diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,257542 @@ +{ + "best_metric": 0.442775160074234, + "best_model_checkpoint": "./mobilenetv2-typecoffee/checkpoint-15288", + "epoch": 100.0, + "eval_steps": 500, + "global_step": 36400, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0027472527472527475, + "grad_norm": 13.516402244567871, + "learning_rate": 4.999862637362638e-05, + "loss": 1.7311, + "step": 1 + }, + { + "epoch": 0.005494505494505495, + "grad_norm": 13.50833511352539, + "learning_rate": 4.999725274725275e-05, + "loss": 1.78, + "step": 2 + }, + { + "epoch": 0.008241758241758242, + "grad_norm": 14.083168029785156, + "learning_rate": 4.9995879120879126e-05, + "loss": 1.857, + "step": 3 + }, + { + "epoch": 0.01098901098901099, + "grad_norm": 13.746472358703613, + "learning_rate": 4.9994505494505496e-05, + "loss": 1.7061, + "step": 4 + }, + { + "epoch": 0.013736263736263736, + "grad_norm": 13.229864120483398, + "learning_rate": 4.999313186813187e-05, + "loss": 1.6826, + "step": 5 + }, + { + "epoch": 0.016483516483516484, + "grad_norm": 13.011897087097168, + "learning_rate": 4.999175824175825e-05, + "loss": 1.7305, + "step": 6 + }, + { + "epoch": 0.019230769230769232, + "grad_norm": 12.674878120422363, + "learning_rate": 4.999038461538462e-05, + "loss": 1.7142, + "step": 7 + }, + { + "epoch": 0.02197802197802198, + "grad_norm": 13.460692405700684, + "learning_rate": 4.998901098901099e-05, + "loss": 1.6394, + "step": 8 + }, + { + "epoch": 0.024725274725274724, + "grad_norm": 12.764488220214844, + "learning_rate": 4.998763736263736e-05, + "loss": 1.6332, + "step": 9 + }, + { + "epoch": 0.027472527472527472, + "grad_norm": 13.359317779541016, + "learning_rate": 4.9986263736263736e-05, + "loss": 1.7292, + "step": 10 + }, + { + "epoch": 0.03021978021978022, + "grad_norm": 12.742820739746094, + "learning_rate": 4.998489010989011e-05, + "loss": 1.744, + "step": 11 + }, + { + "epoch": 0.03296703296703297, + "grad_norm": 12.366043090820312, + "learning_rate": 4.998351648351648e-05, + "loss": 1.6316, + "step": 12 + }, + { + "epoch": 0.03571428571428571, + "grad_norm": 12.863287925720215, + "learning_rate": 4.998214285714286e-05, + "loss": 1.7296, + "step": 13 + }, + { + "epoch": 0.038461538461538464, + "grad_norm": 12.822843551635742, + "learning_rate": 4.998076923076923e-05, + "loss": 1.5025, + "step": 14 + }, + { + "epoch": 0.04120879120879121, + "grad_norm": 12.968554496765137, + "learning_rate": 4.997939560439561e-05, + "loss": 1.5565, + "step": 15 + }, + { + "epoch": 0.04395604395604396, + "grad_norm": 13.954378128051758, + "learning_rate": 4.9978021978021984e-05, + "loss": 1.6606, + "step": 16 + }, + { + "epoch": 0.046703296703296704, + "grad_norm": 14.500865936279297, + "learning_rate": 4.9976648351648354e-05, + "loss": 1.7252, + "step": 17 + }, + { + "epoch": 0.04945054945054945, + "grad_norm": 13.718748092651367, + "learning_rate": 4.997527472527473e-05, + "loss": 1.6986, + "step": 18 + }, + { + "epoch": 0.0521978021978022, + "grad_norm": 13.256901741027832, + "learning_rate": 4.99739010989011e-05, + "loss": 1.6943, + "step": 19 + }, + { + "epoch": 0.054945054945054944, + "grad_norm": 12.886876106262207, + "learning_rate": 4.997252747252748e-05, + "loss": 1.6545, + "step": 20 + }, + { + "epoch": 0.057692307692307696, + "grad_norm": 11.920572280883789, + "learning_rate": 4.9971153846153854e-05, + "loss": 1.531, + "step": 21 + }, + { + "epoch": 0.06043956043956044, + "grad_norm": 12.23888111114502, + "learning_rate": 4.9969780219780224e-05, + "loss": 1.5823, + "step": 22 + }, + { + "epoch": 0.06318681318681318, + "grad_norm": 12.419584274291992, + "learning_rate": 4.9968406593406594e-05, + "loss": 1.6556, + "step": 23 + }, + { + "epoch": 0.06593406593406594, + "grad_norm": 13.013021469116211, + "learning_rate": 4.9967032967032964e-05, + "loss": 1.6547, + "step": 24 + }, + { + "epoch": 0.06868131868131869, + "grad_norm": 13.076851844787598, + "learning_rate": 4.996565934065934e-05, + "loss": 1.7104, + "step": 25 + }, + { + "epoch": 0.07142857142857142, + "grad_norm": 13.007224082946777, + "learning_rate": 4.996428571428572e-05, + "loss": 1.6054, + "step": 26 + }, + { + "epoch": 0.07417582417582418, + "grad_norm": 11.650979995727539, + "learning_rate": 4.996291208791209e-05, + "loss": 1.5647, + "step": 27 + }, + { + "epoch": 0.07692307692307693, + "grad_norm": 13.023259162902832, + "learning_rate": 4.9961538461538465e-05, + "loss": 1.6317, + "step": 28 + }, + { + "epoch": 0.07967032967032966, + "grad_norm": 14.476143836975098, + "learning_rate": 4.9960164835164835e-05, + "loss": 1.692, + "step": 29 + }, + { + "epoch": 0.08241758241758242, + "grad_norm": 11.546948432922363, + "learning_rate": 4.995879120879121e-05, + "loss": 1.4278, + "step": 30 + }, + { + "epoch": 0.08516483516483517, + "grad_norm": 12.801678657531738, + "learning_rate": 4.995741758241759e-05, + "loss": 1.5507, + "step": 31 + }, + { + "epoch": 0.08791208791208792, + "grad_norm": 13.441140174865723, + "learning_rate": 4.995604395604396e-05, + "loss": 1.6674, + "step": 32 + }, + { + "epoch": 0.09065934065934066, + "grad_norm": 12.270824432373047, + "learning_rate": 4.9954670329670335e-05, + "loss": 1.4943, + "step": 33 + }, + { + "epoch": 0.09340659340659341, + "grad_norm": 15.14696216583252, + "learning_rate": 4.9953296703296705e-05, + "loss": 1.5174, + "step": 34 + }, + { + "epoch": 0.09615384615384616, + "grad_norm": 12.303487777709961, + "learning_rate": 4.995192307692308e-05, + "loss": 1.5101, + "step": 35 + }, + { + "epoch": 0.0989010989010989, + "grad_norm": 12.349699020385742, + "learning_rate": 4.995054945054946e-05, + "loss": 1.5756, + "step": 36 + }, + { + "epoch": 0.10164835164835165, + "grad_norm": 12.265460968017578, + "learning_rate": 4.994917582417583e-05, + "loss": 1.51, + "step": 37 + }, + { + "epoch": 0.1043956043956044, + "grad_norm": 12.868760108947754, + "learning_rate": 4.99478021978022e-05, + "loss": 1.4994, + "step": 38 + }, + { + "epoch": 0.10714285714285714, + "grad_norm": 12.516128540039062, + "learning_rate": 4.994642857142857e-05, + "loss": 1.5822, + "step": 39 + }, + { + "epoch": 0.10989010989010989, + "grad_norm": 12.572957038879395, + "learning_rate": 4.9945054945054945e-05, + "loss": 1.4568, + "step": 40 + }, + { + "epoch": 0.11263736263736264, + "grad_norm": 12.583097457885742, + "learning_rate": 4.994368131868132e-05, + "loss": 1.5306, + "step": 41 + }, + { + "epoch": 0.11538461538461539, + "grad_norm": 13.80907917022705, + "learning_rate": 4.994230769230769e-05, + "loss": 1.575, + "step": 42 + }, + { + "epoch": 0.11813186813186813, + "grad_norm": 12.799637794494629, + "learning_rate": 4.994093406593407e-05, + "loss": 1.5361, + "step": 43 + }, + { + "epoch": 0.12087912087912088, + "grad_norm": 16.221487045288086, + "learning_rate": 4.993956043956044e-05, + "loss": 1.5785, + "step": 44 + }, + { + "epoch": 0.12362637362637363, + "grad_norm": 12.808794021606445, + "learning_rate": 4.9938186813186816e-05, + "loss": 1.5527, + "step": 45 + }, + { + "epoch": 0.12637362637362637, + "grad_norm": 13.194719314575195, + "learning_rate": 4.993681318681319e-05, + "loss": 1.6387, + "step": 46 + }, + { + "epoch": 0.12912087912087913, + "grad_norm": 12.497005462646484, + "learning_rate": 4.993543956043956e-05, + "loss": 1.6702, + "step": 47 + }, + { + "epoch": 0.13186813186813187, + "grad_norm": 13.4653959274292, + "learning_rate": 4.993406593406594e-05, + "loss": 1.6484, + "step": 48 + }, + { + "epoch": 0.1346153846153846, + "grad_norm": 11.670938491821289, + "learning_rate": 4.993269230769231e-05, + "loss": 1.5333, + "step": 49 + }, + { + "epoch": 0.13736263736263737, + "grad_norm": 14.452205657958984, + "learning_rate": 4.9931318681318686e-05, + "loss": 1.7723, + "step": 50 + }, + { + "epoch": 0.1401098901098901, + "grad_norm": 12.1572265625, + "learning_rate": 4.992994505494506e-05, + "loss": 1.5525, + "step": 51 + }, + { + "epoch": 0.14285714285714285, + "grad_norm": 12.52499008178711, + "learning_rate": 4.992857142857143e-05, + "loss": 1.6005, + "step": 52 + }, + { + "epoch": 0.14560439560439561, + "grad_norm": 13.8184232711792, + "learning_rate": 4.99271978021978e-05, + "loss": 1.541, + "step": 53 + }, + { + "epoch": 0.14835164835164835, + "grad_norm": 13.710570335388184, + "learning_rate": 4.992582417582417e-05, + "loss": 1.548, + "step": 54 + }, + { + "epoch": 0.1510989010989011, + "grad_norm": 13.722309112548828, + "learning_rate": 4.992445054945055e-05, + "loss": 1.4404, + "step": 55 + }, + { + "epoch": 0.15384615384615385, + "grad_norm": 12.658994674682617, + "learning_rate": 4.992307692307693e-05, + "loss": 1.6399, + "step": 56 + }, + { + "epoch": 0.1565934065934066, + "grad_norm": 12.47728443145752, + "learning_rate": 4.99217032967033e-05, + "loss": 1.4379, + "step": 57 + }, + { + "epoch": 0.15934065934065933, + "grad_norm": 11.535859107971191, + "learning_rate": 4.9920329670329674e-05, + "loss": 1.453, + "step": 58 + }, + { + "epoch": 0.1620879120879121, + "grad_norm": 12.683633804321289, + "learning_rate": 4.9918956043956044e-05, + "loss": 1.456, + "step": 59 + }, + { + "epoch": 0.16483516483516483, + "grad_norm": 12.23169994354248, + "learning_rate": 4.991758241758242e-05, + "loss": 1.4451, + "step": 60 + }, + { + "epoch": 0.16758241758241757, + "grad_norm": 12.935940742492676, + "learning_rate": 4.99162087912088e-05, + "loss": 1.5276, + "step": 61 + }, + { + "epoch": 0.17032967032967034, + "grad_norm": 11.704452514648438, + "learning_rate": 4.991483516483517e-05, + "loss": 1.4297, + "step": 62 + }, + { + "epoch": 0.17307692307692307, + "grad_norm": 12.67447280883789, + "learning_rate": 4.9913461538461544e-05, + "loss": 1.5739, + "step": 63 + }, + { + "epoch": 0.17582417582417584, + "grad_norm": 12.724445343017578, + "learning_rate": 4.9912087912087914e-05, + "loss": 1.5773, + "step": 64 + }, + { + "epoch": 0.17857142857142858, + "grad_norm": 15.099902153015137, + "learning_rate": 4.991071428571429e-05, + "loss": 1.5436, + "step": 65 + }, + { + "epoch": 0.1813186813186813, + "grad_norm": 12.049002647399902, + "learning_rate": 4.990934065934067e-05, + "loss": 1.4732, + "step": 66 + }, + { + "epoch": 0.18406593406593408, + "grad_norm": 12.514328956604004, + "learning_rate": 4.990796703296704e-05, + "loss": 1.3326, + "step": 67 + }, + { + "epoch": 0.18681318681318682, + "grad_norm": 13.557246208190918, + "learning_rate": 4.990659340659341e-05, + "loss": 1.5094, + "step": 68 + }, + { + "epoch": 0.18956043956043955, + "grad_norm": 12.062057495117188, + "learning_rate": 4.990521978021978e-05, + "loss": 1.4109, + "step": 69 + }, + { + "epoch": 0.19230769230769232, + "grad_norm": 11.953624725341797, + "learning_rate": 4.9903846153846154e-05, + "loss": 1.3774, + "step": 70 + }, + { + "epoch": 0.19505494505494506, + "grad_norm": 13.797765731811523, + "learning_rate": 4.990247252747253e-05, + "loss": 1.5027, + "step": 71 + }, + { + "epoch": 0.1978021978021978, + "grad_norm": 15.15893840789795, + "learning_rate": 4.99010989010989e-05, + "loss": 1.6184, + "step": 72 + }, + { + "epoch": 0.20054945054945056, + "grad_norm": 11.799866676330566, + "learning_rate": 4.989972527472528e-05, + "loss": 1.2954, + "step": 73 + }, + { + "epoch": 0.2032967032967033, + "grad_norm": 15.066256523132324, + "learning_rate": 4.989835164835165e-05, + "loss": 1.4465, + "step": 74 + }, + { + "epoch": 0.20604395604395603, + "grad_norm": 11.84368896484375, + "learning_rate": 4.9896978021978025e-05, + "loss": 1.3322, + "step": 75 + }, + { + "epoch": 0.2087912087912088, + "grad_norm": 12.824220657348633, + "learning_rate": 4.98956043956044e-05, + "loss": 1.436, + "step": 76 + }, + { + "epoch": 0.21153846153846154, + "grad_norm": 12.8259859085083, + "learning_rate": 4.989423076923077e-05, + "loss": 1.3874, + "step": 77 + }, + { + "epoch": 0.21428571428571427, + "grad_norm": 15.821691513061523, + "learning_rate": 4.989285714285715e-05, + "loss": 1.612, + "step": 78 + }, + { + "epoch": 0.21703296703296704, + "grad_norm": 11.686238288879395, + "learning_rate": 4.989148351648352e-05, + "loss": 1.4303, + "step": 79 + }, + { + "epoch": 0.21978021978021978, + "grad_norm": 13.501056671142578, + "learning_rate": 4.9890109890109895e-05, + "loss": 1.6459, + "step": 80 + }, + { + "epoch": 0.22252747252747251, + "grad_norm": 12.655521392822266, + "learning_rate": 4.988873626373627e-05, + "loss": 1.4863, + "step": 81 + }, + { + "epoch": 0.22527472527472528, + "grad_norm": 10.908514022827148, + "learning_rate": 4.988736263736264e-05, + "loss": 1.2733, + "step": 82 + }, + { + "epoch": 0.22802197802197802, + "grad_norm": 13.178753852844238, + "learning_rate": 4.988598901098901e-05, + "loss": 1.4765, + "step": 83 + }, + { + "epoch": 0.23076923076923078, + "grad_norm": 12.382018089294434, + "learning_rate": 4.988461538461538e-05, + "loss": 1.3182, + "step": 84 + }, + { + "epoch": 0.23351648351648352, + "grad_norm": 13.1803617477417, + "learning_rate": 4.988324175824176e-05, + "loss": 1.5388, + "step": 85 + }, + { + "epoch": 0.23626373626373626, + "grad_norm": 11.801189422607422, + "learning_rate": 4.9881868131868136e-05, + "loss": 1.4949, + "step": 86 + }, + { + "epoch": 0.23901098901098902, + "grad_norm": 13.375236511230469, + "learning_rate": 4.9880494505494506e-05, + "loss": 1.5357, + "step": 87 + }, + { + "epoch": 0.24175824175824176, + "grad_norm": 13.158304214477539, + "learning_rate": 4.987912087912088e-05, + "loss": 1.4327, + "step": 88 + }, + { + "epoch": 0.2445054945054945, + "grad_norm": 12.852231979370117, + "learning_rate": 4.987774725274725e-05, + "loss": 1.5601, + "step": 89 + }, + { + "epoch": 0.24725274725274726, + "grad_norm": 13.288238525390625, + "learning_rate": 4.987637362637363e-05, + "loss": 1.3393, + "step": 90 + }, + { + "epoch": 0.25, + "grad_norm": 15.076499938964844, + "learning_rate": 4.9875000000000006e-05, + "loss": 1.603, + "step": 91 + }, + { + "epoch": 0.25274725274725274, + "grad_norm": 13.988117218017578, + "learning_rate": 4.9873626373626376e-05, + "loss": 1.4507, + "step": 92 + }, + { + "epoch": 0.2554945054945055, + "grad_norm": 12.459961891174316, + "learning_rate": 4.987225274725275e-05, + "loss": 1.3539, + "step": 93 + }, + { + "epoch": 0.25824175824175827, + "grad_norm": 13.012224197387695, + "learning_rate": 4.987087912087912e-05, + "loss": 1.5481, + "step": 94 + }, + { + "epoch": 0.260989010989011, + "grad_norm": 11.4223051071167, + "learning_rate": 4.98695054945055e-05, + "loss": 1.3972, + "step": 95 + }, + { + "epoch": 0.26373626373626374, + "grad_norm": 12.893906593322754, + "learning_rate": 4.9868131868131877e-05, + "loss": 1.5514, + "step": 96 + }, + { + "epoch": 0.2664835164835165, + "grad_norm": 12.922992706298828, + "learning_rate": 4.9866758241758247e-05, + "loss": 1.3461, + "step": 97 + }, + { + "epoch": 0.2692307692307692, + "grad_norm": 12.530813217163086, + "learning_rate": 4.9865384615384617e-05, + "loss": 1.4464, + "step": 98 + }, + { + "epoch": 0.27197802197802196, + "grad_norm": 12.441610336303711, + "learning_rate": 4.986401098901099e-05, + "loss": 1.2611, + "step": 99 + }, + { + "epoch": 0.27472527472527475, + "grad_norm": 12.940747261047363, + "learning_rate": 4.9862637362637363e-05, + "loss": 1.5798, + "step": 100 + }, + { + "epoch": 0.2774725274725275, + "grad_norm": 12.027348518371582, + "learning_rate": 4.986126373626374e-05, + "loss": 1.2468, + "step": 101 + }, + { + "epoch": 0.2802197802197802, + "grad_norm": 13.154647827148438, + "learning_rate": 4.985989010989011e-05, + "loss": 1.4009, + "step": 102 + }, + { + "epoch": 0.28296703296703296, + "grad_norm": 12.83053207397461, + "learning_rate": 4.985851648351649e-05, + "loss": 1.2386, + "step": 103 + }, + { + "epoch": 0.2857142857142857, + "grad_norm": 14.09329891204834, + "learning_rate": 4.985714285714286e-05, + "loss": 1.5146, + "step": 104 + }, + { + "epoch": 0.28846153846153844, + "grad_norm": 12.58833122253418, + "learning_rate": 4.9855769230769234e-05, + "loss": 1.5287, + "step": 105 + }, + { + "epoch": 0.29120879120879123, + "grad_norm": 13.696698188781738, + "learning_rate": 4.985439560439561e-05, + "loss": 1.4682, + "step": 106 + }, + { + "epoch": 0.29395604395604397, + "grad_norm": 13.522098541259766, + "learning_rate": 4.985302197802198e-05, + "loss": 1.5164, + "step": 107 + }, + { + "epoch": 0.2967032967032967, + "grad_norm": 13.479467391967773, + "learning_rate": 4.985164835164836e-05, + "loss": 1.6247, + "step": 108 + }, + { + "epoch": 0.29945054945054944, + "grad_norm": 11.77881145477295, + "learning_rate": 4.985027472527473e-05, + "loss": 1.1867, + "step": 109 + }, + { + "epoch": 0.3021978021978022, + "grad_norm": 14.815271377563477, + "learning_rate": 4.9848901098901104e-05, + "loss": 1.4617, + "step": 110 + }, + { + "epoch": 0.30494505494505497, + "grad_norm": 12.069361686706543, + "learning_rate": 4.984752747252748e-05, + "loss": 1.3522, + "step": 111 + }, + { + "epoch": 0.3076923076923077, + "grad_norm": 12.298908233642578, + "learning_rate": 4.984615384615385e-05, + "loss": 1.376, + "step": 112 + }, + { + "epoch": 0.31043956043956045, + "grad_norm": 14.535429954528809, + "learning_rate": 4.984478021978022e-05, + "loss": 1.4294, + "step": 113 + }, + { + "epoch": 0.3131868131868132, + "grad_norm": 12.44373607635498, + "learning_rate": 4.984340659340659e-05, + "loss": 1.3302, + "step": 114 + }, + { + "epoch": 0.3159340659340659, + "grad_norm": 13.111770629882812, + "learning_rate": 4.984203296703297e-05, + "loss": 1.4638, + "step": 115 + }, + { + "epoch": 0.31868131868131866, + "grad_norm": 12.009635925292969, + "learning_rate": 4.984065934065934e-05, + "loss": 1.3433, + "step": 116 + }, + { + "epoch": 0.32142857142857145, + "grad_norm": 14.058640480041504, + "learning_rate": 4.9839285714285715e-05, + "loss": 1.2244, + "step": 117 + }, + { + "epoch": 0.3241758241758242, + "grad_norm": 12.061566352844238, + "learning_rate": 4.983791208791209e-05, + "loss": 1.2818, + "step": 118 + }, + { + "epoch": 0.3269230769230769, + "grad_norm": 13.256675720214844, + "learning_rate": 4.983653846153846e-05, + "loss": 1.4435, + "step": 119 + }, + { + "epoch": 0.32967032967032966, + "grad_norm": 12.799558639526367, + "learning_rate": 4.983516483516484e-05, + "loss": 1.3195, + "step": 120 + }, + { + "epoch": 0.3324175824175824, + "grad_norm": 12.824915885925293, + "learning_rate": 4.983379120879121e-05, + "loss": 1.4413, + "step": 121 + }, + { + "epoch": 0.33516483516483514, + "grad_norm": 16.187307357788086, + "learning_rate": 4.9832417582417585e-05, + "loss": 1.5511, + "step": 122 + }, + { + "epoch": 0.33791208791208793, + "grad_norm": 13.432246208190918, + "learning_rate": 4.983104395604396e-05, + "loss": 1.4831, + "step": 123 + }, + { + "epoch": 0.34065934065934067, + "grad_norm": 12.779187202453613, + "learning_rate": 4.982967032967033e-05, + "loss": 1.2416, + "step": 124 + }, + { + "epoch": 0.3434065934065934, + "grad_norm": 13.138362884521484, + "learning_rate": 4.982829670329671e-05, + "loss": 1.3916, + "step": 125 + }, + { + "epoch": 0.34615384615384615, + "grad_norm": 14.558107376098633, + "learning_rate": 4.982692307692308e-05, + "loss": 1.5555, + "step": 126 + }, + { + "epoch": 0.3489010989010989, + "grad_norm": 13.535820007324219, + "learning_rate": 4.9825549450549456e-05, + "loss": 1.1587, + "step": 127 + }, + { + "epoch": 0.3516483516483517, + "grad_norm": 12.253538131713867, + "learning_rate": 4.9824175824175826e-05, + "loss": 1.2514, + "step": 128 + }, + { + "epoch": 0.3543956043956044, + "grad_norm": 14.23134994506836, + "learning_rate": 4.9822802197802196e-05, + "loss": 1.5628, + "step": 129 + }, + { + "epoch": 0.35714285714285715, + "grad_norm": 11.922521591186523, + "learning_rate": 4.982142857142857e-05, + "loss": 1.2573, + "step": 130 + }, + { + "epoch": 0.3598901098901099, + "grad_norm": 12.457552909851074, + "learning_rate": 4.982005494505494e-05, + "loss": 1.4074, + "step": 131 + }, + { + "epoch": 0.3626373626373626, + "grad_norm": 13.143856048583984, + "learning_rate": 4.981868131868132e-05, + "loss": 1.3309, + "step": 132 + }, + { + "epoch": 0.36538461538461536, + "grad_norm": 12.44284439086914, + "learning_rate": 4.9817307692307696e-05, + "loss": 1.2031, + "step": 133 + }, + { + "epoch": 0.36813186813186816, + "grad_norm": 13.035314559936523, + "learning_rate": 4.9815934065934066e-05, + "loss": 1.3246, + "step": 134 + }, + { + "epoch": 0.3708791208791209, + "grad_norm": 13.495064735412598, + "learning_rate": 4.981456043956044e-05, + "loss": 1.4785, + "step": 135 + }, + { + "epoch": 0.37362637362637363, + "grad_norm": 12.069003105163574, + "learning_rate": 4.981318681318681e-05, + "loss": 1.3912, + "step": 136 + }, + { + "epoch": 0.37637362637362637, + "grad_norm": 12.189469337463379, + "learning_rate": 4.981181318681319e-05, + "loss": 1.1949, + "step": 137 + }, + { + "epoch": 0.3791208791208791, + "grad_norm": 13.716899871826172, + "learning_rate": 4.9810439560439566e-05, + "loss": 1.2921, + "step": 138 + }, + { + "epoch": 0.38186813186813184, + "grad_norm": 12.059004783630371, + "learning_rate": 4.9809065934065936e-05, + "loss": 1.3811, + "step": 139 + }, + { + "epoch": 0.38461538461538464, + "grad_norm": 12.158488273620605, + "learning_rate": 4.980769230769231e-05, + "loss": 1.2554, + "step": 140 + }, + { + "epoch": 0.3873626373626374, + "grad_norm": 14.014302253723145, + "learning_rate": 4.980631868131868e-05, + "loss": 1.4857, + "step": 141 + }, + { + "epoch": 0.3901098901098901, + "grad_norm": 12.896697998046875, + "learning_rate": 4.980494505494506e-05, + "loss": 1.292, + "step": 142 + }, + { + "epoch": 0.39285714285714285, + "grad_norm": 11.72409439086914, + "learning_rate": 4.980357142857143e-05, + "loss": 1.2699, + "step": 143 + }, + { + "epoch": 0.3956043956043956, + "grad_norm": 14.239765167236328, + "learning_rate": 4.98021978021978e-05, + "loss": 1.3115, + "step": 144 + }, + { + "epoch": 0.3983516483516483, + "grad_norm": 12.384309768676758, + "learning_rate": 4.980082417582418e-05, + "loss": 1.3682, + "step": 145 + }, + { + "epoch": 0.4010989010989011, + "grad_norm": 13.06794261932373, + "learning_rate": 4.979945054945055e-05, + "loss": 1.4768, + "step": 146 + }, + { + "epoch": 0.40384615384615385, + "grad_norm": 11.605112075805664, + "learning_rate": 4.9798076923076924e-05, + "loss": 1.3358, + "step": 147 + }, + { + "epoch": 0.4065934065934066, + "grad_norm": 13.667404174804688, + "learning_rate": 4.97967032967033e-05, + "loss": 1.3878, + "step": 148 + }, + { + "epoch": 0.40934065934065933, + "grad_norm": 12.230318069458008, + "learning_rate": 4.979532967032967e-05, + "loss": 1.3088, + "step": 149 + }, + { + "epoch": 0.41208791208791207, + "grad_norm": 11.67712116241455, + "learning_rate": 4.979395604395605e-05, + "loss": 1.2681, + "step": 150 + }, + { + "epoch": 0.41483516483516486, + "grad_norm": 11.725312232971191, + "learning_rate": 4.979258241758242e-05, + "loss": 1.3038, + "step": 151 + }, + { + "epoch": 0.4175824175824176, + "grad_norm": 14.911850929260254, + "learning_rate": 4.9791208791208794e-05, + "loss": 1.1986, + "step": 152 + }, + { + "epoch": 0.42032967032967034, + "grad_norm": 11.87808895111084, + "learning_rate": 4.978983516483517e-05, + "loss": 1.2149, + "step": 153 + }, + { + "epoch": 0.4230769230769231, + "grad_norm": 12.430316925048828, + "learning_rate": 4.978846153846154e-05, + "loss": 1.4283, + "step": 154 + }, + { + "epoch": 0.4258241758241758, + "grad_norm": 12.250998497009277, + "learning_rate": 4.978708791208792e-05, + "loss": 1.199, + "step": 155 + }, + { + "epoch": 0.42857142857142855, + "grad_norm": 12.45465087890625, + "learning_rate": 4.978571428571429e-05, + "loss": 1.2422, + "step": 156 + }, + { + "epoch": 0.43131868131868134, + "grad_norm": 11.275678634643555, + "learning_rate": 4.9784340659340665e-05, + "loss": 1.1442, + "step": 157 + }, + { + "epoch": 0.4340659340659341, + "grad_norm": 12.533612251281738, + "learning_rate": 4.9782967032967035e-05, + "loss": 1.1885, + "step": 158 + }, + { + "epoch": 0.4368131868131868, + "grad_norm": 14.511796951293945, + "learning_rate": 4.9781593406593405e-05, + "loss": 1.3324, + "step": 159 + }, + { + "epoch": 0.43956043956043955, + "grad_norm": 14.797943115234375, + "learning_rate": 4.978021978021978e-05, + "loss": 1.4123, + "step": 160 + }, + { + "epoch": 0.4423076923076923, + "grad_norm": 13.161177635192871, + "learning_rate": 4.977884615384615e-05, + "loss": 1.169, + "step": 161 + }, + { + "epoch": 0.44505494505494503, + "grad_norm": 11.798635482788086, + "learning_rate": 4.977747252747253e-05, + "loss": 1.3257, + "step": 162 + }, + { + "epoch": 0.4478021978021978, + "grad_norm": 13.556073188781738, + "learning_rate": 4.9776098901098905e-05, + "loss": 1.3191, + "step": 163 + }, + { + "epoch": 0.45054945054945056, + "grad_norm": 12.81802749633789, + "learning_rate": 4.9774725274725275e-05, + "loss": 1.5242, + "step": 164 + }, + { + "epoch": 0.4532967032967033, + "grad_norm": 13.031112670898438, + "learning_rate": 4.977335164835165e-05, + "loss": 1.2916, + "step": 165 + }, + { + "epoch": 0.45604395604395603, + "grad_norm": 12.677579879760742, + "learning_rate": 4.977197802197802e-05, + "loss": 1.348, + "step": 166 + }, + { + "epoch": 0.45879120879120877, + "grad_norm": 14.021271705627441, + "learning_rate": 4.97706043956044e-05, + "loss": 1.4676, + "step": 167 + }, + { + "epoch": 0.46153846153846156, + "grad_norm": 11.131810188293457, + "learning_rate": 4.9769230769230775e-05, + "loss": 1.1932, + "step": 168 + }, + { + "epoch": 0.4642857142857143, + "grad_norm": 11.51614761352539, + "learning_rate": 4.9767857142857145e-05, + "loss": 1.1887, + "step": 169 + }, + { + "epoch": 0.46703296703296704, + "grad_norm": 14.350001335144043, + "learning_rate": 4.976648351648352e-05, + "loss": 1.2675, + "step": 170 + }, + { + "epoch": 0.4697802197802198, + "grad_norm": 12.874588012695312, + "learning_rate": 4.976510989010989e-05, + "loss": 1.2461, + "step": 171 + }, + { + "epoch": 0.4725274725274725, + "grad_norm": 15.342337608337402, + "learning_rate": 4.976373626373627e-05, + "loss": 1.3302, + "step": 172 + }, + { + "epoch": 0.47527472527472525, + "grad_norm": 12.044695854187012, + "learning_rate": 4.976236263736264e-05, + "loss": 1.1938, + "step": 173 + }, + { + "epoch": 0.47802197802197804, + "grad_norm": 13.522141456604004, + "learning_rate": 4.976098901098901e-05, + "loss": 1.3605, + "step": 174 + }, + { + "epoch": 0.4807692307692308, + "grad_norm": 13.934388160705566, + "learning_rate": 4.9759615384615386e-05, + "loss": 1.281, + "step": 175 + }, + { + "epoch": 0.4835164835164835, + "grad_norm": 14.610892295837402, + "learning_rate": 4.9758241758241756e-05, + "loss": 1.5905, + "step": 176 + }, + { + "epoch": 0.48626373626373626, + "grad_norm": 12.21035099029541, + "learning_rate": 4.975686813186813e-05, + "loss": 1.1944, + "step": 177 + }, + { + "epoch": 0.489010989010989, + "grad_norm": 12.940444946289062, + "learning_rate": 4.975549450549451e-05, + "loss": 1.2502, + "step": 178 + }, + { + "epoch": 0.49175824175824173, + "grad_norm": 13.871164321899414, + "learning_rate": 4.975412087912088e-05, + "loss": 1.3019, + "step": 179 + }, + { + "epoch": 0.4945054945054945, + "grad_norm": 11.72516918182373, + "learning_rate": 4.9752747252747256e-05, + "loss": 1.113, + "step": 180 + }, + { + "epoch": 0.49725274725274726, + "grad_norm": 11.151795387268066, + "learning_rate": 4.9751373626373626e-05, + "loss": 1.1394, + "step": 181 + }, + { + "epoch": 0.5, + "grad_norm": 13.247220993041992, + "learning_rate": 4.975e-05, + "loss": 1.2306, + "step": 182 + }, + { + "epoch": 0.5027472527472527, + "grad_norm": 13.819487571716309, + "learning_rate": 4.974862637362638e-05, + "loss": 1.6427, + "step": 183 + }, + { + "epoch": 0.5054945054945055, + "grad_norm": 12.38183879852295, + "learning_rate": 4.974725274725275e-05, + "loss": 1.4177, + "step": 184 + }, + { + "epoch": 0.5082417582417582, + "grad_norm": 13.996675491333008, + "learning_rate": 4.974587912087913e-05, + "loss": 1.4148, + "step": 185 + }, + { + "epoch": 0.510989010989011, + "grad_norm": 12.657551765441895, + "learning_rate": 4.97445054945055e-05, + "loss": 1.2581, + "step": 186 + }, + { + "epoch": 0.5137362637362637, + "grad_norm": 12.917963027954102, + "learning_rate": 4.9743131868131874e-05, + "loss": 1.1903, + "step": 187 + }, + { + "epoch": 0.5164835164835165, + "grad_norm": 12.060873985290527, + "learning_rate": 4.9741758241758244e-05, + "loss": 1.2416, + "step": 188 + }, + { + "epoch": 0.5192307692307693, + "grad_norm": 10.783863067626953, + "learning_rate": 4.9740384615384614e-05, + "loss": 1.1382, + "step": 189 + }, + { + "epoch": 0.521978021978022, + "grad_norm": 14.203141212463379, + "learning_rate": 4.973901098901099e-05, + "loss": 1.1885, + "step": 190 + }, + { + "epoch": 0.5247252747252747, + "grad_norm": 13.567035675048828, + "learning_rate": 4.973763736263736e-05, + "loss": 1.1115, + "step": 191 + }, + { + "epoch": 0.5274725274725275, + "grad_norm": 12.713512420654297, + "learning_rate": 4.973626373626374e-05, + "loss": 1.1606, + "step": 192 + }, + { + "epoch": 0.5302197802197802, + "grad_norm": 14.753971099853516, + "learning_rate": 4.9734890109890114e-05, + "loss": 1.5217, + "step": 193 + }, + { + "epoch": 0.532967032967033, + "grad_norm": 14.14857292175293, + "learning_rate": 4.9733516483516484e-05, + "loss": 1.3511, + "step": 194 + }, + { + "epoch": 0.5357142857142857, + "grad_norm": 16.696805953979492, + "learning_rate": 4.973214285714286e-05, + "loss": 1.4337, + "step": 195 + }, + { + "epoch": 0.5384615384615384, + "grad_norm": 14.357763290405273, + "learning_rate": 4.973076923076923e-05, + "loss": 1.201, + "step": 196 + }, + { + "epoch": 0.5412087912087912, + "grad_norm": 14.800238609313965, + "learning_rate": 4.972939560439561e-05, + "loss": 1.2152, + "step": 197 + }, + { + "epoch": 0.5439560439560439, + "grad_norm": 12.142005920410156, + "learning_rate": 4.9728021978021984e-05, + "loss": 1.3219, + "step": 198 + }, + { + "epoch": 0.5467032967032966, + "grad_norm": 12.052074432373047, + "learning_rate": 4.9726648351648354e-05, + "loss": 1.297, + "step": 199 + }, + { + "epoch": 0.5494505494505495, + "grad_norm": 13.272930145263672, + "learning_rate": 4.972527472527473e-05, + "loss": 1.2462, + "step": 200 + }, + { + "epoch": 0.5521978021978022, + "grad_norm": 12.327548027038574, + "learning_rate": 4.97239010989011e-05, + "loss": 1.3133, + "step": 201 + }, + { + "epoch": 0.554945054945055, + "grad_norm": 12.976692199707031, + "learning_rate": 4.972252747252748e-05, + "loss": 1.5068, + "step": 202 + }, + { + "epoch": 0.5576923076923077, + "grad_norm": 11.903977394104004, + "learning_rate": 4.972115384615385e-05, + "loss": 0.9695, + "step": 203 + }, + { + "epoch": 0.5604395604395604, + "grad_norm": 13.539390563964844, + "learning_rate": 4.971978021978022e-05, + "loss": 1.453, + "step": 204 + }, + { + "epoch": 0.5631868131868132, + "grad_norm": 14.996646881103516, + "learning_rate": 4.9718406593406595e-05, + "loss": 1.3669, + "step": 205 + }, + { + "epoch": 0.5659340659340659, + "grad_norm": 10.879257202148438, + "learning_rate": 4.9717032967032965e-05, + "loss": 1.0256, + "step": 206 + }, + { + "epoch": 0.5686813186813187, + "grad_norm": 16.553760528564453, + "learning_rate": 4.971565934065934e-05, + "loss": 1.5602, + "step": 207 + }, + { + "epoch": 0.5714285714285714, + "grad_norm": 12.089762687683105, + "learning_rate": 4.971428571428572e-05, + "loss": 1.0228, + "step": 208 + }, + { + "epoch": 0.5741758241758241, + "grad_norm": 18.567848205566406, + "learning_rate": 4.971291208791209e-05, + "loss": 1.5854, + "step": 209 + }, + { + "epoch": 0.5769230769230769, + "grad_norm": 11.29760456085205, + "learning_rate": 4.9711538461538465e-05, + "loss": 1.126, + "step": 210 + }, + { + "epoch": 0.5796703296703297, + "grad_norm": 12.527050971984863, + "learning_rate": 4.9710164835164835e-05, + "loss": 1.2975, + "step": 211 + }, + { + "epoch": 0.5824175824175825, + "grad_norm": 17.650117874145508, + "learning_rate": 4.970879120879121e-05, + "loss": 1.4217, + "step": 212 + }, + { + "epoch": 0.5851648351648352, + "grad_norm": 17.627912521362305, + "learning_rate": 4.970741758241759e-05, + "loss": 1.363, + "step": 213 + }, + { + "epoch": 0.5879120879120879, + "grad_norm": 16.94883918762207, + "learning_rate": 4.970604395604396e-05, + "loss": 1.4507, + "step": 214 + }, + { + "epoch": 0.5906593406593407, + "grad_norm": 10.512198448181152, + "learning_rate": 4.9704670329670336e-05, + "loss": 0.8709, + "step": 215 + }, + { + "epoch": 0.5934065934065934, + "grad_norm": 10.884783744812012, + "learning_rate": 4.9703296703296706e-05, + "loss": 0.9726, + "step": 216 + }, + { + "epoch": 0.5961538461538461, + "grad_norm": 14.142125129699707, + "learning_rate": 4.970192307692308e-05, + "loss": 1.1908, + "step": 217 + }, + { + "epoch": 0.5989010989010989, + "grad_norm": 12.062798500061035, + "learning_rate": 4.970054945054945e-05, + "loss": 1.2414, + "step": 218 + }, + { + "epoch": 0.6016483516483516, + "grad_norm": 13.77719497680664, + "learning_rate": 4.969917582417582e-05, + "loss": 1.6119, + "step": 219 + }, + { + "epoch": 0.6043956043956044, + "grad_norm": 13.4640531539917, + "learning_rate": 4.96978021978022e-05, + "loss": 1.0931, + "step": 220 + }, + { + "epoch": 0.6071428571428571, + "grad_norm": 13.955570220947266, + "learning_rate": 4.969642857142857e-05, + "loss": 1.5054, + "step": 221 + }, + { + "epoch": 0.6098901098901099, + "grad_norm": 14.22704029083252, + "learning_rate": 4.9695054945054946e-05, + "loss": 1.3818, + "step": 222 + }, + { + "epoch": 0.6126373626373627, + "grad_norm": 19.0242977142334, + "learning_rate": 4.969368131868132e-05, + "loss": 1.4329, + "step": 223 + }, + { + "epoch": 0.6153846153846154, + "grad_norm": 12.214743614196777, + "learning_rate": 4.969230769230769e-05, + "loss": 1.1809, + "step": 224 + }, + { + "epoch": 0.6181318681318682, + "grad_norm": 12.167357444763184, + "learning_rate": 4.969093406593407e-05, + "loss": 1.2359, + "step": 225 + }, + { + "epoch": 0.6208791208791209, + "grad_norm": 13.232084274291992, + "learning_rate": 4.968956043956044e-05, + "loss": 1.1439, + "step": 226 + }, + { + "epoch": 0.6236263736263736, + "grad_norm": 13.372308731079102, + "learning_rate": 4.9688186813186817e-05, + "loss": 1.1788, + "step": 227 + }, + { + "epoch": 0.6263736263736264, + "grad_norm": 12.322187423706055, + "learning_rate": 4.968681318681319e-05, + "loss": 1.2933, + "step": 228 + }, + { + "epoch": 0.6291208791208791, + "grad_norm": 14.432149887084961, + "learning_rate": 4.968543956043956e-05, + "loss": 1.51, + "step": 229 + }, + { + "epoch": 0.6318681318681318, + "grad_norm": 13.499879837036133, + "learning_rate": 4.968406593406594e-05, + "loss": 1.3589, + "step": 230 + }, + { + "epoch": 0.6346153846153846, + "grad_norm": 13.425796508789062, + "learning_rate": 4.968269230769231e-05, + "loss": 1.0121, + "step": 231 + }, + { + "epoch": 0.6373626373626373, + "grad_norm": 12.99256420135498, + "learning_rate": 4.968131868131869e-05, + "loss": 1.1344, + "step": 232 + }, + { + "epoch": 0.6401098901098901, + "grad_norm": 12.99930477142334, + "learning_rate": 4.967994505494506e-05, + "loss": 1.1103, + "step": 233 + }, + { + "epoch": 0.6428571428571429, + "grad_norm": 14.280091285705566, + "learning_rate": 4.967857142857143e-05, + "loss": 1.1722, + "step": 234 + }, + { + "epoch": 0.6456043956043956, + "grad_norm": 10.4708251953125, + "learning_rate": 4.9677197802197804e-05, + "loss": 0.9083, + "step": 235 + }, + { + "epoch": 0.6483516483516484, + "grad_norm": 11.97358226776123, + "learning_rate": 4.9675824175824174e-05, + "loss": 1.113, + "step": 236 + }, + { + "epoch": 0.6510989010989011, + "grad_norm": 13.000057220458984, + "learning_rate": 4.967445054945055e-05, + "loss": 1.183, + "step": 237 + }, + { + "epoch": 0.6538461538461539, + "grad_norm": 16.211002349853516, + "learning_rate": 4.967307692307693e-05, + "loss": 1.4628, + "step": 238 + }, + { + "epoch": 0.6565934065934066, + "grad_norm": 16.407642364501953, + "learning_rate": 4.96717032967033e-05, + "loss": 1.2723, + "step": 239 + }, + { + "epoch": 0.6593406593406593, + "grad_norm": 12.823528289794922, + "learning_rate": 4.9670329670329674e-05, + "loss": 1.0943, + "step": 240 + }, + { + "epoch": 0.6620879120879121, + "grad_norm": 12.36283016204834, + "learning_rate": 4.9668956043956044e-05, + "loss": 0.8886, + "step": 241 + }, + { + "epoch": 0.6648351648351648, + "grad_norm": 13.023324966430664, + "learning_rate": 4.966758241758242e-05, + "loss": 0.9862, + "step": 242 + }, + { + "epoch": 0.6675824175824175, + "grad_norm": 13.140905380249023, + "learning_rate": 4.96662087912088e-05, + "loss": 1.196, + "step": 243 + }, + { + "epoch": 0.6703296703296703, + "grad_norm": 15.506550788879395, + "learning_rate": 4.966483516483517e-05, + "loss": 1.3054, + "step": 244 + }, + { + "epoch": 0.6730769230769231, + "grad_norm": 18.001991271972656, + "learning_rate": 4.9663461538461545e-05, + "loss": 1.0568, + "step": 245 + }, + { + "epoch": 0.6758241758241759, + "grad_norm": 12.075765609741211, + "learning_rate": 4.9662087912087915e-05, + "loss": 1.1766, + "step": 246 + }, + { + "epoch": 0.6785714285714286, + "grad_norm": 14.609705924987793, + "learning_rate": 4.966071428571429e-05, + "loss": 1.4124, + "step": 247 + }, + { + "epoch": 0.6813186813186813, + "grad_norm": 13.146650314331055, + "learning_rate": 4.965934065934066e-05, + "loss": 1.3021, + "step": 248 + }, + { + "epoch": 0.6840659340659341, + "grad_norm": 13.227266311645508, + "learning_rate": 4.965796703296703e-05, + "loss": 1.1751, + "step": 249 + }, + { + "epoch": 0.6868131868131868, + "grad_norm": 14.904523849487305, + "learning_rate": 4.965659340659341e-05, + "loss": 1.1216, + "step": 250 + }, + { + "epoch": 0.6895604395604396, + "grad_norm": 15.363250732421875, + "learning_rate": 4.965521978021978e-05, + "loss": 1.3997, + "step": 251 + }, + { + "epoch": 0.6923076923076923, + "grad_norm": 11.2948637008667, + "learning_rate": 4.9653846153846155e-05, + "loss": 1.0006, + "step": 252 + }, + { + "epoch": 0.695054945054945, + "grad_norm": 15.876336097717285, + "learning_rate": 4.965247252747253e-05, + "loss": 1.2268, + "step": 253 + }, + { + "epoch": 0.6978021978021978, + "grad_norm": 12.424973487854004, + "learning_rate": 4.96510989010989e-05, + "loss": 1.2241, + "step": 254 + }, + { + "epoch": 0.7005494505494505, + "grad_norm": 13.094992637634277, + "learning_rate": 4.964972527472528e-05, + "loss": 1.4037, + "step": 255 + }, + { + "epoch": 0.7032967032967034, + "grad_norm": 18.13215446472168, + "learning_rate": 4.964835164835165e-05, + "loss": 1.2561, + "step": 256 + }, + { + "epoch": 0.7060439560439561, + "grad_norm": 14.863910675048828, + "learning_rate": 4.9646978021978026e-05, + "loss": 1.4577, + "step": 257 + }, + { + "epoch": 0.7087912087912088, + "grad_norm": 11.463693618774414, + "learning_rate": 4.96456043956044e-05, + "loss": 0.9695, + "step": 258 + }, + { + "epoch": 0.7115384615384616, + "grad_norm": 18.00654411315918, + "learning_rate": 4.964423076923077e-05, + "loss": 1.9507, + "step": 259 + }, + { + "epoch": 0.7142857142857143, + "grad_norm": 14.758578300476074, + "learning_rate": 4.964285714285715e-05, + "loss": 1.4791, + "step": 260 + }, + { + "epoch": 0.717032967032967, + "grad_norm": 16.33698081970215, + "learning_rate": 4.964148351648352e-05, + "loss": 1.7511, + "step": 261 + }, + { + "epoch": 0.7197802197802198, + "grad_norm": 11.242210388183594, + "learning_rate": 4.9640109890109896e-05, + "loss": 0.9481, + "step": 262 + }, + { + "epoch": 0.7225274725274725, + "grad_norm": 13.557633399963379, + "learning_rate": 4.9638736263736266e-05, + "loss": 1.265, + "step": 263 + }, + { + "epoch": 0.7252747252747253, + "grad_norm": 14.435017585754395, + "learning_rate": 4.9637362637362636e-05, + "loss": 1.1416, + "step": 264 + }, + { + "epoch": 0.728021978021978, + "grad_norm": 13.887308120727539, + "learning_rate": 4.963598901098901e-05, + "loss": 1.1217, + "step": 265 + }, + { + "epoch": 0.7307692307692307, + "grad_norm": 12.614677429199219, + "learning_rate": 4.963461538461538e-05, + "loss": 0.9798, + "step": 266 + }, + { + "epoch": 0.7335164835164835, + "grad_norm": 13.42813491821289, + "learning_rate": 4.963324175824176e-05, + "loss": 1.3921, + "step": 267 + }, + { + "epoch": 0.7362637362637363, + "grad_norm": 11.980626106262207, + "learning_rate": 4.9631868131868136e-05, + "loss": 1.1155, + "step": 268 + }, + { + "epoch": 0.739010989010989, + "grad_norm": 13.069533348083496, + "learning_rate": 4.9630494505494506e-05, + "loss": 1.1136, + "step": 269 + }, + { + "epoch": 0.7417582417582418, + "grad_norm": 14.360024452209473, + "learning_rate": 4.962912087912088e-05, + "loss": 1.5151, + "step": 270 + }, + { + "epoch": 0.7445054945054945, + "grad_norm": 16.212425231933594, + "learning_rate": 4.962774725274725e-05, + "loss": 1.4831, + "step": 271 + }, + { + "epoch": 0.7472527472527473, + "grad_norm": 13.636710166931152, + "learning_rate": 4.962637362637363e-05, + "loss": 1.276, + "step": 272 + }, + { + "epoch": 0.75, + "grad_norm": 13.438370704650879, + "learning_rate": 4.962500000000001e-05, + "loss": 1.4229, + "step": 273 + }, + { + "epoch": 0.7527472527472527, + "grad_norm": 12.851740837097168, + "learning_rate": 4.962362637362638e-05, + "loss": 0.9778, + "step": 274 + }, + { + "epoch": 0.7554945054945055, + "grad_norm": 10.95182991027832, + "learning_rate": 4.9622252747252754e-05, + "loss": 1.0257, + "step": 275 + }, + { + "epoch": 0.7582417582417582, + "grad_norm": 12.108832359313965, + "learning_rate": 4.9620879120879124e-05, + "loss": 1.1062, + "step": 276 + }, + { + "epoch": 0.760989010989011, + "grad_norm": 12.497129440307617, + "learning_rate": 4.96195054945055e-05, + "loss": 1.0353, + "step": 277 + }, + { + "epoch": 0.7637362637362637, + "grad_norm": 10.693946838378906, + "learning_rate": 4.961813186813187e-05, + "loss": 0.9575, + "step": 278 + }, + { + "epoch": 0.7664835164835165, + "grad_norm": 13.327488899230957, + "learning_rate": 4.961675824175824e-05, + "loss": 1.1272, + "step": 279 + }, + { + "epoch": 0.7692307692307693, + "grad_norm": 16.538677215576172, + "learning_rate": 4.961538461538462e-05, + "loss": 1.2135, + "step": 280 + }, + { + "epoch": 0.771978021978022, + "grad_norm": 12.675247192382812, + "learning_rate": 4.961401098901099e-05, + "loss": 1.1351, + "step": 281 + }, + { + "epoch": 0.7747252747252747, + "grad_norm": 14.176576614379883, + "learning_rate": 4.9612637362637364e-05, + "loss": 0.9831, + "step": 282 + }, + { + "epoch": 0.7774725274725275, + "grad_norm": 12.505339622497559, + "learning_rate": 4.961126373626374e-05, + "loss": 1.0079, + "step": 283 + }, + { + "epoch": 0.7802197802197802, + "grad_norm": 13.41912841796875, + "learning_rate": 4.960989010989011e-05, + "loss": 1.2841, + "step": 284 + }, + { + "epoch": 0.782967032967033, + "grad_norm": 15.405747413635254, + "learning_rate": 4.960851648351649e-05, + "loss": 1.4589, + "step": 285 + }, + { + "epoch": 0.7857142857142857, + "grad_norm": 14.830168724060059, + "learning_rate": 4.960714285714286e-05, + "loss": 1.1131, + "step": 286 + }, + { + "epoch": 0.7884615384615384, + "grad_norm": 12.2350492477417, + "learning_rate": 4.9605769230769235e-05, + "loss": 0.9871, + "step": 287 + }, + { + "epoch": 0.7912087912087912, + "grad_norm": 15.194928169250488, + "learning_rate": 4.960439560439561e-05, + "loss": 1.2591, + "step": 288 + }, + { + "epoch": 0.7939560439560439, + "grad_norm": 11.617773056030273, + "learning_rate": 4.960302197802198e-05, + "loss": 1.0174, + "step": 289 + }, + { + "epoch": 0.7967032967032966, + "grad_norm": 11.775501251220703, + "learning_rate": 4.960164835164836e-05, + "loss": 1.1272, + "step": 290 + }, + { + "epoch": 0.7994505494505495, + "grad_norm": 12.565820693969727, + "learning_rate": 4.960027472527473e-05, + "loss": 1.1552, + "step": 291 + }, + { + "epoch": 0.8021978021978022, + "grad_norm": 15.879376411437988, + "learning_rate": 4.9598901098901105e-05, + "loss": 1.1779, + "step": 292 + }, + { + "epoch": 0.804945054945055, + "grad_norm": 11.463175773620605, + "learning_rate": 4.9597527472527475e-05, + "loss": 0.9241, + "step": 293 + }, + { + "epoch": 0.8076923076923077, + "grad_norm": 14.219273567199707, + "learning_rate": 4.9596153846153845e-05, + "loss": 1.1506, + "step": 294 + }, + { + "epoch": 0.8104395604395604, + "grad_norm": 14.836417198181152, + "learning_rate": 4.959478021978022e-05, + "loss": 1.1408, + "step": 295 + }, + { + "epoch": 0.8131868131868132, + "grad_norm": 15.848272323608398, + "learning_rate": 4.959340659340659e-05, + "loss": 1.507, + "step": 296 + }, + { + "epoch": 0.8159340659340659, + "grad_norm": 12.348753929138184, + "learning_rate": 4.959203296703297e-05, + "loss": 1.1947, + "step": 297 + }, + { + "epoch": 0.8186813186813187, + "grad_norm": 13.824447631835938, + "learning_rate": 4.9590659340659345e-05, + "loss": 1.3132, + "step": 298 + }, + { + "epoch": 0.8214285714285714, + "grad_norm": 14.521097183227539, + "learning_rate": 4.9589285714285715e-05, + "loss": 0.8815, + "step": 299 + }, + { + "epoch": 0.8241758241758241, + "grad_norm": 12.139167785644531, + "learning_rate": 4.958791208791209e-05, + "loss": 1.2005, + "step": 300 + }, + { + "epoch": 0.8269230769230769, + "grad_norm": 12.179774284362793, + "learning_rate": 4.958653846153846e-05, + "loss": 1.0885, + "step": 301 + }, + { + "epoch": 0.8296703296703297, + "grad_norm": 14.044488906860352, + "learning_rate": 4.958516483516484e-05, + "loss": 1.2339, + "step": 302 + }, + { + "epoch": 0.8324175824175825, + "grad_norm": 13.817596435546875, + "learning_rate": 4.9583791208791216e-05, + "loss": 1.2841, + "step": 303 + }, + { + "epoch": 0.8351648351648352, + "grad_norm": 12.731148719787598, + "learning_rate": 4.9582417582417586e-05, + "loss": 1.2255, + "step": 304 + }, + { + "epoch": 0.8379120879120879, + "grad_norm": 13.918963432312012, + "learning_rate": 4.958104395604396e-05, + "loss": 1.1174, + "step": 305 + }, + { + "epoch": 0.8406593406593407, + "grad_norm": 14.402066230773926, + "learning_rate": 4.957967032967033e-05, + "loss": 1.3091, + "step": 306 + }, + { + "epoch": 0.8434065934065934, + "grad_norm": 16.559663772583008, + "learning_rate": 4.957829670329671e-05, + "loss": 1.3103, + "step": 307 + }, + { + "epoch": 0.8461538461538461, + "grad_norm": 12.439542770385742, + "learning_rate": 4.957692307692308e-05, + "loss": 1.1716, + "step": 308 + }, + { + "epoch": 0.8489010989010989, + "grad_norm": 11.982641220092773, + "learning_rate": 4.957554945054945e-05, + "loss": 0.91, + "step": 309 + }, + { + "epoch": 0.8516483516483516, + "grad_norm": 13.215062141418457, + "learning_rate": 4.9574175824175826e-05, + "loss": 1.2112, + "step": 310 + }, + { + "epoch": 0.8543956043956044, + "grad_norm": 15.647346496582031, + "learning_rate": 4.9572802197802196e-05, + "loss": 1.148, + "step": 311 + }, + { + "epoch": 0.8571428571428571, + "grad_norm": 13.073627471923828, + "learning_rate": 4.957142857142857e-05, + "loss": 1.2032, + "step": 312 + }, + { + "epoch": 0.8598901098901099, + "grad_norm": 11.098077774047852, + "learning_rate": 4.957005494505495e-05, + "loss": 1.0263, + "step": 313 + }, + { + "epoch": 0.8626373626373627, + "grad_norm": 11.923196792602539, + "learning_rate": 4.956868131868132e-05, + "loss": 1.022, + "step": 314 + }, + { + "epoch": 0.8653846153846154, + "grad_norm": 11.19662857055664, + "learning_rate": 4.95673076923077e-05, + "loss": 1.0551, + "step": 315 + }, + { + "epoch": 0.8681318681318682, + "grad_norm": 12.351297378540039, + "learning_rate": 4.956593406593407e-05, + "loss": 1.0769, + "step": 316 + }, + { + "epoch": 0.8708791208791209, + "grad_norm": 14.358241081237793, + "learning_rate": 4.9564560439560444e-05, + "loss": 1.1228, + "step": 317 + }, + { + "epoch": 0.8736263736263736, + "grad_norm": 17.78407859802246, + "learning_rate": 4.956318681318682e-05, + "loss": 1.237, + "step": 318 + }, + { + "epoch": 0.8763736263736264, + "grad_norm": 13.241104125976562, + "learning_rate": 4.956181318681319e-05, + "loss": 1.3023, + "step": 319 + }, + { + "epoch": 0.8791208791208791, + "grad_norm": 14.029000282287598, + "learning_rate": 4.956043956043957e-05, + "loss": 1.3069, + "step": 320 + }, + { + "epoch": 0.8818681318681318, + "grad_norm": 11.988253593444824, + "learning_rate": 4.955906593406594e-05, + "loss": 0.8275, + "step": 321 + }, + { + "epoch": 0.8846153846153846, + "grad_norm": 14.511198997497559, + "learning_rate": 4.955769230769231e-05, + "loss": 1.283, + "step": 322 + }, + { + "epoch": 0.8873626373626373, + "grad_norm": 15.990909576416016, + "learning_rate": 4.9556318681318684e-05, + "loss": 1.3439, + "step": 323 + }, + { + "epoch": 0.8901098901098901, + "grad_norm": 16.27536964416504, + "learning_rate": 4.9554945054945054e-05, + "loss": 1.3876, + "step": 324 + }, + { + "epoch": 0.8928571428571429, + "grad_norm": 12.053900718688965, + "learning_rate": 4.955357142857143e-05, + "loss": 1.134, + "step": 325 + }, + { + "epoch": 0.8956043956043956, + "grad_norm": 11.105413436889648, + "learning_rate": 4.95521978021978e-05, + "loss": 0.9763, + "step": 326 + }, + { + "epoch": 0.8983516483516484, + "grad_norm": 16.664575576782227, + "learning_rate": 4.955082417582418e-05, + "loss": 1.3536, + "step": 327 + }, + { + "epoch": 0.9010989010989011, + "grad_norm": 13.745959281921387, + "learning_rate": 4.9549450549450554e-05, + "loss": 1.2556, + "step": 328 + }, + { + "epoch": 0.9038461538461539, + "grad_norm": 13.9169921875, + "learning_rate": 4.9548076923076924e-05, + "loss": 1.0755, + "step": 329 + }, + { + "epoch": 0.9065934065934066, + "grad_norm": 13.389812469482422, + "learning_rate": 4.95467032967033e-05, + "loss": 1.187, + "step": 330 + }, + { + "epoch": 0.9093406593406593, + "grad_norm": 13.849236488342285, + "learning_rate": 4.954532967032967e-05, + "loss": 1.1472, + "step": 331 + }, + { + "epoch": 0.9120879120879121, + "grad_norm": 15.92530632019043, + "learning_rate": 4.954395604395605e-05, + "loss": 1.2395, + "step": 332 + }, + { + "epoch": 0.9148351648351648, + "grad_norm": 19.10356330871582, + "learning_rate": 4.9542582417582425e-05, + "loss": 1.3378, + "step": 333 + }, + { + "epoch": 0.9175824175824175, + "grad_norm": 13.76838493347168, + "learning_rate": 4.9541208791208795e-05, + "loss": 1.0543, + "step": 334 + }, + { + "epoch": 0.9203296703296703, + "grad_norm": 12.017414093017578, + "learning_rate": 4.953983516483517e-05, + "loss": 1.0384, + "step": 335 + }, + { + "epoch": 0.9230769230769231, + "grad_norm": 13.505117416381836, + "learning_rate": 4.953846153846154e-05, + "loss": 1.1675, + "step": 336 + }, + { + "epoch": 0.9258241758241759, + "grad_norm": 12.914880752563477, + "learning_rate": 4.953708791208791e-05, + "loss": 1.1729, + "step": 337 + }, + { + "epoch": 0.9285714285714286, + "grad_norm": 13.212347984313965, + "learning_rate": 4.953571428571429e-05, + "loss": 1.1706, + "step": 338 + }, + { + "epoch": 0.9313186813186813, + "grad_norm": 14.061457633972168, + "learning_rate": 4.953434065934066e-05, + "loss": 1.1899, + "step": 339 + }, + { + "epoch": 0.9340659340659341, + "grad_norm": 12.597298622131348, + "learning_rate": 4.9532967032967035e-05, + "loss": 1.0956, + "step": 340 + }, + { + "epoch": 0.9368131868131868, + "grad_norm": 15.991598129272461, + "learning_rate": 4.9531593406593405e-05, + "loss": 1.5897, + "step": 341 + }, + { + "epoch": 0.9395604395604396, + "grad_norm": 14.518169403076172, + "learning_rate": 4.953021978021978e-05, + "loss": 1.5809, + "step": 342 + }, + { + "epoch": 0.9423076923076923, + "grad_norm": 13.618841171264648, + "learning_rate": 4.952884615384615e-05, + "loss": 1.17, + "step": 343 + }, + { + "epoch": 0.945054945054945, + "grad_norm": 13.592436790466309, + "learning_rate": 4.952747252747253e-05, + "loss": 0.9368, + "step": 344 + }, + { + "epoch": 0.9478021978021978, + "grad_norm": 12.92720890045166, + "learning_rate": 4.9526098901098906e-05, + "loss": 0.923, + "step": 345 + }, + { + "epoch": 0.9505494505494505, + "grad_norm": 13.718347549438477, + "learning_rate": 4.9524725274725276e-05, + "loss": 1.2033, + "step": 346 + }, + { + "epoch": 0.9532967032967034, + "grad_norm": 12.583293914794922, + "learning_rate": 4.952335164835165e-05, + "loss": 1.0558, + "step": 347 + }, + { + "epoch": 0.9560439560439561, + "grad_norm": 12.682695388793945, + "learning_rate": 4.952197802197802e-05, + "loss": 0.9811, + "step": 348 + }, + { + "epoch": 0.9587912087912088, + "grad_norm": 14.353730201721191, + "learning_rate": 4.95206043956044e-05, + "loss": 1.3375, + "step": 349 + }, + { + "epoch": 0.9615384615384616, + "grad_norm": 16.087244033813477, + "learning_rate": 4.9519230769230776e-05, + "loss": 1.3843, + "step": 350 + }, + { + "epoch": 0.9642857142857143, + "grad_norm": 13.338921546936035, + "learning_rate": 4.9517857142857146e-05, + "loss": 0.9273, + "step": 351 + }, + { + "epoch": 0.967032967032967, + "grad_norm": 12.162032127380371, + "learning_rate": 4.9516483516483516e-05, + "loss": 0.9515, + "step": 352 + }, + { + "epoch": 0.9697802197802198, + "grad_norm": 13.984648704528809, + "learning_rate": 4.951510989010989e-05, + "loss": 1.2313, + "step": 353 + }, + { + "epoch": 0.9725274725274725, + "grad_norm": 14.791245460510254, + "learning_rate": 4.951373626373626e-05, + "loss": 1.3765, + "step": 354 + }, + { + "epoch": 0.9752747252747253, + "grad_norm": 12.36400032043457, + "learning_rate": 4.951236263736264e-05, + "loss": 0.9808, + "step": 355 + }, + { + "epoch": 0.978021978021978, + "grad_norm": 13.742558479309082, + "learning_rate": 4.951098901098901e-05, + "loss": 1.3154, + "step": 356 + }, + { + "epoch": 0.9807692307692307, + "grad_norm": 17.41370964050293, + "learning_rate": 4.9509615384615387e-05, + "loss": 1.6093, + "step": 357 + }, + { + "epoch": 0.9835164835164835, + "grad_norm": 13.075835227966309, + "learning_rate": 4.9508241758241757e-05, + "loss": 1.1804, + "step": 358 + }, + { + "epoch": 0.9862637362637363, + "grad_norm": 11.341201782226562, + "learning_rate": 4.950686813186813e-05, + "loss": 0.9428, + "step": 359 + }, + { + "epoch": 0.989010989010989, + "grad_norm": 12.028609275817871, + "learning_rate": 4.950549450549451e-05, + "loss": 0.8991, + "step": 360 + }, + { + "epoch": 0.9917582417582418, + "grad_norm": 12.979469299316406, + "learning_rate": 4.950412087912088e-05, + "loss": 1.0204, + "step": 361 + }, + { + "epoch": 0.9945054945054945, + "grad_norm": 12.184111595153809, + "learning_rate": 4.950274725274726e-05, + "loss": 0.987, + "step": 362 + }, + { + "epoch": 0.9972527472527473, + "grad_norm": 12.266473770141602, + "learning_rate": 4.950137362637363e-05, + "loss": 0.988, + "step": 363 + }, + { + "epoch": 1.0, + "grad_norm": 29.803991317749023, + "learning_rate": 4.9500000000000004e-05, + "loss": 1.8339, + "step": 364 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.38154269972451793, + "eval_f1": 0.3479919912332152, + "eval_f1_DuraRiadoRio_64x64": 0.2644628099173554, + "eval_f1_Mole_64x64": 0.41852487135506006, + "eval_f1_Quebrado_64x64": 0.6692307692307692, + "eval_f1_RiadoRio_64x64": 0.21052631578947367, + "eval_f1_RioFechado_64x64": 0.17721518987341772, + "eval_loss": 1.3718713521957397, + "eval_precision": 0.5230799705087645, + "eval_precision_DuraRiadoRio_64x64": 0.32653061224489793, + "eval_precision_Mole_64x64": 0.27790432801822323, + "eval_precision_Quebrado_64x64": 0.75, + "eval_precision_RiadoRio_64x64": 0.38596491228070173, + "eval_precision_RioFechado_64x64": 0.875, + "eval_recall": 0.3833879005024298, + "eval_recall_DuraRiadoRio_64x64": 0.2222222222222222, + "eval_recall_Mole_64x64": 0.8472222222222222, + "eval_recall_Quebrado_64x64": 0.6041666666666666, + "eval_recall_RiadoRio_64x64": 0.14473684210526316, + "eval_recall_RioFechado_64x64": 0.09859154929577464, + "eval_runtime": 1.7245, + "eval_samples_per_second": 420.996, + "eval_steps_per_second": 26.675, + "step": 364 + }, + { + "epoch": 1.0027472527472527, + "grad_norm": 14.17491626739502, + "learning_rate": 4.949862637362638e-05, + "loss": 1.0803, + "step": 365 + }, + { + "epoch": 1.0054945054945055, + "grad_norm": 12.86688232421875, + "learning_rate": 4.949725274725275e-05, + "loss": 1.1198, + "step": 366 + }, + { + "epoch": 1.0082417582417582, + "grad_norm": 14.655556678771973, + "learning_rate": 4.949587912087912e-05, + "loss": 1.5094, + "step": 367 + }, + { + "epoch": 1.010989010989011, + "grad_norm": 14.993326187133789, + "learning_rate": 4.94945054945055e-05, + "loss": 1.0825, + "step": 368 + }, + { + "epoch": 1.0137362637362637, + "grad_norm": 10.798080444335938, + "learning_rate": 4.949313186813187e-05, + "loss": 0.8606, + "step": 369 + }, + { + "epoch": 1.0164835164835164, + "grad_norm": 11.318215370178223, + "learning_rate": 4.9491758241758244e-05, + "loss": 0.9726, + "step": 370 + }, + { + "epoch": 1.0192307692307692, + "grad_norm": 15.56840705871582, + "learning_rate": 4.9490384615384614e-05, + "loss": 1.3926, + "step": 371 + }, + { + "epoch": 1.021978021978022, + "grad_norm": 11.465319633483887, + "learning_rate": 4.948901098901099e-05, + "loss": 0.96, + "step": 372 + }, + { + "epoch": 1.0247252747252746, + "grad_norm": 12.496397972106934, + "learning_rate": 4.948763736263736e-05, + "loss": 0.8687, + "step": 373 + }, + { + "epoch": 1.0274725274725274, + "grad_norm": 11.222525596618652, + "learning_rate": 4.948626373626374e-05, + "loss": 0.7935, + "step": 374 + }, + { + "epoch": 1.0302197802197801, + "grad_norm": 12.04660415649414, + "learning_rate": 4.9484890109890115e-05, + "loss": 0.941, + "step": 375 + }, + { + "epoch": 1.032967032967033, + "grad_norm": 13.465983390808105, + "learning_rate": 4.9483516483516485e-05, + "loss": 0.987, + "step": 376 + }, + { + "epoch": 1.0357142857142858, + "grad_norm": 14.882142066955566, + "learning_rate": 4.948214285714286e-05, + "loss": 1.2558, + "step": 377 + }, + { + "epoch": 1.0384615384615385, + "grad_norm": 13.53616714477539, + "learning_rate": 4.948076923076923e-05, + "loss": 1.2066, + "step": 378 + }, + { + "epoch": 1.0412087912087913, + "grad_norm": 11.295904159545898, + "learning_rate": 4.947939560439561e-05, + "loss": 0.9612, + "step": 379 + }, + { + "epoch": 1.043956043956044, + "grad_norm": 11.085485458374023, + "learning_rate": 4.9478021978021985e-05, + "loss": 0.8353, + "step": 380 + }, + { + "epoch": 1.0467032967032968, + "grad_norm": 9.396191596984863, + "learning_rate": 4.9476648351648355e-05, + "loss": 0.7348, + "step": 381 + }, + { + "epoch": 1.0494505494505495, + "grad_norm": 15.378769874572754, + "learning_rate": 4.9475274725274725e-05, + "loss": 1.0531, + "step": 382 + }, + { + "epoch": 1.0521978021978022, + "grad_norm": 15.522170066833496, + "learning_rate": 4.94739010989011e-05, + "loss": 1.3437, + "step": 383 + }, + { + "epoch": 1.054945054945055, + "grad_norm": 14.2041597366333, + "learning_rate": 4.947252747252747e-05, + "loss": 0.9724, + "step": 384 + }, + { + "epoch": 1.0576923076923077, + "grad_norm": 12.831459045410156, + "learning_rate": 4.947115384615385e-05, + "loss": 0.957, + "step": 385 + }, + { + "epoch": 1.0604395604395604, + "grad_norm": 11.640055656433105, + "learning_rate": 4.946978021978022e-05, + "loss": 0.7842, + "step": 386 + }, + { + "epoch": 1.0631868131868132, + "grad_norm": 14.89058780670166, + "learning_rate": 4.9468406593406596e-05, + "loss": 1.0534, + "step": 387 + }, + { + "epoch": 1.065934065934066, + "grad_norm": 14.356468200683594, + "learning_rate": 4.9467032967032966e-05, + "loss": 1.0286, + "step": 388 + }, + { + "epoch": 1.0686813186813187, + "grad_norm": 14.518092155456543, + "learning_rate": 4.946565934065934e-05, + "loss": 1.03, + "step": 389 + }, + { + "epoch": 1.0714285714285714, + "grad_norm": 14.63350772857666, + "learning_rate": 4.946428571428572e-05, + "loss": 1.2807, + "step": 390 + }, + { + "epoch": 1.0741758241758241, + "grad_norm": 16.172054290771484, + "learning_rate": 4.946291208791209e-05, + "loss": 1.3147, + "step": 391 + }, + { + "epoch": 1.0769230769230769, + "grad_norm": 16.344392776489258, + "learning_rate": 4.9461538461538466e-05, + "loss": 1.0753, + "step": 392 + }, + { + "epoch": 1.0796703296703296, + "grad_norm": 13.30858039855957, + "learning_rate": 4.9460164835164836e-05, + "loss": 1.1334, + "step": 393 + }, + { + "epoch": 1.0824175824175823, + "grad_norm": 17.12913703918457, + "learning_rate": 4.945879120879121e-05, + "loss": 1.0669, + "step": 394 + }, + { + "epoch": 1.085164835164835, + "grad_norm": 16.27476692199707, + "learning_rate": 4.945741758241759e-05, + "loss": 1.0231, + "step": 395 + }, + { + "epoch": 1.0879120879120878, + "grad_norm": 13.754328727722168, + "learning_rate": 4.945604395604396e-05, + "loss": 1.1964, + "step": 396 + }, + { + "epoch": 1.0906593406593406, + "grad_norm": 14.205765724182129, + "learning_rate": 4.945467032967033e-05, + "loss": 1.1541, + "step": 397 + }, + { + "epoch": 1.0934065934065935, + "grad_norm": 13.636202812194824, + "learning_rate": 4.9453296703296706e-05, + "loss": 0.8955, + "step": 398 + }, + { + "epoch": 1.0961538461538463, + "grad_norm": 12.4942626953125, + "learning_rate": 4.9451923076923076e-05, + "loss": 0.8572, + "step": 399 + }, + { + "epoch": 1.098901098901099, + "grad_norm": 13.547833442687988, + "learning_rate": 4.945054945054945e-05, + "loss": 1.1157, + "step": 400 + }, + { + "epoch": 1.1016483516483517, + "grad_norm": 13.875629425048828, + "learning_rate": 4.944917582417582e-05, + "loss": 1.0884, + "step": 401 + }, + { + "epoch": 1.1043956043956045, + "grad_norm": 12.455392837524414, + "learning_rate": 4.94478021978022e-05, + "loss": 0.8355, + "step": 402 + }, + { + "epoch": 1.1071428571428572, + "grad_norm": 14.214324951171875, + "learning_rate": 4.944642857142857e-05, + "loss": 1.0699, + "step": 403 + }, + { + "epoch": 1.10989010989011, + "grad_norm": 12.622101783752441, + "learning_rate": 4.944505494505495e-05, + "loss": 1.1517, + "step": 404 + }, + { + "epoch": 1.1126373626373627, + "grad_norm": 15.251612663269043, + "learning_rate": 4.9443681318681324e-05, + "loss": 0.7824, + "step": 405 + }, + { + "epoch": 1.1153846153846154, + "grad_norm": 10.773489952087402, + "learning_rate": 4.9442307692307694e-05, + "loss": 0.8309, + "step": 406 + }, + { + "epoch": 1.1181318681318682, + "grad_norm": 14.612451553344727, + "learning_rate": 4.944093406593407e-05, + "loss": 1.0018, + "step": 407 + }, + { + "epoch": 1.120879120879121, + "grad_norm": 10.332158088684082, + "learning_rate": 4.943956043956044e-05, + "loss": 0.6761, + "step": 408 + }, + { + "epoch": 1.1236263736263736, + "grad_norm": 14.610616683959961, + "learning_rate": 4.943818681318682e-05, + "loss": 0.9998, + "step": 409 + }, + { + "epoch": 1.1263736263736264, + "grad_norm": 17.433673858642578, + "learning_rate": 4.9436813186813194e-05, + "loss": 1.2386, + "step": 410 + }, + { + "epoch": 1.129120879120879, + "grad_norm": 14.200654983520508, + "learning_rate": 4.9435439560439564e-05, + "loss": 0.9608, + "step": 411 + }, + { + "epoch": 1.1318681318681318, + "grad_norm": 15.899948120117188, + "learning_rate": 4.9434065934065934e-05, + "loss": 1.0951, + "step": 412 + }, + { + "epoch": 1.1346153846153846, + "grad_norm": 13.238754272460938, + "learning_rate": 4.943269230769231e-05, + "loss": 1.0165, + "step": 413 + }, + { + "epoch": 1.1373626373626373, + "grad_norm": 13.81921100616455, + "learning_rate": 4.943131868131868e-05, + "loss": 1.0604, + "step": 414 + }, + { + "epoch": 1.14010989010989, + "grad_norm": 15.54361343383789, + "learning_rate": 4.942994505494506e-05, + "loss": 1.1974, + "step": 415 + }, + { + "epoch": 1.1428571428571428, + "grad_norm": 13.35658073425293, + "learning_rate": 4.942857142857143e-05, + "loss": 0.9885, + "step": 416 + }, + { + "epoch": 1.1456043956043955, + "grad_norm": 11.619772911071777, + "learning_rate": 4.9427197802197805e-05, + "loss": 0.8121, + "step": 417 + }, + { + "epoch": 1.1483516483516483, + "grad_norm": 13.04605484008789, + "learning_rate": 4.9425824175824175e-05, + "loss": 0.972, + "step": 418 + }, + { + "epoch": 1.151098901098901, + "grad_norm": 12.954437255859375, + "learning_rate": 4.942445054945055e-05, + "loss": 1.027, + "step": 419 + }, + { + "epoch": 1.1538461538461537, + "grad_norm": 11.040879249572754, + "learning_rate": 4.942307692307693e-05, + "loss": 0.7719, + "step": 420 + }, + { + "epoch": 1.1565934065934065, + "grad_norm": 11.79328727722168, + "learning_rate": 4.94217032967033e-05, + "loss": 0.8993, + "step": 421 + }, + { + "epoch": 1.1593406593406592, + "grad_norm": 14.387459754943848, + "learning_rate": 4.9420329670329675e-05, + "loss": 1.2277, + "step": 422 + }, + { + "epoch": 1.1620879120879122, + "grad_norm": 13.467272758483887, + "learning_rate": 4.9418956043956045e-05, + "loss": 0.9163, + "step": 423 + }, + { + "epoch": 1.164835164835165, + "grad_norm": 15.86023235321045, + "learning_rate": 4.941758241758242e-05, + "loss": 1.4535, + "step": 424 + }, + { + "epoch": 1.1675824175824177, + "grad_norm": 13.005478858947754, + "learning_rate": 4.94162087912088e-05, + "loss": 1.0039, + "step": 425 + }, + { + "epoch": 1.1703296703296704, + "grad_norm": 11.835919380187988, + "learning_rate": 4.941483516483517e-05, + "loss": 0.754, + "step": 426 + }, + { + "epoch": 1.1730769230769231, + "grad_norm": 13.56119441986084, + "learning_rate": 4.941346153846154e-05, + "loss": 1.0107, + "step": 427 + }, + { + "epoch": 1.1758241758241759, + "grad_norm": 12.75005054473877, + "learning_rate": 4.9412087912087915e-05, + "loss": 0.8787, + "step": 428 + }, + { + "epoch": 1.1785714285714286, + "grad_norm": 11.35076904296875, + "learning_rate": 4.9410714285714285e-05, + "loss": 0.9069, + "step": 429 + }, + { + "epoch": 1.1813186813186813, + "grad_norm": 12.188347816467285, + "learning_rate": 4.940934065934066e-05, + "loss": 0.9617, + "step": 430 + }, + { + "epoch": 1.184065934065934, + "grad_norm": 14.984000205993652, + "learning_rate": 4.940796703296703e-05, + "loss": 1.262, + "step": 431 + }, + { + "epoch": 1.1868131868131868, + "grad_norm": 15.461816787719727, + "learning_rate": 4.940659340659341e-05, + "loss": 1.1587, + "step": 432 + }, + { + "epoch": 1.1895604395604396, + "grad_norm": 11.14546012878418, + "learning_rate": 4.940521978021978e-05, + "loss": 0.8538, + "step": 433 + }, + { + "epoch": 1.1923076923076923, + "grad_norm": 14.947819709777832, + "learning_rate": 4.9403846153846156e-05, + "loss": 1.0603, + "step": 434 + }, + { + "epoch": 1.195054945054945, + "grad_norm": 15.00871753692627, + "learning_rate": 4.940247252747253e-05, + "loss": 1.2403, + "step": 435 + }, + { + "epoch": 1.1978021978021978, + "grad_norm": 18.2979736328125, + "learning_rate": 4.94010989010989e-05, + "loss": 1.2324, + "step": 436 + }, + { + "epoch": 1.2005494505494505, + "grad_norm": 12.331123352050781, + "learning_rate": 4.939972527472528e-05, + "loss": 0.8005, + "step": 437 + }, + { + "epoch": 1.2032967032967032, + "grad_norm": 13.06849193572998, + "learning_rate": 4.939835164835165e-05, + "loss": 0.9749, + "step": 438 + }, + { + "epoch": 1.206043956043956, + "grad_norm": 15.320960998535156, + "learning_rate": 4.9396978021978026e-05, + "loss": 1.1477, + "step": 439 + }, + { + "epoch": 1.2087912087912087, + "grad_norm": 14.352633476257324, + "learning_rate": 4.93956043956044e-05, + "loss": 0.992, + "step": 440 + }, + { + "epoch": 1.2115384615384615, + "grad_norm": 13.437933921813965, + "learning_rate": 4.939423076923077e-05, + "loss": 1.024, + "step": 441 + }, + { + "epoch": 1.2142857142857142, + "grad_norm": 12.138522148132324, + "learning_rate": 4.939285714285714e-05, + "loss": 0.8616, + "step": 442 + }, + { + "epoch": 1.2170329670329672, + "grad_norm": 16.701122283935547, + "learning_rate": 4.939148351648352e-05, + "loss": 0.951, + "step": 443 + }, + { + "epoch": 1.2197802197802199, + "grad_norm": 17.711597442626953, + "learning_rate": 4.939010989010989e-05, + "loss": 1.5573, + "step": 444 + }, + { + "epoch": 1.2225274725274726, + "grad_norm": 12.251205444335938, + "learning_rate": 4.938873626373627e-05, + "loss": 1.0305, + "step": 445 + }, + { + "epoch": 1.2252747252747254, + "grad_norm": 14.39975643157959, + "learning_rate": 4.938736263736264e-05, + "loss": 1.2632, + "step": 446 + }, + { + "epoch": 1.228021978021978, + "grad_norm": 14.338571548461914, + "learning_rate": 4.9385989010989014e-05, + "loss": 1.1573, + "step": 447 + }, + { + "epoch": 1.2307692307692308, + "grad_norm": 13.37582015991211, + "learning_rate": 4.9384615384615384e-05, + "loss": 0.9812, + "step": 448 + }, + { + "epoch": 1.2335164835164836, + "grad_norm": 11.633881568908691, + "learning_rate": 4.938324175824176e-05, + "loss": 0.6662, + "step": 449 + }, + { + "epoch": 1.2362637362637363, + "grad_norm": 11.860138893127441, + "learning_rate": 4.938186813186814e-05, + "loss": 0.8937, + "step": 450 + }, + { + "epoch": 1.239010989010989, + "grad_norm": 12.602147102355957, + "learning_rate": 4.938049450549451e-05, + "loss": 0.8658, + "step": 451 + }, + { + "epoch": 1.2417582417582418, + "grad_norm": 12.168277740478516, + "learning_rate": 4.9379120879120884e-05, + "loss": 0.9541, + "step": 452 + }, + { + "epoch": 1.2445054945054945, + "grad_norm": 15.841530799865723, + "learning_rate": 4.9377747252747254e-05, + "loss": 1.2242, + "step": 453 + }, + { + "epoch": 1.2472527472527473, + "grad_norm": 12.353517532348633, + "learning_rate": 4.937637362637363e-05, + "loss": 0.8505, + "step": 454 + }, + { + "epoch": 1.25, + "grad_norm": 11.365479469299316, + "learning_rate": 4.937500000000001e-05, + "loss": 1.0973, + "step": 455 + }, + { + "epoch": 1.2527472527472527, + "grad_norm": 11.430685043334961, + "learning_rate": 4.937362637362638e-05, + "loss": 0.9623, + "step": 456 + }, + { + "epoch": 1.2554945054945055, + "grad_norm": 15.648473739624023, + "learning_rate": 4.937225274725275e-05, + "loss": 1.1287, + "step": 457 + }, + { + "epoch": 1.2582417582417582, + "grad_norm": 14.671048164367676, + "learning_rate": 4.9370879120879124e-05, + "loss": 1.0811, + "step": 458 + }, + { + "epoch": 1.260989010989011, + "grad_norm": 13.037151336669922, + "learning_rate": 4.9369505494505494e-05, + "loss": 1.1383, + "step": 459 + }, + { + "epoch": 1.2637362637362637, + "grad_norm": 16.10312843322754, + "learning_rate": 4.936813186813187e-05, + "loss": 1.3018, + "step": 460 + }, + { + "epoch": 1.2664835164835164, + "grad_norm": 12.13269329071045, + "learning_rate": 4.936675824175824e-05, + "loss": 0.8989, + "step": 461 + }, + { + "epoch": 1.2692307692307692, + "grad_norm": 16.787141799926758, + "learning_rate": 4.936538461538462e-05, + "loss": 1.2155, + "step": 462 + }, + { + "epoch": 1.271978021978022, + "grad_norm": 11.445819854736328, + "learning_rate": 4.936401098901099e-05, + "loss": 0.7243, + "step": 463 + }, + { + "epoch": 1.2747252747252746, + "grad_norm": 16.20384979248047, + "learning_rate": 4.9362637362637365e-05, + "loss": 1.1944, + "step": 464 + }, + { + "epoch": 1.2774725274725274, + "grad_norm": 13.06916332244873, + "learning_rate": 4.936126373626374e-05, + "loss": 0.9477, + "step": 465 + }, + { + "epoch": 1.2802197802197801, + "grad_norm": 14.169821739196777, + "learning_rate": 4.935989010989011e-05, + "loss": 1.2076, + "step": 466 + }, + { + "epoch": 1.2829670329670328, + "grad_norm": 14.01050853729248, + "learning_rate": 4.935851648351649e-05, + "loss": 0.9372, + "step": 467 + }, + { + "epoch": 1.2857142857142856, + "grad_norm": 13.220412254333496, + "learning_rate": 4.935714285714286e-05, + "loss": 0.8549, + "step": 468 + }, + { + "epoch": 1.2884615384615383, + "grad_norm": 10.686951637268066, + "learning_rate": 4.9355769230769235e-05, + "loss": 0.8134, + "step": 469 + }, + { + "epoch": 1.2912087912087913, + "grad_norm": 18.644081115722656, + "learning_rate": 4.935439560439561e-05, + "loss": 1.4186, + "step": 470 + }, + { + "epoch": 1.293956043956044, + "grad_norm": 12.916885375976562, + "learning_rate": 4.935302197802198e-05, + "loss": 0.9017, + "step": 471 + }, + { + "epoch": 1.2967032967032968, + "grad_norm": 11.507798194885254, + "learning_rate": 4.935164835164835e-05, + "loss": 0.9913, + "step": 472 + }, + { + "epoch": 1.2994505494505495, + "grad_norm": 9.725370407104492, + "learning_rate": 4.935027472527473e-05, + "loss": 0.631, + "step": 473 + }, + { + "epoch": 1.3021978021978022, + "grad_norm": 11.093947410583496, + "learning_rate": 4.93489010989011e-05, + "loss": 0.826, + "step": 474 + }, + { + "epoch": 1.304945054945055, + "grad_norm": 14.288691520690918, + "learning_rate": 4.9347527472527476e-05, + "loss": 1.2129, + "step": 475 + }, + { + "epoch": 1.3076923076923077, + "grad_norm": 12.213302612304688, + "learning_rate": 4.9346153846153846e-05, + "loss": 0.8121, + "step": 476 + }, + { + "epoch": 1.3104395604395604, + "grad_norm": 11.825850486755371, + "learning_rate": 4.934478021978022e-05, + "loss": 0.8932, + "step": 477 + }, + { + "epoch": 1.3131868131868132, + "grad_norm": 14.238340377807617, + "learning_rate": 4.934340659340659e-05, + "loss": 1.1046, + "step": 478 + }, + { + "epoch": 1.315934065934066, + "grad_norm": 14.643025398254395, + "learning_rate": 4.934203296703297e-05, + "loss": 1.0349, + "step": 479 + }, + { + "epoch": 1.3186813186813187, + "grad_norm": 11.061711311340332, + "learning_rate": 4.9340659340659346e-05, + "loss": 0.6943, + "step": 480 + }, + { + "epoch": 1.3214285714285714, + "grad_norm": 12.622522354125977, + "learning_rate": 4.9339285714285716e-05, + "loss": 0.9309, + "step": 481 + }, + { + "epoch": 1.3241758241758241, + "grad_norm": 11.826495170593262, + "learning_rate": 4.933791208791209e-05, + "loss": 0.7992, + "step": 482 + }, + { + "epoch": 1.3269230769230769, + "grad_norm": 14.41885757446289, + "learning_rate": 4.933653846153846e-05, + "loss": 0.9834, + "step": 483 + }, + { + "epoch": 1.3296703296703296, + "grad_norm": 14.250594139099121, + "learning_rate": 4.933516483516484e-05, + "loss": 1.1422, + "step": 484 + }, + { + "epoch": 1.3324175824175823, + "grad_norm": 14.57355785369873, + "learning_rate": 4.9333791208791217e-05, + "loss": 1.114, + "step": 485 + }, + { + "epoch": 1.335164835164835, + "grad_norm": 17.44304084777832, + "learning_rate": 4.9332417582417587e-05, + "loss": 1.2301, + "step": 486 + }, + { + "epoch": 1.337912087912088, + "grad_norm": 17.827959060668945, + "learning_rate": 4.9331043956043957e-05, + "loss": 1.0385, + "step": 487 + }, + { + "epoch": 1.3406593406593408, + "grad_norm": 13.437519073486328, + "learning_rate": 4.932967032967033e-05, + "loss": 1.022, + "step": 488 + }, + { + "epoch": 1.3434065934065935, + "grad_norm": 13.552084922790527, + "learning_rate": 4.93282967032967e-05, + "loss": 1.0113, + "step": 489 + }, + { + "epoch": 1.3461538461538463, + "grad_norm": 12.065842628479004, + "learning_rate": 4.932692307692308e-05, + "loss": 1.1027, + "step": 490 + }, + { + "epoch": 1.348901098901099, + "grad_norm": 16.76594352722168, + "learning_rate": 4.932554945054945e-05, + "loss": 1.3781, + "step": 491 + }, + { + "epoch": 1.3516483516483517, + "grad_norm": 13.956107139587402, + "learning_rate": 4.932417582417583e-05, + "loss": 1.158, + "step": 492 + }, + { + "epoch": 1.3543956043956045, + "grad_norm": 12.385986328125, + "learning_rate": 4.93228021978022e-05, + "loss": 0.9955, + "step": 493 + }, + { + "epoch": 1.3571428571428572, + "grad_norm": 12.213274002075195, + "learning_rate": 4.9321428571428574e-05, + "loss": 0.8575, + "step": 494 + }, + { + "epoch": 1.35989010989011, + "grad_norm": 14.825958251953125, + "learning_rate": 4.932005494505495e-05, + "loss": 0.9842, + "step": 495 + }, + { + "epoch": 1.3626373626373627, + "grad_norm": 13.17214584350586, + "learning_rate": 4.931868131868132e-05, + "loss": 0.9298, + "step": 496 + }, + { + "epoch": 1.3653846153846154, + "grad_norm": 10.92165756225586, + "learning_rate": 4.93173076923077e-05, + "loss": 0.7532, + "step": 497 + }, + { + "epoch": 1.3681318681318682, + "grad_norm": 14.239852905273438, + "learning_rate": 4.931593406593407e-05, + "loss": 1.0607, + "step": 498 + }, + { + "epoch": 1.370879120879121, + "grad_norm": 13.789603233337402, + "learning_rate": 4.9314560439560444e-05, + "loss": 0.9122, + "step": 499 + }, + { + "epoch": 1.3736263736263736, + "grad_norm": 17.80086898803711, + "learning_rate": 4.931318681318682e-05, + "loss": 1.1592, + "step": 500 + }, + { + "epoch": 1.3763736263736264, + "grad_norm": 11.1934814453125, + "learning_rate": 4.931181318681319e-05, + "loss": 0.785, + "step": 501 + }, + { + "epoch": 1.379120879120879, + "grad_norm": 14.073812484741211, + "learning_rate": 4.931043956043956e-05, + "loss": 1.0332, + "step": 502 + }, + { + "epoch": 1.3818681318681318, + "grad_norm": 13.99670124053955, + "learning_rate": 4.930906593406593e-05, + "loss": 1.0051, + "step": 503 + }, + { + "epoch": 1.3846153846153846, + "grad_norm": 12.557366371154785, + "learning_rate": 4.930769230769231e-05, + "loss": 0.8401, + "step": 504 + }, + { + "epoch": 1.3873626373626373, + "grad_norm": 17.1882381439209, + "learning_rate": 4.9306318681318685e-05, + "loss": 1.1213, + "step": 505 + }, + { + "epoch": 1.39010989010989, + "grad_norm": 14.94882583618164, + "learning_rate": 4.9304945054945055e-05, + "loss": 1.3381, + "step": 506 + }, + { + "epoch": 1.3928571428571428, + "grad_norm": 13.517932891845703, + "learning_rate": 4.930357142857143e-05, + "loss": 1.0114, + "step": 507 + }, + { + "epoch": 1.3956043956043955, + "grad_norm": 14.304312705993652, + "learning_rate": 4.93021978021978e-05, + "loss": 1.0328, + "step": 508 + }, + { + "epoch": 1.3983516483516483, + "grad_norm": 16.124372482299805, + "learning_rate": 4.930082417582418e-05, + "loss": 0.8891, + "step": 509 + }, + { + "epoch": 1.401098901098901, + "grad_norm": 15.793384552001953, + "learning_rate": 4.9299450549450555e-05, + "loss": 1.2808, + "step": 510 + }, + { + "epoch": 1.4038461538461537, + "grad_norm": 13.833414077758789, + "learning_rate": 4.9298076923076925e-05, + "loss": 1.0065, + "step": 511 + }, + { + "epoch": 1.4065934065934065, + "grad_norm": 16.483694076538086, + "learning_rate": 4.92967032967033e-05, + "loss": 1.3036, + "step": 512 + }, + { + "epoch": 1.4093406593406592, + "grad_norm": 14.468986511230469, + "learning_rate": 4.929532967032967e-05, + "loss": 1.0203, + "step": 513 + }, + { + "epoch": 1.412087912087912, + "grad_norm": 12.371633529663086, + "learning_rate": 4.929395604395605e-05, + "loss": 0.9305, + "step": 514 + }, + { + "epoch": 1.414835164835165, + "grad_norm": 12.541175842285156, + "learning_rate": 4.9292582417582425e-05, + "loss": 0.9739, + "step": 515 + }, + { + "epoch": 1.4175824175824177, + "grad_norm": 12.92735767364502, + "learning_rate": 4.9291208791208796e-05, + "loss": 0.9523, + "step": 516 + }, + { + "epoch": 1.4203296703296704, + "grad_norm": 16.795650482177734, + "learning_rate": 4.9289835164835166e-05, + "loss": 1.0457, + "step": 517 + }, + { + "epoch": 1.4230769230769231, + "grad_norm": 10.211971282958984, + "learning_rate": 4.9288461538461536e-05, + "loss": 0.7211, + "step": 518 + }, + { + "epoch": 1.4258241758241759, + "grad_norm": 11.089685440063477, + "learning_rate": 4.928708791208791e-05, + "loss": 0.5792, + "step": 519 + }, + { + "epoch": 1.4285714285714286, + "grad_norm": 13.438864707946777, + "learning_rate": 4.928571428571429e-05, + "loss": 0.892, + "step": 520 + }, + { + "epoch": 1.4313186813186813, + "grad_norm": 14.244488716125488, + "learning_rate": 4.928434065934066e-05, + "loss": 0.9487, + "step": 521 + }, + { + "epoch": 1.434065934065934, + "grad_norm": 14.9314546585083, + "learning_rate": 4.9282967032967036e-05, + "loss": 1.0176, + "step": 522 + }, + { + "epoch": 1.4368131868131868, + "grad_norm": 13.39484691619873, + "learning_rate": 4.9281593406593406e-05, + "loss": 0.9644, + "step": 523 + }, + { + "epoch": 1.4395604395604396, + "grad_norm": 15.605340003967285, + "learning_rate": 4.928021978021978e-05, + "loss": 0.7266, + "step": 524 + }, + { + "epoch": 1.4423076923076923, + "grad_norm": 12.895183563232422, + "learning_rate": 4.927884615384616e-05, + "loss": 0.7988, + "step": 525 + }, + { + "epoch": 1.445054945054945, + "grad_norm": 14.091665267944336, + "learning_rate": 4.927747252747253e-05, + "loss": 1.1359, + "step": 526 + }, + { + "epoch": 1.4478021978021978, + "grad_norm": 11.960042953491211, + "learning_rate": 4.9276098901098906e-05, + "loss": 0.8192, + "step": 527 + }, + { + "epoch": 1.4505494505494505, + "grad_norm": 11.263321876525879, + "learning_rate": 4.9274725274725276e-05, + "loss": 0.6781, + "step": 528 + }, + { + "epoch": 1.4532967032967032, + "grad_norm": 12.074165344238281, + "learning_rate": 4.927335164835165e-05, + "loss": 0.6897, + "step": 529 + }, + { + "epoch": 1.456043956043956, + "grad_norm": 13.154492378234863, + "learning_rate": 4.927197802197803e-05, + "loss": 1.0047, + "step": 530 + }, + { + "epoch": 1.4587912087912087, + "grad_norm": 15.21131420135498, + "learning_rate": 4.92706043956044e-05, + "loss": 1.209, + "step": 531 + }, + { + "epoch": 1.4615384615384617, + "grad_norm": 11.427879333496094, + "learning_rate": 4.926923076923077e-05, + "loss": 0.7791, + "step": 532 + }, + { + "epoch": 1.4642857142857144, + "grad_norm": 15.92352294921875, + "learning_rate": 4.926785714285714e-05, + "loss": 1.1257, + "step": 533 + }, + { + "epoch": 1.4670329670329672, + "grad_norm": 14.753470420837402, + "learning_rate": 4.926648351648352e-05, + "loss": 1.085, + "step": 534 + }, + { + "epoch": 1.4697802197802199, + "grad_norm": 15.146732330322266, + "learning_rate": 4.9265109890109894e-05, + "loss": 1.2331, + "step": 535 + }, + { + "epoch": 1.4725274725274726, + "grad_norm": 14.771814346313477, + "learning_rate": 4.9263736263736264e-05, + "loss": 1.0124, + "step": 536 + }, + { + "epoch": 1.4752747252747254, + "grad_norm": 11.911209106445312, + "learning_rate": 4.926236263736264e-05, + "loss": 0.7897, + "step": 537 + }, + { + "epoch": 1.478021978021978, + "grad_norm": 11.75105094909668, + "learning_rate": 4.926098901098901e-05, + "loss": 0.7401, + "step": 538 + }, + { + "epoch": 1.4807692307692308, + "grad_norm": 9.964615821838379, + "learning_rate": 4.925961538461539e-05, + "loss": 0.6737, + "step": 539 + }, + { + "epoch": 1.4835164835164836, + "grad_norm": 15.68148136138916, + "learning_rate": 4.9258241758241764e-05, + "loss": 1.1853, + "step": 540 + }, + { + "epoch": 1.4862637362637363, + "grad_norm": 18.900915145874023, + "learning_rate": 4.9256868131868134e-05, + "loss": 1.223, + "step": 541 + }, + { + "epoch": 1.489010989010989, + "grad_norm": 13.366312980651855, + "learning_rate": 4.925549450549451e-05, + "loss": 0.8809, + "step": 542 + }, + { + "epoch": 1.4917582417582418, + "grad_norm": 17.383546829223633, + "learning_rate": 4.925412087912088e-05, + "loss": 0.945, + "step": 543 + }, + { + "epoch": 1.4945054945054945, + "grad_norm": 12.225083351135254, + "learning_rate": 4.925274725274726e-05, + "loss": 0.955, + "step": 544 + }, + { + "epoch": 1.4972527472527473, + "grad_norm": 16.622787475585938, + "learning_rate": 4.9251373626373634e-05, + "loss": 1.1935, + "step": 545 + }, + { + "epoch": 1.5, + "grad_norm": 16.079137802124023, + "learning_rate": 4.9250000000000004e-05, + "loss": 1.3983, + "step": 546 + }, + { + "epoch": 1.5027472527472527, + "grad_norm": 12.870593070983887, + "learning_rate": 4.9248626373626375e-05, + "loss": 0.8438, + "step": 547 + }, + { + "epoch": 1.5054945054945055, + "grad_norm": 18.58379364013672, + "learning_rate": 4.9247252747252745e-05, + "loss": 1.0192, + "step": 548 + }, + { + "epoch": 1.5082417582417582, + "grad_norm": 12.368755340576172, + "learning_rate": 4.924587912087912e-05, + "loss": 0.8382, + "step": 549 + }, + { + "epoch": 1.510989010989011, + "grad_norm": 13.04057788848877, + "learning_rate": 4.92445054945055e-05, + "loss": 0.8619, + "step": 550 + }, + { + "epoch": 1.5137362637362637, + "grad_norm": 13.337457656860352, + "learning_rate": 4.924313186813187e-05, + "loss": 0.854, + "step": 551 + }, + { + "epoch": 1.5164835164835164, + "grad_norm": 10.52949333190918, + "learning_rate": 4.9241758241758245e-05, + "loss": 0.6295, + "step": 552 + }, + { + "epoch": 1.5192307692307692, + "grad_norm": 16.870134353637695, + "learning_rate": 4.9240384615384615e-05, + "loss": 1.2892, + "step": 553 + }, + { + "epoch": 1.521978021978022, + "grad_norm": 12.490053176879883, + "learning_rate": 4.923901098901099e-05, + "loss": 0.8777, + "step": 554 + }, + { + "epoch": 1.5247252747252746, + "grad_norm": 14.534534454345703, + "learning_rate": 4.923763736263737e-05, + "loss": 1.048, + "step": 555 + }, + { + "epoch": 1.5274725274725274, + "grad_norm": 14.638370513916016, + "learning_rate": 4.923626373626374e-05, + "loss": 1.0859, + "step": 556 + }, + { + "epoch": 1.5302197802197801, + "grad_norm": 12.649492263793945, + "learning_rate": 4.9234890109890115e-05, + "loss": 0.9233, + "step": 557 + }, + { + "epoch": 1.5329670329670328, + "grad_norm": 18.330829620361328, + "learning_rate": 4.9233516483516485e-05, + "loss": 1.5453, + "step": 558 + }, + { + "epoch": 1.5357142857142856, + "grad_norm": 12.236997604370117, + "learning_rate": 4.923214285714286e-05, + "loss": 0.8016, + "step": 559 + }, + { + "epoch": 1.5384615384615383, + "grad_norm": 16.245891571044922, + "learning_rate": 4.923076923076924e-05, + "loss": 1.0698, + "step": 560 + }, + { + "epoch": 1.541208791208791, + "grad_norm": 11.395062446594238, + "learning_rate": 4.922939560439561e-05, + "loss": 0.7669, + "step": 561 + }, + { + "epoch": 1.5439560439560438, + "grad_norm": 18.08130645751953, + "learning_rate": 4.922802197802198e-05, + "loss": 1.0224, + "step": 562 + }, + { + "epoch": 1.5467032967032965, + "grad_norm": 16.82439422607422, + "learning_rate": 4.922664835164835e-05, + "loss": 1.2514, + "step": 563 + }, + { + "epoch": 1.5494505494505495, + "grad_norm": 16.74081802368164, + "learning_rate": 4.9225274725274726e-05, + "loss": 1.1183, + "step": 564 + }, + { + "epoch": 1.5521978021978022, + "grad_norm": 12.541986465454102, + "learning_rate": 4.92239010989011e-05, + "loss": 0.9558, + "step": 565 + }, + { + "epoch": 1.554945054945055, + "grad_norm": 10.529799461364746, + "learning_rate": 4.922252747252747e-05, + "loss": 0.6651, + "step": 566 + }, + { + "epoch": 1.5576923076923077, + "grad_norm": 13.910016059875488, + "learning_rate": 4.922115384615385e-05, + "loss": 0.8057, + "step": 567 + }, + { + "epoch": 1.5604395604395604, + "grad_norm": 16.625730514526367, + "learning_rate": 4.921978021978022e-05, + "loss": 1.4927, + "step": 568 + }, + { + "epoch": 1.5631868131868132, + "grad_norm": 12.33542537689209, + "learning_rate": 4.9218406593406596e-05, + "loss": 0.8768, + "step": 569 + }, + { + "epoch": 1.565934065934066, + "grad_norm": 16.421432495117188, + "learning_rate": 4.9217032967032966e-05, + "loss": 1.2546, + "step": 570 + }, + { + "epoch": 1.5686813186813187, + "grad_norm": 11.040077209472656, + "learning_rate": 4.921565934065934e-05, + "loss": 0.7031, + "step": 571 + }, + { + "epoch": 1.5714285714285714, + "grad_norm": 17.488351821899414, + "learning_rate": 4.921428571428572e-05, + "loss": 1.3635, + "step": 572 + }, + { + "epoch": 1.5741758241758241, + "grad_norm": 14.215925216674805, + "learning_rate": 4.921291208791209e-05, + "loss": 0.9309, + "step": 573 + }, + { + "epoch": 1.5769230769230769, + "grad_norm": 11.967530250549316, + "learning_rate": 4.921153846153847e-05, + "loss": 0.7594, + "step": 574 + }, + { + "epoch": 1.5796703296703298, + "grad_norm": 11.360936164855957, + "learning_rate": 4.921016483516484e-05, + "loss": 0.8565, + "step": 575 + }, + { + "epoch": 1.5824175824175826, + "grad_norm": 10.664443969726562, + "learning_rate": 4.9208791208791213e-05, + "loss": 0.5121, + "step": 576 + }, + { + "epoch": 1.5851648351648353, + "grad_norm": 12.280547142028809, + "learning_rate": 4.9207417582417583e-05, + "loss": 0.7178, + "step": 577 + }, + { + "epoch": 1.587912087912088, + "grad_norm": 13.645966529846191, + "learning_rate": 4.9206043956043954e-05, + "loss": 0.829, + "step": 578 + }, + { + "epoch": 1.5906593406593408, + "grad_norm": 13.885424613952637, + "learning_rate": 4.920467032967033e-05, + "loss": 1.0811, + "step": 579 + }, + { + "epoch": 1.5934065934065935, + "grad_norm": 15.32419490814209, + "learning_rate": 4.92032967032967e-05, + "loss": 1.1879, + "step": 580 + }, + { + "epoch": 1.5961538461538463, + "grad_norm": 14.892571449279785, + "learning_rate": 4.920192307692308e-05, + "loss": 1.2431, + "step": 581 + }, + { + "epoch": 1.598901098901099, + "grad_norm": 14.806314468383789, + "learning_rate": 4.9200549450549454e-05, + "loss": 0.998, + "step": 582 + }, + { + "epoch": 1.6016483516483517, + "grad_norm": 10.595271110534668, + "learning_rate": 4.9199175824175824e-05, + "loss": 0.7397, + "step": 583 + }, + { + "epoch": 1.6043956043956045, + "grad_norm": 14.160755157470703, + "learning_rate": 4.91978021978022e-05, + "loss": 1.0243, + "step": 584 + }, + { + "epoch": 1.6071428571428572, + "grad_norm": 15.59274673461914, + "learning_rate": 4.919642857142857e-05, + "loss": 1.1937, + "step": 585 + }, + { + "epoch": 1.60989010989011, + "grad_norm": 14.971797943115234, + "learning_rate": 4.919505494505495e-05, + "loss": 1.0648, + "step": 586 + }, + { + "epoch": 1.6126373626373627, + "grad_norm": 17.057024002075195, + "learning_rate": 4.9193681318681324e-05, + "loss": 0.9388, + "step": 587 + }, + { + "epoch": 1.6153846153846154, + "grad_norm": 18.27959442138672, + "learning_rate": 4.9192307692307694e-05, + "loss": 1.074, + "step": 588 + }, + { + "epoch": 1.6181318681318682, + "grad_norm": 15.982646942138672, + "learning_rate": 4.919093406593407e-05, + "loss": 1.1204, + "step": 589 + }, + { + "epoch": 1.620879120879121, + "grad_norm": 12.534232139587402, + "learning_rate": 4.918956043956044e-05, + "loss": 0.7654, + "step": 590 + }, + { + "epoch": 1.6236263736263736, + "grad_norm": 15.864424705505371, + "learning_rate": 4.918818681318682e-05, + "loss": 0.9195, + "step": 591 + }, + { + "epoch": 1.6263736263736264, + "grad_norm": 11.60084056854248, + "learning_rate": 4.918681318681319e-05, + "loss": 0.7315, + "step": 592 + }, + { + "epoch": 1.629120879120879, + "grad_norm": 18.22762107849121, + "learning_rate": 4.918543956043956e-05, + "loss": 0.9887, + "step": 593 + }, + { + "epoch": 1.6318681318681318, + "grad_norm": 13.252777099609375, + "learning_rate": 4.9184065934065935e-05, + "loss": 0.7898, + "step": 594 + }, + { + "epoch": 1.6346153846153846, + "grad_norm": 13.764241218566895, + "learning_rate": 4.9182692307692305e-05, + "loss": 1.0918, + "step": 595 + }, + { + "epoch": 1.6373626373626373, + "grad_norm": 19.76540756225586, + "learning_rate": 4.918131868131868e-05, + "loss": 1.2285, + "step": 596 + }, + { + "epoch": 1.64010989010989, + "grad_norm": 13.403697967529297, + "learning_rate": 4.917994505494506e-05, + "loss": 0.9104, + "step": 597 + }, + { + "epoch": 1.6428571428571428, + "grad_norm": 16.228788375854492, + "learning_rate": 4.917857142857143e-05, + "loss": 0.8499, + "step": 598 + }, + { + "epoch": 1.6456043956043955, + "grad_norm": 18.22837257385254, + "learning_rate": 4.9177197802197805e-05, + "loss": 1.312, + "step": 599 + }, + { + "epoch": 1.6483516483516483, + "grad_norm": 11.977128028869629, + "learning_rate": 4.9175824175824175e-05, + "loss": 0.7148, + "step": 600 + }, + { + "epoch": 1.651098901098901, + "grad_norm": 12.385074615478516, + "learning_rate": 4.917445054945055e-05, + "loss": 0.819, + "step": 601 + }, + { + "epoch": 1.6538461538461537, + "grad_norm": 16.1881103515625, + "learning_rate": 4.917307692307693e-05, + "loss": 0.8116, + "step": 602 + }, + { + "epoch": 1.6565934065934065, + "grad_norm": 13.113801002502441, + "learning_rate": 4.91717032967033e-05, + "loss": 0.8589, + "step": 603 + }, + { + "epoch": 1.6593406593406592, + "grad_norm": 10.80451488494873, + "learning_rate": 4.9170329670329676e-05, + "loss": 0.7087, + "step": 604 + }, + { + "epoch": 1.662087912087912, + "grad_norm": 14.168054580688477, + "learning_rate": 4.9168956043956046e-05, + "loss": 0.8326, + "step": 605 + }, + { + "epoch": 1.6648351648351647, + "grad_norm": 14.355937957763672, + "learning_rate": 4.916758241758242e-05, + "loss": 0.8712, + "step": 606 + }, + { + "epoch": 1.6675824175824174, + "grad_norm": 16.29913902282715, + "learning_rate": 4.916620879120879e-05, + "loss": 1.2868, + "step": 607 + }, + { + "epoch": 1.6703296703296702, + "grad_norm": 13.048500061035156, + "learning_rate": 4.916483516483516e-05, + "loss": 0.8957, + "step": 608 + }, + { + "epoch": 1.6730769230769231, + "grad_norm": 15.820258140563965, + "learning_rate": 4.916346153846154e-05, + "loss": 1.0711, + "step": 609 + }, + { + "epoch": 1.6758241758241759, + "grad_norm": 17.44540023803711, + "learning_rate": 4.916208791208791e-05, + "loss": 1.5724, + "step": 610 + }, + { + "epoch": 1.6785714285714286, + "grad_norm": 12.891575813293457, + "learning_rate": 4.9160714285714286e-05, + "loss": 0.7993, + "step": 611 + }, + { + "epoch": 1.6813186813186813, + "grad_norm": 12.063701629638672, + "learning_rate": 4.915934065934066e-05, + "loss": 0.7247, + "step": 612 + }, + { + "epoch": 1.684065934065934, + "grad_norm": 13.876007080078125, + "learning_rate": 4.915796703296703e-05, + "loss": 0.9938, + "step": 613 + }, + { + "epoch": 1.6868131868131868, + "grad_norm": 12.459601402282715, + "learning_rate": 4.915659340659341e-05, + "loss": 0.7557, + "step": 614 + }, + { + "epoch": 1.6895604395604396, + "grad_norm": 16.901342391967773, + "learning_rate": 4.915521978021978e-05, + "loss": 1.2106, + "step": 615 + }, + { + "epoch": 1.6923076923076923, + "grad_norm": 12.761078834533691, + "learning_rate": 4.9153846153846157e-05, + "loss": 0.8908, + "step": 616 + }, + { + "epoch": 1.695054945054945, + "grad_norm": 14.561929702758789, + "learning_rate": 4.915247252747253e-05, + "loss": 0.8782, + "step": 617 + }, + { + "epoch": 1.6978021978021978, + "grad_norm": 11.594861030578613, + "learning_rate": 4.91510989010989e-05, + "loss": 0.7337, + "step": 618 + }, + { + "epoch": 1.7005494505494505, + "grad_norm": 15.159793853759766, + "learning_rate": 4.914972527472528e-05, + "loss": 0.8274, + "step": 619 + }, + { + "epoch": 1.7032967032967035, + "grad_norm": 17.591114044189453, + "learning_rate": 4.914835164835165e-05, + "loss": 1.0228, + "step": 620 + }, + { + "epoch": 1.7060439560439562, + "grad_norm": 10.075901985168457, + "learning_rate": 4.914697802197803e-05, + "loss": 0.7047, + "step": 621 + }, + { + "epoch": 1.708791208791209, + "grad_norm": 12.816974639892578, + "learning_rate": 4.91456043956044e-05, + "loss": 0.7655, + "step": 622 + }, + { + "epoch": 1.7115384615384617, + "grad_norm": 15.445180892944336, + "learning_rate": 4.914423076923077e-05, + "loss": 0.8676, + "step": 623 + }, + { + "epoch": 1.7142857142857144, + "grad_norm": 13.472342491149902, + "learning_rate": 4.9142857142857144e-05, + "loss": 0.8446, + "step": 624 + }, + { + "epoch": 1.7170329670329672, + "grad_norm": 17.288734436035156, + "learning_rate": 4.9141483516483514e-05, + "loss": 1.0831, + "step": 625 + }, + { + "epoch": 1.7197802197802199, + "grad_norm": 15.569014549255371, + "learning_rate": 4.914010989010989e-05, + "loss": 1.164, + "step": 626 + }, + { + "epoch": 1.7225274725274726, + "grad_norm": 12.012418746948242, + "learning_rate": 4.913873626373627e-05, + "loss": 0.7473, + "step": 627 + }, + { + "epoch": 1.7252747252747254, + "grad_norm": 13.129042625427246, + "learning_rate": 4.913736263736264e-05, + "loss": 0.8753, + "step": 628 + }, + { + "epoch": 1.728021978021978, + "grad_norm": 13.439285278320312, + "learning_rate": 4.9135989010989014e-05, + "loss": 0.9143, + "step": 629 + }, + { + "epoch": 1.7307692307692308, + "grad_norm": 10.649282455444336, + "learning_rate": 4.9134615384615384e-05, + "loss": 0.7657, + "step": 630 + }, + { + "epoch": 1.7335164835164836, + "grad_norm": 18.59752082824707, + "learning_rate": 4.913324175824176e-05, + "loss": 0.9512, + "step": 631 + }, + { + "epoch": 1.7362637362637363, + "grad_norm": 13.395218849182129, + "learning_rate": 4.913186813186814e-05, + "loss": 0.9723, + "step": 632 + }, + { + "epoch": 1.739010989010989, + "grad_norm": 15.271066665649414, + "learning_rate": 4.913049450549451e-05, + "loss": 0.7591, + "step": 633 + }, + { + "epoch": 1.7417582417582418, + "grad_norm": 14.563886642456055, + "learning_rate": 4.9129120879120885e-05, + "loss": 1.0891, + "step": 634 + }, + { + "epoch": 1.7445054945054945, + "grad_norm": 11.815345764160156, + "learning_rate": 4.9127747252747255e-05, + "loss": 0.725, + "step": 635 + }, + { + "epoch": 1.7472527472527473, + "grad_norm": 14.605222702026367, + "learning_rate": 4.912637362637363e-05, + "loss": 0.7759, + "step": 636 + }, + { + "epoch": 1.75, + "grad_norm": 16.246078491210938, + "learning_rate": 4.9125e-05, + "loss": 1.0729, + "step": 637 + }, + { + "epoch": 1.7527472527472527, + "grad_norm": 12.467345237731934, + "learning_rate": 4.912362637362637e-05, + "loss": 0.8337, + "step": 638 + }, + { + "epoch": 1.7554945054945055, + "grad_norm": 13.441892623901367, + "learning_rate": 4.912225274725275e-05, + "loss": 0.7531, + "step": 639 + }, + { + "epoch": 1.7582417582417582, + "grad_norm": 15.514660835266113, + "learning_rate": 4.912087912087912e-05, + "loss": 0.8712, + "step": 640 + }, + { + "epoch": 1.760989010989011, + "grad_norm": 17.754772186279297, + "learning_rate": 4.9119505494505495e-05, + "loss": 0.9181, + "step": 641 + }, + { + "epoch": 1.7637362637362637, + "grad_norm": 15.864236831665039, + "learning_rate": 4.911813186813187e-05, + "loss": 0.902, + "step": 642 + }, + { + "epoch": 1.7664835164835164, + "grad_norm": 14.439709663391113, + "learning_rate": 4.911675824175824e-05, + "loss": 1.0384, + "step": 643 + }, + { + "epoch": 1.7692307692307692, + "grad_norm": 16.74623680114746, + "learning_rate": 4.911538461538462e-05, + "loss": 1.4127, + "step": 644 + }, + { + "epoch": 1.771978021978022, + "grad_norm": 14.693897247314453, + "learning_rate": 4.911401098901099e-05, + "loss": 0.9445, + "step": 645 + }, + { + "epoch": 1.7747252747252746, + "grad_norm": 15.952995300292969, + "learning_rate": 4.9112637362637366e-05, + "loss": 0.9889, + "step": 646 + }, + { + "epoch": 1.7774725274725274, + "grad_norm": 16.430118560791016, + "learning_rate": 4.911126373626374e-05, + "loss": 1.0045, + "step": 647 + }, + { + "epoch": 1.7802197802197801, + "grad_norm": 13.572482109069824, + "learning_rate": 4.910989010989011e-05, + "loss": 0.8652, + "step": 648 + }, + { + "epoch": 1.7829670329670328, + "grad_norm": 17.57451057434082, + "learning_rate": 4.910851648351649e-05, + "loss": 1.3607, + "step": 649 + }, + { + "epoch": 1.7857142857142856, + "grad_norm": 17.398996353149414, + "learning_rate": 4.910714285714286e-05, + "loss": 1.4115, + "step": 650 + }, + { + "epoch": 1.7884615384615383, + "grad_norm": 14.078177452087402, + "learning_rate": 4.9105769230769236e-05, + "loss": 1.0906, + "step": 651 + }, + { + "epoch": 1.791208791208791, + "grad_norm": 13.810065269470215, + "learning_rate": 4.9104395604395606e-05, + "loss": 0.927, + "step": 652 + }, + { + "epoch": 1.7939560439560438, + "grad_norm": 17.08163833618164, + "learning_rate": 4.9103021978021976e-05, + "loss": 1.2235, + "step": 653 + }, + { + "epoch": 1.7967032967032965, + "grad_norm": 16.817909240722656, + "learning_rate": 4.910164835164835e-05, + "loss": 1.1177, + "step": 654 + }, + { + "epoch": 1.7994505494505495, + "grad_norm": 18.605131149291992, + "learning_rate": 4.910027472527472e-05, + "loss": 1.0151, + "step": 655 + }, + { + "epoch": 1.8021978021978022, + "grad_norm": 13.628056526184082, + "learning_rate": 4.90989010989011e-05, + "loss": 0.8417, + "step": 656 + }, + { + "epoch": 1.804945054945055, + "grad_norm": 15.426694869995117, + "learning_rate": 4.9097527472527476e-05, + "loss": 1.0167, + "step": 657 + }, + { + "epoch": 1.8076923076923077, + "grad_norm": 12.28169059753418, + "learning_rate": 4.9096153846153846e-05, + "loss": 0.7166, + "step": 658 + }, + { + "epoch": 1.8104395604395604, + "grad_norm": 10.902162551879883, + "learning_rate": 4.909478021978022e-05, + "loss": 0.6771, + "step": 659 + }, + { + "epoch": 1.8131868131868132, + "grad_norm": 14.485595703125, + "learning_rate": 4.909340659340659e-05, + "loss": 0.8103, + "step": 660 + }, + { + "epoch": 1.815934065934066, + "grad_norm": 16.625680923461914, + "learning_rate": 4.909203296703297e-05, + "loss": 0.9675, + "step": 661 + }, + { + "epoch": 1.8186813186813187, + "grad_norm": 16.281145095825195, + "learning_rate": 4.909065934065935e-05, + "loss": 0.7537, + "step": 662 + }, + { + "epoch": 1.8214285714285714, + "grad_norm": 13.992392539978027, + "learning_rate": 4.908928571428572e-05, + "loss": 0.9987, + "step": 663 + }, + { + "epoch": 1.8241758241758241, + "grad_norm": 10.338507652282715, + "learning_rate": 4.9087912087912094e-05, + "loss": 0.6471, + "step": 664 + }, + { + "epoch": 1.8269230769230769, + "grad_norm": 13.219951629638672, + "learning_rate": 4.9086538461538464e-05, + "loss": 0.792, + "step": 665 + }, + { + "epoch": 1.8296703296703298, + "grad_norm": 14.557419776916504, + "learning_rate": 4.908516483516484e-05, + "loss": 1.2207, + "step": 666 + }, + { + "epoch": 1.8324175824175826, + "grad_norm": 10.96453857421875, + "learning_rate": 4.908379120879121e-05, + "loss": 0.5564, + "step": 667 + }, + { + "epoch": 1.8351648351648353, + "grad_norm": 17.667984008789062, + "learning_rate": 4.908241758241758e-05, + "loss": 1.2312, + "step": 668 + }, + { + "epoch": 1.837912087912088, + "grad_norm": 12.212926864624023, + "learning_rate": 4.908104395604396e-05, + "loss": 0.8006, + "step": 669 + }, + { + "epoch": 1.8406593406593408, + "grad_norm": 11.723919868469238, + "learning_rate": 4.907967032967033e-05, + "loss": 0.7859, + "step": 670 + }, + { + "epoch": 1.8434065934065935, + "grad_norm": 14.895100593566895, + "learning_rate": 4.9078296703296704e-05, + "loss": 1.1862, + "step": 671 + }, + { + "epoch": 1.8461538461538463, + "grad_norm": 17.532503128051758, + "learning_rate": 4.907692307692308e-05, + "loss": 1.1274, + "step": 672 + }, + { + "epoch": 1.848901098901099, + "grad_norm": 14.556373596191406, + "learning_rate": 4.907554945054945e-05, + "loss": 0.8673, + "step": 673 + }, + { + "epoch": 1.8516483516483517, + "grad_norm": 11.593340873718262, + "learning_rate": 4.907417582417583e-05, + "loss": 0.7741, + "step": 674 + }, + { + "epoch": 1.8543956043956045, + "grad_norm": 7.981894493103027, + "learning_rate": 4.90728021978022e-05, + "loss": 0.4608, + "step": 675 + }, + { + "epoch": 1.8571428571428572, + "grad_norm": 14.61449146270752, + "learning_rate": 4.9071428571428574e-05, + "loss": 0.8281, + "step": 676 + }, + { + "epoch": 1.85989010989011, + "grad_norm": 12.939298629760742, + "learning_rate": 4.907005494505495e-05, + "loss": 0.8541, + "step": 677 + }, + { + "epoch": 1.8626373626373627, + "grad_norm": 18.484939575195312, + "learning_rate": 4.906868131868132e-05, + "loss": 1.5581, + "step": 678 + }, + { + "epoch": 1.8653846153846154, + "grad_norm": 19.75174903869629, + "learning_rate": 4.90673076923077e-05, + "loss": 1.1044, + "step": 679 + }, + { + "epoch": 1.8681318681318682, + "grad_norm": 15.469444274902344, + "learning_rate": 4.906593406593407e-05, + "loss": 0.8034, + "step": 680 + }, + { + "epoch": 1.870879120879121, + "grad_norm": 14.439960479736328, + "learning_rate": 4.9064560439560445e-05, + "loss": 0.7601, + "step": 681 + }, + { + "epoch": 1.8736263736263736, + "grad_norm": 15.985133171081543, + "learning_rate": 4.9063186813186815e-05, + "loss": 0.8306, + "step": 682 + }, + { + "epoch": 1.8763736263736264, + "grad_norm": 18.385499954223633, + "learning_rate": 4.9061813186813185e-05, + "loss": 1.0995, + "step": 683 + }, + { + "epoch": 1.879120879120879, + "grad_norm": 15.034536361694336, + "learning_rate": 4.906043956043956e-05, + "loss": 0.9415, + "step": 684 + }, + { + "epoch": 1.8818681318681318, + "grad_norm": 13.285090446472168, + "learning_rate": 4.905906593406593e-05, + "loss": 0.8707, + "step": 685 + }, + { + "epoch": 1.8846153846153846, + "grad_norm": 12.888044357299805, + "learning_rate": 4.905769230769231e-05, + "loss": 0.8778, + "step": 686 + }, + { + "epoch": 1.8873626373626373, + "grad_norm": 12.58847713470459, + "learning_rate": 4.9056318681318685e-05, + "loss": 0.7669, + "step": 687 + }, + { + "epoch": 1.89010989010989, + "grad_norm": 15.361771583557129, + "learning_rate": 4.9054945054945055e-05, + "loss": 1.0766, + "step": 688 + }, + { + "epoch": 1.8928571428571428, + "grad_norm": 12.137139320373535, + "learning_rate": 4.905357142857143e-05, + "loss": 0.7843, + "step": 689 + }, + { + "epoch": 1.8956043956043955, + "grad_norm": 16.310148239135742, + "learning_rate": 4.90521978021978e-05, + "loss": 1.146, + "step": 690 + }, + { + "epoch": 1.8983516483516483, + "grad_norm": 14.975325584411621, + "learning_rate": 4.905082417582418e-05, + "loss": 1.0571, + "step": 691 + }, + { + "epoch": 1.901098901098901, + "grad_norm": 13.53234577178955, + "learning_rate": 4.9049450549450556e-05, + "loss": 0.9264, + "step": 692 + }, + { + "epoch": 1.9038461538461537, + "grad_norm": 14.745759963989258, + "learning_rate": 4.9048076923076926e-05, + "loss": 1.1279, + "step": 693 + }, + { + "epoch": 1.9065934065934065, + "grad_norm": 19.44516372680664, + "learning_rate": 4.90467032967033e-05, + "loss": 0.8396, + "step": 694 + }, + { + "epoch": 1.9093406593406592, + "grad_norm": 11.296976089477539, + "learning_rate": 4.904532967032967e-05, + "loss": 0.6381, + "step": 695 + }, + { + "epoch": 1.912087912087912, + "grad_norm": 13.871928215026855, + "learning_rate": 4.904395604395605e-05, + "loss": 0.8156, + "step": 696 + }, + { + "epoch": 1.9148351648351647, + "grad_norm": 17.789159774780273, + "learning_rate": 4.904258241758242e-05, + "loss": 0.9806, + "step": 697 + }, + { + "epoch": 1.9175824175824174, + "grad_norm": 13.611194610595703, + "learning_rate": 4.904120879120879e-05, + "loss": 0.8379, + "step": 698 + }, + { + "epoch": 1.9203296703296702, + "grad_norm": 18.277278900146484, + "learning_rate": 4.9039835164835166e-05, + "loss": 0.9719, + "step": 699 + }, + { + "epoch": 1.9230769230769231, + "grad_norm": 15.55429744720459, + "learning_rate": 4.9038461538461536e-05, + "loss": 0.8935, + "step": 700 + }, + { + "epoch": 1.9258241758241759, + "grad_norm": 15.091839790344238, + "learning_rate": 4.903708791208791e-05, + "loss": 0.8134, + "step": 701 + }, + { + "epoch": 1.9285714285714286, + "grad_norm": 10.63100814819336, + "learning_rate": 4.903571428571429e-05, + "loss": 0.6999, + "step": 702 + }, + { + "epoch": 1.9313186813186813, + "grad_norm": 13.978720664978027, + "learning_rate": 4.903434065934066e-05, + "loss": 0.8142, + "step": 703 + }, + { + "epoch": 1.934065934065934, + "grad_norm": 18.451929092407227, + "learning_rate": 4.903296703296704e-05, + "loss": 1.1733, + "step": 704 + }, + { + "epoch": 1.9368131868131868, + "grad_norm": 14.86965560913086, + "learning_rate": 4.903159340659341e-05, + "loss": 1.311, + "step": 705 + }, + { + "epoch": 1.9395604395604396, + "grad_norm": 16.120586395263672, + "learning_rate": 4.9030219780219783e-05, + "loss": 1.1684, + "step": 706 + }, + { + "epoch": 1.9423076923076923, + "grad_norm": 12.484301567077637, + "learning_rate": 4.902884615384616e-05, + "loss": 0.832, + "step": 707 + }, + { + "epoch": 1.945054945054945, + "grad_norm": 12.823892593383789, + "learning_rate": 4.902747252747253e-05, + "loss": 0.8975, + "step": 708 + }, + { + "epoch": 1.9478021978021978, + "grad_norm": 21.365388870239258, + "learning_rate": 4.902609890109891e-05, + "loss": 1.568, + "step": 709 + }, + { + "epoch": 1.9505494505494505, + "grad_norm": 17.321611404418945, + "learning_rate": 4.902472527472528e-05, + "loss": 1.0219, + "step": 710 + }, + { + "epoch": 1.9532967032967035, + "grad_norm": 16.332338333129883, + "learning_rate": 4.9023351648351654e-05, + "loss": 1.1899, + "step": 711 + }, + { + "epoch": 1.9560439560439562, + "grad_norm": 13.939593315124512, + "learning_rate": 4.9021978021978024e-05, + "loss": 0.8961, + "step": 712 + }, + { + "epoch": 1.958791208791209, + "grad_norm": 19.184755325317383, + "learning_rate": 4.9020604395604394e-05, + "loss": 1.4537, + "step": 713 + }, + { + "epoch": 1.9615384615384617, + "grad_norm": 17.170454025268555, + "learning_rate": 4.901923076923077e-05, + "loss": 1.1605, + "step": 714 + }, + { + "epoch": 1.9642857142857144, + "grad_norm": 16.786540985107422, + "learning_rate": 4.901785714285714e-05, + "loss": 1.4111, + "step": 715 + }, + { + "epoch": 1.9670329670329672, + "grad_norm": 11.020231246948242, + "learning_rate": 4.901648351648352e-05, + "loss": 0.7406, + "step": 716 + }, + { + "epoch": 1.9697802197802199, + "grad_norm": 10.655393600463867, + "learning_rate": 4.9015109890109894e-05, + "loss": 0.5822, + "step": 717 + }, + { + "epoch": 1.9725274725274726, + "grad_norm": 15.19691276550293, + "learning_rate": 4.9013736263736264e-05, + "loss": 1.1043, + "step": 718 + }, + { + "epoch": 1.9752747252747254, + "grad_norm": 15.200854301452637, + "learning_rate": 4.901236263736264e-05, + "loss": 0.9383, + "step": 719 + }, + { + "epoch": 1.978021978021978, + "grad_norm": 16.68987464904785, + "learning_rate": 4.901098901098901e-05, + "loss": 0.9569, + "step": 720 + }, + { + "epoch": 1.9807692307692308, + "grad_norm": 11.252931594848633, + "learning_rate": 4.900961538461539e-05, + "loss": 0.6422, + "step": 721 + }, + { + "epoch": 1.9835164835164836, + "grad_norm": 15.121378898620605, + "learning_rate": 4.9008241758241765e-05, + "loss": 1.0679, + "step": 722 + }, + { + "epoch": 1.9862637362637363, + "grad_norm": 18.289308547973633, + "learning_rate": 4.9006868131868135e-05, + "loss": 1.1245, + "step": 723 + }, + { + "epoch": 1.989010989010989, + "grad_norm": 19.985984802246094, + "learning_rate": 4.900549450549451e-05, + "loss": 1.0259, + "step": 724 + }, + { + "epoch": 1.9917582417582418, + "grad_norm": 15.956352233886719, + "learning_rate": 4.900412087912088e-05, + "loss": 1.2179, + "step": 725 + }, + { + "epoch": 1.9945054945054945, + "grad_norm": 12.467228889465332, + "learning_rate": 4.900274725274726e-05, + "loss": 0.9213, + "step": 726 + }, + { + "epoch": 1.9972527472527473, + "grad_norm": 14.039835929870605, + "learning_rate": 4.900137362637363e-05, + "loss": 1.0264, + "step": 727 + }, + { + "epoch": 2.0, + "grad_norm": 27.63059425354004, + "learning_rate": 4.9e-05, + "loss": 0.8841, + "step": 728 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.49586776859504134, + "eval_f1": 0.4689883449772257, + "eval_f1_DuraRiadoRio_64x64": 0.4359861591695502, + "eval_f1_Mole_64x64": 0.375, + "eval_f1_Quebrado_64x64": 0.39344262295081966, + "eval_f1_RiadoRio_64x64": 0.5653495440729484, + "eval_f1_RioFechado_64x64": 0.5751633986928104, + "eval_loss": 1.2401933670043945, + "eval_precision": 0.6098774392000262, + "eval_precision_DuraRiadoRio_64x64": 0.43448275862068964, + "eval_precision_Mole_64x64": 0.75, + "eval_precision_Quebrado_64x64": 0.9230769230769231, + "eval_precision_RiadoRio_64x64": 0.5254237288135594, + "eval_precision_RioFechado_64x64": 0.416403785488959, + "eval_recall": 0.4957839140103781, + "eval_recall_DuraRiadoRio_64x64": 0.4375, + "eval_recall_Mole_64x64": 0.25, + "eval_recall_Quebrado_64x64": 0.25, + "eval_recall_RiadoRio_64x64": 0.6118421052631579, + "eval_recall_RioFechado_64x64": 0.9295774647887324, + "eval_runtime": 1.7158, + "eval_samples_per_second": 423.122, + "eval_steps_per_second": 26.809, + "step": 728 + }, + { + "epoch": 2.0027472527472527, + "grad_norm": 13.839991569519043, + "learning_rate": 4.8998626373626375e-05, + "loss": 0.8849, + "step": 729 + }, + { + "epoch": 2.0054945054945055, + "grad_norm": 14.493867874145508, + "learning_rate": 4.8997252747252745e-05, + "loss": 0.8649, + "step": 730 + }, + { + "epoch": 2.008241758241758, + "grad_norm": 12.919429779052734, + "learning_rate": 4.899587912087912e-05, + "loss": 0.7161, + "step": 731 + }, + { + "epoch": 2.010989010989011, + "grad_norm": 15.703041076660156, + "learning_rate": 4.89945054945055e-05, + "loss": 1.0097, + "step": 732 + }, + { + "epoch": 2.0137362637362637, + "grad_norm": 15.223114013671875, + "learning_rate": 4.899313186813187e-05, + "loss": 0.8743, + "step": 733 + }, + { + "epoch": 2.0164835164835164, + "grad_norm": 14.48355770111084, + "learning_rate": 4.8991758241758246e-05, + "loss": 0.8671, + "step": 734 + }, + { + "epoch": 2.019230769230769, + "grad_norm": 13.781757354736328, + "learning_rate": 4.8990384615384616e-05, + "loss": 0.9068, + "step": 735 + }, + { + "epoch": 2.021978021978022, + "grad_norm": 12.924257278442383, + "learning_rate": 4.898901098901099e-05, + "loss": 1.059, + "step": 736 + }, + { + "epoch": 2.0247252747252746, + "grad_norm": 11.37004280090332, + "learning_rate": 4.898763736263737e-05, + "loss": 0.6828, + "step": 737 + }, + { + "epoch": 2.0274725274725274, + "grad_norm": 16.088638305664062, + "learning_rate": 4.898626373626374e-05, + "loss": 1.1748, + "step": 738 + }, + { + "epoch": 2.03021978021978, + "grad_norm": 13.707465171813965, + "learning_rate": 4.8984890109890116e-05, + "loss": 0.8144, + "step": 739 + }, + { + "epoch": 2.032967032967033, + "grad_norm": 12.819602966308594, + "learning_rate": 4.8983516483516486e-05, + "loss": 0.6678, + "step": 740 + }, + { + "epoch": 2.0357142857142856, + "grad_norm": 15.411751747131348, + "learning_rate": 4.898214285714286e-05, + "loss": 1.135, + "step": 741 + }, + { + "epoch": 2.0384615384615383, + "grad_norm": 8.798617362976074, + "learning_rate": 4.898076923076923e-05, + "loss": 0.4512, + "step": 742 + }, + { + "epoch": 2.041208791208791, + "grad_norm": 14.797220230102539, + "learning_rate": 4.89793956043956e-05, + "loss": 0.921, + "step": 743 + }, + { + "epoch": 2.043956043956044, + "grad_norm": 16.92374610900879, + "learning_rate": 4.897802197802198e-05, + "loss": 1.0015, + "step": 744 + }, + { + "epoch": 2.0467032967032965, + "grad_norm": 12.514242172241211, + "learning_rate": 4.897664835164835e-05, + "loss": 0.7263, + "step": 745 + }, + { + "epoch": 2.0494505494505493, + "grad_norm": 13.792859077453613, + "learning_rate": 4.8975274725274727e-05, + "loss": 0.7286, + "step": 746 + }, + { + "epoch": 2.052197802197802, + "grad_norm": 13.910897254943848, + "learning_rate": 4.89739010989011e-05, + "loss": 0.8719, + "step": 747 + }, + { + "epoch": 2.0549450549450547, + "grad_norm": 11.216545104980469, + "learning_rate": 4.897252747252747e-05, + "loss": 0.5822, + "step": 748 + }, + { + "epoch": 2.0576923076923075, + "grad_norm": 14.844161987304688, + "learning_rate": 4.897115384615385e-05, + "loss": 0.9536, + "step": 749 + }, + { + "epoch": 2.0604395604395602, + "grad_norm": 10.143104553222656, + "learning_rate": 4.896978021978022e-05, + "loss": 0.5746, + "step": 750 + }, + { + "epoch": 2.063186813186813, + "grad_norm": 17.105009078979492, + "learning_rate": 4.89684065934066e-05, + "loss": 1.0978, + "step": 751 + }, + { + "epoch": 2.065934065934066, + "grad_norm": 12.81464672088623, + "learning_rate": 4.8967032967032974e-05, + "loss": 0.5759, + "step": 752 + }, + { + "epoch": 2.068681318681319, + "grad_norm": 19.364221572875977, + "learning_rate": 4.8965659340659344e-05, + "loss": 1.2548, + "step": 753 + }, + { + "epoch": 2.0714285714285716, + "grad_norm": 15.993997573852539, + "learning_rate": 4.896428571428572e-05, + "loss": 1.1226, + "step": 754 + }, + { + "epoch": 2.0741758241758244, + "grad_norm": 13.118683815002441, + "learning_rate": 4.896291208791209e-05, + "loss": 0.8014, + "step": 755 + }, + { + "epoch": 2.076923076923077, + "grad_norm": 11.864962577819824, + "learning_rate": 4.896153846153847e-05, + "loss": 0.785, + "step": 756 + }, + { + "epoch": 2.07967032967033, + "grad_norm": 13.37844181060791, + "learning_rate": 4.896016483516484e-05, + "loss": 0.9472, + "step": 757 + }, + { + "epoch": 2.0824175824175826, + "grad_norm": 13.807975769042969, + "learning_rate": 4.895879120879121e-05, + "loss": 0.6593, + "step": 758 + }, + { + "epoch": 2.0851648351648353, + "grad_norm": 20.25714111328125, + "learning_rate": 4.8957417582417584e-05, + "loss": 1.3876, + "step": 759 + }, + { + "epoch": 2.087912087912088, + "grad_norm": 14.539628982543945, + "learning_rate": 4.8956043956043954e-05, + "loss": 0.9957, + "step": 760 + }, + { + "epoch": 2.090659340659341, + "grad_norm": 10.845990180969238, + "learning_rate": 4.895467032967033e-05, + "loss": 0.5918, + "step": 761 + }, + { + "epoch": 2.0934065934065935, + "grad_norm": 13.149925231933594, + "learning_rate": 4.895329670329671e-05, + "loss": 0.824, + "step": 762 + }, + { + "epoch": 2.0961538461538463, + "grad_norm": 18.609325408935547, + "learning_rate": 4.895192307692308e-05, + "loss": 1.0679, + "step": 763 + }, + { + "epoch": 2.098901098901099, + "grad_norm": 23.580394744873047, + "learning_rate": 4.8950549450549455e-05, + "loss": 0.9223, + "step": 764 + }, + { + "epoch": 2.1016483516483517, + "grad_norm": 15.532552719116211, + "learning_rate": 4.8949175824175825e-05, + "loss": 0.8007, + "step": 765 + }, + { + "epoch": 2.1043956043956045, + "grad_norm": 13.095273971557617, + "learning_rate": 4.89478021978022e-05, + "loss": 0.6157, + "step": 766 + }, + { + "epoch": 2.107142857142857, + "grad_norm": 18.198017120361328, + "learning_rate": 4.894642857142858e-05, + "loss": 1.2445, + "step": 767 + }, + { + "epoch": 2.10989010989011, + "grad_norm": 9.121940612792969, + "learning_rate": 4.894505494505495e-05, + "loss": 0.5051, + "step": 768 + }, + { + "epoch": 2.1126373626373627, + "grad_norm": 13.471160888671875, + "learning_rate": 4.8943681318681325e-05, + "loss": 0.7788, + "step": 769 + }, + { + "epoch": 2.1153846153846154, + "grad_norm": 12.567544937133789, + "learning_rate": 4.8942307692307695e-05, + "loss": 0.8603, + "step": 770 + }, + { + "epoch": 2.118131868131868, + "grad_norm": 16.859485626220703, + "learning_rate": 4.894093406593407e-05, + "loss": 1.1168, + "step": 771 + }, + { + "epoch": 2.120879120879121, + "grad_norm": 11.0952730178833, + "learning_rate": 4.893956043956044e-05, + "loss": 0.5584, + "step": 772 + }, + { + "epoch": 2.1236263736263736, + "grad_norm": 13.90368938446045, + "learning_rate": 4.893818681318681e-05, + "loss": 0.9899, + "step": 773 + }, + { + "epoch": 2.1263736263736264, + "grad_norm": 9.325477600097656, + "learning_rate": 4.893681318681319e-05, + "loss": 0.4581, + "step": 774 + }, + { + "epoch": 2.129120879120879, + "grad_norm": 15.407636642456055, + "learning_rate": 4.893543956043956e-05, + "loss": 0.8064, + "step": 775 + }, + { + "epoch": 2.131868131868132, + "grad_norm": 14.110698699951172, + "learning_rate": 4.8934065934065935e-05, + "loss": 0.9144, + "step": 776 + }, + { + "epoch": 2.1346153846153846, + "grad_norm": 11.225390434265137, + "learning_rate": 4.893269230769231e-05, + "loss": 0.6849, + "step": 777 + }, + { + "epoch": 2.1373626373626373, + "grad_norm": 13.062077522277832, + "learning_rate": 4.893131868131868e-05, + "loss": 0.7625, + "step": 778 + }, + { + "epoch": 2.14010989010989, + "grad_norm": 17.763256072998047, + "learning_rate": 4.892994505494506e-05, + "loss": 1.2523, + "step": 779 + }, + { + "epoch": 2.142857142857143, + "grad_norm": 18.24871063232422, + "learning_rate": 4.892857142857143e-05, + "loss": 1.3185, + "step": 780 + }, + { + "epoch": 2.1456043956043955, + "grad_norm": 16.251325607299805, + "learning_rate": 4.8927197802197806e-05, + "loss": 1.2612, + "step": 781 + }, + { + "epoch": 2.1483516483516483, + "grad_norm": 14.06808853149414, + "learning_rate": 4.892582417582418e-05, + "loss": 0.784, + "step": 782 + }, + { + "epoch": 2.151098901098901, + "grad_norm": 16.099300384521484, + "learning_rate": 4.892445054945055e-05, + "loss": 1.057, + "step": 783 + }, + { + "epoch": 2.1538461538461537, + "grad_norm": 14.571100234985352, + "learning_rate": 4.892307692307693e-05, + "loss": 1.1082, + "step": 784 + }, + { + "epoch": 2.1565934065934065, + "grad_norm": 13.731108665466309, + "learning_rate": 4.89217032967033e-05, + "loss": 0.6891, + "step": 785 + }, + { + "epoch": 2.159340659340659, + "grad_norm": 11.7400484085083, + "learning_rate": 4.8920329670329676e-05, + "loss": 0.66, + "step": 786 + }, + { + "epoch": 2.162087912087912, + "grad_norm": 15.554512023925781, + "learning_rate": 4.8918956043956046e-05, + "loss": 1.104, + "step": 787 + }, + { + "epoch": 2.1648351648351647, + "grad_norm": 12.28312873840332, + "learning_rate": 4.8917582417582416e-05, + "loss": 0.8456, + "step": 788 + }, + { + "epoch": 2.1675824175824174, + "grad_norm": 11.424721717834473, + "learning_rate": 4.891620879120879e-05, + "loss": 0.5117, + "step": 789 + }, + { + "epoch": 2.17032967032967, + "grad_norm": 15.744207382202148, + "learning_rate": 4.891483516483516e-05, + "loss": 0.8396, + "step": 790 + }, + { + "epoch": 2.173076923076923, + "grad_norm": 12.07422924041748, + "learning_rate": 4.891346153846154e-05, + "loss": 0.6027, + "step": 791 + }, + { + "epoch": 2.1758241758241756, + "grad_norm": 13.351359367370605, + "learning_rate": 4.891208791208792e-05, + "loss": 0.7559, + "step": 792 + }, + { + "epoch": 2.1785714285714284, + "grad_norm": 12.532447814941406, + "learning_rate": 4.891071428571429e-05, + "loss": 0.6455, + "step": 793 + }, + { + "epoch": 2.181318681318681, + "grad_norm": 11.860299110412598, + "learning_rate": 4.8909340659340664e-05, + "loss": 0.7421, + "step": 794 + }, + { + "epoch": 2.1840659340659343, + "grad_norm": 18.47911834716797, + "learning_rate": 4.8907967032967034e-05, + "loss": 1.1109, + "step": 795 + }, + { + "epoch": 2.186813186813187, + "grad_norm": 16.987333297729492, + "learning_rate": 4.890659340659341e-05, + "loss": 0.8468, + "step": 796 + }, + { + "epoch": 2.1895604395604398, + "grad_norm": 15.131301879882812, + "learning_rate": 4.890521978021978e-05, + "loss": 0.9226, + "step": 797 + }, + { + "epoch": 2.1923076923076925, + "grad_norm": 16.03067398071289, + "learning_rate": 4.890384615384616e-05, + "loss": 0.9329, + "step": 798 + }, + { + "epoch": 2.1950549450549453, + "grad_norm": 14.166631698608398, + "learning_rate": 4.8902472527472534e-05, + "loss": 0.7408, + "step": 799 + }, + { + "epoch": 2.197802197802198, + "grad_norm": 13.666008949279785, + "learning_rate": 4.8901098901098904e-05, + "loss": 0.8383, + "step": 800 + }, + { + "epoch": 2.2005494505494507, + "grad_norm": 16.801513671875, + "learning_rate": 4.889972527472528e-05, + "loss": 1.0845, + "step": 801 + }, + { + "epoch": 2.2032967032967035, + "grad_norm": 14.821956634521484, + "learning_rate": 4.889835164835165e-05, + "loss": 0.836, + "step": 802 + }, + { + "epoch": 2.206043956043956, + "grad_norm": 12.674059867858887, + "learning_rate": 4.889697802197802e-05, + "loss": 0.8005, + "step": 803 + }, + { + "epoch": 2.208791208791209, + "grad_norm": 14.945211410522461, + "learning_rate": 4.88956043956044e-05, + "loss": 0.7641, + "step": 804 + }, + { + "epoch": 2.2115384615384617, + "grad_norm": 14.277778625488281, + "learning_rate": 4.889423076923077e-05, + "loss": 0.9594, + "step": 805 + }, + { + "epoch": 2.2142857142857144, + "grad_norm": 13.073183059692383, + "learning_rate": 4.8892857142857144e-05, + "loss": 0.7294, + "step": 806 + }, + { + "epoch": 2.217032967032967, + "grad_norm": 10.08071517944336, + "learning_rate": 4.8891483516483514e-05, + "loss": 0.5698, + "step": 807 + }, + { + "epoch": 2.21978021978022, + "grad_norm": 12.676898002624512, + "learning_rate": 4.889010989010989e-05, + "loss": 0.6444, + "step": 808 + }, + { + "epoch": 2.2225274725274726, + "grad_norm": 12.784205436706543, + "learning_rate": 4.888873626373627e-05, + "loss": 0.7646, + "step": 809 + }, + { + "epoch": 2.2252747252747254, + "grad_norm": 20.880861282348633, + "learning_rate": 4.888736263736264e-05, + "loss": 1.4264, + "step": 810 + }, + { + "epoch": 2.228021978021978, + "grad_norm": 12.561858177185059, + "learning_rate": 4.8885989010989015e-05, + "loss": 0.6993, + "step": 811 + }, + { + "epoch": 2.230769230769231, + "grad_norm": 14.236583709716797, + "learning_rate": 4.8884615384615385e-05, + "loss": 0.86, + "step": 812 + }, + { + "epoch": 2.2335164835164836, + "grad_norm": 12.301734924316406, + "learning_rate": 4.888324175824176e-05, + "loss": 0.7132, + "step": 813 + }, + { + "epoch": 2.2362637362637363, + "grad_norm": 16.14687156677246, + "learning_rate": 4.888186813186814e-05, + "loss": 0.9032, + "step": 814 + }, + { + "epoch": 2.239010989010989, + "grad_norm": 12.52413272857666, + "learning_rate": 4.888049450549451e-05, + "loss": 1.0828, + "step": 815 + }, + { + "epoch": 2.241758241758242, + "grad_norm": 18.2390079498291, + "learning_rate": 4.8879120879120885e-05, + "loss": 1.0223, + "step": 816 + }, + { + "epoch": 2.2445054945054945, + "grad_norm": 14.129022598266602, + "learning_rate": 4.8877747252747255e-05, + "loss": 0.7888, + "step": 817 + }, + { + "epoch": 2.2472527472527473, + "grad_norm": 16.830909729003906, + "learning_rate": 4.8876373626373625e-05, + "loss": 1.1451, + "step": 818 + }, + { + "epoch": 2.25, + "grad_norm": 13.034226417541504, + "learning_rate": 4.8875e-05, + "loss": 0.7324, + "step": 819 + }, + { + "epoch": 2.2527472527472527, + "grad_norm": 11.640145301818848, + "learning_rate": 4.887362637362637e-05, + "loss": 0.7866, + "step": 820 + }, + { + "epoch": 2.2554945054945055, + "grad_norm": 16.305139541625977, + "learning_rate": 4.887225274725275e-05, + "loss": 0.7398, + "step": 821 + }, + { + "epoch": 2.258241758241758, + "grad_norm": 11.075858116149902, + "learning_rate": 4.887087912087912e-05, + "loss": 0.6475, + "step": 822 + }, + { + "epoch": 2.260989010989011, + "grad_norm": 14.66039752960205, + "learning_rate": 4.8869505494505496e-05, + "loss": 0.8369, + "step": 823 + }, + { + "epoch": 2.2637362637362637, + "grad_norm": 14.055329322814941, + "learning_rate": 4.886813186813187e-05, + "loss": 0.8919, + "step": 824 + }, + { + "epoch": 2.2664835164835164, + "grad_norm": 10.416074752807617, + "learning_rate": 4.886675824175824e-05, + "loss": 0.4739, + "step": 825 + }, + { + "epoch": 2.269230769230769, + "grad_norm": 13.299088478088379, + "learning_rate": 4.886538461538462e-05, + "loss": 0.7542, + "step": 826 + }, + { + "epoch": 2.271978021978022, + "grad_norm": 13.634106636047363, + "learning_rate": 4.886401098901099e-05, + "loss": 0.7716, + "step": 827 + }, + { + "epoch": 2.2747252747252746, + "grad_norm": 13.48167610168457, + "learning_rate": 4.8862637362637366e-05, + "loss": 0.8367, + "step": 828 + }, + { + "epoch": 2.2774725274725274, + "grad_norm": 13.779614448547363, + "learning_rate": 4.886126373626374e-05, + "loss": 0.7804, + "step": 829 + }, + { + "epoch": 2.28021978021978, + "grad_norm": 16.530071258544922, + "learning_rate": 4.885989010989011e-05, + "loss": 1.4536, + "step": 830 + }, + { + "epoch": 2.282967032967033, + "grad_norm": 10.801733016967773, + "learning_rate": 4.885851648351649e-05, + "loss": 0.5837, + "step": 831 + }, + { + "epoch": 2.2857142857142856, + "grad_norm": 14.244390487670898, + "learning_rate": 4.885714285714286e-05, + "loss": 0.9777, + "step": 832 + }, + { + "epoch": 2.2884615384615383, + "grad_norm": 14.532998085021973, + "learning_rate": 4.885576923076923e-05, + "loss": 1.0237, + "step": 833 + }, + { + "epoch": 2.291208791208791, + "grad_norm": 16.765897750854492, + "learning_rate": 4.885439560439561e-05, + "loss": 1.091, + "step": 834 + }, + { + "epoch": 2.293956043956044, + "grad_norm": 20.355451583862305, + "learning_rate": 4.885302197802198e-05, + "loss": 1.2008, + "step": 835 + }, + { + "epoch": 2.2967032967032965, + "grad_norm": 18.521778106689453, + "learning_rate": 4.8851648351648353e-05, + "loss": 1.4271, + "step": 836 + }, + { + "epoch": 2.2994505494505493, + "grad_norm": 18.355037689208984, + "learning_rate": 4.8850274725274723e-05, + "loss": 1.1275, + "step": 837 + }, + { + "epoch": 2.302197802197802, + "grad_norm": 18.554166793823242, + "learning_rate": 4.88489010989011e-05, + "loss": 1.4262, + "step": 838 + }, + { + "epoch": 2.3049450549450547, + "grad_norm": 16.31891632080078, + "learning_rate": 4.884752747252748e-05, + "loss": 0.9104, + "step": 839 + }, + { + "epoch": 2.3076923076923075, + "grad_norm": 14.046340942382812, + "learning_rate": 4.884615384615385e-05, + "loss": 1.1359, + "step": 840 + }, + { + "epoch": 2.3104395604395602, + "grad_norm": 10.372947692871094, + "learning_rate": 4.8844780219780224e-05, + "loss": 0.5521, + "step": 841 + }, + { + "epoch": 2.313186813186813, + "grad_norm": 12.761919021606445, + "learning_rate": 4.8843406593406594e-05, + "loss": 0.749, + "step": 842 + }, + { + "epoch": 2.3159340659340657, + "grad_norm": 11.07174015045166, + "learning_rate": 4.884203296703297e-05, + "loss": 0.4177, + "step": 843 + }, + { + "epoch": 2.3186813186813184, + "grad_norm": 14.696747779846191, + "learning_rate": 4.884065934065935e-05, + "loss": 0.7909, + "step": 844 + }, + { + "epoch": 2.3214285714285716, + "grad_norm": 13.083575248718262, + "learning_rate": 4.883928571428572e-05, + "loss": 0.6001, + "step": 845 + }, + { + "epoch": 2.3241758241758244, + "grad_norm": 14.954764366149902, + "learning_rate": 4.8837912087912094e-05, + "loss": 1.2585, + "step": 846 + }, + { + "epoch": 2.326923076923077, + "grad_norm": 14.06807804107666, + "learning_rate": 4.8836538461538464e-05, + "loss": 0.8092, + "step": 847 + }, + { + "epoch": 2.32967032967033, + "grad_norm": 13.653632164001465, + "learning_rate": 4.8835164835164834e-05, + "loss": 0.8354, + "step": 848 + }, + { + "epoch": 2.3324175824175826, + "grad_norm": 13.591744422912598, + "learning_rate": 4.883379120879121e-05, + "loss": 0.8118, + "step": 849 + }, + { + "epoch": 2.3351648351648353, + "grad_norm": 12.615340232849121, + "learning_rate": 4.883241758241758e-05, + "loss": 0.6381, + "step": 850 + }, + { + "epoch": 2.337912087912088, + "grad_norm": 14.650327682495117, + "learning_rate": 4.883104395604396e-05, + "loss": 0.9262, + "step": 851 + }, + { + "epoch": 2.340659340659341, + "grad_norm": 13.956536293029785, + "learning_rate": 4.882967032967033e-05, + "loss": 0.9912, + "step": 852 + }, + { + "epoch": 2.3434065934065935, + "grad_norm": 14.316728591918945, + "learning_rate": 4.8828296703296705e-05, + "loss": 0.8551, + "step": 853 + }, + { + "epoch": 2.3461538461538463, + "grad_norm": 15.669833183288574, + "learning_rate": 4.882692307692308e-05, + "loss": 0.8033, + "step": 854 + }, + { + "epoch": 2.348901098901099, + "grad_norm": 11.91400146484375, + "learning_rate": 4.882554945054945e-05, + "loss": 0.5719, + "step": 855 + }, + { + "epoch": 2.3516483516483517, + "grad_norm": 11.092728614807129, + "learning_rate": 4.882417582417583e-05, + "loss": 0.5596, + "step": 856 + }, + { + "epoch": 2.3543956043956045, + "grad_norm": 13.004595756530762, + "learning_rate": 4.88228021978022e-05, + "loss": 0.7587, + "step": 857 + }, + { + "epoch": 2.357142857142857, + "grad_norm": 13.08729076385498, + "learning_rate": 4.8821428571428575e-05, + "loss": 0.7618, + "step": 858 + }, + { + "epoch": 2.35989010989011, + "grad_norm": 15.382063865661621, + "learning_rate": 4.882005494505495e-05, + "loss": 1.0221, + "step": 859 + }, + { + "epoch": 2.3626373626373627, + "grad_norm": 10.956058502197266, + "learning_rate": 4.881868131868132e-05, + "loss": 0.6194, + "step": 860 + }, + { + "epoch": 2.3653846153846154, + "grad_norm": 23.39991569519043, + "learning_rate": 4.88173076923077e-05, + "loss": 1.3119, + "step": 861 + }, + { + "epoch": 2.368131868131868, + "grad_norm": 12.41798210144043, + "learning_rate": 4.881593406593407e-05, + "loss": 0.5732, + "step": 862 + }, + { + "epoch": 2.370879120879121, + "grad_norm": 15.839483261108398, + "learning_rate": 4.881456043956044e-05, + "loss": 0.9392, + "step": 863 + }, + { + "epoch": 2.3736263736263736, + "grad_norm": 13.072545051574707, + "learning_rate": 4.8813186813186816e-05, + "loss": 0.7473, + "step": 864 + }, + { + "epoch": 2.3763736263736264, + "grad_norm": 16.205459594726562, + "learning_rate": 4.8811813186813186e-05, + "loss": 1.1731, + "step": 865 + }, + { + "epoch": 2.379120879120879, + "grad_norm": 15.398651123046875, + "learning_rate": 4.881043956043956e-05, + "loss": 0.8844, + "step": 866 + }, + { + "epoch": 2.381868131868132, + "grad_norm": 13.73119831085205, + "learning_rate": 4.880906593406593e-05, + "loss": 0.8661, + "step": 867 + }, + { + "epoch": 2.3846153846153846, + "grad_norm": 12.167360305786133, + "learning_rate": 4.880769230769231e-05, + "loss": 0.6573, + "step": 868 + }, + { + "epoch": 2.3873626373626373, + "grad_norm": 19.73334503173828, + "learning_rate": 4.8806318681318686e-05, + "loss": 1.0304, + "step": 869 + }, + { + "epoch": 2.39010989010989, + "grad_norm": 14.809198379516602, + "learning_rate": 4.8804945054945056e-05, + "loss": 0.8151, + "step": 870 + }, + { + "epoch": 2.392857142857143, + "grad_norm": 12.415457725524902, + "learning_rate": 4.880357142857143e-05, + "loss": 0.6135, + "step": 871 + }, + { + "epoch": 2.3956043956043955, + "grad_norm": 15.576911926269531, + "learning_rate": 4.88021978021978e-05, + "loss": 0.9186, + "step": 872 + }, + { + "epoch": 2.3983516483516483, + "grad_norm": 19.388874053955078, + "learning_rate": 4.880082417582418e-05, + "loss": 1.0617, + "step": 873 + }, + { + "epoch": 2.401098901098901, + "grad_norm": 11.338384628295898, + "learning_rate": 4.8799450549450556e-05, + "loss": 0.6287, + "step": 874 + }, + { + "epoch": 2.4038461538461537, + "grad_norm": 11.025239944458008, + "learning_rate": 4.8798076923076926e-05, + "loss": 0.6368, + "step": 875 + }, + { + "epoch": 2.4065934065934065, + "grad_norm": 18.58313751220703, + "learning_rate": 4.87967032967033e-05, + "loss": 0.9994, + "step": 876 + }, + { + "epoch": 2.409340659340659, + "grad_norm": 16.242887496948242, + "learning_rate": 4.879532967032967e-05, + "loss": 0.8415, + "step": 877 + }, + { + "epoch": 2.412087912087912, + "grad_norm": 19.367265701293945, + "learning_rate": 4.879395604395604e-05, + "loss": 1.0393, + "step": 878 + }, + { + "epoch": 2.4148351648351647, + "grad_norm": 17.655603408813477, + "learning_rate": 4.879258241758242e-05, + "loss": 1.1194, + "step": 879 + }, + { + "epoch": 2.4175824175824174, + "grad_norm": 13.590378761291504, + "learning_rate": 4.879120879120879e-05, + "loss": 0.9483, + "step": 880 + }, + { + "epoch": 2.42032967032967, + "grad_norm": 16.559337615966797, + "learning_rate": 4.878983516483517e-05, + "loss": 0.9681, + "step": 881 + }, + { + "epoch": 2.423076923076923, + "grad_norm": 14.033143043518066, + "learning_rate": 4.878846153846154e-05, + "loss": 0.8093, + "step": 882 + }, + { + "epoch": 2.4258241758241756, + "grad_norm": 18.925565719604492, + "learning_rate": 4.8787087912087914e-05, + "loss": 1.4123, + "step": 883 + }, + { + "epoch": 2.4285714285714284, + "grad_norm": 10.690312385559082, + "learning_rate": 4.878571428571429e-05, + "loss": 0.5591, + "step": 884 + }, + { + "epoch": 2.4313186813186816, + "grad_norm": 17.831926345825195, + "learning_rate": 4.878434065934066e-05, + "loss": 0.9981, + "step": 885 + }, + { + "epoch": 2.4340659340659343, + "grad_norm": 12.36201286315918, + "learning_rate": 4.878296703296704e-05, + "loss": 0.6042, + "step": 886 + }, + { + "epoch": 2.436813186813187, + "grad_norm": 13.248072624206543, + "learning_rate": 4.878159340659341e-05, + "loss": 0.7653, + "step": 887 + }, + { + "epoch": 2.4395604395604398, + "grad_norm": 13.58764934539795, + "learning_rate": 4.8780219780219784e-05, + "loss": 0.6259, + "step": 888 + }, + { + "epoch": 2.4423076923076925, + "grad_norm": 21.34435272216797, + "learning_rate": 4.877884615384616e-05, + "loss": 1.5696, + "step": 889 + }, + { + "epoch": 2.4450549450549453, + "grad_norm": 12.219600677490234, + "learning_rate": 4.877747252747253e-05, + "loss": 0.6909, + "step": 890 + }, + { + "epoch": 2.447802197802198, + "grad_norm": 19.6954288482666, + "learning_rate": 4.877609890109891e-05, + "loss": 1.0773, + "step": 891 + }, + { + "epoch": 2.4505494505494507, + "grad_norm": 10.840039253234863, + "learning_rate": 4.877472527472528e-05, + "loss": 0.6073, + "step": 892 + }, + { + "epoch": 2.4532967032967035, + "grad_norm": 16.162158966064453, + "learning_rate": 4.877335164835165e-05, + "loss": 1.0898, + "step": 893 + }, + { + "epoch": 2.456043956043956, + "grad_norm": 15.647418975830078, + "learning_rate": 4.8771978021978025e-05, + "loss": 1.1801, + "step": 894 + }, + { + "epoch": 2.458791208791209, + "grad_norm": 10.635377883911133, + "learning_rate": 4.8770604395604395e-05, + "loss": 0.5553, + "step": 895 + }, + { + "epoch": 2.4615384615384617, + "grad_norm": 17.356985092163086, + "learning_rate": 4.876923076923077e-05, + "loss": 1.1817, + "step": 896 + }, + { + "epoch": 2.4642857142857144, + "grad_norm": 11.16988754272461, + "learning_rate": 4.876785714285714e-05, + "loss": 0.6261, + "step": 897 + }, + { + "epoch": 2.467032967032967, + "grad_norm": 13.564579010009766, + "learning_rate": 4.876648351648352e-05, + "loss": 0.6362, + "step": 898 + }, + { + "epoch": 2.46978021978022, + "grad_norm": 13.431307792663574, + "learning_rate": 4.8765109890109895e-05, + "loss": 0.6184, + "step": 899 + }, + { + "epoch": 2.4725274725274726, + "grad_norm": 16.410036087036133, + "learning_rate": 4.8763736263736265e-05, + "loss": 1.1592, + "step": 900 + }, + { + "epoch": 2.4752747252747254, + "grad_norm": 14.282124519348145, + "learning_rate": 4.876236263736264e-05, + "loss": 0.7655, + "step": 901 + }, + { + "epoch": 2.478021978021978, + "grad_norm": 14.608485221862793, + "learning_rate": 4.876098901098901e-05, + "loss": 0.8013, + "step": 902 + }, + { + "epoch": 2.480769230769231, + "grad_norm": 17.4757080078125, + "learning_rate": 4.875961538461539e-05, + "loss": 0.975, + "step": 903 + }, + { + "epoch": 2.4835164835164836, + "grad_norm": 13.9658842086792, + "learning_rate": 4.8758241758241765e-05, + "loss": 0.8931, + "step": 904 + }, + { + "epoch": 2.4862637362637363, + "grad_norm": 14.800570487976074, + "learning_rate": 4.8756868131868135e-05, + "loss": 1.0848, + "step": 905 + }, + { + "epoch": 2.489010989010989, + "grad_norm": 13.978639602661133, + "learning_rate": 4.875549450549451e-05, + "loss": 0.6323, + "step": 906 + }, + { + "epoch": 2.491758241758242, + "grad_norm": 13.278600692749023, + "learning_rate": 4.875412087912088e-05, + "loss": 0.7134, + "step": 907 + }, + { + "epoch": 2.4945054945054945, + "grad_norm": 11.600781440734863, + "learning_rate": 4.875274725274725e-05, + "loss": 0.5576, + "step": 908 + }, + { + "epoch": 2.4972527472527473, + "grad_norm": 14.091300010681152, + "learning_rate": 4.875137362637363e-05, + "loss": 0.6932, + "step": 909 + }, + { + "epoch": 2.5, + "grad_norm": 17.729522705078125, + "learning_rate": 4.875e-05, + "loss": 1.1725, + "step": 910 + }, + { + "epoch": 2.5027472527472527, + "grad_norm": 10.997565269470215, + "learning_rate": 4.8748626373626376e-05, + "loss": 0.5312, + "step": 911 + }, + { + "epoch": 2.5054945054945055, + "grad_norm": 15.676445960998535, + "learning_rate": 4.8747252747252746e-05, + "loss": 0.846, + "step": 912 + }, + { + "epoch": 2.508241758241758, + "grad_norm": 14.0894775390625, + "learning_rate": 4.874587912087912e-05, + "loss": 0.7365, + "step": 913 + }, + { + "epoch": 2.510989010989011, + "grad_norm": 12.919700622558594, + "learning_rate": 4.87445054945055e-05, + "loss": 0.8069, + "step": 914 + }, + { + "epoch": 2.5137362637362637, + "grad_norm": 13.74242877960205, + "learning_rate": 4.874313186813187e-05, + "loss": 0.9692, + "step": 915 + }, + { + "epoch": 2.5164835164835164, + "grad_norm": 10.249710083007812, + "learning_rate": 4.8741758241758246e-05, + "loss": 0.515, + "step": 916 + }, + { + "epoch": 2.519230769230769, + "grad_norm": 13.359102249145508, + "learning_rate": 4.8740384615384616e-05, + "loss": 0.6638, + "step": 917 + }, + { + "epoch": 2.521978021978022, + "grad_norm": 14.396571159362793, + "learning_rate": 4.873901098901099e-05, + "loss": 0.8507, + "step": 918 + }, + { + "epoch": 2.5247252747252746, + "grad_norm": 22.028263092041016, + "learning_rate": 4.873763736263737e-05, + "loss": 1.1355, + "step": 919 + }, + { + "epoch": 2.5274725274725274, + "grad_norm": 14.955309867858887, + "learning_rate": 4.873626373626374e-05, + "loss": 0.8437, + "step": 920 + }, + { + "epoch": 2.53021978021978, + "grad_norm": 11.971925735473633, + "learning_rate": 4.873489010989012e-05, + "loss": 0.5783, + "step": 921 + }, + { + "epoch": 2.532967032967033, + "grad_norm": 14.91650390625, + "learning_rate": 4.873351648351649e-05, + "loss": 0.8866, + "step": 922 + }, + { + "epoch": 2.5357142857142856, + "grad_norm": 12.280522346496582, + "learning_rate": 4.873214285714286e-05, + "loss": 0.6777, + "step": 923 + }, + { + "epoch": 2.5384615384615383, + "grad_norm": 13.0282564163208, + "learning_rate": 4.8730769230769234e-05, + "loss": 0.832, + "step": 924 + }, + { + "epoch": 2.541208791208791, + "grad_norm": 14.164655685424805, + "learning_rate": 4.8729395604395604e-05, + "loss": 0.7761, + "step": 925 + }, + { + "epoch": 2.543956043956044, + "grad_norm": 19.346784591674805, + "learning_rate": 4.872802197802198e-05, + "loss": 1.6059, + "step": 926 + }, + { + "epoch": 2.5467032967032965, + "grad_norm": 12.846574783325195, + "learning_rate": 4.872664835164835e-05, + "loss": 0.7207, + "step": 927 + }, + { + "epoch": 2.5494505494505493, + "grad_norm": 18.93380355834961, + "learning_rate": 4.872527472527473e-05, + "loss": 1.1442, + "step": 928 + }, + { + "epoch": 2.552197802197802, + "grad_norm": 10.87706470489502, + "learning_rate": 4.8723901098901104e-05, + "loss": 0.6411, + "step": 929 + }, + { + "epoch": 2.5549450549450547, + "grad_norm": 10.07974910736084, + "learning_rate": 4.8722527472527474e-05, + "loss": 0.4389, + "step": 930 + }, + { + "epoch": 2.5576923076923075, + "grad_norm": 15.179009437561035, + "learning_rate": 4.872115384615385e-05, + "loss": 0.9018, + "step": 931 + }, + { + "epoch": 2.5604395604395602, + "grad_norm": 9.651642799377441, + "learning_rate": 4.871978021978022e-05, + "loss": 0.4279, + "step": 932 + }, + { + "epoch": 2.563186813186813, + "grad_norm": 12.315526008605957, + "learning_rate": 4.87184065934066e-05, + "loss": 0.602, + "step": 933 + }, + { + "epoch": 2.5659340659340657, + "grad_norm": 11.946396827697754, + "learning_rate": 4.8717032967032974e-05, + "loss": 0.6542, + "step": 934 + }, + { + "epoch": 2.5686813186813184, + "grad_norm": 14.62000846862793, + "learning_rate": 4.8715659340659344e-05, + "loss": 0.6958, + "step": 935 + }, + { + "epoch": 2.571428571428571, + "grad_norm": 13.927973747253418, + "learning_rate": 4.8714285714285714e-05, + "loss": 0.7677, + "step": 936 + }, + { + "epoch": 2.574175824175824, + "grad_norm": 17.85807228088379, + "learning_rate": 4.871291208791209e-05, + "loss": 0.9615, + "step": 937 + }, + { + "epoch": 2.5769230769230766, + "grad_norm": 11.025675773620605, + "learning_rate": 4.871153846153846e-05, + "loss": 0.5344, + "step": 938 + }, + { + "epoch": 2.57967032967033, + "grad_norm": 16.4997615814209, + "learning_rate": 4.871016483516484e-05, + "loss": 0.9222, + "step": 939 + }, + { + "epoch": 2.5824175824175826, + "grad_norm": 18.1002254486084, + "learning_rate": 4.870879120879121e-05, + "loss": 0.8736, + "step": 940 + }, + { + "epoch": 2.5851648351648353, + "grad_norm": 15.034292221069336, + "learning_rate": 4.8707417582417585e-05, + "loss": 0.994, + "step": 941 + }, + { + "epoch": 2.587912087912088, + "grad_norm": 12.981252670288086, + "learning_rate": 4.8706043956043955e-05, + "loss": 0.6472, + "step": 942 + }, + { + "epoch": 2.590659340659341, + "grad_norm": 14.668790817260742, + "learning_rate": 4.870467032967033e-05, + "loss": 0.7417, + "step": 943 + }, + { + "epoch": 2.5934065934065935, + "grad_norm": 10.248544692993164, + "learning_rate": 4.870329670329671e-05, + "loss": 0.3604, + "step": 944 + }, + { + "epoch": 2.5961538461538463, + "grad_norm": 18.196813583374023, + "learning_rate": 4.870192307692308e-05, + "loss": 1.0836, + "step": 945 + }, + { + "epoch": 2.598901098901099, + "grad_norm": 12.476591110229492, + "learning_rate": 4.8700549450549455e-05, + "loss": 0.612, + "step": 946 + }, + { + "epoch": 2.6016483516483517, + "grad_norm": 13.208456039428711, + "learning_rate": 4.8699175824175825e-05, + "loss": 0.7995, + "step": 947 + }, + { + "epoch": 2.6043956043956045, + "grad_norm": 11.311752319335938, + "learning_rate": 4.86978021978022e-05, + "loss": 0.5085, + "step": 948 + }, + { + "epoch": 2.607142857142857, + "grad_norm": 12.848376274108887, + "learning_rate": 4.869642857142858e-05, + "loss": 0.7199, + "step": 949 + }, + { + "epoch": 2.60989010989011, + "grad_norm": 18.88039207458496, + "learning_rate": 4.869505494505495e-05, + "loss": 1.2337, + "step": 950 + }, + { + "epoch": 2.6126373626373627, + "grad_norm": 17.01807975769043, + "learning_rate": 4.869368131868132e-05, + "loss": 0.6467, + "step": 951 + }, + { + "epoch": 2.6153846153846154, + "grad_norm": 16.820377349853516, + "learning_rate": 4.8692307692307696e-05, + "loss": 0.8148, + "step": 952 + }, + { + "epoch": 2.618131868131868, + "grad_norm": 16.092241287231445, + "learning_rate": 4.8690934065934066e-05, + "loss": 0.811, + "step": 953 + }, + { + "epoch": 2.620879120879121, + "grad_norm": 13.59615707397461, + "learning_rate": 4.868956043956044e-05, + "loss": 0.852, + "step": 954 + }, + { + "epoch": 2.6236263736263736, + "grad_norm": 13.780264854431152, + "learning_rate": 4.868818681318681e-05, + "loss": 0.6501, + "step": 955 + }, + { + "epoch": 2.6263736263736264, + "grad_norm": 20.365699768066406, + "learning_rate": 4.868681318681319e-05, + "loss": 1.4267, + "step": 956 + }, + { + "epoch": 2.629120879120879, + "grad_norm": 16.374561309814453, + "learning_rate": 4.868543956043956e-05, + "loss": 0.9514, + "step": 957 + }, + { + "epoch": 2.631868131868132, + "grad_norm": 19.701396942138672, + "learning_rate": 4.8684065934065936e-05, + "loss": 0.8056, + "step": 958 + }, + { + "epoch": 2.6346153846153846, + "grad_norm": 14.353686332702637, + "learning_rate": 4.868269230769231e-05, + "loss": 0.7461, + "step": 959 + }, + { + "epoch": 2.6373626373626373, + "grad_norm": 18.32234001159668, + "learning_rate": 4.868131868131868e-05, + "loss": 1.1045, + "step": 960 + }, + { + "epoch": 2.64010989010989, + "grad_norm": 18.57530403137207, + "learning_rate": 4.867994505494506e-05, + "loss": 1.3429, + "step": 961 + }, + { + "epoch": 2.642857142857143, + "grad_norm": 13.298498153686523, + "learning_rate": 4.867857142857143e-05, + "loss": 0.7081, + "step": 962 + }, + { + "epoch": 2.6456043956043955, + "grad_norm": 13.798978805541992, + "learning_rate": 4.867719780219781e-05, + "loss": 0.6321, + "step": 963 + }, + { + "epoch": 2.6483516483516483, + "grad_norm": 19.77607536315918, + "learning_rate": 4.8675824175824183e-05, + "loss": 1.1598, + "step": 964 + }, + { + "epoch": 2.651098901098901, + "grad_norm": 14.557024955749512, + "learning_rate": 4.8674450549450553e-05, + "loss": 1.2036, + "step": 965 + }, + { + "epoch": 2.6538461538461537, + "grad_norm": 15.480331420898438, + "learning_rate": 4.8673076923076923e-05, + "loss": 0.8445, + "step": 966 + }, + { + "epoch": 2.6565934065934065, + "grad_norm": 10.985025405883789, + "learning_rate": 4.86717032967033e-05, + "loss": 0.6221, + "step": 967 + }, + { + "epoch": 2.659340659340659, + "grad_norm": 11.912259101867676, + "learning_rate": 4.867032967032967e-05, + "loss": 0.886, + "step": 968 + }, + { + "epoch": 2.662087912087912, + "grad_norm": 13.69580364227295, + "learning_rate": 4.866895604395605e-05, + "loss": 0.8196, + "step": 969 + }, + { + "epoch": 2.6648351648351647, + "grad_norm": 15.000988960266113, + "learning_rate": 4.866758241758242e-05, + "loss": 0.7369, + "step": 970 + }, + { + "epoch": 2.6675824175824174, + "grad_norm": 10.479554176330566, + "learning_rate": 4.8666208791208794e-05, + "loss": 0.5142, + "step": 971 + }, + { + "epoch": 2.67032967032967, + "grad_norm": 13.528936386108398, + "learning_rate": 4.8664835164835164e-05, + "loss": 0.7575, + "step": 972 + }, + { + "epoch": 2.6730769230769234, + "grad_norm": 11.4408540725708, + "learning_rate": 4.866346153846154e-05, + "loss": 0.5715, + "step": 973 + }, + { + "epoch": 2.675824175824176, + "grad_norm": 13.204667091369629, + "learning_rate": 4.866208791208792e-05, + "loss": 0.7839, + "step": 974 + }, + { + "epoch": 2.678571428571429, + "grad_norm": 14.548672676086426, + "learning_rate": 4.866071428571429e-05, + "loss": 0.9564, + "step": 975 + }, + { + "epoch": 2.6813186813186816, + "grad_norm": 15.351937294006348, + "learning_rate": 4.8659340659340664e-05, + "loss": 1.0257, + "step": 976 + }, + { + "epoch": 2.6840659340659343, + "grad_norm": 12.302892684936523, + "learning_rate": 4.8657967032967034e-05, + "loss": 0.6954, + "step": 977 + }, + { + "epoch": 2.686813186813187, + "grad_norm": 13.949237823486328, + "learning_rate": 4.865659340659341e-05, + "loss": 0.7075, + "step": 978 + }, + { + "epoch": 2.6895604395604398, + "grad_norm": 12.472766876220703, + "learning_rate": 4.865521978021979e-05, + "loss": 0.652, + "step": 979 + }, + { + "epoch": 2.6923076923076925, + "grad_norm": 14.029370307922363, + "learning_rate": 4.865384615384616e-05, + "loss": 0.7856, + "step": 980 + }, + { + "epoch": 2.6950549450549453, + "grad_norm": 13.657215118408203, + "learning_rate": 4.865247252747253e-05, + "loss": 0.9041, + "step": 981 + }, + { + "epoch": 2.697802197802198, + "grad_norm": 15.17567253112793, + "learning_rate": 4.8651098901098905e-05, + "loss": 0.8105, + "step": 982 + }, + { + "epoch": 2.7005494505494507, + "grad_norm": 19.03754425048828, + "learning_rate": 4.8649725274725275e-05, + "loss": 1.0992, + "step": 983 + }, + { + "epoch": 2.7032967032967035, + "grad_norm": 18.938764572143555, + "learning_rate": 4.864835164835165e-05, + "loss": 0.9926, + "step": 984 + }, + { + "epoch": 2.706043956043956, + "grad_norm": 13.751965522766113, + "learning_rate": 4.864697802197802e-05, + "loss": 0.7219, + "step": 985 + }, + { + "epoch": 2.708791208791209, + "grad_norm": 14.994152069091797, + "learning_rate": 4.86456043956044e-05, + "loss": 1.0331, + "step": 986 + }, + { + "epoch": 2.7115384615384617, + "grad_norm": 19.881118774414062, + "learning_rate": 4.864423076923077e-05, + "loss": 1.1619, + "step": 987 + }, + { + "epoch": 2.7142857142857144, + "grad_norm": 12.325602531433105, + "learning_rate": 4.8642857142857145e-05, + "loss": 0.6344, + "step": 988 + }, + { + "epoch": 2.717032967032967, + "grad_norm": 17.29552459716797, + "learning_rate": 4.864148351648352e-05, + "loss": 0.9756, + "step": 989 + }, + { + "epoch": 2.71978021978022, + "grad_norm": 14.532641410827637, + "learning_rate": 4.864010989010989e-05, + "loss": 0.7731, + "step": 990 + }, + { + "epoch": 2.7225274725274726, + "grad_norm": 17.141576766967773, + "learning_rate": 4.863873626373627e-05, + "loss": 1.1554, + "step": 991 + }, + { + "epoch": 2.7252747252747254, + "grad_norm": 19.422685623168945, + "learning_rate": 4.863736263736264e-05, + "loss": 1.1999, + "step": 992 + }, + { + "epoch": 2.728021978021978, + "grad_norm": 16.098045349121094, + "learning_rate": 4.8635989010989016e-05, + "loss": 0.8092, + "step": 993 + }, + { + "epoch": 2.730769230769231, + "grad_norm": 15.430366516113281, + "learning_rate": 4.863461538461539e-05, + "loss": 0.8428, + "step": 994 + }, + { + "epoch": 2.7335164835164836, + "grad_norm": 12.821514129638672, + "learning_rate": 4.863324175824176e-05, + "loss": 0.592, + "step": 995 + }, + { + "epoch": 2.7362637362637363, + "grad_norm": 12.7743501663208, + "learning_rate": 4.863186813186813e-05, + "loss": 0.6157, + "step": 996 + }, + { + "epoch": 2.739010989010989, + "grad_norm": 10.349045753479004, + "learning_rate": 4.863049450549451e-05, + "loss": 0.524, + "step": 997 + }, + { + "epoch": 2.741758241758242, + "grad_norm": 11.299015045166016, + "learning_rate": 4.862912087912088e-05, + "loss": 0.5873, + "step": 998 + }, + { + "epoch": 2.7445054945054945, + "grad_norm": 14.360404014587402, + "learning_rate": 4.8627747252747256e-05, + "loss": 0.8997, + "step": 999 + }, + { + "epoch": 2.7472527472527473, + "grad_norm": 16.014493942260742, + "learning_rate": 4.8626373626373626e-05, + "loss": 0.7922, + "step": 1000 + }, + { + "epoch": 2.75, + "grad_norm": 16.882173538208008, + "learning_rate": 4.8625e-05, + "loss": 0.8624, + "step": 1001 + }, + { + "epoch": 2.7527472527472527, + "grad_norm": 16.81242561340332, + "learning_rate": 4.862362637362637e-05, + "loss": 0.943, + "step": 1002 + }, + { + "epoch": 2.7554945054945055, + "grad_norm": 21.08108901977539, + "learning_rate": 4.862225274725275e-05, + "loss": 0.8919, + "step": 1003 + }, + { + "epoch": 2.758241758241758, + "grad_norm": 16.30824089050293, + "learning_rate": 4.8620879120879126e-05, + "loss": 0.9976, + "step": 1004 + }, + { + "epoch": 2.760989010989011, + "grad_norm": 13.494091033935547, + "learning_rate": 4.8619505494505496e-05, + "loss": 0.7004, + "step": 1005 + }, + { + "epoch": 2.7637362637362637, + "grad_norm": 16.770038604736328, + "learning_rate": 4.861813186813187e-05, + "loss": 1.125, + "step": 1006 + }, + { + "epoch": 2.7664835164835164, + "grad_norm": 11.999794006347656, + "learning_rate": 4.861675824175824e-05, + "loss": 0.6158, + "step": 1007 + }, + { + "epoch": 2.769230769230769, + "grad_norm": 15.604997634887695, + "learning_rate": 4.861538461538462e-05, + "loss": 0.6823, + "step": 1008 + }, + { + "epoch": 2.771978021978022, + "grad_norm": 13.323881149291992, + "learning_rate": 4.8614010989011e-05, + "loss": 0.6107, + "step": 1009 + }, + { + "epoch": 2.7747252747252746, + "grad_norm": 15.792468070983887, + "learning_rate": 4.861263736263737e-05, + "loss": 0.8816, + "step": 1010 + }, + { + "epoch": 2.7774725274725274, + "grad_norm": 17.18852996826172, + "learning_rate": 4.861126373626374e-05, + "loss": 1.0481, + "step": 1011 + }, + { + "epoch": 2.78021978021978, + "grad_norm": 17.5505313873291, + "learning_rate": 4.8609890109890114e-05, + "loss": 0.957, + "step": 1012 + }, + { + "epoch": 2.782967032967033, + "grad_norm": 12.826671600341797, + "learning_rate": 4.8608516483516484e-05, + "loss": 0.6748, + "step": 1013 + }, + { + "epoch": 2.7857142857142856, + "grad_norm": 14.917492866516113, + "learning_rate": 4.860714285714286e-05, + "loss": 0.779, + "step": 1014 + }, + { + "epoch": 2.7884615384615383, + "grad_norm": 18.09163475036621, + "learning_rate": 4.860576923076923e-05, + "loss": 0.9201, + "step": 1015 + }, + { + "epoch": 2.791208791208791, + "grad_norm": 16.606626510620117, + "learning_rate": 4.860439560439561e-05, + "loss": 0.9688, + "step": 1016 + }, + { + "epoch": 2.793956043956044, + "grad_norm": 11.329010963439941, + "learning_rate": 4.860302197802198e-05, + "loss": 0.6743, + "step": 1017 + }, + { + "epoch": 2.7967032967032965, + "grad_norm": 13.6315279006958, + "learning_rate": 4.8601648351648354e-05, + "loss": 0.5164, + "step": 1018 + }, + { + "epoch": 2.7994505494505493, + "grad_norm": 18.687482833862305, + "learning_rate": 4.860027472527473e-05, + "loss": 1.1544, + "step": 1019 + }, + { + "epoch": 2.802197802197802, + "grad_norm": 10.510602951049805, + "learning_rate": 4.85989010989011e-05, + "loss": 0.4741, + "step": 1020 + }, + { + "epoch": 2.8049450549450547, + "grad_norm": 15.128782272338867, + "learning_rate": 4.859752747252748e-05, + "loss": 0.7715, + "step": 1021 + }, + { + "epoch": 2.8076923076923075, + "grad_norm": 14.728870391845703, + "learning_rate": 4.859615384615385e-05, + "loss": 0.8743, + "step": 1022 + }, + { + "epoch": 2.8104395604395602, + "grad_norm": 14.287874221801758, + "learning_rate": 4.8594780219780225e-05, + "loss": 0.9568, + "step": 1023 + }, + { + "epoch": 2.813186813186813, + "grad_norm": 15.867145538330078, + "learning_rate": 4.8593406593406595e-05, + "loss": 0.9582, + "step": 1024 + }, + { + "epoch": 2.8159340659340657, + "grad_norm": 14.950897216796875, + "learning_rate": 4.859203296703297e-05, + "loss": 0.8941, + "step": 1025 + }, + { + "epoch": 2.8186813186813184, + "grad_norm": 12.07934856414795, + "learning_rate": 4.859065934065934e-05, + "loss": 0.7387, + "step": 1026 + }, + { + "epoch": 2.821428571428571, + "grad_norm": 13.956378936767578, + "learning_rate": 4.858928571428572e-05, + "loss": 0.7, + "step": 1027 + }, + { + "epoch": 2.824175824175824, + "grad_norm": 12.46455192565918, + "learning_rate": 4.858791208791209e-05, + "loss": 0.6541, + "step": 1028 + }, + { + "epoch": 2.8269230769230766, + "grad_norm": 12.134684562683105, + "learning_rate": 4.858653846153846e-05, + "loss": 0.4795, + "step": 1029 + }, + { + "epoch": 2.82967032967033, + "grad_norm": 11.896688461303711, + "learning_rate": 4.8585164835164835e-05, + "loss": 0.5076, + "step": 1030 + }, + { + "epoch": 2.8324175824175826, + "grad_norm": 17.196182250976562, + "learning_rate": 4.858379120879121e-05, + "loss": 0.9253, + "step": 1031 + }, + { + "epoch": 2.8351648351648353, + "grad_norm": 15.434943199157715, + "learning_rate": 4.858241758241758e-05, + "loss": 0.7467, + "step": 1032 + }, + { + "epoch": 2.837912087912088, + "grad_norm": 15.364803314208984, + "learning_rate": 4.858104395604396e-05, + "loss": 1.0478, + "step": 1033 + }, + { + "epoch": 2.840659340659341, + "grad_norm": 15.175493240356445, + "learning_rate": 4.857967032967033e-05, + "loss": 0.8128, + "step": 1034 + }, + { + "epoch": 2.8434065934065935, + "grad_norm": 17.490291595458984, + "learning_rate": 4.8578296703296705e-05, + "loss": 0.7505, + "step": 1035 + }, + { + "epoch": 2.8461538461538463, + "grad_norm": 20.38970184326172, + "learning_rate": 4.857692307692308e-05, + "loss": 1.4133, + "step": 1036 + }, + { + "epoch": 2.848901098901099, + "grad_norm": 16.044097900390625, + "learning_rate": 4.857554945054945e-05, + "loss": 0.8502, + "step": 1037 + }, + { + "epoch": 2.8516483516483517, + "grad_norm": 17.743749618530273, + "learning_rate": 4.857417582417583e-05, + "loss": 1.1549, + "step": 1038 + }, + { + "epoch": 2.8543956043956045, + "grad_norm": 12.499144554138184, + "learning_rate": 4.85728021978022e-05, + "loss": 0.5377, + "step": 1039 + }, + { + "epoch": 2.857142857142857, + "grad_norm": 14.010758399963379, + "learning_rate": 4.8571428571428576e-05, + "loss": 0.7684, + "step": 1040 + }, + { + "epoch": 2.85989010989011, + "grad_norm": 12.936921119689941, + "learning_rate": 4.8570054945054946e-05, + "loss": 0.5893, + "step": 1041 + }, + { + "epoch": 2.8626373626373627, + "grad_norm": 12.228337287902832, + "learning_rate": 4.856868131868132e-05, + "loss": 0.5257, + "step": 1042 + }, + { + "epoch": 2.8653846153846154, + "grad_norm": 19.685182571411133, + "learning_rate": 4.856730769230769e-05, + "loss": 1.1584, + "step": 1043 + }, + { + "epoch": 2.868131868131868, + "grad_norm": 11.31631851196289, + "learning_rate": 4.856593406593406e-05, + "loss": 0.4856, + "step": 1044 + }, + { + "epoch": 2.870879120879121, + "grad_norm": 12.619566917419434, + "learning_rate": 4.856456043956044e-05, + "loss": 0.7493, + "step": 1045 + }, + { + "epoch": 2.8736263736263736, + "grad_norm": 12.078021049499512, + "learning_rate": 4.8563186813186816e-05, + "loss": 0.5543, + "step": 1046 + }, + { + "epoch": 2.8763736263736264, + "grad_norm": 14.460659980773926, + "learning_rate": 4.8561813186813186e-05, + "loss": 0.7915, + "step": 1047 + }, + { + "epoch": 2.879120879120879, + "grad_norm": 16.111557006835938, + "learning_rate": 4.856043956043956e-05, + "loss": 0.9161, + "step": 1048 + }, + { + "epoch": 2.881868131868132, + "grad_norm": 14.116975784301758, + "learning_rate": 4.855906593406593e-05, + "loss": 0.8482, + "step": 1049 + }, + { + "epoch": 2.8846153846153846, + "grad_norm": 20.62559700012207, + "learning_rate": 4.855769230769231e-05, + "loss": 1.2965, + "step": 1050 + }, + { + "epoch": 2.8873626373626373, + "grad_norm": 16.105443954467773, + "learning_rate": 4.855631868131869e-05, + "loss": 0.7252, + "step": 1051 + }, + { + "epoch": 2.89010989010989, + "grad_norm": 15.534497261047363, + "learning_rate": 4.855494505494506e-05, + "loss": 0.8369, + "step": 1052 + }, + { + "epoch": 2.892857142857143, + "grad_norm": 15.814239501953125, + "learning_rate": 4.8553571428571434e-05, + "loss": 0.9219, + "step": 1053 + }, + { + "epoch": 2.8956043956043955, + "grad_norm": 17.264780044555664, + "learning_rate": 4.8552197802197804e-05, + "loss": 1.082, + "step": 1054 + }, + { + "epoch": 2.8983516483516483, + "grad_norm": 11.304997444152832, + "learning_rate": 4.855082417582418e-05, + "loss": 0.5689, + "step": 1055 + }, + { + "epoch": 2.901098901098901, + "grad_norm": 12.801589965820312, + "learning_rate": 4.854945054945055e-05, + "loss": 0.8306, + "step": 1056 + }, + { + "epoch": 2.9038461538461537, + "grad_norm": 17.945331573486328, + "learning_rate": 4.854807692307693e-05, + "loss": 0.7167, + "step": 1057 + }, + { + "epoch": 2.9065934065934065, + "grad_norm": 14.469118118286133, + "learning_rate": 4.85467032967033e-05, + "loss": 0.7043, + "step": 1058 + }, + { + "epoch": 2.909340659340659, + "grad_norm": 15.441131591796875, + "learning_rate": 4.854532967032967e-05, + "loss": 0.6776, + "step": 1059 + }, + { + "epoch": 2.912087912087912, + "grad_norm": 11.984737396240234, + "learning_rate": 4.8543956043956044e-05, + "loss": 0.6957, + "step": 1060 + }, + { + "epoch": 2.9148351648351647, + "grad_norm": 9.057570457458496, + "learning_rate": 4.854258241758242e-05, + "loss": 0.4156, + "step": 1061 + }, + { + "epoch": 2.9175824175824174, + "grad_norm": 16.287384033203125, + "learning_rate": 4.854120879120879e-05, + "loss": 1.015, + "step": 1062 + }, + { + "epoch": 2.92032967032967, + "grad_norm": 14.247267723083496, + "learning_rate": 4.853983516483517e-05, + "loss": 0.9315, + "step": 1063 + }, + { + "epoch": 2.9230769230769234, + "grad_norm": 20.816917419433594, + "learning_rate": 4.853846153846154e-05, + "loss": 1.3487, + "step": 1064 + }, + { + "epoch": 2.925824175824176, + "grad_norm": 16.545652389526367, + "learning_rate": 4.8537087912087914e-05, + "loss": 0.8553, + "step": 1065 + }, + { + "epoch": 2.928571428571429, + "grad_norm": 13.354628562927246, + "learning_rate": 4.853571428571429e-05, + "loss": 0.7846, + "step": 1066 + }, + { + "epoch": 2.9313186813186816, + "grad_norm": 13.603947639465332, + "learning_rate": 4.853434065934066e-05, + "loss": 0.6224, + "step": 1067 + }, + { + "epoch": 2.9340659340659343, + "grad_norm": 17.05893325805664, + "learning_rate": 4.853296703296704e-05, + "loss": 1.0595, + "step": 1068 + }, + { + "epoch": 2.936813186813187, + "grad_norm": 13.80978775024414, + "learning_rate": 4.853159340659341e-05, + "loss": 0.8261, + "step": 1069 + }, + { + "epoch": 2.9395604395604398, + "grad_norm": 15.800795555114746, + "learning_rate": 4.8530219780219785e-05, + "loss": 0.8855, + "step": 1070 + }, + { + "epoch": 2.9423076923076925, + "grad_norm": 13.09980583190918, + "learning_rate": 4.8528846153846155e-05, + "loss": 0.6312, + "step": 1071 + }, + { + "epoch": 2.9450549450549453, + "grad_norm": 10.892742156982422, + "learning_rate": 4.852747252747253e-05, + "loss": 0.5674, + "step": 1072 + }, + { + "epoch": 2.947802197802198, + "grad_norm": 15.549901008605957, + "learning_rate": 4.85260989010989e-05, + "loss": 0.8083, + "step": 1073 + }, + { + "epoch": 2.9505494505494507, + "grad_norm": 12.731832504272461, + "learning_rate": 4.852472527472527e-05, + "loss": 0.6248, + "step": 1074 + }, + { + "epoch": 2.9532967032967035, + "grad_norm": 17.700197219848633, + "learning_rate": 4.852335164835165e-05, + "loss": 0.9189, + "step": 1075 + }, + { + "epoch": 2.956043956043956, + "grad_norm": 15.86786937713623, + "learning_rate": 4.8521978021978025e-05, + "loss": 0.8117, + "step": 1076 + }, + { + "epoch": 2.958791208791209, + "grad_norm": 9.330792427062988, + "learning_rate": 4.8520604395604395e-05, + "loss": 0.462, + "step": 1077 + }, + { + "epoch": 2.9615384615384617, + "grad_norm": 18.814945220947266, + "learning_rate": 4.851923076923077e-05, + "loss": 1.0529, + "step": 1078 + }, + { + "epoch": 2.9642857142857144, + "grad_norm": 11.608325958251953, + "learning_rate": 4.851785714285714e-05, + "loss": 0.5202, + "step": 1079 + }, + { + "epoch": 2.967032967032967, + "grad_norm": 14.813562393188477, + "learning_rate": 4.851648351648352e-05, + "loss": 0.8153, + "step": 1080 + }, + { + "epoch": 2.96978021978022, + "grad_norm": 16.098466873168945, + "learning_rate": 4.8515109890109896e-05, + "loss": 1.0747, + "step": 1081 + }, + { + "epoch": 2.9725274725274726, + "grad_norm": 14.27912712097168, + "learning_rate": 4.8513736263736266e-05, + "loss": 0.7232, + "step": 1082 + }, + { + "epoch": 2.9752747252747254, + "grad_norm": 10.577662467956543, + "learning_rate": 4.851236263736264e-05, + "loss": 0.4757, + "step": 1083 + }, + { + "epoch": 2.978021978021978, + "grad_norm": 19.45563316345215, + "learning_rate": 4.851098901098901e-05, + "loss": 1.2618, + "step": 1084 + }, + { + "epoch": 2.980769230769231, + "grad_norm": 23.905094146728516, + "learning_rate": 4.850961538461539e-05, + "loss": 1.3215, + "step": 1085 + }, + { + "epoch": 2.9835164835164836, + "grad_norm": 13.220477104187012, + "learning_rate": 4.850824175824176e-05, + "loss": 0.6609, + "step": 1086 + }, + { + "epoch": 2.9862637362637363, + "grad_norm": 11.321064949035645, + "learning_rate": 4.8506868131868136e-05, + "loss": 0.4661, + "step": 1087 + }, + { + "epoch": 2.989010989010989, + "grad_norm": 9.831096649169922, + "learning_rate": 4.8505494505494506e-05, + "loss": 0.45, + "step": 1088 + }, + { + "epoch": 2.991758241758242, + "grad_norm": 18.78774070739746, + "learning_rate": 4.8504120879120876e-05, + "loss": 0.9873, + "step": 1089 + }, + { + "epoch": 2.9945054945054945, + "grad_norm": 12.764711380004883, + "learning_rate": 4.850274725274725e-05, + "loss": 0.7034, + "step": 1090 + }, + { + "epoch": 2.9972527472527473, + "grad_norm": 16.9917049407959, + "learning_rate": 4.850137362637363e-05, + "loss": 0.8733, + "step": 1091 + }, + { + "epoch": 3.0, + "grad_norm": 36.12929916381836, + "learning_rate": 4.85e-05, + "loss": 0.9358, + "step": 1092 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.7975206611570248, + "eval_f1": 0.7956857154762363, + "eval_f1_DuraRiadoRio_64x64": 0.7439024390243902, + "eval_f1_Mole_64x64": 0.7604562737642585, + "eval_f1_Quebrado_64x64": 0.9131832797427653, + "eval_f1_RiadoRio_64x64": 0.6544117647058824, + "eval_f1_RioFechado_64x64": 0.9064748201438849, + "eval_loss": 0.5862842202186584, + "eval_precision": 0.8043632537628433, + "eval_precision_DuraRiadoRio_64x64": 0.6630434782608695, + "eval_precision_Mole_64x64": 0.8403361344537815, + "eval_precision_Quebrado_64x64": 0.8502994011976048, + "eval_precision_RiadoRio_64x64": 0.7416666666666667, + "eval_precision_RioFechado_64x64": 0.9264705882352942, + "eval_recall": 0.8001256074458446, + "eval_recall_DuraRiadoRio_64x64": 0.8472222222222222, + "eval_recall_Mole_64x64": 0.6944444444444444, + "eval_recall_Quebrado_64x64": 0.9861111111111112, + "eval_recall_RiadoRio_64x64": 0.5855263157894737, + "eval_recall_RioFechado_64x64": 0.8873239436619719, + "eval_runtime": 1.7821, + "eval_samples_per_second": 407.396, + "eval_steps_per_second": 25.813, + "step": 1092 + }, + { + "epoch": 3.0027472527472527, + "grad_norm": 15.010889053344727, + "learning_rate": 4.849862637362638e-05, + "loss": 0.7484, + "step": 1093 + }, + { + "epoch": 3.0054945054945055, + "grad_norm": 12.706743240356445, + "learning_rate": 4.849725274725275e-05, + "loss": 0.6257, + "step": 1094 + }, + { + "epoch": 3.008241758241758, + "grad_norm": 11.010807991027832, + "learning_rate": 4.8495879120879123e-05, + "loss": 0.4269, + "step": 1095 + }, + { + "epoch": 3.010989010989011, + "grad_norm": 15.5350980758667, + "learning_rate": 4.84945054945055e-05, + "loss": 0.7243, + "step": 1096 + }, + { + "epoch": 3.0137362637362637, + "grad_norm": 14.777241706848145, + "learning_rate": 4.849313186813187e-05, + "loss": 0.8202, + "step": 1097 + }, + { + "epoch": 3.0164835164835164, + "grad_norm": 16.283203125, + "learning_rate": 4.849175824175825e-05, + "loss": 0.9017, + "step": 1098 + }, + { + "epoch": 3.019230769230769, + "grad_norm": 21.33920669555664, + "learning_rate": 4.849038461538462e-05, + "loss": 1.0989, + "step": 1099 + }, + { + "epoch": 3.021978021978022, + "grad_norm": 14.630582809448242, + "learning_rate": 4.8489010989010994e-05, + "loss": 0.8588, + "step": 1100 + }, + { + "epoch": 3.0247252747252746, + "grad_norm": 12.767298698425293, + "learning_rate": 4.8487637362637364e-05, + "loss": 0.6625, + "step": 1101 + }, + { + "epoch": 3.0274725274725274, + "grad_norm": 11.870111465454102, + "learning_rate": 4.848626373626374e-05, + "loss": 0.5312, + "step": 1102 + }, + { + "epoch": 3.03021978021978, + "grad_norm": 14.522883415222168, + "learning_rate": 4.848489010989011e-05, + "loss": 0.68, + "step": 1103 + }, + { + "epoch": 3.032967032967033, + "grad_norm": 12.352104187011719, + "learning_rate": 4.848351648351648e-05, + "loss": 0.5397, + "step": 1104 + }, + { + "epoch": 3.0357142857142856, + "grad_norm": 19.11823081970215, + "learning_rate": 4.848214285714286e-05, + "loss": 0.8695, + "step": 1105 + }, + { + "epoch": 3.0384615384615383, + "grad_norm": 14.850441932678223, + "learning_rate": 4.8480769230769234e-05, + "loss": 0.8679, + "step": 1106 + }, + { + "epoch": 3.041208791208791, + "grad_norm": 14.250921249389648, + "learning_rate": 4.8479395604395604e-05, + "loss": 0.7014, + "step": 1107 + }, + { + "epoch": 3.043956043956044, + "grad_norm": 12.789539337158203, + "learning_rate": 4.847802197802198e-05, + "loss": 0.5839, + "step": 1108 + }, + { + "epoch": 3.0467032967032965, + "grad_norm": 13.744973182678223, + "learning_rate": 4.847664835164835e-05, + "loss": 0.5884, + "step": 1109 + }, + { + "epoch": 3.0494505494505493, + "grad_norm": 14.342063903808594, + "learning_rate": 4.847527472527473e-05, + "loss": 0.7472, + "step": 1110 + }, + { + "epoch": 3.052197802197802, + "grad_norm": 12.842358589172363, + "learning_rate": 4.8473901098901105e-05, + "loss": 0.5298, + "step": 1111 + }, + { + "epoch": 3.0549450549450547, + "grad_norm": 14.297633171081543, + "learning_rate": 4.8472527472527475e-05, + "loss": 0.7374, + "step": 1112 + }, + { + "epoch": 3.0576923076923075, + "grad_norm": 13.88449478149414, + "learning_rate": 4.847115384615385e-05, + "loss": 0.7506, + "step": 1113 + }, + { + "epoch": 3.0604395604395602, + "grad_norm": 12.865939140319824, + "learning_rate": 4.846978021978022e-05, + "loss": 0.6739, + "step": 1114 + }, + { + "epoch": 3.063186813186813, + "grad_norm": 11.264561653137207, + "learning_rate": 4.84684065934066e-05, + "loss": 0.598, + "step": 1115 + }, + { + "epoch": 3.065934065934066, + "grad_norm": 14.844022750854492, + "learning_rate": 4.846703296703297e-05, + "loss": 0.7561, + "step": 1116 + }, + { + "epoch": 3.068681318681319, + "grad_norm": 20.95587921142578, + "learning_rate": 4.8465659340659345e-05, + "loss": 1.1286, + "step": 1117 + }, + { + "epoch": 3.0714285714285716, + "grad_norm": 11.233293533325195, + "learning_rate": 4.8464285714285715e-05, + "loss": 0.4731, + "step": 1118 + }, + { + "epoch": 3.0741758241758244, + "grad_norm": 11.63081169128418, + "learning_rate": 4.8462912087912085e-05, + "loss": 0.7062, + "step": 1119 + }, + { + "epoch": 3.076923076923077, + "grad_norm": 13.197134017944336, + "learning_rate": 4.846153846153846e-05, + "loss": 0.5957, + "step": 1120 + }, + { + "epoch": 3.07967032967033, + "grad_norm": 15.979177474975586, + "learning_rate": 4.846016483516484e-05, + "loss": 0.8949, + "step": 1121 + }, + { + "epoch": 3.0824175824175826, + "grad_norm": 14.179339408874512, + "learning_rate": 4.845879120879121e-05, + "loss": 0.4901, + "step": 1122 + }, + { + "epoch": 3.0851648351648353, + "grad_norm": 15.860198020935059, + "learning_rate": 4.8457417582417586e-05, + "loss": 0.8915, + "step": 1123 + }, + { + "epoch": 3.087912087912088, + "grad_norm": 16.412391662597656, + "learning_rate": 4.8456043956043956e-05, + "loss": 0.6629, + "step": 1124 + }, + { + "epoch": 3.090659340659341, + "grad_norm": 16.962562561035156, + "learning_rate": 4.845467032967033e-05, + "loss": 0.891, + "step": 1125 + }, + { + "epoch": 3.0934065934065935, + "grad_norm": 13.635594367980957, + "learning_rate": 4.845329670329671e-05, + "loss": 0.8088, + "step": 1126 + }, + { + "epoch": 3.0961538461538463, + "grad_norm": 16.356666564941406, + "learning_rate": 4.845192307692308e-05, + "loss": 0.9095, + "step": 1127 + }, + { + "epoch": 3.098901098901099, + "grad_norm": 16.690282821655273, + "learning_rate": 4.8450549450549456e-05, + "loss": 0.9599, + "step": 1128 + }, + { + "epoch": 3.1016483516483517, + "grad_norm": 13.47181224822998, + "learning_rate": 4.8449175824175826e-05, + "loss": 0.6898, + "step": 1129 + }, + { + "epoch": 3.1043956043956045, + "grad_norm": 13.659584999084473, + "learning_rate": 4.84478021978022e-05, + "loss": 0.7924, + "step": 1130 + }, + { + "epoch": 3.107142857142857, + "grad_norm": 18.11357879638672, + "learning_rate": 4.844642857142857e-05, + "loss": 1.1331, + "step": 1131 + }, + { + "epoch": 3.10989010989011, + "grad_norm": 13.369011878967285, + "learning_rate": 4.844505494505494e-05, + "loss": 0.6528, + "step": 1132 + }, + { + "epoch": 3.1126373626373627, + "grad_norm": 20.734712600708008, + "learning_rate": 4.844368131868132e-05, + "loss": 1.3372, + "step": 1133 + }, + { + "epoch": 3.1153846153846154, + "grad_norm": 13.224788665771484, + "learning_rate": 4.844230769230769e-05, + "loss": 0.5056, + "step": 1134 + }, + { + "epoch": 3.118131868131868, + "grad_norm": 18.789953231811523, + "learning_rate": 4.8440934065934066e-05, + "loss": 0.9407, + "step": 1135 + }, + { + "epoch": 3.120879120879121, + "grad_norm": 13.066003799438477, + "learning_rate": 4.843956043956044e-05, + "loss": 0.5379, + "step": 1136 + }, + { + "epoch": 3.1236263736263736, + "grad_norm": 11.557300567626953, + "learning_rate": 4.843818681318681e-05, + "loss": 0.5609, + "step": 1137 + }, + { + "epoch": 3.1263736263736264, + "grad_norm": 14.562403678894043, + "learning_rate": 4.843681318681319e-05, + "loss": 0.6971, + "step": 1138 + }, + { + "epoch": 3.129120879120879, + "grad_norm": 14.110827445983887, + "learning_rate": 4.843543956043956e-05, + "loss": 0.5105, + "step": 1139 + }, + { + "epoch": 3.131868131868132, + "grad_norm": 11.627459526062012, + "learning_rate": 4.843406593406594e-05, + "loss": 0.5458, + "step": 1140 + }, + { + "epoch": 3.1346153846153846, + "grad_norm": 18.142868041992188, + "learning_rate": 4.8432692307692314e-05, + "loss": 1.0437, + "step": 1141 + }, + { + "epoch": 3.1373626373626373, + "grad_norm": 14.790739059448242, + "learning_rate": 4.8431318681318684e-05, + "loss": 0.7796, + "step": 1142 + }, + { + "epoch": 3.14010989010989, + "grad_norm": 18.24419593811035, + "learning_rate": 4.842994505494506e-05, + "loss": 1.1091, + "step": 1143 + }, + { + "epoch": 3.142857142857143, + "grad_norm": 15.927098274230957, + "learning_rate": 4.842857142857143e-05, + "loss": 0.7822, + "step": 1144 + }, + { + "epoch": 3.1456043956043955, + "grad_norm": 17.13590431213379, + "learning_rate": 4.842719780219781e-05, + "loss": 0.955, + "step": 1145 + }, + { + "epoch": 3.1483516483516483, + "grad_norm": 20.163480758666992, + "learning_rate": 4.842582417582418e-05, + "loss": 0.9576, + "step": 1146 + }, + { + "epoch": 3.151098901098901, + "grad_norm": 17.353715896606445, + "learning_rate": 4.842445054945055e-05, + "loss": 1.1112, + "step": 1147 + }, + { + "epoch": 3.1538461538461537, + "grad_norm": 14.613665580749512, + "learning_rate": 4.8423076923076924e-05, + "loss": 0.5777, + "step": 1148 + }, + { + "epoch": 3.1565934065934065, + "grad_norm": 17.150671005249023, + "learning_rate": 4.8421703296703294e-05, + "loss": 0.9015, + "step": 1149 + }, + { + "epoch": 3.159340659340659, + "grad_norm": 17.842042922973633, + "learning_rate": 4.842032967032967e-05, + "loss": 0.7374, + "step": 1150 + }, + { + "epoch": 3.162087912087912, + "grad_norm": 16.112403869628906, + "learning_rate": 4.841895604395605e-05, + "loss": 0.8368, + "step": 1151 + }, + { + "epoch": 3.1648351648351647, + "grad_norm": 15.258038520812988, + "learning_rate": 4.841758241758242e-05, + "loss": 1.2326, + "step": 1152 + }, + { + "epoch": 3.1675824175824174, + "grad_norm": 12.874332427978516, + "learning_rate": 4.8416208791208795e-05, + "loss": 0.7939, + "step": 1153 + }, + { + "epoch": 3.17032967032967, + "grad_norm": 14.585524559020996, + "learning_rate": 4.8414835164835165e-05, + "loss": 0.7611, + "step": 1154 + }, + { + "epoch": 3.173076923076923, + "grad_norm": 18.91487693786621, + "learning_rate": 4.841346153846154e-05, + "loss": 1.1917, + "step": 1155 + }, + { + "epoch": 3.1758241758241756, + "grad_norm": 14.925509452819824, + "learning_rate": 4.841208791208792e-05, + "loss": 0.6964, + "step": 1156 + }, + { + "epoch": 3.1785714285714284, + "grad_norm": 12.63805103302002, + "learning_rate": 4.841071428571429e-05, + "loss": 0.5219, + "step": 1157 + }, + { + "epoch": 3.181318681318681, + "grad_norm": 11.998065948486328, + "learning_rate": 4.8409340659340665e-05, + "loss": 0.6325, + "step": 1158 + }, + { + "epoch": 3.1840659340659343, + "grad_norm": 17.680740356445312, + "learning_rate": 4.8407967032967035e-05, + "loss": 0.662, + "step": 1159 + }, + { + "epoch": 3.186813186813187, + "grad_norm": 13.299527168273926, + "learning_rate": 4.840659340659341e-05, + "loss": 0.5651, + "step": 1160 + }, + { + "epoch": 3.1895604395604398, + "grad_norm": 16.639663696289062, + "learning_rate": 4.840521978021978e-05, + "loss": 0.7992, + "step": 1161 + }, + { + "epoch": 3.1923076923076925, + "grad_norm": 11.060380935668945, + "learning_rate": 4.840384615384615e-05, + "loss": 0.5578, + "step": 1162 + }, + { + "epoch": 3.1950549450549453, + "grad_norm": 14.831205368041992, + "learning_rate": 4.840247252747253e-05, + "loss": 0.7869, + "step": 1163 + }, + { + "epoch": 3.197802197802198, + "grad_norm": 14.577446937561035, + "learning_rate": 4.84010989010989e-05, + "loss": 0.7544, + "step": 1164 + }, + { + "epoch": 3.2005494505494507, + "grad_norm": 20.907428741455078, + "learning_rate": 4.8399725274725275e-05, + "loss": 1.0425, + "step": 1165 + }, + { + "epoch": 3.2032967032967035, + "grad_norm": 12.149118423461914, + "learning_rate": 4.839835164835165e-05, + "loss": 0.5451, + "step": 1166 + }, + { + "epoch": 3.206043956043956, + "grad_norm": 20.95671844482422, + "learning_rate": 4.839697802197802e-05, + "loss": 0.9222, + "step": 1167 + }, + { + "epoch": 3.208791208791209, + "grad_norm": 17.353330612182617, + "learning_rate": 4.83956043956044e-05, + "loss": 0.9978, + "step": 1168 + }, + { + "epoch": 3.2115384615384617, + "grad_norm": 15.686883926391602, + "learning_rate": 4.839423076923077e-05, + "loss": 0.8216, + "step": 1169 + }, + { + "epoch": 3.2142857142857144, + "grad_norm": 14.507176399230957, + "learning_rate": 4.8392857142857146e-05, + "loss": 0.6155, + "step": 1170 + }, + { + "epoch": 3.217032967032967, + "grad_norm": 18.233150482177734, + "learning_rate": 4.839148351648352e-05, + "loss": 0.9614, + "step": 1171 + }, + { + "epoch": 3.21978021978022, + "grad_norm": 10.874823570251465, + "learning_rate": 4.839010989010989e-05, + "loss": 0.5511, + "step": 1172 + }, + { + "epoch": 3.2225274725274726, + "grad_norm": 13.081624984741211, + "learning_rate": 4.838873626373627e-05, + "loss": 0.8533, + "step": 1173 + }, + { + "epoch": 3.2252747252747254, + "grad_norm": 9.994791984558105, + "learning_rate": 4.838736263736264e-05, + "loss": 0.5188, + "step": 1174 + }, + { + "epoch": 3.228021978021978, + "grad_norm": 12.86158561706543, + "learning_rate": 4.8385989010989016e-05, + "loss": 0.5291, + "step": 1175 + }, + { + "epoch": 3.230769230769231, + "grad_norm": 15.190009117126465, + "learning_rate": 4.8384615384615386e-05, + "loss": 0.693, + "step": 1176 + }, + { + "epoch": 3.2335164835164836, + "grad_norm": 9.62110710144043, + "learning_rate": 4.8383241758241756e-05, + "loss": 0.3785, + "step": 1177 + }, + { + "epoch": 3.2362637362637363, + "grad_norm": 17.347665786743164, + "learning_rate": 4.838186813186813e-05, + "loss": 0.6584, + "step": 1178 + }, + { + "epoch": 3.239010989010989, + "grad_norm": 15.132720947265625, + "learning_rate": 4.83804945054945e-05, + "loss": 0.7674, + "step": 1179 + }, + { + "epoch": 3.241758241758242, + "grad_norm": 14.741386413574219, + "learning_rate": 4.837912087912088e-05, + "loss": 0.6745, + "step": 1180 + }, + { + "epoch": 3.2445054945054945, + "grad_norm": 15.378877639770508, + "learning_rate": 4.837774725274726e-05, + "loss": 0.999, + "step": 1181 + }, + { + "epoch": 3.2472527472527473, + "grad_norm": 13.05073356628418, + "learning_rate": 4.837637362637363e-05, + "loss": 0.6368, + "step": 1182 + }, + { + "epoch": 3.25, + "grad_norm": 14.593989372253418, + "learning_rate": 4.8375000000000004e-05, + "loss": 1.0311, + "step": 1183 + }, + { + "epoch": 3.2527472527472527, + "grad_norm": 19.294221878051758, + "learning_rate": 4.8373626373626374e-05, + "loss": 1.4575, + "step": 1184 + }, + { + "epoch": 3.2554945054945055, + "grad_norm": 13.958815574645996, + "learning_rate": 4.837225274725275e-05, + "loss": 0.6451, + "step": 1185 + }, + { + "epoch": 3.258241758241758, + "grad_norm": 18.844362258911133, + "learning_rate": 4.837087912087913e-05, + "loss": 0.6917, + "step": 1186 + }, + { + "epoch": 3.260989010989011, + "grad_norm": 13.644393920898438, + "learning_rate": 4.83695054945055e-05, + "loss": 0.6005, + "step": 1187 + }, + { + "epoch": 3.2637362637362637, + "grad_norm": 10.69964599609375, + "learning_rate": 4.8368131868131874e-05, + "loss": 0.4903, + "step": 1188 + }, + { + "epoch": 3.2664835164835164, + "grad_norm": 14.437705993652344, + "learning_rate": 4.8366758241758244e-05, + "loss": 0.8799, + "step": 1189 + }, + { + "epoch": 3.269230769230769, + "grad_norm": 15.603113174438477, + "learning_rate": 4.836538461538462e-05, + "loss": 0.7439, + "step": 1190 + }, + { + "epoch": 3.271978021978022, + "grad_norm": 18.857263565063477, + "learning_rate": 4.836401098901099e-05, + "loss": 1.0221, + "step": 1191 + }, + { + "epoch": 3.2747252747252746, + "grad_norm": 14.5534029006958, + "learning_rate": 4.836263736263736e-05, + "loss": 0.8057, + "step": 1192 + }, + { + "epoch": 3.2774725274725274, + "grad_norm": 12.960893630981445, + "learning_rate": 4.836126373626374e-05, + "loss": 0.6219, + "step": 1193 + }, + { + "epoch": 3.28021978021978, + "grad_norm": 16.071012496948242, + "learning_rate": 4.835989010989011e-05, + "loss": 0.8234, + "step": 1194 + }, + { + "epoch": 3.282967032967033, + "grad_norm": 13.790521621704102, + "learning_rate": 4.8358516483516484e-05, + "loss": 0.838, + "step": 1195 + }, + { + "epoch": 3.2857142857142856, + "grad_norm": 12.101720809936523, + "learning_rate": 4.835714285714286e-05, + "loss": 0.6482, + "step": 1196 + }, + { + "epoch": 3.2884615384615383, + "grad_norm": 13.256159782409668, + "learning_rate": 4.835576923076923e-05, + "loss": 0.6222, + "step": 1197 + }, + { + "epoch": 3.291208791208791, + "grad_norm": 24.244722366333008, + "learning_rate": 4.835439560439561e-05, + "loss": 1.4346, + "step": 1198 + }, + { + "epoch": 3.293956043956044, + "grad_norm": 15.2078857421875, + "learning_rate": 4.835302197802198e-05, + "loss": 0.6843, + "step": 1199 + }, + { + "epoch": 3.2967032967032965, + "grad_norm": 16.671506881713867, + "learning_rate": 4.8351648351648355e-05, + "loss": 0.7753, + "step": 1200 + }, + { + "epoch": 3.2994505494505493, + "grad_norm": 12.735738754272461, + "learning_rate": 4.835027472527473e-05, + "loss": 0.862, + "step": 1201 + }, + { + "epoch": 3.302197802197802, + "grad_norm": 18.082733154296875, + "learning_rate": 4.83489010989011e-05, + "loss": 0.9954, + "step": 1202 + }, + { + "epoch": 3.3049450549450547, + "grad_norm": 18.65884780883789, + "learning_rate": 4.834752747252748e-05, + "loss": 1.1206, + "step": 1203 + }, + { + "epoch": 3.3076923076923075, + "grad_norm": 14.449830055236816, + "learning_rate": 4.834615384615385e-05, + "loss": 0.738, + "step": 1204 + }, + { + "epoch": 3.3104395604395602, + "grad_norm": 13.801289558410645, + "learning_rate": 4.8344780219780225e-05, + "loss": 0.6833, + "step": 1205 + }, + { + "epoch": 3.313186813186813, + "grad_norm": 14.841197967529297, + "learning_rate": 4.8343406593406595e-05, + "loss": 0.8519, + "step": 1206 + }, + { + "epoch": 3.3159340659340657, + "grad_norm": 15.077213287353516, + "learning_rate": 4.8342032967032965e-05, + "loss": 0.9214, + "step": 1207 + }, + { + "epoch": 3.3186813186813184, + "grad_norm": 19.308147430419922, + "learning_rate": 4.834065934065934e-05, + "loss": 0.7892, + "step": 1208 + }, + { + "epoch": 3.3214285714285716, + "grad_norm": 14.178460121154785, + "learning_rate": 4.833928571428571e-05, + "loss": 0.6337, + "step": 1209 + }, + { + "epoch": 3.3241758241758244, + "grad_norm": 7.713986396789551, + "learning_rate": 4.833791208791209e-05, + "loss": 0.3276, + "step": 1210 + }, + { + "epoch": 3.326923076923077, + "grad_norm": 13.100558280944824, + "learning_rate": 4.8336538461538466e-05, + "loss": 0.7177, + "step": 1211 + }, + { + "epoch": 3.32967032967033, + "grad_norm": 13.76345443725586, + "learning_rate": 4.8335164835164836e-05, + "loss": 0.7918, + "step": 1212 + }, + { + "epoch": 3.3324175824175826, + "grad_norm": 12.137063026428223, + "learning_rate": 4.833379120879121e-05, + "loss": 0.7558, + "step": 1213 + }, + { + "epoch": 3.3351648351648353, + "grad_norm": 15.421720504760742, + "learning_rate": 4.833241758241758e-05, + "loss": 0.8239, + "step": 1214 + }, + { + "epoch": 3.337912087912088, + "grad_norm": 16.41057014465332, + "learning_rate": 4.833104395604396e-05, + "loss": 1.2024, + "step": 1215 + }, + { + "epoch": 3.340659340659341, + "grad_norm": 17.112152099609375, + "learning_rate": 4.8329670329670336e-05, + "loss": 1.0362, + "step": 1216 + }, + { + "epoch": 3.3434065934065935, + "grad_norm": 15.274739265441895, + "learning_rate": 4.8328296703296706e-05, + "loss": 0.903, + "step": 1217 + }, + { + "epoch": 3.3461538461538463, + "grad_norm": 11.826335906982422, + "learning_rate": 4.832692307692308e-05, + "loss": 0.5696, + "step": 1218 + }, + { + "epoch": 3.348901098901099, + "grad_norm": 14.447942733764648, + "learning_rate": 4.832554945054945e-05, + "loss": 0.9496, + "step": 1219 + }, + { + "epoch": 3.3516483516483517, + "grad_norm": 12.058658599853516, + "learning_rate": 4.832417582417583e-05, + "loss": 0.7159, + "step": 1220 + }, + { + "epoch": 3.3543956043956045, + "grad_norm": 11.827282905578613, + "learning_rate": 4.83228021978022e-05, + "loss": 0.8389, + "step": 1221 + }, + { + "epoch": 3.357142857142857, + "grad_norm": 13.370779037475586, + "learning_rate": 4.832142857142857e-05, + "loss": 0.5148, + "step": 1222 + }, + { + "epoch": 3.35989010989011, + "grad_norm": 16.603832244873047, + "learning_rate": 4.8320054945054947e-05, + "loss": 0.8726, + "step": 1223 + }, + { + "epoch": 3.3626373626373627, + "grad_norm": 13.112849235534668, + "learning_rate": 4.831868131868132e-05, + "loss": 0.5468, + "step": 1224 + }, + { + "epoch": 3.3653846153846154, + "grad_norm": 14.027828216552734, + "learning_rate": 4.8317307692307693e-05, + "loss": 1.0985, + "step": 1225 + }, + { + "epoch": 3.368131868131868, + "grad_norm": 12.093584060668945, + "learning_rate": 4.831593406593407e-05, + "loss": 0.5399, + "step": 1226 + }, + { + "epoch": 3.370879120879121, + "grad_norm": 18.217235565185547, + "learning_rate": 4.831456043956044e-05, + "loss": 1.1198, + "step": 1227 + }, + { + "epoch": 3.3736263736263736, + "grad_norm": 17.745540618896484, + "learning_rate": 4.831318681318682e-05, + "loss": 1.2896, + "step": 1228 + }, + { + "epoch": 3.3763736263736264, + "grad_norm": 16.913711547851562, + "learning_rate": 4.831181318681319e-05, + "loss": 0.9336, + "step": 1229 + }, + { + "epoch": 3.379120879120879, + "grad_norm": 14.753660202026367, + "learning_rate": 4.8310439560439564e-05, + "loss": 0.7375, + "step": 1230 + }, + { + "epoch": 3.381868131868132, + "grad_norm": 18.052886962890625, + "learning_rate": 4.830906593406594e-05, + "loss": 1.1606, + "step": 1231 + }, + { + "epoch": 3.3846153846153846, + "grad_norm": 11.979866027832031, + "learning_rate": 4.830769230769231e-05, + "loss": 0.5183, + "step": 1232 + }, + { + "epoch": 3.3873626373626373, + "grad_norm": 12.62882137298584, + "learning_rate": 4.830631868131869e-05, + "loss": 0.5449, + "step": 1233 + }, + { + "epoch": 3.39010989010989, + "grad_norm": 15.449196815490723, + "learning_rate": 4.830494505494506e-05, + "loss": 0.7079, + "step": 1234 + }, + { + "epoch": 3.392857142857143, + "grad_norm": 15.779882431030273, + "learning_rate": 4.8303571428571434e-05, + "loss": 0.8158, + "step": 1235 + }, + { + "epoch": 3.3956043956043955, + "grad_norm": 13.18478012084961, + "learning_rate": 4.8302197802197804e-05, + "loss": 0.4894, + "step": 1236 + }, + { + "epoch": 3.3983516483516483, + "grad_norm": 14.392644882202148, + "learning_rate": 4.8300824175824174e-05, + "loss": 0.7399, + "step": 1237 + }, + { + "epoch": 3.401098901098901, + "grad_norm": 15.341130256652832, + "learning_rate": 4.829945054945055e-05, + "loss": 0.966, + "step": 1238 + }, + { + "epoch": 3.4038461538461537, + "grad_norm": 18.89992904663086, + "learning_rate": 4.829807692307692e-05, + "loss": 0.9403, + "step": 1239 + }, + { + "epoch": 3.4065934065934065, + "grad_norm": 10.33676815032959, + "learning_rate": 4.82967032967033e-05, + "loss": 0.4479, + "step": 1240 + }, + { + "epoch": 3.409340659340659, + "grad_norm": 10.18825626373291, + "learning_rate": 4.8295329670329675e-05, + "loss": 0.4748, + "step": 1241 + }, + { + "epoch": 3.412087912087912, + "grad_norm": 12.433100700378418, + "learning_rate": 4.8293956043956045e-05, + "loss": 0.7188, + "step": 1242 + }, + { + "epoch": 3.4148351648351647, + "grad_norm": 14.682637214660645, + "learning_rate": 4.829258241758242e-05, + "loss": 0.6197, + "step": 1243 + }, + { + "epoch": 3.4175824175824174, + "grad_norm": 13.825569152832031, + "learning_rate": 4.829120879120879e-05, + "loss": 0.5908, + "step": 1244 + }, + { + "epoch": 3.42032967032967, + "grad_norm": 13.13470458984375, + "learning_rate": 4.828983516483517e-05, + "loss": 0.5036, + "step": 1245 + }, + { + "epoch": 3.423076923076923, + "grad_norm": 13.724502563476562, + "learning_rate": 4.8288461538461545e-05, + "loss": 0.7042, + "step": 1246 + }, + { + "epoch": 3.4258241758241756, + "grad_norm": 17.073484420776367, + "learning_rate": 4.8287087912087915e-05, + "loss": 0.6148, + "step": 1247 + }, + { + "epoch": 3.4285714285714284, + "grad_norm": 16.857357025146484, + "learning_rate": 4.828571428571429e-05, + "loss": 0.8577, + "step": 1248 + }, + { + "epoch": 3.4313186813186816, + "grad_norm": 12.831147193908691, + "learning_rate": 4.828434065934066e-05, + "loss": 0.6915, + "step": 1249 + }, + { + "epoch": 3.4340659340659343, + "grad_norm": 14.249072074890137, + "learning_rate": 4.828296703296704e-05, + "loss": 0.5921, + "step": 1250 + }, + { + "epoch": 3.436813186813187, + "grad_norm": 14.722885131835938, + "learning_rate": 4.828159340659341e-05, + "loss": 0.6785, + "step": 1251 + }, + { + "epoch": 3.4395604395604398, + "grad_norm": 12.26345157623291, + "learning_rate": 4.828021978021978e-05, + "loss": 0.578, + "step": 1252 + }, + { + "epoch": 3.4423076923076925, + "grad_norm": 20.84876823425293, + "learning_rate": 4.8278846153846156e-05, + "loss": 0.9767, + "step": 1253 + }, + { + "epoch": 3.4450549450549453, + "grad_norm": 15.157804489135742, + "learning_rate": 4.8277472527472526e-05, + "loss": 0.7849, + "step": 1254 + }, + { + "epoch": 3.447802197802198, + "grad_norm": 15.803071022033691, + "learning_rate": 4.82760989010989e-05, + "loss": 0.6102, + "step": 1255 + }, + { + "epoch": 3.4505494505494507, + "grad_norm": 13.338933944702148, + "learning_rate": 4.827472527472527e-05, + "loss": 0.6912, + "step": 1256 + }, + { + "epoch": 3.4532967032967035, + "grad_norm": 9.150359153747559, + "learning_rate": 4.827335164835165e-05, + "loss": 0.4655, + "step": 1257 + }, + { + "epoch": 3.456043956043956, + "grad_norm": 14.883206367492676, + "learning_rate": 4.8271978021978026e-05, + "loss": 0.6033, + "step": 1258 + }, + { + "epoch": 3.458791208791209, + "grad_norm": 15.49027156829834, + "learning_rate": 4.8270604395604396e-05, + "loss": 0.9499, + "step": 1259 + }, + { + "epoch": 3.4615384615384617, + "grad_norm": 18.53816032409668, + "learning_rate": 4.826923076923077e-05, + "loss": 0.8747, + "step": 1260 + }, + { + "epoch": 3.4642857142857144, + "grad_norm": 15.73111629486084, + "learning_rate": 4.826785714285714e-05, + "loss": 0.8619, + "step": 1261 + }, + { + "epoch": 3.467032967032967, + "grad_norm": 13.561824798583984, + "learning_rate": 4.826648351648352e-05, + "loss": 0.8519, + "step": 1262 + }, + { + "epoch": 3.46978021978022, + "grad_norm": 11.503890991210938, + "learning_rate": 4.8265109890109896e-05, + "loss": 0.606, + "step": 1263 + }, + { + "epoch": 3.4725274725274726, + "grad_norm": 18.755945205688477, + "learning_rate": 4.8263736263736266e-05, + "loss": 0.826, + "step": 1264 + }, + { + "epoch": 3.4752747252747254, + "grad_norm": 15.679579734802246, + "learning_rate": 4.826236263736264e-05, + "loss": 0.8377, + "step": 1265 + }, + { + "epoch": 3.478021978021978, + "grad_norm": 9.727232933044434, + "learning_rate": 4.826098901098901e-05, + "loss": 0.3805, + "step": 1266 + }, + { + "epoch": 3.480769230769231, + "grad_norm": 18.258968353271484, + "learning_rate": 4.825961538461538e-05, + "loss": 0.7181, + "step": 1267 + }, + { + "epoch": 3.4835164835164836, + "grad_norm": 16.888011932373047, + "learning_rate": 4.825824175824176e-05, + "loss": 0.8987, + "step": 1268 + }, + { + "epoch": 3.4862637362637363, + "grad_norm": 18.486923217773438, + "learning_rate": 4.825686813186813e-05, + "loss": 0.9493, + "step": 1269 + }, + { + "epoch": 3.489010989010989, + "grad_norm": 12.772636413574219, + "learning_rate": 4.825549450549451e-05, + "loss": 0.544, + "step": 1270 + }, + { + "epoch": 3.491758241758242, + "grad_norm": 15.15808391571045, + "learning_rate": 4.825412087912088e-05, + "loss": 0.8774, + "step": 1271 + }, + { + "epoch": 3.4945054945054945, + "grad_norm": 15.661169052124023, + "learning_rate": 4.8252747252747254e-05, + "loss": 0.9947, + "step": 1272 + }, + { + "epoch": 3.4972527472527473, + "grad_norm": 12.26929759979248, + "learning_rate": 4.825137362637363e-05, + "loss": 0.5273, + "step": 1273 + }, + { + "epoch": 3.5, + "grad_norm": 15.564992904663086, + "learning_rate": 4.825e-05, + "loss": 0.967, + "step": 1274 + }, + { + "epoch": 3.5027472527472527, + "grad_norm": 16.30723762512207, + "learning_rate": 4.824862637362638e-05, + "loss": 0.8084, + "step": 1275 + }, + { + "epoch": 3.5054945054945055, + "grad_norm": 19.143409729003906, + "learning_rate": 4.824725274725275e-05, + "loss": 0.9737, + "step": 1276 + }, + { + "epoch": 3.508241758241758, + "grad_norm": 15.151655197143555, + "learning_rate": 4.8245879120879124e-05, + "loss": 0.6225, + "step": 1277 + }, + { + "epoch": 3.510989010989011, + "grad_norm": 15.40898323059082, + "learning_rate": 4.82445054945055e-05, + "loss": 0.7012, + "step": 1278 + }, + { + "epoch": 3.5137362637362637, + "grad_norm": 14.926325798034668, + "learning_rate": 4.824313186813187e-05, + "loss": 0.6413, + "step": 1279 + }, + { + "epoch": 3.5164835164835164, + "grad_norm": 18.871315002441406, + "learning_rate": 4.824175824175825e-05, + "loss": 1.1711, + "step": 1280 + }, + { + "epoch": 3.519230769230769, + "grad_norm": 9.637480735778809, + "learning_rate": 4.824038461538462e-05, + "loss": 0.3556, + "step": 1281 + }, + { + "epoch": 3.521978021978022, + "grad_norm": 19.37165641784668, + "learning_rate": 4.823901098901099e-05, + "loss": 1.3895, + "step": 1282 + }, + { + "epoch": 3.5247252747252746, + "grad_norm": 14.188552856445312, + "learning_rate": 4.8237637362637365e-05, + "loss": 0.5623, + "step": 1283 + }, + { + "epoch": 3.5274725274725274, + "grad_norm": 12.379385948181152, + "learning_rate": 4.8236263736263735e-05, + "loss": 0.6798, + "step": 1284 + }, + { + "epoch": 3.53021978021978, + "grad_norm": 12.34742546081543, + "learning_rate": 4.823489010989011e-05, + "loss": 0.6339, + "step": 1285 + }, + { + "epoch": 3.532967032967033, + "grad_norm": 13.529550552368164, + "learning_rate": 4.823351648351648e-05, + "loss": 0.661, + "step": 1286 + }, + { + "epoch": 3.5357142857142856, + "grad_norm": 18.705839157104492, + "learning_rate": 4.823214285714286e-05, + "loss": 0.9208, + "step": 1287 + }, + { + "epoch": 3.5384615384615383, + "grad_norm": 15.39837646484375, + "learning_rate": 4.8230769230769235e-05, + "loss": 0.6853, + "step": 1288 + }, + { + "epoch": 3.541208791208791, + "grad_norm": 15.468221664428711, + "learning_rate": 4.8229395604395605e-05, + "loss": 0.7654, + "step": 1289 + }, + { + "epoch": 3.543956043956044, + "grad_norm": 19.485570907592773, + "learning_rate": 4.822802197802198e-05, + "loss": 1.0069, + "step": 1290 + }, + { + "epoch": 3.5467032967032965, + "grad_norm": 14.593085289001465, + "learning_rate": 4.822664835164835e-05, + "loss": 0.6282, + "step": 1291 + }, + { + "epoch": 3.5494505494505493, + "grad_norm": 21.657278060913086, + "learning_rate": 4.822527472527473e-05, + "loss": 0.9938, + "step": 1292 + }, + { + "epoch": 3.552197802197802, + "grad_norm": 12.108549118041992, + "learning_rate": 4.8223901098901105e-05, + "loss": 0.6531, + "step": 1293 + }, + { + "epoch": 3.5549450549450547, + "grad_norm": 17.616586685180664, + "learning_rate": 4.8222527472527475e-05, + "loss": 0.8706, + "step": 1294 + }, + { + "epoch": 3.5576923076923075, + "grad_norm": 12.579195022583008, + "learning_rate": 4.822115384615385e-05, + "loss": 0.5674, + "step": 1295 + }, + { + "epoch": 3.5604395604395602, + "grad_norm": 13.979180335998535, + "learning_rate": 4.821978021978022e-05, + "loss": 0.6573, + "step": 1296 + }, + { + "epoch": 3.563186813186813, + "grad_norm": 20.26894760131836, + "learning_rate": 4.821840659340659e-05, + "loss": 0.9795, + "step": 1297 + }, + { + "epoch": 3.5659340659340657, + "grad_norm": 18.317829132080078, + "learning_rate": 4.821703296703297e-05, + "loss": 0.9972, + "step": 1298 + }, + { + "epoch": 3.5686813186813184, + "grad_norm": 18.55682373046875, + "learning_rate": 4.821565934065934e-05, + "loss": 1.1594, + "step": 1299 + }, + { + "epoch": 3.571428571428571, + "grad_norm": 15.770116806030273, + "learning_rate": 4.8214285714285716e-05, + "loss": 0.7081, + "step": 1300 + }, + { + "epoch": 3.574175824175824, + "grad_norm": 15.487269401550293, + "learning_rate": 4.8212912087912086e-05, + "loss": 0.6542, + "step": 1301 + }, + { + "epoch": 3.5769230769230766, + "grad_norm": 14.441583633422852, + "learning_rate": 4.821153846153846e-05, + "loss": 0.7513, + "step": 1302 + }, + { + "epoch": 3.57967032967033, + "grad_norm": 17.108701705932617, + "learning_rate": 4.821016483516484e-05, + "loss": 1.0197, + "step": 1303 + }, + { + "epoch": 3.5824175824175826, + "grad_norm": 15.231244087219238, + "learning_rate": 4.820879120879121e-05, + "loss": 1.1074, + "step": 1304 + }, + { + "epoch": 3.5851648351648353, + "grad_norm": 13.391681671142578, + "learning_rate": 4.8207417582417586e-05, + "loss": 0.6853, + "step": 1305 + }, + { + "epoch": 3.587912087912088, + "grad_norm": 10.506162643432617, + "learning_rate": 4.8206043956043956e-05, + "loss": 0.4698, + "step": 1306 + }, + { + "epoch": 3.590659340659341, + "grad_norm": 15.66284465789795, + "learning_rate": 4.820467032967033e-05, + "loss": 0.9547, + "step": 1307 + }, + { + "epoch": 3.5934065934065935, + "grad_norm": 17.16551399230957, + "learning_rate": 4.820329670329671e-05, + "loss": 0.8495, + "step": 1308 + }, + { + "epoch": 3.5961538461538463, + "grad_norm": 13.512702941894531, + "learning_rate": 4.820192307692308e-05, + "loss": 0.6387, + "step": 1309 + }, + { + "epoch": 3.598901098901099, + "grad_norm": 14.282268524169922, + "learning_rate": 4.820054945054946e-05, + "loss": 0.8, + "step": 1310 + }, + { + "epoch": 3.6016483516483517, + "grad_norm": 13.751169204711914, + "learning_rate": 4.819917582417583e-05, + "loss": 0.5054, + "step": 1311 + }, + { + "epoch": 3.6043956043956045, + "grad_norm": 9.957740783691406, + "learning_rate": 4.81978021978022e-05, + "loss": 0.5194, + "step": 1312 + }, + { + "epoch": 3.607142857142857, + "grad_norm": 14.30151081085205, + "learning_rate": 4.8196428571428574e-05, + "loss": 0.7094, + "step": 1313 + }, + { + "epoch": 3.60989010989011, + "grad_norm": 11.625187873840332, + "learning_rate": 4.8195054945054944e-05, + "loss": 0.4798, + "step": 1314 + }, + { + "epoch": 3.6126373626373627, + "grad_norm": 17.948307037353516, + "learning_rate": 4.819368131868132e-05, + "loss": 0.9925, + "step": 1315 + }, + { + "epoch": 3.6153846153846154, + "grad_norm": 9.962382316589355, + "learning_rate": 4.819230769230769e-05, + "loss": 0.571, + "step": 1316 + }, + { + "epoch": 3.618131868131868, + "grad_norm": 20.917722702026367, + "learning_rate": 4.819093406593407e-05, + "loss": 1.0289, + "step": 1317 + }, + { + "epoch": 3.620879120879121, + "grad_norm": 16.51538848876953, + "learning_rate": 4.8189560439560444e-05, + "loss": 1.0589, + "step": 1318 + }, + { + "epoch": 3.6236263736263736, + "grad_norm": 11.519105911254883, + "learning_rate": 4.8188186813186814e-05, + "loss": 0.4925, + "step": 1319 + }, + { + "epoch": 3.6263736263736264, + "grad_norm": 13.783053398132324, + "learning_rate": 4.818681318681319e-05, + "loss": 0.5681, + "step": 1320 + }, + { + "epoch": 3.629120879120879, + "grad_norm": 15.365880012512207, + "learning_rate": 4.818543956043956e-05, + "loss": 1.0771, + "step": 1321 + }, + { + "epoch": 3.631868131868132, + "grad_norm": 15.969018936157227, + "learning_rate": 4.818406593406594e-05, + "loss": 0.8776, + "step": 1322 + }, + { + "epoch": 3.6346153846153846, + "grad_norm": 15.3104887008667, + "learning_rate": 4.8182692307692314e-05, + "loss": 0.7529, + "step": 1323 + }, + { + "epoch": 3.6373626373626373, + "grad_norm": 14.725351333618164, + "learning_rate": 4.8181318681318684e-05, + "loss": 0.8015, + "step": 1324 + }, + { + "epoch": 3.64010989010989, + "grad_norm": 10.966792106628418, + "learning_rate": 4.817994505494506e-05, + "loss": 0.4812, + "step": 1325 + }, + { + "epoch": 3.642857142857143, + "grad_norm": 16.721525192260742, + "learning_rate": 4.817857142857143e-05, + "loss": 0.7048, + "step": 1326 + }, + { + "epoch": 3.6456043956043955, + "grad_norm": 13.857767105102539, + "learning_rate": 4.81771978021978e-05, + "loss": 0.5983, + "step": 1327 + }, + { + "epoch": 3.6483516483516483, + "grad_norm": 16.979482650756836, + "learning_rate": 4.817582417582418e-05, + "loss": 0.9577, + "step": 1328 + }, + { + "epoch": 3.651098901098901, + "grad_norm": 15.473302841186523, + "learning_rate": 4.817445054945055e-05, + "loss": 0.6805, + "step": 1329 + }, + { + "epoch": 3.6538461538461537, + "grad_norm": 18.40723419189453, + "learning_rate": 4.8173076923076925e-05, + "loss": 1.0423, + "step": 1330 + }, + { + "epoch": 3.6565934065934065, + "grad_norm": 12.232446670532227, + "learning_rate": 4.8171703296703295e-05, + "loss": 0.5945, + "step": 1331 + }, + { + "epoch": 3.659340659340659, + "grad_norm": 15.811627388000488, + "learning_rate": 4.817032967032967e-05, + "loss": 0.7751, + "step": 1332 + }, + { + "epoch": 3.662087912087912, + "grad_norm": 13.699906349182129, + "learning_rate": 4.816895604395605e-05, + "loss": 0.6767, + "step": 1333 + }, + { + "epoch": 3.6648351648351647, + "grad_norm": 17.763263702392578, + "learning_rate": 4.816758241758242e-05, + "loss": 0.8074, + "step": 1334 + }, + { + "epoch": 3.6675824175824174, + "grad_norm": 15.15144157409668, + "learning_rate": 4.8166208791208795e-05, + "loss": 0.6931, + "step": 1335 + }, + { + "epoch": 3.67032967032967, + "grad_norm": 17.358375549316406, + "learning_rate": 4.8164835164835165e-05, + "loss": 0.9663, + "step": 1336 + }, + { + "epoch": 3.6730769230769234, + "grad_norm": 10.852611541748047, + "learning_rate": 4.816346153846154e-05, + "loss": 0.45, + "step": 1337 + }, + { + "epoch": 3.675824175824176, + "grad_norm": 13.556015014648438, + "learning_rate": 4.816208791208792e-05, + "loss": 0.5012, + "step": 1338 + }, + { + "epoch": 3.678571428571429, + "grad_norm": 14.034388542175293, + "learning_rate": 4.816071428571429e-05, + "loss": 0.8268, + "step": 1339 + }, + { + "epoch": 3.6813186813186816, + "grad_norm": 16.7539119720459, + "learning_rate": 4.8159340659340666e-05, + "loss": 0.6049, + "step": 1340 + }, + { + "epoch": 3.6840659340659343, + "grad_norm": 13.25749683380127, + "learning_rate": 4.8157967032967036e-05, + "loss": 0.6695, + "step": 1341 + }, + { + "epoch": 3.686813186813187, + "grad_norm": 13.178590774536133, + "learning_rate": 4.8156593406593406e-05, + "loss": 0.6456, + "step": 1342 + }, + { + "epoch": 3.6895604395604398, + "grad_norm": 13.628046989440918, + "learning_rate": 4.815521978021978e-05, + "loss": 0.8189, + "step": 1343 + }, + { + "epoch": 3.6923076923076925, + "grad_norm": 12.64569091796875, + "learning_rate": 4.815384615384615e-05, + "loss": 0.7395, + "step": 1344 + }, + { + "epoch": 3.6950549450549453, + "grad_norm": 12.664226531982422, + "learning_rate": 4.815247252747253e-05, + "loss": 0.8615, + "step": 1345 + }, + { + "epoch": 3.697802197802198, + "grad_norm": 11.553495407104492, + "learning_rate": 4.81510989010989e-05, + "loss": 0.4899, + "step": 1346 + }, + { + "epoch": 3.7005494505494507, + "grad_norm": 15.371725082397461, + "learning_rate": 4.8149725274725276e-05, + "loss": 0.5737, + "step": 1347 + }, + { + "epoch": 3.7032967032967035, + "grad_norm": 19.813180923461914, + "learning_rate": 4.814835164835165e-05, + "loss": 1.1468, + "step": 1348 + }, + { + "epoch": 3.706043956043956, + "grad_norm": 16.259992599487305, + "learning_rate": 4.814697802197802e-05, + "loss": 0.6923, + "step": 1349 + }, + { + "epoch": 3.708791208791209, + "grad_norm": 12.644952774047852, + "learning_rate": 4.81456043956044e-05, + "loss": 0.7223, + "step": 1350 + }, + { + "epoch": 3.7115384615384617, + "grad_norm": 20.978015899658203, + "learning_rate": 4.814423076923077e-05, + "loss": 0.9546, + "step": 1351 + }, + { + "epoch": 3.7142857142857144, + "grad_norm": 9.721485137939453, + "learning_rate": 4.8142857142857147e-05, + "loss": 0.4819, + "step": 1352 + }, + { + "epoch": 3.717032967032967, + "grad_norm": 12.163315773010254, + "learning_rate": 4.814148351648352e-05, + "loss": 0.5993, + "step": 1353 + }, + { + "epoch": 3.71978021978022, + "grad_norm": 15.205613136291504, + "learning_rate": 4.8140109890109893e-05, + "loss": 0.8229, + "step": 1354 + }, + { + "epoch": 3.7225274725274726, + "grad_norm": 8.823723793029785, + "learning_rate": 4.813873626373627e-05, + "loss": 0.4418, + "step": 1355 + }, + { + "epoch": 3.7252747252747254, + "grad_norm": 11.57104206085205, + "learning_rate": 4.813736263736264e-05, + "loss": 0.5762, + "step": 1356 + }, + { + "epoch": 3.728021978021978, + "grad_norm": 17.266786575317383, + "learning_rate": 4.813598901098901e-05, + "loss": 0.9032, + "step": 1357 + }, + { + "epoch": 3.730769230769231, + "grad_norm": 15.830473899841309, + "learning_rate": 4.813461538461539e-05, + "loss": 0.8728, + "step": 1358 + }, + { + "epoch": 3.7335164835164836, + "grad_norm": 16.171005249023438, + "learning_rate": 4.813324175824176e-05, + "loss": 0.7816, + "step": 1359 + }, + { + "epoch": 3.7362637362637363, + "grad_norm": 16.851762771606445, + "learning_rate": 4.8131868131868134e-05, + "loss": 0.9027, + "step": 1360 + }, + { + "epoch": 3.739010989010989, + "grad_norm": 16.940916061401367, + "learning_rate": 4.8130494505494504e-05, + "loss": 0.9008, + "step": 1361 + }, + { + "epoch": 3.741758241758242, + "grad_norm": 18.19738006591797, + "learning_rate": 4.812912087912088e-05, + "loss": 0.889, + "step": 1362 + }, + { + "epoch": 3.7445054945054945, + "grad_norm": 15.36471176147461, + "learning_rate": 4.812774725274726e-05, + "loss": 0.9799, + "step": 1363 + }, + { + "epoch": 3.7472527472527473, + "grad_norm": 14.422201156616211, + "learning_rate": 4.812637362637363e-05, + "loss": 0.5919, + "step": 1364 + }, + { + "epoch": 3.75, + "grad_norm": 15.361601829528809, + "learning_rate": 4.8125000000000004e-05, + "loss": 0.7473, + "step": 1365 + }, + { + "epoch": 3.7527472527472527, + "grad_norm": 12.962967872619629, + "learning_rate": 4.8123626373626374e-05, + "loss": 0.4848, + "step": 1366 + }, + { + "epoch": 3.7554945054945055, + "grad_norm": 20.38536262512207, + "learning_rate": 4.812225274725275e-05, + "loss": 0.9198, + "step": 1367 + }, + { + "epoch": 3.758241758241758, + "grad_norm": 7.851359844207764, + "learning_rate": 4.812087912087913e-05, + "loss": 0.2474, + "step": 1368 + }, + { + "epoch": 3.760989010989011, + "grad_norm": 16.12128257751465, + "learning_rate": 4.81195054945055e-05, + "loss": 1.0283, + "step": 1369 + }, + { + "epoch": 3.7637362637362637, + "grad_norm": 17.020816802978516, + "learning_rate": 4.8118131868131875e-05, + "loss": 0.737, + "step": 1370 + }, + { + "epoch": 3.7664835164835164, + "grad_norm": 11.809185028076172, + "learning_rate": 4.8116758241758245e-05, + "loss": 0.5735, + "step": 1371 + }, + { + "epoch": 3.769230769230769, + "grad_norm": 14.712368965148926, + "learning_rate": 4.8115384615384615e-05, + "loss": 0.6707, + "step": 1372 + }, + { + "epoch": 3.771978021978022, + "grad_norm": 16.102920532226562, + "learning_rate": 4.811401098901099e-05, + "loss": 0.7296, + "step": 1373 + }, + { + "epoch": 3.7747252747252746, + "grad_norm": 16.056015014648438, + "learning_rate": 4.811263736263736e-05, + "loss": 1.1894, + "step": 1374 + }, + { + "epoch": 3.7774725274725274, + "grad_norm": 16.204204559326172, + "learning_rate": 4.811126373626374e-05, + "loss": 1.1706, + "step": 1375 + }, + { + "epoch": 3.78021978021978, + "grad_norm": 11.363175392150879, + "learning_rate": 4.810989010989011e-05, + "loss": 0.5244, + "step": 1376 + }, + { + "epoch": 3.782967032967033, + "grad_norm": 16.46103858947754, + "learning_rate": 4.8108516483516485e-05, + "loss": 0.6192, + "step": 1377 + }, + { + "epoch": 3.7857142857142856, + "grad_norm": 9.789678573608398, + "learning_rate": 4.810714285714286e-05, + "loss": 0.4263, + "step": 1378 + }, + { + "epoch": 3.7884615384615383, + "grad_norm": 14.782917022705078, + "learning_rate": 4.810576923076923e-05, + "loss": 0.6265, + "step": 1379 + }, + { + "epoch": 3.791208791208791, + "grad_norm": 11.866228103637695, + "learning_rate": 4.810439560439561e-05, + "loss": 0.4974, + "step": 1380 + }, + { + "epoch": 3.793956043956044, + "grad_norm": 18.509510040283203, + "learning_rate": 4.810302197802198e-05, + "loss": 1.1931, + "step": 1381 + }, + { + "epoch": 3.7967032967032965, + "grad_norm": 17.953601837158203, + "learning_rate": 4.8101648351648356e-05, + "loss": 1.0674, + "step": 1382 + }, + { + "epoch": 3.7994505494505493, + "grad_norm": 15.519170761108398, + "learning_rate": 4.810027472527473e-05, + "loss": 0.7609, + "step": 1383 + }, + { + "epoch": 3.802197802197802, + "grad_norm": 13.6853666305542, + "learning_rate": 4.80989010989011e-05, + "loss": 0.7295, + "step": 1384 + }, + { + "epoch": 3.8049450549450547, + "grad_norm": 11.65126895904541, + "learning_rate": 4.809752747252748e-05, + "loss": 0.5644, + "step": 1385 + }, + { + "epoch": 3.8076923076923075, + "grad_norm": 14.902117729187012, + "learning_rate": 4.809615384615385e-05, + "loss": 0.6127, + "step": 1386 + }, + { + "epoch": 3.8104395604395602, + "grad_norm": 11.60632038116455, + "learning_rate": 4.809478021978022e-05, + "loss": 0.6338, + "step": 1387 + }, + { + "epoch": 3.813186813186813, + "grad_norm": 14.397490501403809, + "learning_rate": 4.8093406593406596e-05, + "loss": 0.8776, + "step": 1388 + }, + { + "epoch": 3.8159340659340657, + "grad_norm": 16.589527130126953, + "learning_rate": 4.8092032967032966e-05, + "loss": 0.844, + "step": 1389 + }, + { + "epoch": 3.8186813186813184, + "grad_norm": 12.329761505126953, + "learning_rate": 4.809065934065934e-05, + "loss": 0.8058, + "step": 1390 + }, + { + "epoch": 3.821428571428571, + "grad_norm": 11.197298049926758, + "learning_rate": 4.808928571428571e-05, + "loss": 0.6903, + "step": 1391 + }, + { + "epoch": 3.824175824175824, + "grad_norm": 9.710850715637207, + "learning_rate": 4.808791208791209e-05, + "loss": 0.4153, + "step": 1392 + }, + { + "epoch": 3.8269230769230766, + "grad_norm": 8.145963668823242, + "learning_rate": 4.8086538461538466e-05, + "loss": 0.3131, + "step": 1393 + }, + { + "epoch": 3.82967032967033, + "grad_norm": 14.110034942626953, + "learning_rate": 4.8085164835164836e-05, + "loss": 0.7374, + "step": 1394 + }, + { + "epoch": 3.8324175824175826, + "grad_norm": 10.351580619812012, + "learning_rate": 4.808379120879121e-05, + "loss": 0.4876, + "step": 1395 + }, + { + "epoch": 3.8351648351648353, + "grad_norm": 10.961955070495605, + "learning_rate": 4.808241758241758e-05, + "loss": 0.4862, + "step": 1396 + }, + { + "epoch": 3.837912087912088, + "grad_norm": 13.203309059143066, + "learning_rate": 4.808104395604396e-05, + "loss": 0.7487, + "step": 1397 + }, + { + "epoch": 3.840659340659341, + "grad_norm": 9.224257469177246, + "learning_rate": 4.807967032967034e-05, + "loss": 0.3991, + "step": 1398 + }, + { + "epoch": 3.8434065934065935, + "grad_norm": 9.745423316955566, + "learning_rate": 4.807829670329671e-05, + "loss": 0.4381, + "step": 1399 + }, + { + "epoch": 3.8461538461538463, + "grad_norm": 19.34194564819336, + "learning_rate": 4.8076923076923084e-05, + "loss": 1.0305, + "step": 1400 + }, + { + "epoch": 3.848901098901099, + "grad_norm": 22.608444213867188, + "learning_rate": 4.8075549450549454e-05, + "loss": 1.1821, + "step": 1401 + }, + { + "epoch": 3.8516483516483517, + "grad_norm": 17.924530029296875, + "learning_rate": 4.8074175824175824e-05, + "loss": 0.8564, + "step": 1402 + }, + { + "epoch": 3.8543956043956045, + "grad_norm": 17.772064208984375, + "learning_rate": 4.80728021978022e-05, + "loss": 0.9836, + "step": 1403 + }, + { + "epoch": 3.857142857142857, + "grad_norm": 9.778543472290039, + "learning_rate": 4.807142857142857e-05, + "loss": 0.3711, + "step": 1404 + }, + { + "epoch": 3.85989010989011, + "grad_norm": 16.235984802246094, + "learning_rate": 4.807005494505495e-05, + "loss": 0.8424, + "step": 1405 + }, + { + "epoch": 3.8626373626373627, + "grad_norm": 15.847322463989258, + "learning_rate": 4.806868131868132e-05, + "loss": 0.6637, + "step": 1406 + }, + { + "epoch": 3.8653846153846154, + "grad_norm": 15.300226211547852, + "learning_rate": 4.8067307692307694e-05, + "loss": 0.7799, + "step": 1407 + }, + { + "epoch": 3.868131868131868, + "grad_norm": 14.864086151123047, + "learning_rate": 4.806593406593407e-05, + "loss": 0.8034, + "step": 1408 + }, + { + "epoch": 3.870879120879121, + "grad_norm": 15.515146255493164, + "learning_rate": 4.806456043956044e-05, + "loss": 0.5731, + "step": 1409 + }, + { + "epoch": 3.8736263736263736, + "grad_norm": 12.493637084960938, + "learning_rate": 4.806318681318682e-05, + "loss": 0.5856, + "step": 1410 + }, + { + "epoch": 3.8763736263736264, + "grad_norm": 13.421157836914062, + "learning_rate": 4.806181318681319e-05, + "loss": 0.5196, + "step": 1411 + }, + { + "epoch": 3.879120879120879, + "grad_norm": 14.060749053955078, + "learning_rate": 4.8060439560439565e-05, + "loss": 0.7665, + "step": 1412 + }, + { + "epoch": 3.881868131868132, + "grad_norm": 16.031736373901367, + "learning_rate": 4.805906593406594e-05, + "loss": 0.8501, + "step": 1413 + }, + { + "epoch": 3.8846153846153846, + "grad_norm": 12.45368766784668, + "learning_rate": 4.805769230769231e-05, + "loss": 0.5853, + "step": 1414 + }, + { + "epoch": 3.8873626373626373, + "grad_norm": 14.566658020019531, + "learning_rate": 4.805631868131869e-05, + "loss": 0.7628, + "step": 1415 + }, + { + "epoch": 3.89010989010989, + "grad_norm": 11.574804306030273, + "learning_rate": 4.805494505494506e-05, + "loss": 0.4465, + "step": 1416 + }, + { + "epoch": 3.892857142857143, + "grad_norm": 17.25187110900879, + "learning_rate": 4.805357142857143e-05, + "loss": 0.6045, + "step": 1417 + }, + { + "epoch": 3.8956043956043955, + "grad_norm": 11.733257293701172, + "learning_rate": 4.8052197802197805e-05, + "loss": 0.3543, + "step": 1418 + }, + { + "epoch": 3.8983516483516483, + "grad_norm": 9.773578643798828, + "learning_rate": 4.8050824175824175e-05, + "loss": 0.4892, + "step": 1419 + }, + { + "epoch": 3.901098901098901, + "grad_norm": 15.930971145629883, + "learning_rate": 4.804945054945055e-05, + "loss": 0.7338, + "step": 1420 + }, + { + "epoch": 3.9038461538461537, + "grad_norm": 15.662665367126465, + "learning_rate": 4.804807692307692e-05, + "loss": 0.8603, + "step": 1421 + }, + { + "epoch": 3.9065934065934065, + "grad_norm": 18.781206130981445, + "learning_rate": 4.80467032967033e-05, + "loss": 1.2525, + "step": 1422 + }, + { + "epoch": 3.909340659340659, + "grad_norm": 17.226139068603516, + "learning_rate": 4.8045329670329675e-05, + "loss": 0.902, + "step": 1423 + }, + { + "epoch": 3.912087912087912, + "grad_norm": 10.728498458862305, + "learning_rate": 4.8043956043956045e-05, + "loss": 0.4983, + "step": 1424 + }, + { + "epoch": 3.9148351648351647, + "grad_norm": 13.884638786315918, + "learning_rate": 4.804258241758242e-05, + "loss": 0.5794, + "step": 1425 + }, + { + "epoch": 3.9175824175824174, + "grad_norm": 13.878242492675781, + "learning_rate": 4.804120879120879e-05, + "loss": 0.6467, + "step": 1426 + }, + { + "epoch": 3.92032967032967, + "grad_norm": 12.325819969177246, + "learning_rate": 4.803983516483517e-05, + "loss": 0.5583, + "step": 1427 + }, + { + "epoch": 3.9230769230769234, + "grad_norm": 12.896513938903809, + "learning_rate": 4.8038461538461546e-05, + "loss": 0.61, + "step": 1428 + }, + { + "epoch": 3.925824175824176, + "grad_norm": 12.677578926086426, + "learning_rate": 4.8037087912087916e-05, + "loss": 0.4543, + "step": 1429 + }, + { + "epoch": 3.928571428571429, + "grad_norm": 14.729846000671387, + "learning_rate": 4.803571428571429e-05, + "loss": 0.6935, + "step": 1430 + }, + { + "epoch": 3.9313186813186816, + "grad_norm": 11.240335464477539, + "learning_rate": 4.803434065934066e-05, + "loss": 0.5529, + "step": 1431 + }, + { + "epoch": 3.9340659340659343, + "grad_norm": 22.081485748291016, + "learning_rate": 4.803296703296703e-05, + "loss": 1.0576, + "step": 1432 + }, + { + "epoch": 3.936813186813187, + "grad_norm": 11.067687034606934, + "learning_rate": 4.803159340659341e-05, + "loss": 0.548, + "step": 1433 + }, + { + "epoch": 3.9395604395604398, + "grad_norm": 8.846476554870605, + "learning_rate": 4.803021978021978e-05, + "loss": 0.3473, + "step": 1434 + }, + { + "epoch": 3.9423076923076925, + "grad_norm": 15.148796081542969, + "learning_rate": 4.8028846153846156e-05, + "loss": 0.8459, + "step": 1435 + }, + { + "epoch": 3.9450549450549453, + "grad_norm": 13.67352294921875, + "learning_rate": 4.8027472527472526e-05, + "loss": 0.6612, + "step": 1436 + }, + { + "epoch": 3.947802197802198, + "grad_norm": 12.072436332702637, + "learning_rate": 4.80260989010989e-05, + "loss": 0.5468, + "step": 1437 + }, + { + "epoch": 3.9505494505494507, + "grad_norm": 11.241681098937988, + "learning_rate": 4.802472527472528e-05, + "loss": 0.4575, + "step": 1438 + }, + { + "epoch": 3.9532967032967035, + "grad_norm": 15.002403259277344, + "learning_rate": 4.802335164835165e-05, + "loss": 0.8726, + "step": 1439 + }, + { + "epoch": 3.956043956043956, + "grad_norm": 15.456604957580566, + "learning_rate": 4.802197802197803e-05, + "loss": 0.5883, + "step": 1440 + }, + { + "epoch": 3.958791208791209, + "grad_norm": 11.451822280883789, + "learning_rate": 4.80206043956044e-05, + "loss": 0.5925, + "step": 1441 + }, + { + "epoch": 3.9615384615384617, + "grad_norm": 12.624055862426758, + "learning_rate": 4.8019230769230774e-05, + "loss": 0.7813, + "step": 1442 + }, + { + "epoch": 3.9642857142857144, + "grad_norm": 12.070456504821777, + "learning_rate": 4.801785714285715e-05, + "loss": 0.3926, + "step": 1443 + }, + { + "epoch": 3.967032967032967, + "grad_norm": 10.996441841125488, + "learning_rate": 4.801648351648352e-05, + "loss": 0.553, + "step": 1444 + }, + { + "epoch": 3.96978021978022, + "grad_norm": 16.45053482055664, + "learning_rate": 4.80151098901099e-05, + "loss": 0.9972, + "step": 1445 + }, + { + "epoch": 3.9725274725274726, + "grad_norm": 20.370330810546875, + "learning_rate": 4.801373626373627e-05, + "loss": 1.0931, + "step": 1446 + }, + { + "epoch": 3.9752747252747254, + "grad_norm": 15.367652893066406, + "learning_rate": 4.801236263736264e-05, + "loss": 0.74, + "step": 1447 + }, + { + "epoch": 3.978021978021978, + "grad_norm": 12.924178123474121, + "learning_rate": 4.8010989010989014e-05, + "loss": 0.7692, + "step": 1448 + }, + { + "epoch": 3.980769230769231, + "grad_norm": 15.03148078918457, + "learning_rate": 4.8009615384615384e-05, + "loss": 0.7752, + "step": 1449 + }, + { + "epoch": 3.9835164835164836, + "grad_norm": 16.62321662902832, + "learning_rate": 4.800824175824176e-05, + "loss": 0.895, + "step": 1450 + }, + { + "epoch": 3.9862637362637363, + "grad_norm": 16.291126251220703, + "learning_rate": 4.800686813186813e-05, + "loss": 0.7619, + "step": 1451 + }, + { + "epoch": 3.989010989010989, + "grad_norm": 16.044641494750977, + "learning_rate": 4.800549450549451e-05, + "loss": 1.148, + "step": 1452 + }, + { + "epoch": 3.991758241758242, + "grad_norm": 20.257169723510742, + "learning_rate": 4.8004120879120884e-05, + "loss": 1.0163, + "step": 1453 + }, + { + "epoch": 3.9945054945054945, + "grad_norm": 18.118682861328125, + "learning_rate": 4.8002747252747254e-05, + "loss": 0.7408, + "step": 1454 + }, + { + "epoch": 3.9972527472527473, + "grad_norm": 20.35677719116211, + "learning_rate": 4.800137362637363e-05, + "loss": 0.9339, + "step": 1455 + }, + { + "epoch": 4.0, + "grad_norm": 23.296953201293945, + "learning_rate": 4.8e-05, + "loss": 0.5744, + "step": 1456 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.8057851239669421, + "eval_f1": 0.8008713739893721, + "eval_f1_DuraRiadoRio_64x64": 0.7286245353159851, + "eval_f1_Mole_64x64": 0.8027681660899654, + "eval_f1_Quebrado_64x64": 0.919093851132686, + "eval_f1_RiadoRio_64x64": 0.6642599277978339, + "eval_f1_RioFechado_64x64": 0.8896103896103896, + "eval_loss": 0.5525858402252197, + "eval_precision": 0.8011814530850675, + "eval_precision_DuraRiadoRio_64x64": 0.784, + "eval_precision_Mole_64x64": 0.8, + "eval_precision_Quebrado_64x64": 0.8606060606060606, + "eval_precision_RiadoRio_64x64": 0.736, + "eval_precision_RioFechado_64x64": 0.8253012048192772, + "eval_recall": 0.8084548225022651, + "eval_recall_DuraRiadoRio_64x64": 0.6805555555555556, + "eval_recall_Mole_64x64": 0.8055555555555556, + "eval_recall_Quebrado_64x64": 0.9861111111111112, + "eval_recall_RiadoRio_64x64": 0.6052631578947368, + "eval_recall_RioFechado_64x64": 0.9647887323943662, + "eval_runtime": 1.7509, + "eval_samples_per_second": 414.636, + "eval_steps_per_second": 26.272, + "step": 1456 + }, + { + "epoch": 4.002747252747253, + "grad_norm": 12.207172393798828, + "learning_rate": 4.799862637362638e-05, + "loss": 0.5836, + "step": 1457 + }, + { + "epoch": 4.0054945054945055, + "grad_norm": 16.26470184326172, + "learning_rate": 4.7997252747252755e-05, + "loss": 0.6104, + "step": 1458 + }, + { + "epoch": 4.008241758241758, + "grad_norm": 16.887554168701172, + "learning_rate": 4.7995879120879125e-05, + "loss": 0.7264, + "step": 1459 + }, + { + "epoch": 4.010989010989011, + "grad_norm": 19.411880493164062, + "learning_rate": 4.79945054945055e-05, + "loss": 0.911, + "step": 1460 + }, + { + "epoch": 4.013736263736264, + "grad_norm": 15.931333541870117, + "learning_rate": 4.799313186813187e-05, + "loss": 1.2096, + "step": 1461 + }, + { + "epoch": 4.016483516483516, + "grad_norm": 13.022385597229004, + "learning_rate": 4.799175824175824e-05, + "loss": 0.4409, + "step": 1462 + }, + { + "epoch": 4.019230769230769, + "grad_norm": 21.148662567138672, + "learning_rate": 4.799038461538462e-05, + "loss": 1.322, + "step": 1463 + }, + { + "epoch": 4.021978021978022, + "grad_norm": 11.873523712158203, + "learning_rate": 4.798901098901099e-05, + "loss": 0.442, + "step": 1464 + }, + { + "epoch": 4.024725274725275, + "grad_norm": 11.754591941833496, + "learning_rate": 4.7987637362637365e-05, + "loss": 0.537, + "step": 1465 + }, + { + "epoch": 4.027472527472527, + "grad_norm": 10.553231239318848, + "learning_rate": 4.7986263736263735e-05, + "loss": 0.3468, + "step": 1466 + }, + { + "epoch": 4.03021978021978, + "grad_norm": 15.488229751586914, + "learning_rate": 4.798489010989011e-05, + "loss": 0.9382, + "step": 1467 + }, + { + "epoch": 4.032967032967033, + "grad_norm": 17.320404052734375, + "learning_rate": 4.798351648351649e-05, + "loss": 0.8393, + "step": 1468 + }, + { + "epoch": 4.035714285714286, + "grad_norm": 16.898935317993164, + "learning_rate": 4.798214285714286e-05, + "loss": 0.6826, + "step": 1469 + }, + { + "epoch": 4.038461538461538, + "grad_norm": 17.48408317565918, + "learning_rate": 4.7980769230769236e-05, + "loss": 0.9507, + "step": 1470 + }, + { + "epoch": 4.041208791208791, + "grad_norm": 10.495352745056152, + "learning_rate": 4.7979395604395606e-05, + "loss": 0.6405, + "step": 1471 + }, + { + "epoch": 4.043956043956044, + "grad_norm": 13.20077133178711, + "learning_rate": 4.797802197802198e-05, + "loss": 0.7805, + "step": 1472 + }, + { + "epoch": 4.0467032967032965, + "grad_norm": 17.857511520385742, + "learning_rate": 4.797664835164836e-05, + "loss": 0.9476, + "step": 1473 + }, + { + "epoch": 4.049450549450549, + "grad_norm": 13.744216918945312, + "learning_rate": 4.797527472527473e-05, + "loss": 0.7437, + "step": 1474 + }, + { + "epoch": 4.052197802197802, + "grad_norm": 20.19918441772461, + "learning_rate": 4.7973901098901106e-05, + "loss": 0.9482, + "step": 1475 + }, + { + "epoch": 4.054945054945055, + "grad_norm": 12.634825706481934, + "learning_rate": 4.7972527472527476e-05, + "loss": 0.7027, + "step": 1476 + }, + { + "epoch": 4.0576923076923075, + "grad_norm": 14.72909164428711, + "learning_rate": 4.7971153846153846e-05, + "loss": 0.7746, + "step": 1477 + }, + { + "epoch": 4.06043956043956, + "grad_norm": 16.16374969482422, + "learning_rate": 4.796978021978022e-05, + "loss": 0.7224, + "step": 1478 + }, + { + "epoch": 4.063186813186813, + "grad_norm": 12.672536849975586, + "learning_rate": 4.796840659340659e-05, + "loss": 0.4675, + "step": 1479 + }, + { + "epoch": 4.065934065934066, + "grad_norm": 10.313870429992676, + "learning_rate": 4.796703296703297e-05, + "loss": 0.5506, + "step": 1480 + }, + { + "epoch": 4.068681318681318, + "grad_norm": 10.079122543334961, + "learning_rate": 4.796565934065934e-05, + "loss": 0.3965, + "step": 1481 + }, + { + "epoch": 4.071428571428571, + "grad_norm": 14.797484397888184, + "learning_rate": 4.7964285714285717e-05, + "loss": 0.8399, + "step": 1482 + }, + { + "epoch": 4.074175824175824, + "grad_norm": 16.139402389526367, + "learning_rate": 4.7962912087912087e-05, + "loss": 0.7606, + "step": 1483 + }, + { + "epoch": 4.076923076923077, + "grad_norm": 10.578532218933105, + "learning_rate": 4.796153846153846e-05, + "loss": 0.4724, + "step": 1484 + }, + { + "epoch": 4.079670329670329, + "grad_norm": 14.45373249053955, + "learning_rate": 4.796016483516484e-05, + "loss": 0.7087, + "step": 1485 + }, + { + "epoch": 4.082417582417582, + "grad_norm": 13.058232307434082, + "learning_rate": 4.795879120879121e-05, + "loss": 0.5871, + "step": 1486 + }, + { + "epoch": 4.085164835164835, + "grad_norm": 14.81097412109375, + "learning_rate": 4.795741758241759e-05, + "loss": 0.5779, + "step": 1487 + }, + { + "epoch": 4.087912087912088, + "grad_norm": 14.18227767944336, + "learning_rate": 4.795604395604396e-05, + "loss": 0.5405, + "step": 1488 + }, + { + "epoch": 4.09065934065934, + "grad_norm": 9.971996307373047, + "learning_rate": 4.7954670329670334e-05, + "loss": 0.4566, + "step": 1489 + }, + { + "epoch": 4.093406593406593, + "grad_norm": 13.020177841186523, + "learning_rate": 4.795329670329671e-05, + "loss": 0.6412, + "step": 1490 + }, + { + "epoch": 4.096153846153846, + "grad_norm": 11.937134742736816, + "learning_rate": 4.795192307692308e-05, + "loss": 0.6565, + "step": 1491 + }, + { + "epoch": 4.0989010989010985, + "grad_norm": 14.005784034729004, + "learning_rate": 4.795054945054945e-05, + "loss": 0.6564, + "step": 1492 + }, + { + "epoch": 4.101648351648351, + "grad_norm": 15.5830078125, + "learning_rate": 4.794917582417582e-05, + "loss": 0.6315, + "step": 1493 + }, + { + "epoch": 4.104395604395604, + "grad_norm": 11.806196212768555, + "learning_rate": 4.79478021978022e-05, + "loss": 0.4774, + "step": 1494 + }, + { + "epoch": 4.107142857142857, + "grad_norm": 17.367666244506836, + "learning_rate": 4.7946428571428574e-05, + "loss": 0.8967, + "step": 1495 + }, + { + "epoch": 4.1098901098901095, + "grad_norm": 12.53258228302002, + "learning_rate": 4.7945054945054944e-05, + "loss": 0.6657, + "step": 1496 + }, + { + "epoch": 4.112637362637362, + "grad_norm": 13.19011402130127, + "learning_rate": 4.794368131868132e-05, + "loss": 0.6804, + "step": 1497 + }, + { + "epoch": 4.115384615384615, + "grad_norm": 14.995484352111816, + "learning_rate": 4.794230769230769e-05, + "loss": 1.0371, + "step": 1498 + }, + { + "epoch": 4.118131868131868, + "grad_norm": 16.954113006591797, + "learning_rate": 4.794093406593407e-05, + "loss": 0.8524, + "step": 1499 + }, + { + "epoch": 4.1208791208791204, + "grad_norm": 10.36644172668457, + "learning_rate": 4.7939560439560445e-05, + "loss": 0.5725, + "step": 1500 + }, + { + "epoch": 4.123626373626374, + "grad_norm": 13.114336013793945, + "learning_rate": 4.7938186813186815e-05, + "loss": 0.7822, + "step": 1501 + }, + { + "epoch": 4.126373626373626, + "grad_norm": 14.242609024047852, + "learning_rate": 4.793681318681319e-05, + "loss": 0.8463, + "step": 1502 + }, + { + "epoch": 4.1291208791208796, + "grad_norm": 14.204581260681152, + "learning_rate": 4.793543956043956e-05, + "loss": 0.6914, + "step": 1503 + }, + { + "epoch": 4.131868131868132, + "grad_norm": 13.390033721923828, + "learning_rate": 4.793406593406594e-05, + "loss": 0.7037, + "step": 1504 + }, + { + "epoch": 4.134615384615385, + "grad_norm": 15.229975700378418, + "learning_rate": 4.7932692307692315e-05, + "loss": 0.7915, + "step": 1505 + }, + { + "epoch": 4.137362637362638, + "grad_norm": 13.415787696838379, + "learning_rate": 4.7931318681318685e-05, + "loss": 0.6424, + "step": 1506 + }, + { + "epoch": 4.1401098901098905, + "grad_norm": 11.374828338623047, + "learning_rate": 4.7929945054945055e-05, + "loss": 0.5682, + "step": 1507 + }, + { + "epoch": 4.142857142857143, + "grad_norm": 11.860982894897461, + "learning_rate": 4.7928571428571425e-05, + "loss": 0.469, + "step": 1508 + }, + { + "epoch": 4.145604395604396, + "grad_norm": 12.885540008544922, + "learning_rate": 4.79271978021978e-05, + "loss": 0.5901, + "step": 1509 + }, + { + "epoch": 4.148351648351649, + "grad_norm": 9.735228538513184, + "learning_rate": 4.792582417582418e-05, + "loss": 0.4552, + "step": 1510 + }, + { + "epoch": 4.1510989010989015, + "grad_norm": 10.358702659606934, + "learning_rate": 4.792445054945055e-05, + "loss": 0.4191, + "step": 1511 + }, + { + "epoch": 4.153846153846154, + "grad_norm": 14.97957706451416, + "learning_rate": 4.7923076923076926e-05, + "loss": 0.896, + "step": 1512 + }, + { + "epoch": 4.156593406593407, + "grad_norm": 17.540925979614258, + "learning_rate": 4.7921703296703296e-05, + "loss": 1.1826, + "step": 1513 + }, + { + "epoch": 4.15934065934066, + "grad_norm": 17.589513778686523, + "learning_rate": 4.792032967032967e-05, + "loss": 0.9019, + "step": 1514 + }, + { + "epoch": 4.162087912087912, + "grad_norm": 15.887808799743652, + "learning_rate": 4.791895604395605e-05, + "loss": 0.7091, + "step": 1515 + }, + { + "epoch": 4.164835164835165, + "grad_norm": 11.978968620300293, + "learning_rate": 4.791758241758242e-05, + "loss": 0.3656, + "step": 1516 + }, + { + "epoch": 4.167582417582418, + "grad_norm": 17.704938888549805, + "learning_rate": 4.7916208791208796e-05, + "loss": 1.0864, + "step": 1517 + }, + { + "epoch": 4.170329670329671, + "grad_norm": 11.642274856567383, + "learning_rate": 4.7914835164835166e-05, + "loss": 0.5835, + "step": 1518 + }, + { + "epoch": 4.173076923076923, + "grad_norm": 14.242012977600098, + "learning_rate": 4.791346153846154e-05, + "loss": 0.5294, + "step": 1519 + }, + { + "epoch": 4.175824175824176, + "grad_norm": 8.681442260742188, + "learning_rate": 4.791208791208792e-05, + "loss": 0.2492, + "step": 1520 + }, + { + "epoch": 4.178571428571429, + "grad_norm": 14.716197967529297, + "learning_rate": 4.791071428571429e-05, + "loss": 0.5973, + "step": 1521 + }, + { + "epoch": 4.181318681318682, + "grad_norm": 12.996406555175781, + "learning_rate": 4.790934065934066e-05, + "loss": 0.7867, + "step": 1522 + }, + { + "epoch": 4.184065934065934, + "grad_norm": 16.853599548339844, + "learning_rate": 4.790796703296703e-05, + "loss": 0.839, + "step": 1523 + }, + { + "epoch": 4.186813186813187, + "grad_norm": 16.303361892700195, + "learning_rate": 4.7906593406593406e-05, + "loss": 1.2449, + "step": 1524 + }, + { + "epoch": 4.18956043956044, + "grad_norm": 15.335265159606934, + "learning_rate": 4.790521978021978e-05, + "loss": 0.7125, + "step": 1525 + }, + { + "epoch": 4.1923076923076925, + "grad_norm": 13.211369514465332, + "learning_rate": 4.790384615384615e-05, + "loss": 0.5473, + "step": 1526 + }, + { + "epoch": 4.195054945054945, + "grad_norm": 17.486299514770508, + "learning_rate": 4.790247252747253e-05, + "loss": 0.982, + "step": 1527 + }, + { + "epoch": 4.197802197802198, + "grad_norm": 16.874656677246094, + "learning_rate": 4.79010989010989e-05, + "loss": 1.0636, + "step": 1528 + }, + { + "epoch": 4.200549450549451, + "grad_norm": 7.049760818481445, + "learning_rate": 4.789972527472528e-05, + "loss": 0.2589, + "step": 1529 + }, + { + "epoch": 4.2032967032967035, + "grad_norm": 17.29155921936035, + "learning_rate": 4.7898351648351654e-05, + "loss": 0.8877, + "step": 1530 + }, + { + "epoch": 4.206043956043956, + "grad_norm": 19.049640655517578, + "learning_rate": 4.7896978021978024e-05, + "loss": 1.1263, + "step": 1531 + }, + { + "epoch": 4.208791208791209, + "grad_norm": 14.199668884277344, + "learning_rate": 4.78956043956044e-05, + "loss": 0.6311, + "step": 1532 + }, + { + "epoch": 4.211538461538462, + "grad_norm": 12.12460994720459, + "learning_rate": 4.789423076923077e-05, + "loss": 0.555, + "step": 1533 + }, + { + "epoch": 4.214285714285714, + "grad_norm": 17.597572326660156, + "learning_rate": 4.789285714285715e-05, + "loss": 0.9272, + "step": 1534 + }, + { + "epoch": 4.217032967032967, + "grad_norm": 13.27408218383789, + "learning_rate": 4.7891483516483524e-05, + "loss": 0.7788, + "step": 1535 + }, + { + "epoch": 4.21978021978022, + "grad_norm": 14.796609878540039, + "learning_rate": 4.7890109890109894e-05, + "loss": 0.9982, + "step": 1536 + }, + { + "epoch": 4.222527472527473, + "grad_norm": 14.767990112304688, + "learning_rate": 4.7888736263736264e-05, + "loss": 0.8125, + "step": 1537 + }, + { + "epoch": 4.225274725274725, + "grad_norm": 10.347270011901855, + "learning_rate": 4.7887362637362634e-05, + "loss": 0.4024, + "step": 1538 + }, + { + "epoch": 4.228021978021978, + "grad_norm": 13.783976554870605, + "learning_rate": 4.788598901098901e-05, + "loss": 0.6189, + "step": 1539 + }, + { + "epoch": 4.230769230769231, + "grad_norm": 12.17139720916748, + "learning_rate": 4.788461538461539e-05, + "loss": 0.5628, + "step": 1540 + }, + { + "epoch": 4.233516483516484, + "grad_norm": 12.495774269104004, + "learning_rate": 4.788324175824176e-05, + "loss": 0.7238, + "step": 1541 + }, + { + "epoch": 4.236263736263736, + "grad_norm": 16.424297332763672, + "learning_rate": 4.7881868131868135e-05, + "loss": 0.7465, + "step": 1542 + }, + { + "epoch": 4.239010989010989, + "grad_norm": 16.53338050842285, + "learning_rate": 4.7880494505494505e-05, + "loss": 0.7297, + "step": 1543 + }, + { + "epoch": 4.241758241758242, + "grad_norm": 11.308565139770508, + "learning_rate": 4.787912087912088e-05, + "loss": 0.4598, + "step": 1544 + }, + { + "epoch": 4.2445054945054945, + "grad_norm": 13.90260124206543, + "learning_rate": 4.787774725274726e-05, + "loss": 0.7339, + "step": 1545 + }, + { + "epoch": 4.247252747252747, + "grad_norm": 12.586994171142578, + "learning_rate": 4.787637362637363e-05, + "loss": 0.629, + "step": 1546 + }, + { + "epoch": 4.25, + "grad_norm": 15.581748962402344, + "learning_rate": 4.7875000000000005e-05, + "loss": 0.6399, + "step": 1547 + }, + { + "epoch": 4.252747252747253, + "grad_norm": 14.309340476989746, + "learning_rate": 4.7873626373626375e-05, + "loss": 0.9158, + "step": 1548 + }, + { + "epoch": 4.2554945054945055, + "grad_norm": 10.53359603881836, + "learning_rate": 4.787225274725275e-05, + "loss": 0.5395, + "step": 1549 + }, + { + "epoch": 4.258241758241758, + "grad_norm": 13.209564208984375, + "learning_rate": 4.787087912087913e-05, + "loss": 0.5463, + "step": 1550 + }, + { + "epoch": 4.260989010989011, + "grad_norm": 13.331165313720703, + "learning_rate": 4.78695054945055e-05, + "loss": 0.5815, + "step": 1551 + }, + { + "epoch": 4.263736263736264, + "grad_norm": 11.606983184814453, + "learning_rate": 4.786813186813187e-05, + "loss": 0.5276, + "step": 1552 + }, + { + "epoch": 4.266483516483516, + "grad_norm": 9.74377155303955, + "learning_rate": 4.786675824175824e-05, + "loss": 0.4025, + "step": 1553 + }, + { + "epoch": 4.269230769230769, + "grad_norm": 17.252092361450195, + "learning_rate": 4.7865384615384615e-05, + "loss": 1.0256, + "step": 1554 + }, + { + "epoch": 4.271978021978022, + "grad_norm": 15.42432975769043, + "learning_rate": 4.786401098901099e-05, + "loss": 0.8728, + "step": 1555 + }, + { + "epoch": 4.274725274725275, + "grad_norm": 12.841586112976074, + "learning_rate": 4.786263736263736e-05, + "loss": 0.6577, + "step": 1556 + }, + { + "epoch": 4.277472527472527, + "grad_norm": 14.858381271362305, + "learning_rate": 4.786126373626374e-05, + "loss": 0.6835, + "step": 1557 + }, + { + "epoch": 4.28021978021978, + "grad_norm": 16.791080474853516, + "learning_rate": 4.785989010989011e-05, + "loss": 0.9933, + "step": 1558 + }, + { + "epoch": 4.282967032967033, + "grad_norm": 12.621728897094727, + "learning_rate": 4.7858516483516486e-05, + "loss": 0.6334, + "step": 1559 + }, + { + "epoch": 4.285714285714286, + "grad_norm": 13.061128616333008, + "learning_rate": 4.785714285714286e-05, + "loss": 0.5574, + "step": 1560 + }, + { + "epoch": 4.288461538461538, + "grad_norm": 19.390947341918945, + "learning_rate": 4.785576923076923e-05, + "loss": 0.8774, + "step": 1561 + }, + { + "epoch": 4.291208791208791, + "grad_norm": 8.711089134216309, + "learning_rate": 4.785439560439561e-05, + "loss": 0.3115, + "step": 1562 + }, + { + "epoch": 4.293956043956044, + "grad_norm": 18.710769653320312, + "learning_rate": 4.785302197802198e-05, + "loss": 1.0377, + "step": 1563 + }, + { + "epoch": 4.2967032967032965, + "grad_norm": 14.210365295410156, + "learning_rate": 4.7851648351648356e-05, + "loss": 0.8484, + "step": 1564 + }, + { + "epoch": 4.299450549450549, + "grad_norm": 13.879621505737305, + "learning_rate": 4.7850274725274726e-05, + "loss": 0.6456, + "step": 1565 + }, + { + "epoch": 4.302197802197802, + "grad_norm": 11.70589542388916, + "learning_rate": 4.78489010989011e-05, + "loss": 0.5916, + "step": 1566 + }, + { + "epoch": 4.304945054945055, + "grad_norm": 16.74168586730957, + "learning_rate": 4.784752747252747e-05, + "loss": 0.9109, + "step": 1567 + }, + { + "epoch": 4.3076923076923075, + "grad_norm": 11.32386302947998, + "learning_rate": 4.784615384615384e-05, + "loss": 0.5289, + "step": 1568 + }, + { + "epoch": 4.31043956043956, + "grad_norm": 11.741448402404785, + "learning_rate": 4.784478021978022e-05, + "loss": 0.5492, + "step": 1569 + }, + { + "epoch": 4.313186813186813, + "grad_norm": 13.321894645690918, + "learning_rate": 4.78434065934066e-05, + "loss": 0.6504, + "step": 1570 + }, + { + "epoch": 4.315934065934066, + "grad_norm": 20.795774459838867, + "learning_rate": 4.784203296703297e-05, + "loss": 1.0634, + "step": 1571 + }, + { + "epoch": 4.318681318681318, + "grad_norm": 17.358861923217773, + "learning_rate": 4.7840659340659344e-05, + "loss": 0.785, + "step": 1572 + }, + { + "epoch": 4.321428571428571, + "grad_norm": 13.897663116455078, + "learning_rate": 4.7839285714285714e-05, + "loss": 0.4649, + "step": 1573 + }, + { + "epoch": 4.324175824175824, + "grad_norm": 24.628690719604492, + "learning_rate": 4.783791208791209e-05, + "loss": 1.7446, + "step": 1574 + }, + { + "epoch": 4.326923076923077, + "grad_norm": 10.183780670166016, + "learning_rate": 4.783653846153847e-05, + "loss": 0.4111, + "step": 1575 + }, + { + "epoch": 4.329670329670329, + "grad_norm": 14.820205688476562, + "learning_rate": 4.783516483516484e-05, + "loss": 0.6397, + "step": 1576 + }, + { + "epoch": 4.332417582417582, + "grad_norm": 13.989264488220215, + "learning_rate": 4.7833791208791214e-05, + "loss": 0.5843, + "step": 1577 + }, + { + "epoch": 4.335164835164835, + "grad_norm": 7.227046966552734, + "learning_rate": 4.7832417582417584e-05, + "loss": 0.2247, + "step": 1578 + }, + { + "epoch": 4.337912087912088, + "grad_norm": 17.095792770385742, + "learning_rate": 4.783104395604396e-05, + "loss": 0.9643, + "step": 1579 + }, + { + "epoch": 4.34065934065934, + "grad_norm": 15.964848518371582, + "learning_rate": 4.782967032967033e-05, + "loss": 0.8322, + "step": 1580 + }, + { + "epoch": 4.343406593406593, + "grad_norm": 19.28877830505371, + "learning_rate": 4.782829670329671e-05, + "loss": 1.3202, + "step": 1581 + }, + { + "epoch": 4.346153846153846, + "grad_norm": 11.375373840332031, + "learning_rate": 4.782692307692308e-05, + "loss": 0.6105, + "step": 1582 + }, + { + "epoch": 4.3489010989010985, + "grad_norm": 11.582159996032715, + "learning_rate": 4.782554945054945e-05, + "loss": 0.5973, + "step": 1583 + }, + { + "epoch": 4.351648351648351, + "grad_norm": 17.88197135925293, + "learning_rate": 4.7824175824175824e-05, + "loss": 0.7285, + "step": 1584 + }, + { + "epoch": 4.354395604395604, + "grad_norm": 12.66162395477295, + "learning_rate": 4.78228021978022e-05, + "loss": 0.6731, + "step": 1585 + }, + { + "epoch": 4.357142857142857, + "grad_norm": 12.248143196105957, + "learning_rate": 4.782142857142857e-05, + "loss": 0.5293, + "step": 1586 + }, + { + "epoch": 4.3598901098901095, + "grad_norm": 18.600622177124023, + "learning_rate": 4.782005494505495e-05, + "loss": 0.8548, + "step": 1587 + }, + { + "epoch": 4.362637362637362, + "grad_norm": 12.029911994934082, + "learning_rate": 4.781868131868132e-05, + "loss": 0.496, + "step": 1588 + }, + { + "epoch": 4.365384615384615, + "grad_norm": 16.09404754638672, + "learning_rate": 4.7817307692307695e-05, + "loss": 0.8714, + "step": 1589 + }, + { + "epoch": 4.368131868131869, + "grad_norm": 17.750591278076172, + "learning_rate": 4.781593406593407e-05, + "loss": 0.7749, + "step": 1590 + }, + { + "epoch": 4.3708791208791204, + "grad_norm": 13.898585319519043, + "learning_rate": 4.781456043956044e-05, + "loss": 0.4453, + "step": 1591 + }, + { + "epoch": 4.373626373626374, + "grad_norm": 14.643345832824707, + "learning_rate": 4.781318681318682e-05, + "loss": 0.62, + "step": 1592 + }, + { + "epoch": 4.376373626373626, + "grad_norm": 15.349994659423828, + "learning_rate": 4.781181318681319e-05, + "loss": 0.8467, + "step": 1593 + }, + { + "epoch": 4.3791208791208796, + "grad_norm": 15.728671073913574, + "learning_rate": 4.7810439560439565e-05, + "loss": 0.5556, + "step": 1594 + }, + { + "epoch": 4.381868131868131, + "grad_norm": 11.522177696228027, + "learning_rate": 4.7809065934065935e-05, + "loss": 0.648, + "step": 1595 + }, + { + "epoch": 4.384615384615385, + "grad_norm": 17.44355583190918, + "learning_rate": 4.780769230769231e-05, + "loss": 0.7997, + "step": 1596 + }, + { + "epoch": 4.387362637362638, + "grad_norm": 15.943095207214355, + "learning_rate": 4.780631868131868e-05, + "loss": 0.7738, + "step": 1597 + }, + { + "epoch": 4.3901098901098905, + "grad_norm": 12.729194641113281, + "learning_rate": 4.780494505494505e-05, + "loss": 0.619, + "step": 1598 + }, + { + "epoch": 4.392857142857143, + "grad_norm": 19.237289428710938, + "learning_rate": 4.780357142857143e-05, + "loss": 1.1043, + "step": 1599 + }, + { + "epoch": 4.395604395604396, + "grad_norm": 12.477239608764648, + "learning_rate": 4.7802197802197806e-05, + "loss": 0.5611, + "step": 1600 + }, + { + "epoch": 4.398351648351649, + "grad_norm": 23.706398010253906, + "learning_rate": 4.7800824175824176e-05, + "loss": 0.9987, + "step": 1601 + }, + { + "epoch": 4.4010989010989015, + "grad_norm": 12.12814998626709, + "learning_rate": 4.779945054945055e-05, + "loss": 0.3997, + "step": 1602 + }, + { + "epoch": 4.403846153846154, + "grad_norm": 13.654998779296875, + "learning_rate": 4.779807692307692e-05, + "loss": 0.5805, + "step": 1603 + }, + { + "epoch": 4.406593406593407, + "grad_norm": 15.858882904052734, + "learning_rate": 4.77967032967033e-05, + "loss": 0.8748, + "step": 1604 + }, + { + "epoch": 4.40934065934066, + "grad_norm": 11.364599227905273, + "learning_rate": 4.7795329670329676e-05, + "loss": 0.4244, + "step": 1605 + }, + { + "epoch": 4.412087912087912, + "grad_norm": 18.256359100341797, + "learning_rate": 4.7793956043956046e-05, + "loss": 0.8142, + "step": 1606 + }, + { + "epoch": 4.414835164835165, + "grad_norm": 11.197509765625, + "learning_rate": 4.779258241758242e-05, + "loss": 0.5066, + "step": 1607 + }, + { + "epoch": 4.417582417582418, + "grad_norm": 15.534639358520508, + "learning_rate": 4.779120879120879e-05, + "loss": 0.6796, + "step": 1608 + }, + { + "epoch": 4.420329670329671, + "grad_norm": 13.119133949279785, + "learning_rate": 4.778983516483517e-05, + "loss": 0.5923, + "step": 1609 + }, + { + "epoch": 4.423076923076923, + "grad_norm": 13.686419486999512, + "learning_rate": 4.778846153846154e-05, + "loss": 0.625, + "step": 1610 + }, + { + "epoch": 4.425824175824176, + "grad_norm": 12.672633171081543, + "learning_rate": 4.7787087912087917e-05, + "loss": 0.5523, + "step": 1611 + }, + { + "epoch": 4.428571428571429, + "grad_norm": 18.793819427490234, + "learning_rate": 4.7785714285714287e-05, + "loss": 0.8736, + "step": 1612 + }, + { + "epoch": 4.431318681318682, + "grad_norm": 14.988555908203125, + "learning_rate": 4.7784340659340657e-05, + "loss": 0.8269, + "step": 1613 + }, + { + "epoch": 4.434065934065934, + "grad_norm": 16.80515480041504, + "learning_rate": 4.778296703296703e-05, + "loss": 0.8498, + "step": 1614 + }, + { + "epoch": 4.436813186813187, + "grad_norm": 14.633030891418457, + "learning_rate": 4.778159340659341e-05, + "loss": 0.7703, + "step": 1615 + }, + { + "epoch": 4.43956043956044, + "grad_norm": 10.314766883850098, + "learning_rate": 4.778021978021978e-05, + "loss": 0.3883, + "step": 1616 + }, + { + "epoch": 4.4423076923076925, + "grad_norm": 14.680475234985352, + "learning_rate": 4.777884615384616e-05, + "loss": 0.632, + "step": 1617 + }, + { + "epoch": 4.445054945054945, + "grad_norm": 13.400086402893066, + "learning_rate": 4.777747252747253e-05, + "loss": 0.5654, + "step": 1618 + }, + { + "epoch": 4.447802197802198, + "grad_norm": 16.143692016601562, + "learning_rate": 4.7776098901098904e-05, + "loss": 0.7965, + "step": 1619 + }, + { + "epoch": 4.450549450549451, + "grad_norm": 12.135529518127441, + "learning_rate": 4.777472527472528e-05, + "loss": 0.4699, + "step": 1620 + }, + { + "epoch": 4.4532967032967035, + "grad_norm": 17.099624633789062, + "learning_rate": 4.777335164835165e-05, + "loss": 0.875, + "step": 1621 + }, + { + "epoch": 4.456043956043956, + "grad_norm": 19.229106903076172, + "learning_rate": 4.777197802197803e-05, + "loss": 1.1736, + "step": 1622 + }, + { + "epoch": 4.458791208791209, + "grad_norm": 10.722928047180176, + "learning_rate": 4.77706043956044e-05, + "loss": 0.3866, + "step": 1623 + }, + { + "epoch": 4.461538461538462, + "grad_norm": 22.119874954223633, + "learning_rate": 4.7769230769230774e-05, + "loss": 1.4934, + "step": 1624 + }, + { + "epoch": 4.464285714285714, + "grad_norm": 17.115236282348633, + "learning_rate": 4.7767857142857144e-05, + "loss": 0.7472, + "step": 1625 + }, + { + "epoch": 4.467032967032967, + "grad_norm": 21.090621948242188, + "learning_rate": 4.776648351648352e-05, + "loss": 1.0799, + "step": 1626 + }, + { + "epoch": 4.46978021978022, + "grad_norm": 16.93502426147461, + "learning_rate": 4.776510989010989e-05, + "loss": 0.9108, + "step": 1627 + }, + { + "epoch": 4.472527472527473, + "grad_norm": 16.86637306213379, + "learning_rate": 4.776373626373626e-05, + "loss": 1.0022, + "step": 1628 + }, + { + "epoch": 4.475274725274725, + "grad_norm": 14.087535858154297, + "learning_rate": 4.776236263736264e-05, + "loss": 0.6668, + "step": 1629 + }, + { + "epoch": 4.478021978021978, + "grad_norm": 8.331254005432129, + "learning_rate": 4.7760989010989015e-05, + "loss": 0.5152, + "step": 1630 + }, + { + "epoch": 4.480769230769231, + "grad_norm": 17.038532257080078, + "learning_rate": 4.7759615384615385e-05, + "loss": 1.0253, + "step": 1631 + }, + { + "epoch": 4.483516483516484, + "grad_norm": 19.56062126159668, + "learning_rate": 4.775824175824176e-05, + "loss": 0.915, + "step": 1632 + }, + { + "epoch": 4.486263736263736, + "grad_norm": 14.260760307312012, + "learning_rate": 4.775686813186813e-05, + "loss": 0.7404, + "step": 1633 + }, + { + "epoch": 4.489010989010989, + "grad_norm": 10.657249450683594, + "learning_rate": 4.775549450549451e-05, + "loss": 0.5092, + "step": 1634 + }, + { + "epoch": 4.491758241758242, + "grad_norm": 17.501956939697266, + "learning_rate": 4.7754120879120885e-05, + "loss": 0.8687, + "step": 1635 + }, + { + "epoch": 4.4945054945054945, + "grad_norm": 15.55941104888916, + "learning_rate": 4.7752747252747255e-05, + "loss": 0.6278, + "step": 1636 + }, + { + "epoch": 4.497252747252747, + "grad_norm": 13.368682861328125, + "learning_rate": 4.775137362637363e-05, + "loss": 0.5083, + "step": 1637 + }, + { + "epoch": 4.5, + "grad_norm": 15.053168296813965, + "learning_rate": 4.775e-05, + "loss": 0.7308, + "step": 1638 + }, + { + "epoch": 4.502747252747253, + "grad_norm": 11.295777320861816, + "learning_rate": 4.774862637362638e-05, + "loss": 0.5193, + "step": 1639 + }, + { + "epoch": 4.5054945054945055, + "grad_norm": 15.558849334716797, + "learning_rate": 4.774725274725275e-05, + "loss": 0.6197, + "step": 1640 + }, + { + "epoch": 4.508241758241758, + "grad_norm": 13.440225601196289, + "learning_rate": 4.7745879120879126e-05, + "loss": 0.5014, + "step": 1641 + }, + { + "epoch": 4.510989010989011, + "grad_norm": 10.635198593139648, + "learning_rate": 4.7744505494505496e-05, + "loss": 0.6082, + "step": 1642 + }, + { + "epoch": 4.513736263736264, + "grad_norm": 16.774782180786133, + "learning_rate": 4.7743131868131866e-05, + "loss": 0.653, + "step": 1643 + }, + { + "epoch": 4.516483516483516, + "grad_norm": 13.85551643371582, + "learning_rate": 4.774175824175824e-05, + "loss": 0.6528, + "step": 1644 + }, + { + "epoch": 4.519230769230769, + "grad_norm": 13.515114784240723, + "learning_rate": 4.774038461538462e-05, + "loss": 0.7502, + "step": 1645 + }, + { + "epoch": 4.521978021978022, + "grad_norm": 17.343523025512695, + "learning_rate": 4.773901098901099e-05, + "loss": 1.064, + "step": 1646 + }, + { + "epoch": 4.524725274725275, + "grad_norm": 15.381963729858398, + "learning_rate": 4.7737637362637366e-05, + "loss": 0.6733, + "step": 1647 + }, + { + "epoch": 4.527472527472527, + "grad_norm": 13.501811027526855, + "learning_rate": 4.7736263736263736e-05, + "loss": 0.491, + "step": 1648 + }, + { + "epoch": 4.53021978021978, + "grad_norm": 11.904844284057617, + "learning_rate": 4.773489010989011e-05, + "loss": 0.5057, + "step": 1649 + }, + { + "epoch": 4.532967032967033, + "grad_norm": 15.099312782287598, + "learning_rate": 4.773351648351649e-05, + "loss": 0.7232, + "step": 1650 + }, + { + "epoch": 4.535714285714286, + "grad_norm": 15.02596664428711, + "learning_rate": 4.773214285714286e-05, + "loss": 0.7363, + "step": 1651 + }, + { + "epoch": 4.538461538461538, + "grad_norm": 10.041150093078613, + "learning_rate": 4.7730769230769236e-05, + "loss": 0.4258, + "step": 1652 + }, + { + "epoch": 4.541208791208791, + "grad_norm": 13.47021484375, + "learning_rate": 4.7729395604395606e-05, + "loss": 0.718, + "step": 1653 + }, + { + "epoch": 4.543956043956044, + "grad_norm": 16.11989974975586, + "learning_rate": 4.772802197802198e-05, + "loss": 0.9296, + "step": 1654 + }, + { + "epoch": 4.5467032967032965, + "grad_norm": 13.76646614074707, + "learning_rate": 4.772664835164835e-05, + "loss": 0.6202, + "step": 1655 + }, + { + "epoch": 4.549450549450549, + "grad_norm": 15.081107139587402, + "learning_rate": 4.772527472527473e-05, + "loss": 0.6506, + "step": 1656 + }, + { + "epoch": 4.552197802197802, + "grad_norm": 13.306504249572754, + "learning_rate": 4.77239010989011e-05, + "loss": 0.4859, + "step": 1657 + }, + { + "epoch": 4.554945054945055, + "grad_norm": 16.038148880004883, + "learning_rate": 4.772252747252747e-05, + "loss": 0.6391, + "step": 1658 + }, + { + "epoch": 4.5576923076923075, + "grad_norm": 14.595527648925781, + "learning_rate": 4.772115384615385e-05, + "loss": 0.6812, + "step": 1659 + }, + { + "epoch": 4.56043956043956, + "grad_norm": 13.39775276184082, + "learning_rate": 4.7719780219780224e-05, + "loss": 0.7335, + "step": 1660 + }, + { + "epoch": 4.563186813186813, + "grad_norm": 16.839035034179688, + "learning_rate": 4.7718406593406594e-05, + "loss": 0.9629, + "step": 1661 + }, + { + "epoch": 4.565934065934066, + "grad_norm": 10.990760803222656, + "learning_rate": 4.771703296703297e-05, + "loss": 0.4455, + "step": 1662 + }, + { + "epoch": 4.568681318681318, + "grad_norm": 12.447546005249023, + "learning_rate": 4.771565934065934e-05, + "loss": 0.4839, + "step": 1663 + }, + { + "epoch": 4.571428571428571, + "grad_norm": 12.547819137573242, + "learning_rate": 4.771428571428572e-05, + "loss": 0.5178, + "step": 1664 + }, + { + "epoch": 4.574175824175824, + "grad_norm": 15.478802680969238, + "learning_rate": 4.7712912087912094e-05, + "loss": 0.7195, + "step": 1665 + }, + { + "epoch": 4.576923076923077, + "grad_norm": 22.12600326538086, + "learning_rate": 4.7711538461538464e-05, + "loss": 1.1139, + "step": 1666 + }, + { + "epoch": 4.579670329670329, + "grad_norm": 10.328317642211914, + "learning_rate": 4.771016483516484e-05, + "loss": 0.4741, + "step": 1667 + }, + { + "epoch": 4.582417582417582, + "grad_norm": 11.65616512298584, + "learning_rate": 4.770879120879121e-05, + "loss": 0.307, + "step": 1668 + }, + { + "epoch": 4.585164835164835, + "grad_norm": 26.297809600830078, + "learning_rate": 4.770741758241759e-05, + "loss": 1.6084, + "step": 1669 + }, + { + "epoch": 4.587912087912088, + "grad_norm": 13.233898162841797, + "learning_rate": 4.770604395604396e-05, + "loss": 0.5358, + "step": 1670 + }, + { + "epoch": 4.59065934065934, + "grad_norm": 15.406360626220703, + "learning_rate": 4.7704670329670335e-05, + "loss": 0.591, + "step": 1671 + }, + { + "epoch": 4.593406593406593, + "grad_norm": 20.109106063842773, + "learning_rate": 4.7703296703296705e-05, + "loss": 0.944, + "step": 1672 + }, + { + "epoch": 4.596153846153846, + "grad_norm": 9.432844161987305, + "learning_rate": 4.7701923076923075e-05, + "loss": 0.3577, + "step": 1673 + }, + { + "epoch": 4.5989010989010985, + "grad_norm": 17.663196563720703, + "learning_rate": 4.770054945054945e-05, + "loss": 0.9351, + "step": 1674 + }, + { + "epoch": 4.601648351648351, + "grad_norm": 14.657723426818848, + "learning_rate": 4.769917582417583e-05, + "loss": 0.7006, + "step": 1675 + }, + { + "epoch": 4.604395604395604, + "grad_norm": 16.408693313598633, + "learning_rate": 4.76978021978022e-05, + "loss": 0.7983, + "step": 1676 + }, + { + "epoch": 4.607142857142857, + "grad_norm": 13.786080360412598, + "learning_rate": 4.7696428571428575e-05, + "loss": 0.6169, + "step": 1677 + }, + { + "epoch": 4.6098901098901095, + "grad_norm": 11.060160636901855, + "learning_rate": 4.7695054945054945e-05, + "loss": 0.5967, + "step": 1678 + }, + { + "epoch": 4.612637362637363, + "grad_norm": 18.1876277923584, + "learning_rate": 4.769368131868132e-05, + "loss": 0.8736, + "step": 1679 + }, + { + "epoch": 4.615384615384615, + "grad_norm": 11.216095924377441, + "learning_rate": 4.76923076923077e-05, + "loss": 0.3885, + "step": 1680 + }, + { + "epoch": 4.618131868131869, + "grad_norm": 15.983573913574219, + "learning_rate": 4.769093406593407e-05, + "loss": 0.7624, + "step": 1681 + }, + { + "epoch": 4.6208791208791204, + "grad_norm": 12.656405448913574, + "learning_rate": 4.7689560439560445e-05, + "loss": 0.5858, + "step": 1682 + }, + { + "epoch": 4.623626373626374, + "grad_norm": 10.31066608428955, + "learning_rate": 4.7688186813186815e-05, + "loss": 0.3726, + "step": 1683 + }, + { + "epoch": 4.626373626373626, + "grad_norm": 16.160127639770508, + "learning_rate": 4.768681318681319e-05, + "loss": 0.9632, + "step": 1684 + }, + { + "epoch": 4.6291208791208796, + "grad_norm": 18.6495361328125, + "learning_rate": 4.768543956043956e-05, + "loss": 0.5939, + "step": 1685 + }, + { + "epoch": 4.631868131868131, + "grad_norm": 11.342283248901367, + "learning_rate": 4.768406593406594e-05, + "loss": 0.4956, + "step": 1686 + }, + { + "epoch": 4.634615384615385, + "grad_norm": 14.97378158569336, + "learning_rate": 4.768269230769231e-05, + "loss": 0.7246, + "step": 1687 + }, + { + "epoch": 4.637362637362637, + "grad_norm": 15.600935935974121, + "learning_rate": 4.768131868131868e-05, + "loss": 0.7959, + "step": 1688 + }, + { + "epoch": 4.6401098901098905, + "grad_norm": 15.52572250366211, + "learning_rate": 4.7679945054945056e-05, + "loss": 0.7018, + "step": 1689 + }, + { + "epoch": 4.642857142857143, + "grad_norm": 16.481061935424805, + "learning_rate": 4.767857142857143e-05, + "loss": 0.7548, + "step": 1690 + }, + { + "epoch": 4.645604395604396, + "grad_norm": 8.664950370788574, + "learning_rate": 4.76771978021978e-05, + "loss": 0.2557, + "step": 1691 + }, + { + "epoch": 4.648351648351649, + "grad_norm": 13.763487815856934, + "learning_rate": 4.767582417582418e-05, + "loss": 0.6552, + "step": 1692 + }, + { + "epoch": 4.6510989010989015, + "grad_norm": 17.775325775146484, + "learning_rate": 4.767445054945055e-05, + "loss": 0.9109, + "step": 1693 + }, + { + "epoch": 4.653846153846154, + "grad_norm": 9.95751953125, + "learning_rate": 4.7673076923076926e-05, + "loss": 0.3111, + "step": 1694 + }, + { + "epoch": 4.656593406593407, + "grad_norm": 16.74725914001465, + "learning_rate": 4.76717032967033e-05, + "loss": 0.8407, + "step": 1695 + }, + { + "epoch": 4.65934065934066, + "grad_norm": 17.812753677368164, + "learning_rate": 4.767032967032967e-05, + "loss": 0.8893, + "step": 1696 + }, + { + "epoch": 4.662087912087912, + "grad_norm": 16.40890121459961, + "learning_rate": 4.766895604395605e-05, + "loss": 0.7745, + "step": 1697 + }, + { + "epoch": 4.664835164835165, + "grad_norm": 12.961435317993164, + "learning_rate": 4.766758241758242e-05, + "loss": 0.6288, + "step": 1698 + }, + { + "epoch": 4.667582417582418, + "grad_norm": 11.700603485107422, + "learning_rate": 4.76662087912088e-05, + "loss": 0.3957, + "step": 1699 + }, + { + "epoch": 4.670329670329671, + "grad_norm": 11.386565208435059, + "learning_rate": 4.766483516483517e-05, + "loss": 0.4102, + "step": 1700 + }, + { + "epoch": 4.673076923076923, + "grad_norm": 11.183863639831543, + "learning_rate": 4.7663461538461543e-05, + "loss": 0.3842, + "step": 1701 + }, + { + "epoch": 4.675824175824176, + "grad_norm": 15.662540435791016, + "learning_rate": 4.7662087912087914e-05, + "loss": 0.5212, + "step": 1702 + }, + { + "epoch": 4.678571428571429, + "grad_norm": 11.035184860229492, + "learning_rate": 4.7660714285714284e-05, + "loss": 0.4889, + "step": 1703 + }, + { + "epoch": 4.681318681318682, + "grad_norm": 16.905887603759766, + "learning_rate": 4.765934065934066e-05, + "loss": 0.7472, + "step": 1704 + }, + { + "epoch": 4.684065934065934, + "grad_norm": 15.56137752532959, + "learning_rate": 4.765796703296704e-05, + "loss": 0.951, + "step": 1705 + }, + { + "epoch": 4.686813186813187, + "grad_norm": 13.406933784484863, + "learning_rate": 4.765659340659341e-05, + "loss": 0.7442, + "step": 1706 + }, + { + "epoch": 4.68956043956044, + "grad_norm": 14.60519790649414, + "learning_rate": 4.7655219780219784e-05, + "loss": 0.77, + "step": 1707 + }, + { + "epoch": 4.6923076923076925, + "grad_norm": 11.168946266174316, + "learning_rate": 4.7653846153846154e-05, + "loss": 0.5352, + "step": 1708 + }, + { + "epoch": 4.695054945054945, + "grad_norm": 10.660670280456543, + "learning_rate": 4.765247252747253e-05, + "loss": 0.3572, + "step": 1709 + }, + { + "epoch": 4.697802197802198, + "grad_norm": 19.066814422607422, + "learning_rate": 4.76510989010989e-05, + "loss": 0.8895, + "step": 1710 + }, + { + "epoch": 4.700549450549451, + "grad_norm": 14.175457954406738, + "learning_rate": 4.764972527472528e-05, + "loss": 0.8016, + "step": 1711 + }, + { + "epoch": 4.7032967032967035, + "grad_norm": 18.029224395751953, + "learning_rate": 4.7648351648351654e-05, + "loss": 0.9733, + "step": 1712 + }, + { + "epoch": 4.706043956043956, + "grad_norm": 9.820085525512695, + "learning_rate": 4.7646978021978024e-05, + "loss": 0.4073, + "step": 1713 + }, + { + "epoch": 4.708791208791209, + "grad_norm": 12.235942840576172, + "learning_rate": 4.76456043956044e-05, + "loss": 0.5413, + "step": 1714 + }, + { + "epoch": 4.711538461538462, + "grad_norm": 13.03597354888916, + "learning_rate": 4.764423076923077e-05, + "loss": 0.5973, + "step": 1715 + }, + { + "epoch": 4.714285714285714, + "grad_norm": 7.735022068023682, + "learning_rate": 4.764285714285715e-05, + "loss": 0.2939, + "step": 1716 + }, + { + "epoch": 4.717032967032967, + "grad_norm": 11.043665885925293, + "learning_rate": 4.764148351648352e-05, + "loss": 0.6657, + "step": 1717 + }, + { + "epoch": 4.71978021978022, + "grad_norm": 14.228971481323242, + "learning_rate": 4.764010989010989e-05, + "loss": 0.7547, + "step": 1718 + }, + { + "epoch": 4.722527472527473, + "grad_norm": 9.7803955078125, + "learning_rate": 4.7638736263736265e-05, + "loss": 0.3789, + "step": 1719 + }, + { + "epoch": 4.725274725274725, + "grad_norm": 18.814762115478516, + "learning_rate": 4.7637362637362635e-05, + "loss": 0.8854, + "step": 1720 + }, + { + "epoch": 4.728021978021978, + "grad_norm": 10.840907096862793, + "learning_rate": 4.763598901098901e-05, + "loss": 0.4571, + "step": 1721 + }, + { + "epoch": 4.730769230769231, + "grad_norm": 14.498320579528809, + "learning_rate": 4.763461538461539e-05, + "loss": 1.0057, + "step": 1722 + }, + { + "epoch": 4.733516483516484, + "grad_norm": 16.29479217529297, + "learning_rate": 4.763324175824176e-05, + "loss": 0.826, + "step": 1723 + }, + { + "epoch": 4.736263736263736, + "grad_norm": 13.416084289550781, + "learning_rate": 4.7631868131868135e-05, + "loss": 0.4974, + "step": 1724 + }, + { + "epoch": 4.739010989010989, + "grad_norm": 15.683382034301758, + "learning_rate": 4.7630494505494505e-05, + "loss": 0.6624, + "step": 1725 + }, + { + "epoch": 4.741758241758242, + "grad_norm": 13.649749755859375, + "learning_rate": 4.762912087912088e-05, + "loss": 0.5709, + "step": 1726 + }, + { + "epoch": 4.7445054945054945, + "grad_norm": 12.441527366638184, + "learning_rate": 4.762774725274726e-05, + "loss": 0.628, + "step": 1727 + }, + { + "epoch": 4.747252747252747, + "grad_norm": 13.8370361328125, + "learning_rate": 4.762637362637363e-05, + "loss": 0.5956, + "step": 1728 + }, + { + "epoch": 4.75, + "grad_norm": 16.219738006591797, + "learning_rate": 4.7625000000000006e-05, + "loss": 0.6961, + "step": 1729 + }, + { + "epoch": 4.752747252747253, + "grad_norm": 17.48995590209961, + "learning_rate": 4.7623626373626376e-05, + "loss": 0.7058, + "step": 1730 + }, + { + "epoch": 4.7554945054945055, + "grad_norm": 14.696541786193848, + "learning_rate": 4.762225274725275e-05, + "loss": 0.72, + "step": 1731 + }, + { + "epoch": 4.758241758241758, + "grad_norm": 16.29581642150879, + "learning_rate": 4.762087912087912e-05, + "loss": 0.8032, + "step": 1732 + }, + { + "epoch": 4.760989010989011, + "grad_norm": 14.35117244720459, + "learning_rate": 4.761950549450549e-05, + "loss": 0.494, + "step": 1733 + }, + { + "epoch": 4.763736263736264, + "grad_norm": 14.546198844909668, + "learning_rate": 4.761813186813187e-05, + "loss": 0.5816, + "step": 1734 + }, + { + "epoch": 4.766483516483516, + "grad_norm": 14.696683883666992, + "learning_rate": 4.761675824175824e-05, + "loss": 0.7382, + "step": 1735 + }, + { + "epoch": 4.769230769230769, + "grad_norm": 13.800020217895508, + "learning_rate": 4.7615384615384616e-05, + "loss": 0.7107, + "step": 1736 + }, + { + "epoch": 4.771978021978022, + "grad_norm": 22.33713722229004, + "learning_rate": 4.761401098901099e-05, + "loss": 1.0621, + "step": 1737 + }, + { + "epoch": 4.774725274725275, + "grad_norm": 15.423481941223145, + "learning_rate": 4.761263736263736e-05, + "loss": 0.6351, + "step": 1738 + }, + { + "epoch": 4.777472527472527, + "grad_norm": 18.677370071411133, + "learning_rate": 4.761126373626374e-05, + "loss": 0.9196, + "step": 1739 + }, + { + "epoch": 4.78021978021978, + "grad_norm": 14.268290519714355, + "learning_rate": 4.760989010989011e-05, + "loss": 0.9652, + "step": 1740 + }, + { + "epoch": 4.782967032967033, + "grad_norm": 15.4848051071167, + "learning_rate": 4.7608516483516487e-05, + "loss": 0.805, + "step": 1741 + }, + { + "epoch": 4.785714285714286, + "grad_norm": 9.508118629455566, + "learning_rate": 4.760714285714286e-05, + "loss": 0.3105, + "step": 1742 + }, + { + "epoch": 4.788461538461538, + "grad_norm": 13.370476722717285, + "learning_rate": 4.760576923076923e-05, + "loss": 0.4876, + "step": 1743 + }, + { + "epoch": 4.791208791208791, + "grad_norm": 15.492253303527832, + "learning_rate": 4.760439560439561e-05, + "loss": 0.7385, + "step": 1744 + }, + { + "epoch": 4.793956043956044, + "grad_norm": 15.981986999511719, + "learning_rate": 4.760302197802198e-05, + "loss": 0.6729, + "step": 1745 + }, + { + "epoch": 4.7967032967032965, + "grad_norm": 15.496569633483887, + "learning_rate": 4.760164835164835e-05, + "loss": 0.9168, + "step": 1746 + }, + { + "epoch": 4.799450549450549, + "grad_norm": 13.267698287963867, + "learning_rate": 4.760027472527473e-05, + "loss": 0.6448, + "step": 1747 + }, + { + "epoch": 4.802197802197802, + "grad_norm": 19.339256286621094, + "learning_rate": 4.75989010989011e-05, + "loss": 0.9265, + "step": 1748 + }, + { + "epoch": 4.804945054945055, + "grad_norm": 16.14868927001953, + "learning_rate": 4.7597527472527474e-05, + "loss": 0.9777, + "step": 1749 + }, + { + "epoch": 4.8076923076923075, + "grad_norm": 10.805839538574219, + "learning_rate": 4.7596153846153844e-05, + "loss": 0.4258, + "step": 1750 + }, + { + "epoch": 4.81043956043956, + "grad_norm": 11.172111511230469, + "learning_rate": 4.759478021978022e-05, + "loss": 0.3615, + "step": 1751 + }, + { + "epoch": 4.813186813186813, + "grad_norm": 12.729728698730469, + "learning_rate": 4.75934065934066e-05, + "loss": 0.5632, + "step": 1752 + }, + { + "epoch": 4.815934065934066, + "grad_norm": 14.494158744812012, + "learning_rate": 4.759203296703297e-05, + "loss": 0.6835, + "step": 1753 + }, + { + "epoch": 4.818681318681318, + "grad_norm": 11.553218841552734, + "learning_rate": 4.7590659340659344e-05, + "loss": 0.4527, + "step": 1754 + }, + { + "epoch": 4.821428571428571, + "grad_norm": 18.331832885742188, + "learning_rate": 4.7589285714285714e-05, + "loss": 0.7904, + "step": 1755 + }, + { + "epoch": 4.824175824175824, + "grad_norm": 15.745659828186035, + "learning_rate": 4.758791208791209e-05, + "loss": 0.6551, + "step": 1756 + }, + { + "epoch": 4.826923076923077, + "grad_norm": 12.54145622253418, + "learning_rate": 4.758653846153847e-05, + "loss": 0.4373, + "step": 1757 + }, + { + "epoch": 4.829670329670329, + "grad_norm": 14.699137687683105, + "learning_rate": 4.758516483516484e-05, + "loss": 0.4747, + "step": 1758 + }, + { + "epoch": 4.832417582417582, + "grad_norm": 15.90109920501709, + "learning_rate": 4.7583791208791215e-05, + "loss": 0.7897, + "step": 1759 + }, + { + "epoch": 4.835164835164835, + "grad_norm": 12.526437759399414, + "learning_rate": 4.7582417582417585e-05, + "loss": 0.5517, + "step": 1760 + }, + { + "epoch": 4.837912087912088, + "grad_norm": 15.410250663757324, + "learning_rate": 4.7581043956043955e-05, + "loss": 0.7535, + "step": 1761 + }, + { + "epoch": 4.84065934065934, + "grad_norm": 15.521352767944336, + "learning_rate": 4.757967032967033e-05, + "loss": 0.4947, + "step": 1762 + }, + { + "epoch": 4.843406593406593, + "grad_norm": 18.079730987548828, + "learning_rate": 4.75782967032967e-05, + "loss": 1.0558, + "step": 1763 + }, + { + "epoch": 4.846153846153846, + "grad_norm": 12.364667892456055, + "learning_rate": 4.757692307692308e-05, + "loss": 0.6228, + "step": 1764 + }, + { + "epoch": 4.8489010989010985, + "grad_norm": 12.860191345214844, + "learning_rate": 4.757554945054945e-05, + "loss": 0.3503, + "step": 1765 + }, + { + "epoch": 4.851648351648351, + "grad_norm": 16.773618698120117, + "learning_rate": 4.7574175824175825e-05, + "loss": 0.8191, + "step": 1766 + }, + { + "epoch": 4.854395604395604, + "grad_norm": 20.597265243530273, + "learning_rate": 4.75728021978022e-05, + "loss": 0.6925, + "step": 1767 + }, + { + "epoch": 4.857142857142857, + "grad_norm": 13.435858726501465, + "learning_rate": 4.757142857142857e-05, + "loss": 0.5655, + "step": 1768 + }, + { + "epoch": 4.8598901098901095, + "grad_norm": 17.0831241607666, + "learning_rate": 4.757005494505495e-05, + "loss": 0.9674, + "step": 1769 + }, + { + "epoch": 4.862637362637363, + "grad_norm": 12.810819625854492, + "learning_rate": 4.756868131868132e-05, + "loss": 0.5914, + "step": 1770 + }, + { + "epoch": 4.865384615384615, + "grad_norm": 11.367944717407227, + "learning_rate": 4.7567307692307696e-05, + "loss": 0.4851, + "step": 1771 + }, + { + "epoch": 4.868131868131869, + "grad_norm": 15.100316047668457, + "learning_rate": 4.756593406593407e-05, + "loss": 0.7324, + "step": 1772 + }, + { + "epoch": 4.8708791208791204, + "grad_norm": 12.82736587524414, + "learning_rate": 4.756456043956044e-05, + "loss": 0.5352, + "step": 1773 + }, + { + "epoch": 4.873626373626374, + "grad_norm": 11.641951560974121, + "learning_rate": 4.756318681318682e-05, + "loss": 0.4759, + "step": 1774 + }, + { + "epoch": 4.876373626373626, + "grad_norm": 16.15857696533203, + "learning_rate": 4.756181318681319e-05, + "loss": 0.9288, + "step": 1775 + }, + { + "epoch": 4.8791208791208796, + "grad_norm": 14.417330741882324, + "learning_rate": 4.756043956043956e-05, + "loss": 0.466, + "step": 1776 + }, + { + "epoch": 4.881868131868131, + "grad_norm": 15.491340637207031, + "learning_rate": 4.7559065934065936e-05, + "loss": 0.8262, + "step": 1777 + }, + { + "epoch": 4.884615384615385, + "grad_norm": 15.996907234191895, + "learning_rate": 4.7557692307692306e-05, + "loss": 0.7487, + "step": 1778 + }, + { + "epoch": 4.887362637362637, + "grad_norm": 12.055787086486816, + "learning_rate": 4.755631868131868e-05, + "loss": 0.5258, + "step": 1779 + }, + { + "epoch": 4.8901098901098905, + "grad_norm": 16.92466163635254, + "learning_rate": 4.755494505494505e-05, + "loss": 0.5361, + "step": 1780 + }, + { + "epoch": 4.892857142857143, + "grad_norm": 12.135360717773438, + "learning_rate": 4.755357142857143e-05, + "loss": 0.4926, + "step": 1781 + }, + { + "epoch": 4.895604395604396, + "grad_norm": 15.021578788757324, + "learning_rate": 4.7552197802197806e-05, + "loss": 0.7396, + "step": 1782 + }, + { + "epoch": 4.898351648351649, + "grad_norm": 12.84119701385498, + "learning_rate": 4.7550824175824176e-05, + "loss": 0.5125, + "step": 1783 + }, + { + "epoch": 4.9010989010989015, + "grad_norm": 18.289640426635742, + "learning_rate": 4.754945054945055e-05, + "loss": 1.1163, + "step": 1784 + }, + { + "epoch": 4.903846153846154, + "grad_norm": 17.344249725341797, + "learning_rate": 4.754807692307692e-05, + "loss": 0.9122, + "step": 1785 + }, + { + "epoch": 4.906593406593407, + "grad_norm": 13.925238609313965, + "learning_rate": 4.75467032967033e-05, + "loss": 0.533, + "step": 1786 + }, + { + "epoch": 4.90934065934066, + "grad_norm": 13.083922386169434, + "learning_rate": 4.754532967032968e-05, + "loss": 0.4653, + "step": 1787 + }, + { + "epoch": 4.912087912087912, + "grad_norm": 11.745787620544434, + "learning_rate": 4.754395604395605e-05, + "loss": 0.4747, + "step": 1788 + }, + { + "epoch": 4.914835164835165, + "grad_norm": 12.218159675598145, + "learning_rate": 4.7542582417582424e-05, + "loss": 0.5161, + "step": 1789 + }, + { + "epoch": 4.917582417582418, + "grad_norm": 14.870437622070312, + "learning_rate": 4.7541208791208794e-05, + "loss": 0.6644, + "step": 1790 + }, + { + "epoch": 4.920329670329671, + "grad_norm": 17.503854751586914, + "learning_rate": 4.7539835164835164e-05, + "loss": 0.6324, + "step": 1791 + }, + { + "epoch": 4.923076923076923, + "grad_norm": 10.778443336486816, + "learning_rate": 4.753846153846154e-05, + "loss": 0.3858, + "step": 1792 + }, + { + "epoch": 4.925824175824176, + "grad_norm": 19.164478302001953, + "learning_rate": 4.753708791208791e-05, + "loss": 1.1256, + "step": 1793 + }, + { + "epoch": 4.928571428571429, + "grad_norm": 16.176321029663086, + "learning_rate": 4.753571428571429e-05, + "loss": 0.6028, + "step": 1794 + }, + { + "epoch": 4.931318681318682, + "grad_norm": 14.1065034866333, + "learning_rate": 4.753434065934066e-05, + "loss": 0.7871, + "step": 1795 + }, + { + "epoch": 4.934065934065934, + "grad_norm": 18.450786590576172, + "learning_rate": 4.7532967032967034e-05, + "loss": 1.106, + "step": 1796 + }, + { + "epoch": 4.936813186813187, + "grad_norm": 17.895999908447266, + "learning_rate": 4.753159340659341e-05, + "loss": 1.2897, + "step": 1797 + }, + { + "epoch": 4.93956043956044, + "grad_norm": 14.6462984085083, + "learning_rate": 4.753021978021978e-05, + "loss": 0.7975, + "step": 1798 + }, + { + "epoch": 4.9423076923076925, + "grad_norm": 16.482473373413086, + "learning_rate": 4.752884615384616e-05, + "loss": 0.705, + "step": 1799 + }, + { + "epoch": 4.945054945054945, + "grad_norm": 16.758764266967773, + "learning_rate": 4.752747252747253e-05, + "loss": 0.9484, + "step": 1800 + }, + { + "epoch": 4.947802197802198, + "grad_norm": 11.192342758178711, + "learning_rate": 4.7526098901098904e-05, + "loss": 0.4449, + "step": 1801 + }, + { + "epoch": 4.950549450549451, + "grad_norm": 12.338055610656738, + "learning_rate": 4.752472527472528e-05, + "loss": 0.5068, + "step": 1802 + }, + { + "epoch": 4.9532967032967035, + "grad_norm": 18.38007164001465, + "learning_rate": 4.752335164835165e-05, + "loss": 0.9131, + "step": 1803 + }, + { + "epoch": 4.956043956043956, + "grad_norm": 7.937841415405273, + "learning_rate": 4.752197802197803e-05, + "loss": 0.2975, + "step": 1804 + }, + { + "epoch": 4.958791208791209, + "grad_norm": 12.752330780029297, + "learning_rate": 4.75206043956044e-05, + "loss": 0.7161, + "step": 1805 + }, + { + "epoch": 4.961538461538462, + "grad_norm": 12.064733505249023, + "learning_rate": 4.751923076923077e-05, + "loss": 0.4746, + "step": 1806 + }, + { + "epoch": 4.964285714285714, + "grad_norm": 16.58132553100586, + "learning_rate": 4.7517857142857145e-05, + "loss": 0.6455, + "step": 1807 + }, + { + "epoch": 4.967032967032967, + "grad_norm": 17.375524520874023, + "learning_rate": 4.7516483516483515e-05, + "loss": 1.1257, + "step": 1808 + }, + { + "epoch": 4.96978021978022, + "grad_norm": 12.431781768798828, + "learning_rate": 4.751510989010989e-05, + "loss": 0.535, + "step": 1809 + }, + { + "epoch": 4.972527472527473, + "grad_norm": 14.820554733276367, + "learning_rate": 4.751373626373626e-05, + "loss": 0.7832, + "step": 1810 + }, + { + "epoch": 4.975274725274725, + "grad_norm": 12.77883243560791, + "learning_rate": 4.751236263736264e-05, + "loss": 0.6168, + "step": 1811 + }, + { + "epoch": 4.978021978021978, + "grad_norm": 10.987071990966797, + "learning_rate": 4.7510989010989015e-05, + "loss": 0.3761, + "step": 1812 + }, + { + "epoch": 4.980769230769231, + "grad_norm": 13.883281707763672, + "learning_rate": 4.7509615384615385e-05, + "loss": 0.5975, + "step": 1813 + }, + { + "epoch": 4.983516483516484, + "grad_norm": 19.933130264282227, + "learning_rate": 4.750824175824176e-05, + "loss": 0.9062, + "step": 1814 + }, + { + "epoch": 4.986263736263736, + "grad_norm": 11.579184532165527, + "learning_rate": 4.750686813186813e-05, + "loss": 0.6854, + "step": 1815 + }, + { + "epoch": 4.989010989010989, + "grad_norm": 18.39486312866211, + "learning_rate": 4.750549450549451e-05, + "loss": 0.7586, + "step": 1816 + }, + { + "epoch": 4.991758241758242, + "grad_norm": 17.12263298034668, + "learning_rate": 4.7504120879120886e-05, + "loss": 0.8404, + "step": 1817 + }, + { + "epoch": 4.9945054945054945, + "grad_norm": 12.767090797424316, + "learning_rate": 4.7502747252747256e-05, + "loss": 0.4963, + "step": 1818 + }, + { + "epoch": 4.997252747252747, + "grad_norm": 12.91556167602539, + "learning_rate": 4.750137362637363e-05, + "loss": 0.5056, + "step": 1819 + }, + { + "epoch": 5.0, + "grad_norm": 44.361083984375, + "learning_rate": 4.75e-05, + "loss": 3.8423, + "step": 1820 + }, + { + "epoch": 5.0, + "eval_accuracy": 0.45454545454545453, + "eval_f1": 0.41802672733000945, + "eval_f1_DuraRiadoRio_64x64": 0.3722943722943723, + "eval_f1_Mole_64x64": 0.5130890052356021, + "eval_f1_Quebrado_64x64": 0.38202247191011235, + "eval_f1_RiadoRio_64x64": 0.5431578947368421, + "eval_f1_RioFechado_64x64": 0.27956989247311825, + "eval_loss": 2.2944629192352295, + "eval_precision": 0.5792614950616446, + "eval_precision_DuraRiadoRio_64x64": 0.4942528735632184, + "eval_precision_Mole_64x64": 0.4117647058823529, + "eval_precision_Quebrado_64x64": 1.0, + "eval_precision_RiadoRio_64x64": 0.3993808049535604, + "eval_precision_RioFechado_64x64": 0.5909090909090909, + "eval_recall": 0.44941211597067793, + "eval_recall_DuraRiadoRio_64x64": 0.2986111111111111, + "eval_recall_Mole_64x64": 0.6805555555555556, + "eval_recall_Quebrado_64x64": 0.2361111111111111, + "eval_recall_RiadoRio_64x64": 0.8486842105263158, + "eval_recall_RioFechado_64x64": 0.18309859154929578, + "eval_runtime": 1.726, + "eval_samples_per_second": 420.626, + "eval_steps_per_second": 26.651, + "step": 1820 + }, + { + "epoch": 5.002747252747253, + "grad_norm": 13.547343254089355, + "learning_rate": 4.749862637362637e-05, + "loss": 0.6213, + "step": 1821 + }, + { + "epoch": 5.0054945054945055, + "grad_norm": 8.549741744995117, + "learning_rate": 4.749725274725275e-05, + "loss": 0.2859, + "step": 1822 + }, + { + "epoch": 5.008241758241758, + "grad_norm": 16.65281867980957, + "learning_rate": 4.749587912087912e-05, + "loss": 0.8265, + "step": 1823 + }, + { + "epoch": 5.010989010989011, + "grad_norm": 21.182462692260742, + "learning_rate": 4.7494505494505496e-05, + "loss": 0.9522, + "step": 1824 + }, + { + "epoch": 5.013736263736264, + "grad_norm": 24.494787216186523, + "learning_rate": 4.7493131868131866e-05, + "loss": 1.3646, + "step": 1825 + }, + { + "epoch": 5.016483516483516, + "grad_norm": 6.8992838859558105, + "learning_rate": 4.749175824175824e-05, + "loss": 0.23, + "step": 1826 + }, + { + "epoch": 5.019230769230769, + "grad_norm": 18.246261596679688, + "learning_rate": 4.749038461538462e-05, + "loss": 1.0587, + "step": 1827 + }, + { + "epoch": 5.021978021978022, + "grad_norm": 15.62402629852295, + "learning_rate": 4.748901098901099e-05, + "loss": 0.8344, + "step": 1828 + }, + { + "epoch": 5.024725274725275, + "grad_norm": 16.055225372314453, + "learning_rate": 4.748763736263737e-05, + "loss": 0.6934, + "step": 1829 + }, + { + "epoch": 5.027472527472527, + "grad_norm": 15.380712509155273, + "learning_rate": 4.748626373626374e-05, + "loss": 0.7613, + "step": 1830 + }, + { + "epoch": 5.03021978021978, + "grad_norm": 14.370957374572754, + "learning_rate": 4.7484890109890113e-05, + "loss": 0.6249, + "step": 1831 + }, + { + "epoch": 5.032967032967033, + "grad_norm": 13.028947830200195, + "learning_rate": 4.748351648351649e-05, + "loss": 0.6671, + "step": 1832 + }, + { + "epoch": 5.035714285714286, + "grad_norm": 18.210599899291992, + "learning_rate": 4.748214285714286e-05, + "loss": 0.8624, + "step": 1833 + }, + { + "epoch": 5.038461538461538, + "grad_norm": 8.966680526733398, + "learning_rate": 4.748076923076924e-05, + "loss": 0.3006, + "step": 1834 + }, + { + "epoch": 5.041208791208791, + "grad_norm": 14.595634460449219, + "learning_rate": 4.747939560439561e-05, + "loss": 0.7315, + "step": 1835 + }, + { + "epoch": 5.043956043956044, + "grad_norm": 18.4862060546875, + "learning_rate": 4.747802197802198e-05, + "loss": 0.8712, + "step": 1836 + }, + { + "epoch": 5.0467032967032965, + "grad_norm": 15.934798240661621, + "learning_rate": 4.7476648351648354e-05, + "loss": 0.5746, + "step": 1837 + }, + { + "epoch": 5.049450549450549, + "grad_norm": 13.755273818969727, + "learning_rate": 4.7475274725274724e-05, + "loss": 0.6515, + "step": 1838 + }, + { + "epoch": 5.052197802197802, + "grad_norm": 10.6652193069458, + "learning_rate": 4.74739010989011e-05, + "loss": 0.3339, + "step": 1839 + }, + { + "epoch": 5.054945054945055, + "grad_norm": 14.46150016784668, + "learning_rate": 4.747252747252747e-05, + "loss": 0.4115, + "step": 1840 + }, + { + "epoch": 5.0576923076923075, + "grad_norm": 16.156585693359375, + "learning_rate": 4.747115384615385e-05, + "loss": 0.8318, + "step": 1841 + }, + { + "epoch": 5.06043956043956, + "grad_norm": 12.252687454223633, + "learning_rate": 4.7469780219780224e-05, + "loss": 0.4323, + "step": 1842 + }, + { + "epoch": 5.063186813186813, + "grad_norm": 15.718852043151855, + "learning_rate": 4.7468406593406594e-05, + "loss": 0.6115, + "step": 1843 + }, + { + "epoch": 5.065934065934066, + "grad_norm": 16.92159080505371, + "learning_rate": 4.746703296703297e-05, + "loss": 0.7863, + "step": 1844 + }, + { + "epoch": 5.068681318681318, + "grad_norm": 10.878663063049316, + "learning_rate": 4.746565934065934e-05, + "loss": 0.3785, + "step": 1845 + }, + { + "epoch": 5.071428571428571, + "grad_norm": 6.399317264556885, + "learning_rate": 4.746428571428572e-05, + "loss": 0.1755, + "step": 1846 + }, + { + "epoch": 5.074175824175824, + "grad_norm": 13.645618438720703, + "learning_rate": 4.7462912087912095e-05, + "loss": 0.6068, + "step": 1847 + }, + { + "epoch": 5.076923076923077, + "grad_norm": 19.580612182617188, + "learning_rate": 4.7461538461538465e-05, + "loss": 0.886, + "step": 1848 + }, + { + "epoch": 5.079670329670329, + "grad_norm": 14.016632080078125, + "learning_rate": 4.746016483516484e-05, + "loss": 0.5274, + "step": 1849 + }, + { + "epoch": 5.082417582417582, + "grad_norm": 15.62269401550293, + "learning_rate": 4.745879120879121e-05, + "loss": 0.9426, + "step": 1850 + }, + { + "epoch": 5.085164835164835, + "grad_norm": 12.133212089538574, + "learning_rate": 4.745741758241758e-05, + "loss": 0.3807, + "step": 1851 + }, + { + "epoch": 5.087912087912088, + "grad_norm": 16.3833065032959, + "learning_rate": 4.745604395604396e-05, + "loss": 0.6342, + "step": 1852 + }, + { + "epoch": 5.09065934065934, + "grad_norm": 16.502792358398438, + "learning_rate": 4.745467032967033e-05, + "loss": 0.716, + "step": 1853 + }, + { + "epoch": 5.093406593406593, + "grad_norm": 14.188520431518555, + "learning_rate": 4.7453296703296705e-05, + "loss": 0.6881, + "step": 1854 + }, + { + "epoch": 5.096153846153846, + "grad_norm": 9.010828971862793, + "learning_rate": 4.7451923076923075e-05, + "loss": 0.4043, + "step": 1855 + }, + { + "epoch": 5.0989010989010985, + "grad_norm": 16.02014923095703, + "learning_rate": 4.745054945054945e-05, + "loss": 0.9456, + "step": 1856 + }, + { + "epoch": 5.101648351648351, + "grad_norm": 13.638360023498535, + "learning_rate": 4.744917582417583e-05, + "loss": 0.4475, + "step": 1857 + }, + { + "epoch": 5.104395604395604, + "grad_norm": 10.477652549743652, + "learning_rate": 4.74478021978022e-05, + "loss": 0.3557, + "step": 1858 + }, + { + "epoch": 5.107142857142857, + "grad_norm": 14.708148002624512, + "learning_rate": 4.7446428571428576e-05, + "loss": 0.7467, + "step": 1859 + }, + { + "epoch": 5.1098901098901095, + "grad_norm": 9.16574764251709, + "learning_rate": 4.7445054945054946e-05, + "loss": 0.3588, + "step": 1860 + }, + { + "epoch": 5.112637362637362, + "grad_norm": 15.072232246398926, + "learning_rate": 4.744368131868132e-05, + "loss": 0.6344, + "step": 1861 + }, + { + "epoch": 5.115384615384615, + "grad_norm": 10.16650390625, + "learning_rate": 4.74423076923077e-05, + "loss": 0.444, + "step": 1862 + }, + { + "epoch": 5.118131868131868, + "grad_norm": 14.190801620483398, + "learning_rate": 4.744093406593407e-05, + "loss": 0.6146, + "step": 1863 + }, + { + "epoch": 5.1208791208791204, + "grad_norm": 16.986875534057617, + "learning_rate": 4.7439560439560446e-05, + "loss": 0.8242, + "step": 1864 + }, + { + "epoch": 5.123626373626374, + "grad_norm": 15.45579719543457, + "learning_rate": 4.7438186813186816e-05, + "loss": 0.8027, + "step": 1865 + }, + { + "epoch": 5.126373626373626, + "grad_norm": 15.672195434570312, + "learning_rate": 4.7436813186813186e-05, + "loss": 0.6546, + "step": 1866 + }, + { + "epoch": 5.1291208791208796, + "grad_norm": 20.6445255279541, + "learning_rate": 4.743543956043956e-05, + "loss": 1.0531, + "step": 1867 + }, + { + "epoch": 5.131868131868132, + "grad_norm": 13.699498176574707, + "learning_rate": 4.743406593406593e-05, + "loss": 0.6334, + "step": 1868 + }, + { + "epoch": 5.134615384615385, + "grad_norm": 12.24120044708252, + "learning_rate": 4.743269230769231e-05, + "loss": 0.4886, + "step": 1869 + }, + { + "epoch": 5.137362637362638, + "grad_norm": 15.90270709991455, + "learning_rate": 4.743131868131868e-05, + "loss": 0.812, + "step": 1870 + }, + { + "epoch": 5.1401098901098905, + "grad_norm": 18.58860206604004, + "learning_rate": 4.7429945054945057e-05, + "loss": 0.9857, + "step": 1871 + }, + { + "epoch": 5.142857142857143, + "grad_norm": 13.102278709411621, + "learning_rate": 4.742857142857143e-05, + "loss": 0.622, + "step": 1872 + }, + { + "epoch": 5.145604395604396, + "grad_norm": 15.886469841003418, + "learning_rate": 4.74271978021978e-05, + "loss": 0.5174, + "step": 1873 + }, + { + "epoch": 5.148351648351649, + "grad_norm": 14.0157470703125, + "learning_rate": 4.742582417582418e-05, + "loss": 0.6695, + "step": 1874 + }, + { + "epoch": 5.1510989010989015, + "grad_norm": 20.992124557495117, + "learning_rate": 4.742445054945055e-05, + "loss": 1.0888, + "step": 1875 + }, + { + "epoch": 5.153846153846154, + "grad_norm": 21.356447219848633, + "learning_rate": 4.742307692307693e-05, + "loss": 1.0326, + "step": 1876 + }, + { + "epoch": 5.156593406593407, + "grad_norm": 12.597246170043945, + "learning_rate": 4.7421703296703304e-05, + "loss": 0.3449, + "step": 1877 + }, + { + "epoch": 5.15934065934066, + "grad_norm": 16.79010009765625, + "learning_rate": 4.7420329670329674e-05, + "loss": 0.6462, + "step": 1878 + }, + { + "epoch": 5.162087912087912, + "grad_norm": 12.670523643493652, + "learning_rate": 4.741895604395605e-05, + "loss": 0.6482, + "step": 1879 + }, + { + "epoch": 5.164835164835165, + "grad_norm": 14.338422775268555, + "learning_rate": 4.741758241758242e-05, + "loss": 0.8761, + "step": 1880 + }, + { + "epoch": 5.167582417582418, + "grad_norm": 11.170743942260742, + "learning_rate": 4.741620879120879e-05, + "loss": 0.5404, + "step": 1881 + }, + { + "epoch": 5.170329670329671, + "grad_norm": 12.334281921386719, + "learning_rate": 4.741483516483517e-05, + "loss": 0.4444, + "step": 1882 + }, + { + "epoch": 5.173076923076923, + "grad_norm": 14.423274993896484, + "learning_rate": 4.741346153846154e-05, + "loss": 0.7077, + "step": 1883 + }, + { + "epoch": 5.175824175824176, + "grad_norm": 12.347402572631836, + "learning_rate": 4.7412087912087914e-05, + "loss": 0.4374, + "step": 1884 + }, + { + "epoch": 5.178571428571429, + "grad_norm": 17.28289222717285, + "learning_rate": 4.7410714285714284e-05, + "loss": 0.7728, + "step": 1885 + }, + { + "epoch": 5.181318681318682, + "grad_norm": 12.90137004852295, + "learning_rate": 4.740934065934066e-05, + "loss": 0.5156, + "step": 1886 + }, + { + "epoch": 5.184065934065934, + "grad_norm": 13.196690559387207, + "learning_rate": 4.740796703296704e-05, + "loss": 0.6136, + "step": 1887 + }, + { + "epoch": 5.186813186813187, + "grad_norm": 19.37807846069336, + "learning_rate": 4.740659340659341e-05, + "loss": 0.811, + "step": 1888 + }, + { + "epoch": 5.18956043956044, + "grad_norm": 13.80131721496582, + "learning_rate": 4.7405219780219785e-05, + "loss": 0.5867, + "step": 1889 + }, + { + "epoch": 5.1923076923076925, + "grad_norm": 9.918821334838867, + "learning_rate": 4.7403846153846155e-05, + "loss": 0.4889, + "step": 1890 + }, + { + "epoch": 5.195054945054945, + "grad_norm": 12.327982902526855, + "learning_rate": 4.740247252747253e-05, + "loss": 0.5176, + "step": 1891 + }, + { + "epoch": 5.197802197802198, + "grad_norm": 12.47840404510498, + "learning_rate": 4.740109890109891e-05, + "loss": 0.572, + "step": 1892 + }, + { + "epoch": 5.200549450549451, + "grad_norm": 16.40585708618164, + "learning_rate": 4.739972527472528e-05, + "loss": 0.8399, + "step": 1893 + }, + { + "epoch": 5.2032967032967035, + "grad_norm": 16.554786682128906, + "learning_rate": 4.7398351648351655e-05, + "loss": 0.7656, + "step": 1894 + }, + { + "epoch": 5.206043956043956, + "grad_norm": 16.224246978759766, + "learning_rate": 4.7396978021978025e-05, + "loss": 0.8904, + "step": 1895 + }, + { + "epoch": 5.208791208791209, + "grad_norm": 14.170382499694824, + "learning_rate": 4.7395604395604395e-05, + "loss": 0.5822, + "step": 1896 + }, + { + "epoch": 5.211538461538462, + "grad_norm": 23.537458419799805, + "learning_rate": 4.739423076923077e-05, + "loss": 1.1968, + "step": 1897 + }, + { + "epoch": 5.214285714285714, + "grad_norm": 10.351757049560547, + "learning_rate": 4.739285714285714e-05, + "loss": 0.4462, + "step": 1898 + }, + { + "epoch": 5.217032967032967, + "grad_norm": 16.574186325073242, + "learning_rate": 4.739148351648352e-05, + "loss": 0.9182, + "step": 1899 + }, + { + "epoch": 5.21978021978022, + "grad_norm": 9.47105884552002, + "learning_rate": 4.739010989010989e-05, + "loss": 0.545, + "step": 1900 + }, + { + "epoch": 5.222527472527473, + "grad_norm": 13.378235816955566, + "learning_rate": 4.7388736263736266e-05, + "loss": 0.7248, + "step": 1901 + }, + { + "epoch": 5.225274725274725, + "grad_norm": 20.081897735595703, + "learning_rate": 4.738736263736264e-05, + "loss": 1.1545, + "step": 1902 + }, + { + "epoch": 5.228021978021978, + "grad_norm": 15.141491889953613, + "learning_rate": 4.738598901098901e-05, + "loss": 0.7634, + "step": 1903 + }, + { + "epoch": 5.230769230769231, + "grad_norm": 15.783458709716797, + "learning_rate": 4.738461538461539e-05, + "loss": 0.6616, + "step": 1904 + }, + { + "epoch": 5.233516483516484, + "grad_norm": 14.41890811920166, + "learning_rate": 4.738324175824176e-05, + "loss": 0.6208, + "step": 1905 + }, + { + "epoch": 5.236263736263736, + "grad_norm": 12.550251960754395, + "learning_rate": 4.7381868131868136e-05, + "loss": 0.6673, + "step": 1906 + }, + { + "epoch": 5.239010989010989, + "grad_norm": 15.834924697875977, + "learning_rate": 4.738049450549451e-05, + "loss": 1.0323, + "step": 1907 + }, + { + "epoch": 5.241758241758242, + "grad_norm": 9.989380836486816, + "learning_rate": 4.737912087912088e-05, + "loss": 0.438, + "step": 1908 + }, + { + "epoch": 5.2445054945054945, + "grad_norm": 11.198132514953613, + "learning_rate": 4.737774725274726e-05, + "loss": 0.3602, + "step": 1909 + }, + { + "epoch": 5.247252747252747, + "grad_norm": 17.97982406616211, + "learning_rate": 4.737637362637363e-05, + "loss": 0.9239, + "step": 1910 + }, + { + "epoch": 5.25, + "grad_norm": 10.676671028137207, + "learning_rate": 4.7375e-05, + "loss": 0.3724, + "step": 1911 + }, + { + "epoch": 5.252747252747253, + "grad_norm": 15.101442337036133, + "learning_rate": 4.7373626373626376e-05, + "loss": 0.8088, + "step": 1912 + }, + { + "epoch": 5.2554945054945055, + "grad_norm": 14.17904281616211, + "learning_rate": 4.7372252747252746e-05, + "loss": 0.5632, + "step": 1913 + }, + { + "epoch": 5.258241758241758, + "grad_norm": 18.29233169555664, + "learning_rate": 4.737087912087912e-05, + "loss": 0.8733, + "step": 1914 + }, + { + "epoch": 5.260989010989011, + "grad_norm": 9.339506149291992, + "learning_rate": 4.736950549450549e-05, + "loss": 0.4044, + "step": 1915 + }, + { + "epoch": 5.263736263736264, + "grad_norm": 14.090872764587402, + "learning_rate": 4.736813186813187e-05, + "loss": 0.8351, + "step": 1916 + }, + { + "epoch": 5.266483516483516, + "grad_norm": 11.024155616760254, + "learning_rate": 4.736675824175825e-05, + "loss": 0.499, + "step": 1917 + }, + { + "epoch": 5.269230769230769, + "grad_norm": 10.545353889465332, + "learning_rate": 4.736538461538462e-05, + "loss": 0.4469, + "step": 1918 + }, + { + "epoch": 5.271978021978022, + "grad_norm": 12.50690746307373, + "learning_rate": 4.7364010989010994e-05, + "loss": 0.4179, + "step": 1919 + }, + { + "epoch": 5.274725274725275, + "grad_norm": 14.339223861694336, + "learning_rate": 4.7362637362637364e-05, + "loss": 0.7727, + "step": 1920 + }, + { + "epoch": 5.277472527472527, + "grad_norm": 12.633584022521973, + "learning_rate": 4.736126373626374e-05, + "loss": 0.5219, + "step": 1921 + }, + { + "epoch": 5.28021978021978, + "grad_norm": 17.172700881958008, + "learning_rate": 4.735989010989012e-05, + "loss": 0.8612, + "step": 1922 + }, + { + "epoch": 5.282967032967033, + "grad_norm": 13.00218677520752, + "learning_rate": 4.735851648351649e-05, + "loss": 0.5614, + "step": 1923 + }, + { + "epoch": 5.285714285714286, + "grad_norm": 14.776264190673828, + "learning_rate": 4.7357142857142864e-05, + "loss": 0.7566, + "step": 1924 + }, + { + "epoch": 5.288461538461538, + "grad_norm": 18.205591201782227, + "learning_rate": 4.7355769230769234e-05, + "loss": 0.8385, + "step": 1925 + }, + { + "epoch": 5.291208791208791, + "grad_norm": 9.609884262084961, + "learning_rate": 4.7354395604395604e-05, + "loss": 0.3464, + "step": 1926 + }, + { + "epoch": 5.293956043956044, + "grad_norm": 12.175902366638184, + "learning_rate": 4.735302197802198e-05, + "loss": 0.5152, + "step": 1927 + }, + { + "epoch": 5.2967032967032965, + "grad_norm": 17.902870178222656, + "learning_rate": 4.735164835164835e-05, + "loss": 0.7936, + "step": 1928 + }, + { + "epoch": 5.299450549450549, + "grad_norm": 18.79924964904785, + "learning_rate": 4.735027472527473e-05, + "loss": 1.2151, + "step": 1929 + }, + { + "epoch": 5.302197802197802, + "grad_norm": 14.083861351013184, + "learning_rate": 4.73489010989011e-05, + "loss": 0.6412, + "step": 1930 + }, + { + "epoch": 5.304945054945055, + "grad_norm": 13.758096694946289, + "learning_rate": 4.7347527472527474e-05, + "loss": 0.6364, + "step": 1931 + }, + { + "epoch": 5.3076923076923075, + "grad_norm": 13.445198059082031, + "learning_rate": 4.734615384615385e-05, + "loss": 0.5529, + "step": 1932 + }, + { + "epoch": 5.31043956043956, + "grad_norm": 17.69482421875, + "learning_rate": 4.734478021978022e-05, + "loss": 0.8301, + "step": 1933 + }, + { + "epoch": 5.313186813186813, + "grad_norm": 10.669268608093262, + "learning_rate": 4.73434065934066e-05, + "loss": 0.5271, + "step": 1934 + }, + { + "epoch": 5.315934065934066, + "grad_norm": 13.547107696533203, + "learning_rate": 4.734203296703297e-05, + "loss": 0.5163, + "step": 1935 + }, + { + "epoch": 5.318681318681318, + "grad_norm": 14.693496704101562, + "learning_rate": 4.7340659340659345e-05, + "loss": 0.6817, + "step": 1936 + }, + { + "epoch": 5.321428571428571, + "grad_norm": 15.5437593460083, + "learning_rate": 4.7339285714285715e-05, + "loss": 0.7194, + "step": 1937 + }, + { + "epoch": 5.324175824175824, + "grad_norm": 12.043641090393066, + "learning_rate": 4.733791208791209e-05, + "loss": 0.6527, + "step": 1938 + }, + { + "epoch": 5.326923076923077, + "grad_norm": 14.692059516906738, + "learning_rate": 4.733653846153847e-05, + "loss": 0.7341, + "step": 1939 + }, + { + "epoch": 5.329670329670329, + "grad_norm": 15.909791946411133, + "learning_rate": 4.733516483516484e-05, + "loss": 0.7962, + "step": 1940 + }, + { + "epoch": 5.332417582417582, + "grad_norm": 13.411978721618652, + "learning_rate": 4.733379120879121e-05, + "loss": 0.5234, + "step": 1941 + }, + { + "epoch": 5.335164835164835, + "grad_norm": 15.69485855102539, + "learning_rate": 4.733241758241758e-05, + "loss": 0.8659, + "step": 1942 + }, + { + "epoch": 5.337912087912088, + "grad_norm": 12.800731658935547, + "learning_rate": 4.7331043956043955e-05, + "loss": 0.6585, + "step": 1943 + }, + { + "epoch": 5.34065934065934, + "grad_norm": 19.89139747619629, + "learning_rate": 4.732967032967033e-05, + "loss": 0.8654, + "step": 1944 + }, + { + "epoch": 5.343406593406593, + "grad_norm": 14.788527488708496, + "learning_rate": 4.73282967032967e-05, + "loss": 0.5118, + "step": 1945 + }, + { + "epoch": 5.346153846153846, + "grad_norm": 17.252662658691406, + "learning_rate": 4.732692307692308e-05, + "loss": 1.0544, + "step": 1946 + }, + { + "epoch": 5.3489010989010985, + "grad_norm": 18.148252487182617, + "learning_rate": 4.732554945054945e-05, + "loss": 0.7292, + "step": 1947 + }, + { + "epoch": 5.351648351648351, + "grad_norm": 14.559661865234375, + "learning_rate": 4.7324175824175826e-05, + "loss": 0.5868, + "step": 1948 + }, + { + "epoch": 5.354395604395604, + "grad_norm": 16.720794677734375, + "learning_rate": 4.73228021978022e-05, + "loss": 0.6564, + "step": 1949 + }, + { + "epoch": 5.357142857142857, + "grad_norm": 17.433399200439453, + "learning_rate": 4.732142857142857e-05, + "loss": 1.0505, + "step": 1950 + }, + { + "epoch": 5.3598901098901095, + "grad_norm": 13.025655746459961, + "learning_rate": 4.732005494505495e-05, + "loss": 0.9605, + "step": 1951 + }, + { + "epoch": 5.362637362637362, + "grad_norm": 9.960747718811035, + "learning_rate": 4.731868131868132e-05, + "loss": 0.4254, + "step": 1952 + }, + { + "epoch": 5.365384615384615, + "grad_norm": 11.176366806030273, + "learning_rate": 4.7317307692307696e-05, + "loss": 0.6108, + "step": 1953 + }, + { + "epoch": 5.368131868131869, + "grad_norm": 13.595830917358398, + "learning_rate": 4.731593406593407e-05, + "loss": 0.6377, + "step": 1954 + }, + { + "epoch": 5.3708791208791204, + "grad_norm": 11.704704284667969, + "learning_rate": 4.731456043956044e-05, + "loss": 0.5486, + "step": 1955 + }, + { + "epoch": 5.373626373626374, + "grad_norm": 18.170717239379883, + "learning_rate": 4.731318681318681e-05, + "loss": 0.7942, + "step": 1956 + }, + { + "epoch": 5.376373626373626, + "grad_norm": 16.08589744567871, + "learning_rate": 4.731181318681318e-05, + "loss": 0.6163, + "step": 1957 + }, + { + "epoch": 5.3791208791208796, + "grad_norm": 10.337196350097656, + "learning_rate": 4.731043956043956e-05, + "loss": 0.3817, + "step": 1958 + }, + { + "epoch": 5.381868131868131, + "grad_norm": 13.924397468566895, + "learning_rate": 4.730906593406594e-05, + "loss": 0.4491, + "step": 1959 + }, + { + "epoch": 5.384615384615385, + "grad_norm": 14.787561416625977, + "learning_rate": 4.730769230769231e-05, + "loss": 0.8351, + "step": 1960 + }, + { + "epoch": 5.387362637362638, + "grad_norm": 14.429831504821777, + "learning_rate": 4.7306318681318683e-05, + "loss": 0.483, + "step": 1961 + }, + { + "epoch": 5.3901098901098905, + "grad_norm": 14.29183292388916, + "learning_rate": 4.7304945054945053e-05, + "loss": 0.6641, + "step": 1962 + }, + { + "epoch": 5.392857142857143, + "grad_norm": 7.880934238433838, + "learning_rate": 4.730357142857143e-05, + "loss": 0.2575, + "step": 1963 + }, + { + "epoch": 5.395604395604396, + "grad_norm": 13.2633638381958, + "learning_rate": 4.730219780219781e-05, + "loss": 0.4805, + "step": 1964 + }, + { + "epoch": 5.398351648351649, + "grad_norm": 11.936156272888184, + "learning_rate": 4.730082417582418e-05, + "loss": 0.7065, + "step": 1965 + }, + { + "epoch": 5.4010989010989015, + "grad_norm": 14.325133323669434, + "learning_rate": 4.7299450549450554e-05, + "loss": 0.5028, + "step": 1966 + }, + { + "epoch": 5.403846153846154, + "grad_norm": 8.662582397460938, + "learning_rate": 4.7298076923076924e-05, + "loss": 0.4337, + "step": 1967 + }, + { + "epoch": 5.406593406593407, + "grad_norm": 20.307294845581055, + "learning_rate": 4.72967032967033e-05, + "loss": 1.1528, + "step": 1968 + }, + { + "epoch": 5.40934065934066, + "grad_norm": 11.65078353881836, + "learning_rate": 4.729532967032968e-05, + "loss": 0.4561, + "step": 1969 + }, + { + "epoch": 5.412087912087912, + "grad_norm": 13.453124046325684, + "learning_rate": 4.729395604395605e-05, + "loss": 0.5679, + "step": 1970 + }, + { + "epoch": 5.414835164835165, + "grad_norm": 14.381767272949219, + "learning_rate": 4.729258241758242e-05, + "loss": 0.6404, + "step": 1971 + }, + { + "epoch": 5.417582417582418, + "grad_norm": 14.13640022277832, + "learning_rate": 4.729120879120879e-05, + "loss": 0.587, + "step": 1972 + }, + { + "epoch": 5.420329670329671, + "grad_norm": 13.978923797607422, + "learning_rate": 4.7289835164835164e-05, + "loss": 0.4949, + "step": 1973 + }, + { + "epoch": 5.423076923076923, + "grad_norm": 17.997304916381836, + "learning_rate": 4.728846153846154e-05, + "loss": 0.7759, + "step": 1974 + }, + { + "epoch": 5.425824175824176, + "grad_norm": 19.698036193847656, + "learning_rate": 4.728708791208791e-05, + "loss": 0.7017, + "step": 1975 + }, + { + "epoch": 5.428571428571429, + "grad_norm": 12.830765724182129, + "learning_rate": 4.728571428571429e-05, + "loss": 0.4609, + "step": 1976 + }, + { + "epoch": 5.431318681318682, + "grad_norm": 13.365849494934082, + "learning_rate": 4.728434065934066e-05, + "loss": 0.5262, + "step": 1977 + }, + { + "epoch": 5.434065934065934, + "grad_norm": 11.204157829284668, + "learning_rate": 4.7282967032967035e-05, + "loss": 0.4831, + "step": 1978 + }, + { + "epoch": 5.436813186813187, + "grad_norm": 10.846827507019043, + "learning_rate": 4.728159340659341e-05, + "loss": 0.5153, + "step": 1979 + }, + { + "epoch": 5.43956043956044, + "grad_norm": 10.15371036529541, + "learning_rate": 4.728021978021978e-05, + "loss": 0.4697, + "step": 1980 + }, + { + "epoch": 5.4423076923076925, + "grad_norm": 16.315555572509766, + "learning_rate": 4.727884615384616e-05, + "loss": 0.7386, + "step": 1981 + }, + { + "epoch": 5.445054945054945, + "grad_norm": 12.364234924316406, + "learning_rate": 4.727747252747253e-05, + "loss": 0.6476, + "step": 1982 + }, + { + "epoch": 5.447802197802198, + "grad_norm": 12.115828514099121, + "learning_rate": 4.7276098901098905e-05, + "loss": 0.4607, + "step": 1983 + }, + { + "epoch": 5.450549450549451, + "grad_norm": 17.083908081054688, + "learning_rate": 4.727472527472528e-05, + "loss": 0.7977, + "step": 1984 + }, + { + "epoch": 5.4532967032967035, + "grad_norm": 16.09782600402832, + "learning_rate": 4.727335164835165e-05, + "loss": 0.9858, + "step": 1985 + }, + { + "epoch": 5.456043956043956, + "grad_norm": 15.21256160736084, + "learning_rate": 4.727197802197802e-05, + "loss": 0.7485, + "step": 1986 + }, + { + "epoch": 5.458791208791209, + "grad_norm": 14.70250415802002, + "learning_rate": 4.727060439560439e-05, + "loss": 0.6236, + "step": 1987 + }, + { + "epoch": 5.461538461538462, + "grad_norm": 11.262351036071777, + "learning_rate": 4.726923076923077e-05, + "loss": 0.4523, + "step": 1988 + }, + { + "epoch": 5.464285714285714, + "grad_norm": 9.618062973022461, + "learning_rate": 4.7267857142857146e-05, + "loss": 0.2766, + "step": 1989 + }, + { + "epoch": 5.467032967032967, + "grad_norm": 9.827611923217773, + "learning_rate": 4.7266483516483516e-05, + "loss": 0.4359, + "step": 1990 + }, + { + "epoch": 5.46978021978022, + "grad_norm": 12.270039558410645, + "learning_rate": 4.726510989010989e-05, + "loss": 0.4413, + "step": 1991 + }, + { + "epoch": 5.472527472527473, + "grad_norm": 15.11429214477539, + "learning_rate": 4.726373626373626e-05, + "loss": 0.5994, + "step": 1992 + }, + { + "epoch": 5.475274725274725, + "grad_norm": 13.73464298248291, + "learning_rate": 4.726236263736264e-05, + "loss": 0.6973, + "step": 1993 + }, + { + "epoch": 5.478021978021978, + "grad_norm": 10.135263442993164, + "learning_rate": 4.7260989010989016e-05, + "loss": 0.4117, + "step": 1994 + }, + { + "epoch": 5.480769230769231, + "grad_norm": 15.827424049377441, + "learning_rate": 4.7259615384615386e-05, + "loss": 0.5824, + "step": 1995 + }, + { + "epoch": 5.483516483516484, + "grad_norm": 15.405771255493164, + "learning_rate": 4.725824175824176e-05, + "loss": 0.6545, + "step": 1996 + }, + { + "epoch": 5.486263736263736, + "grad_norm": 19.664287567138672, + "learning_rate": 4.725686813186813e-05, + "loss": 0.7742, + "step": 1997 + }, + { + "epoch": 5.489010989010989, + "grad_norm": 12.67995548248291, + "learning_rate": 4.725549450549451e-05, + "loss": 0.5862, + "step": 1998 + }, + { + "epoch": 5.491758241758242, + "grad_norm": 13.113446235656738, + "learning_rate": 4.7254120879120886e-05, + "loss": 0.6004, + "step": 1999 + }, + { + "epoch": 5.4945054945054945, + "grad_norm": 15.40587043762207, + "learning_rate": 4.7252747252747257e-05, + "loss": 0.6528, + "step": 2000 + }, + { + "epoch": 5.497252747252747, + "grad_norm": 16.927024841308594, + "learning_rate": 4.7251373626373627e-05, + "loss": 0.7428, + "step": 2001 + }, + { + "epoch": 5.5, + "grad_norm": 10.483575820922852, + "learning_rate": 4.7249999999999997e-05, + "loss": 0.3997, + "step": 2002 + }, + { + "epoch": 5.502747252747253, + "grad_norm": 11.602401733398438, + "learning_rate": 4.724862637362637e-05, + "loss": 0.4315, + "step": 2003 + }, + { + "epoch": 5.5054945054945055, + "grad_norm": 12.380098342895508, + "learning_rate": 4.724725274725275e-05, + "loss": 0.4749, + "step": 2004 + }, + { + "epoch": 5.508241758241758, + "grad_norm": 10.558863639831543, + "learning_rate": 4.724587912087912e-05, + "loss": 0.3744, + "step": 2005 + }, + { + "epoch": 5.510989010989011, + "grad_norm": 15.129894256591797, + "learning_rate": 4.72445054945055e-05, + "loss": 0.6707, + "step": 2006 + }, + { + "epoch": 5.513736263736264, + "grad_norm": 12.837348937988281, + "learning_rate": 4.724313186813187e-05, + "loss": 0.7279, + "step": 2007 + }, + { + "epoch": 5.516483516483516, + "grad_norm": 14.38437557220459, + "learning_rate": 4.7241758241758244e-05, + "loss": 0.7334, + "step": 2008 + }, + { + "epoch": 5.519230769230769, + "grad_norm": 11.71286678314209, + "learning_rate": 4.724038461538462e-05, + "loss": 0.5918, + "step": 2009 + }, + { + "epoch": 5.521978021978022, + "grad_norm": 11.375632286071777, + "learning_rate": 4.723901098901099e-05, + "loss": 0.356, + "step": 2010 + }, + { + "epoch": 5.524725274725275, + "grad_norm": 12.935187339782715, + "learning_rate": 4.723763736263737e-05, + "loss": 0.3966, + "step": 2011 + }, + { + "epoch": 5.527472527472527, + "grad_norm": 19.86518669128418, + "learning_rate": 4.723626373626374e-05, + "loss": 0.7776, + "step": 2012 + }, + { + "epoch": 5.53021978021978, + "grad_norm": 10.465386390686035, + "learning_rate": 4.7234890109890114e-05, + "loss": 0.4383, + "step": 2013 + }, + { + "epoch": 5.532967032967033, + "grad_norm": 19.962360382080078, + "learning_rate": 4.723351648351649e-05, + "loss": 1.2451, + "step": 2014 + }, + { + "epoch": 5.535714285714286, + "grad_norm": 16.02103614807129, + "learning_rate": 4.723214285714286e-05, + "loss": 0.9673, + "step": 2015 + }, + { + "epoch": 5.538461538461538, + "grad_norm": 15.954838752746582, + "learning_rate": 4.723076923076923e-05, + "loss": 0.7928, + "step": 2016 + }, + { + "epoch": 5.541208791208791, + "grad_norm": 13.766757011413574, + "learning_rate": 4.72293956043956e-05, + "loss": 0.5849, + "step": 2017 + }, + { + "epoch": 5.543956043956044, + "grad_norm": 9.042097091674805, + "learning_rate": 4.722802197802198e-05, + "loss": 0.2865, + "step": 2018 + }, + { + "epoch": 5.5467032967032965, + "grad_norm": 12.429973602294922, + "learning_rate": 4.7226648351648355e-05, + "loss": 0.5629, + "step": 2019 + }, + { + "epoch": 5.549450549450549, + "grad_norm": 10.871721267700195, + "learning_rate": 4.7225274725274725e-05, + "loss": 0.4334, + "step": 2020 + }, + { + "epoch": 5.552197802197802, + "grad_norm": 17.284408569335938, + "learning_rate": 4.72239010989011e-05, + "loss": 0.7058, + "step": 2021 + }, + { + "epoch": 5.554945054945055, + "grad_norm": 14.177847862243652, + "learning_rate": 4.722252747252747e-05, + "loss": 0.5576, + "step": 2022 + }, + { + "epoch": 5.5576923076923075, + "grad_norm": 14.038324356079102, + "learning_rate": 4.722115384615385e-05, + "loss": 0.5761, + "step": 2023 + }, + { + "epoch": 5.56043956043956, + "grad_norm": 14.945708274841309, + "learning_rate": 4.7219780219780225e-05, + "loss": 0.909, + "step": 2024 + }, + { + "epoch": 5.563186813186813, + "grad_norm": 16.81757354736328, + "learning_rate": 4.7218406593406595e-05, + "loss": 0.9367, + "step": 2025 + }, + { + "epoch": 5.565934065934066, + "grad_norm": 11.174369812011719, + "learning_rate": 4.721703296703297e-05, + "loss": 0.4358, + "step": 2026 + }, + { + "epoch": 5.568681318681318, + "grad_norm": 22.20759391784668, + "learning_rate": 4.721565934065934e-05, + "loss": 0.9105, + "step": 2027 + }, + { + "epoch": 5.571428571428571, + "grad_norm": 14.637234687805176, + "learning_rate": 4.721428571428572e-05, + "loss": 0.8033, + "step": 2028 + }, + { + "epoch": 5.574175824175824, + "grad_norm": 13.739927291870117, + "learning_rate": 4.7212912087912095e-05, + "loss": 0.5406, + "step": 2029 + }, + { + "epoch": 5.576923076923077, + "grad_norm": 12.898688316345215, + "learning_rate": 4.7211538461538465e-05, + "loss": 0.4354, + "step": 2030 + }, + { + "epoch": 5.579670329670329, + "grad_norm": 13.617535591125488, + "learning_rate": 4.7210164835164836e-05, + "loss": 0.8626, + "step": 2031 + }, + { + "epoch": 5.582417582417582, + "grad_norm": 13.865701675415039, + "learning_rate": 4.7208791208791206e-05, + "loss": 0.5841, + "step": 2032 + }, + { + "epoch": 5.585164835164835, + "grad_norm": 15.330277442932129, + "learning_rate": 4.720741758241758e-05, + "loss": 0.6825, + "step": 2033 + }, + { + "epoch": 5.587912087912088, + "grad_norm": 13.336155891418457, + "learning_rate": 4.720604395604396e-05, + "loss": 0.5984, + "step": 2034 + }, + { + "epoch": 5.59065934065934, + "grad_norm": 20.973318099975586, + "learning_rate": 4.720467032967033e-05, + "loss": 1.2264, + "step": 2035 + }, + { + "epoch": 5.593406593406593, + "grad_norm": 14.172182083129883, + "learning_rate": 4.7203296703296706e-05, + "loss": 0.5818, + "step": 2036 + }, + { + "epoch": 5.596153846153846, + "grad_norm": 18.661771774291992, + "learning_rate": 4.7201923076923076e-05, + "loss": 1.0051, + "step": 2037 + }, + { + "epoch": 5.5989010989010985, + "grad_norm": 18.316923141479492, + "learning_rate": 4.720054945054945e-05, + "loss": 0.6982, + "step": 2038 + }, + { + "epoch": 5.601648351648351, + "grad_norm": 13.271209716796875, + "learning_rate": 4.719917582417583e-05, + "loss": 0.4731, + "step": 2039 + }, + { + "epoch": 5.604395604395604, + "grad_norm": 16.830787658691406, + "learning_rate": 4.71978021978022e-05, + "loss": 0.6762, + "step": 2040 + }, + { + "epoch": 5.607142857142857, + "grad_norm": 9.857969284057617, + "learning_rate": 4.7196428571428576e-05, + "loss": 0.3806, + "step": 2041 + }, + { + "epoch": 5.6098901098901095, + "grad_norm": 10.726420402526855, + "learning_rate": 4.7195054945054946e-05, + "loss": 0.4697, + "step": 2042 + }, + { + "epoch": 5.612637362637363, + "grad_norm": 14.653743743896484, + "learning_rate": 4.719368131868132e-05, + "loss": 0.6923, + "step": 2043 + }, + { + "epoch": 5.615384615384615, + "grad_norm": 15.116546630859375, + "learning_rate": 4.71923076923077e-05, + "loss": 0.659, + "step": 2044 + }, + { + "epoch": 5.618131868131869, + "grad_norm": 18.170833587646484, + "learning_rate": 4.719093406593407e-05, + "loss": 0.9635, + "step": 2045 + }, + { + "epoch": 5.6208791208791204, + "grad_norm": 10.249771118164062, + "learning_rate": 4.718956043956044e-05, + "loss": 0.4267, + "step": 2046 + }, + { + "epoch": 5.623626373626374, + "grad_norm": 11.502979278564453, + "learning_rate": 4.718818681318681e-05, + "loss": 0.6037, + "step": 2047 + }, + { + "epoch": 5.626373626373626, + "grad_norm": 12.483367919921875, + "learning_rate": 4.718681318681319e-05, + "loss": 0.5635, + "step": 2048 + }, + { + "epoch": 5.6291208791208796, + "grad_norm": 14.869614601135254, + "learning_rate": 4.7185439560439564e-05, + "loss": 0.926, + "step": 2049 + }, + { + "epoch": 5.631868131868131, + "grad_norm": 14.385876655578613, + "learning_rate": 4.7184065934065934e-05, + "loss": 0.5636, + "step": 2050 + }, + { + "epoch": 5.634615384615385, + "grad_norm": 16.82333755493164, + "learning_rate": 4.718269230769231e-05, + "loss": 0.8519, + "step": 2051 + }, + { + "epoch": 5.637362637362637, + "grad_norm": 13.059895515441895, + "learning_rate": 4.718131868131868e-05, + "loss": 0.7378, + "step": 2052 + }, + { + "epoch": 5.6401098901098905, + "grad_norm": 15.020467758178711, + "learning_rate": 4.717994505494506e-05, + "loss": 0.8437, + "step": 2053 + }, + { + "epoch": 5.642857142857143, + "grad_norm": 14.388976097106934, + "learning_rate": 4.7178571428571434e-05, + "loss": 0.5695, + "step": 2054 + }, + { + "epoch": 5.645604395604396, + "grad_norm": 17.32850456237793, + "learning_rate": 4.7177197802197804e-05, + "loss": 0.8318, + "step": 2055 + }, + { + "epoch": 5.648351648351649, + "grad_norm": 15.12865161895752, + "learning_rate": 4.717582417582418e-05, + "loss": 0.6662, + "step": 2056 + }, + { + "epoch": 5.6510989010989015, + "grad_norm": 17.231670379638672, + "learning_rate": 4.717445054945055e-05, + "loss": 0.8779, + "step": 2057 + }, + { + "epoch": 5.653846153846154, + "grad_norm": 9.482114791870117, + "learning_rate": 4.717307692307693e-05, + "loss": 0.4161, + "step": 2058 + }, + { + "epoch": 5.656593406593407, + "grad_norm": 12.783285140991211, + "learning_rate": 4.7171703296703304e-05, + "loss": 0.4488, + "step": 2059 + }, + { + "epoch": 5.65934065934066, + "grad_norm": 13.312654495239258, + "learning_rate": 4.7170329670329674e-05, + "loss": 0.41, + "step": 2060 + }, + { + "epoch": 5.662087912087912, + "grad_norm": 16.302865982055664, + "learning_rate": 4.7168956043956044e-05, + "loss": 0.7052, + "step": 2061 + }, + { + "epoch": 5.664835164835165, + "grad_norm": 15.689266204833984, + "learning_rate": 4.7167582417582415e-05, + "loss": 0.6182, + "step": 2062 + }, + { + "epoch": 5.667582417582418, + "grad_norm": 10.985533714294434, + "learning_rate": 4.716620879120879e-05, + "loss": 0.5038, + "step": 2063 + }, + { + "epoch": 5.670329670329671, + "grad_norm": 9.465571403503418, + "learning_rate": 4.716483516483517e-05, + "loss": 0.4434, + "step": 2064 + }, + { + "epoch": 5.673076923076923, + "grad_norm": 16.281536102294922, + "learning_rate": 4.716346153846154e-05, + "loss": 0.7711, + "step": 2065 + }, + { + "epoch": 5.675824175824176, + "grad_norm": 11.196747779846191, + "learning_rate": 4.7162087912087915e-05, + "loss": 0.6492, + "step": 2066 + }, + { + "epoch": 5.678571428571429, + "grad_norm": 15.5492525100708, + "learning_rate": 4.7160714285714285e-05, + "loss": 0.671, + "step": 2067 + }, + { + "epoch": 5.681318681318682, + "grad_norm": 17.418577194213867, + "learning_rate": 4.715934065934066e-05, + "loss": 1.0666, + "step": 2068 + }, + { + "epoch": 5.684065934065934, + "grad_norm": 12.908575057983398, + "learning_rate": 4.715796703296704e-05, + "loss": 0.5881, + "step": 2069 + }, + { + "epoch": 5.686813186813187, + "grad_norm": 12.775712966918945, + "learning_rate": 4.715659340659341e-05, + "loss": 0.4243, + "step": 2070 + }, + { + "epoch": 5.68956043956044, + "grad_norm": 20.64731216430664, + "learning_rate": 4.7155219780219785e-05, + "loss": 0.9695, + "step": 2071 + }, + { + "epoch": 5.6923076923076925, + "grad_norm": 7.94806432723999, + "learning_rate": 4.7153846153846155e-05, + "loss": 0.2204, + "step": 2072 + }, + { + "epoch": 5.695054945054945, + "grad_norm": 15.974282264709473, + "learning_rate": 4.715247252747253e-05, + "loss": 0.6423, + "step": 2073 + }, + { + "epoch": 5.697802197802198, + "grad_norm": 9.187612533569336, + "learning_rate": 4.715109890109891e-05, + "loss": 0.3729, + "step": 2074 + }, + { + "epoch": 5.700549450549451, + "grad_norm": 9.330967903137207, + "learning_rate": 4.714972527472528e-05, + "loss": 0.3272, + "step": 2075 + }, + { + "epoch": 5.7032967032967035, + "grad_norm": 9.380051612854004, + "learning_rate": 4.714835164835165e-05, + "loss": 0.294, + "step": 2076 + }, + { + "epoch": 5.706043956043956, + "grad_norm": 10.432364463806152, + "learning_rate": 4.714697802197802e-05, + "loss": 0.4949, + "step": 2077 + }, + { + "epoch": 5.708791208791209, + "grad_norm": 13.136824607849121, + "learning_rate": 4.7145604395604396e-05, + "loss": 0.5611, + "step": 2078 + }, + { + "epoch": 5.711538461538462, + "grad_norm": 16.020967483520508, + "learning_rate": 4.714423076923077e-05, + "loss": 0.7482, + "step": 2079 + }, + { + "epoch": 5.714285714285714, + "grad_norm": 17.780750274658203, + "learning_rate": 4.714285714285714e-05, + "loss": 0.9261, + "step": 2080 + }, + { + "epoch": 5.717032967032967, + "grad_norm": 15.440974235534668, + "learning_rate": 4.714148351648352e-05, + "loss": 0.7948, + "step": 2081 + }, + { + "epoch": 5.71978021978022, + "grad_norm": 20.742042541503906, + "learning_rate": 4.714010989010989e-05, + "loss": 0.8058, + "step": 2082 + }, + { + "epoch": 5.722527472527473, + "grad_norm": 15.803894996643066, + "learning_rate": 4.7138736263736266e-05, + "loss": 1.0255, + "step": 2083 + }, + { + "epoch": 5.725274725274725, + "grad_norm": 13.849042892456055, + "learning_rate": 4.713736263736264e-05, + "loss": 0.5169, + "step": 2084 + }, + { + "epoch": 5.728021978021978, + "grad_norm": 19.908233642578125, + "learning_rate": 4.713598901098901e-05, + "loss": 1.0433, + "step": 2085 + }, + { + "epoch": 5.730769230769231, + "grad_norm": 17.26911163330078, + "learning_rate": 4.713461538461539e-05, + "loss": 0.5072, + "step": 2086 + }, + { + "epoch": 5.733516483516484, + "grad_norm": 16.980554580688477, + "learning_rate": 4.713324175824176e-05, + "loss": 0.8616, + "step": 2087 + }, + { + "epoch": 5.736263736263736, + "grad_norm": 15.31624698638916, + "learning_rate": 4.713186813186814e-05, + "loss": 0.6291, + "step": 2088 + }, + { + "epoch": 5.739010989010989, + "grad_norm": 11.574971199035645, + "learning_rate": 4.7130494505494513e-05, + "loss": 0.5203, + "step": 2089 + }, + { + "epoch": 5.741758241758242, + "grad_norm": 15.287267684936523, + "learning_rate": 4.7129120879120883e-05, + "loss": 1.0567, + "step": 2090 + }, + { + "epoch": 5.7445054945054945, + "grad_norm": 14.812471389770508, + "learning_rate": 4.7127747252747253e-05, + "loss": 0.6647, + "step": 2091 + }, + { + "epoch": 5.747252747252747, + "grad_norm": 12.864046096801758, + "learning_rate": 4.7126373626373623e-05, + "loss": 0.4652, + "step": 2092 + }, + { + "epoch": 5.75, + "grad_norm": 11.778188705444336, + "learning_rate": 4.7125e-05, + "loss": 0.4991, + "step": 2093 + }, + { + "epoch": 5.752747252747253, + "grad_norm": 13.065302848815918, + "learning_rate": 4.712362637362638e-05, + "loss": 0.4219, + "step": 2094 + }, + { + "epoch": 5.7554945054945055, + "grad_norm": 11.483388900756836, + "learning_rate": 4.712225274725275e-05, + "loss": 0.5728, + "step": 2095 + }, + { + "epoch": 5.758241758241758, + "grad_norm": 19.91908836364746, + "learning_rate": 4.7120879120879124e-05, + "loss": 0.7959, + "step": 2096 + }, + { + "epoch": 5.760989010989011, + "grad_norm": 16.470081329345703, + "learning_rate": 4.7119505494505494e-05, + "loss": 0.6538, + "step": 2097 + }, + { + "epoch": 5.763736263736264, + "grad_norm": 11.228363990783691, + "learning_rate": 4.711813186813187e-05, + "loss": 0.5057, + "step": 2098 + }, + { + "epoch": 5.766483516483516, + "grad_norm": 16.106115341186523, + "learning_rate": 4.711675824175825e-05, + "loss": 0.8286, + "step": 2099 + }, + { + "epoch": 5.769230769230769, + "grad_norm": 12.270888328552246, + "learning_rate": 4.711538461538462e-05, + "loss": 0.5556, + "step": 2100 + }, + { + "epoch": 5.771978021978022, + "grad_norm": 10.757462501525879, + "learning_rate": 4.7114010989010994e-05, + "loss": 0.5149, + "step": 2101 + }, + { + "epoch": 5.774725274725275, + "grad_norm": 9.302736282348633, + "learning_rate": 4.7112637362637364e-05, + "loss": 0.3888, + "step": 2102 + }, + { + "epoch": 5.777472527472527, + "grad_norm": 12.008350372314453, + "learning_rate": 4.711126373626374e-05, + "loss": 0.4653, + "step": 2103 + }, + { + "epoch": 5.78021978021978, + "grad_norm": 15.685662269592285, + "learning_rate": 4.710989010989012e-05, + "loss": 0.7015, + "step": 2104 + }, + { + "epoch": 5.782967032967033, + "grad_norm": 14.679983139038086, + "learning_rate": 4.710851648351649e-05, + "loss": 0.5517, + "step": 2105 + }, + { + "epoch": 5.785714285714286, + "grad_norm": 13.686928749084473, + "learning_rate": 4.710714285714286e-05, + "loss": 0.6369, + "step": 2106 + }, + { + "epoch": 5.788461538461538, + "grad_norm": 13.881559371948242, + "learning_rate": 4.710576923076923e-05, + "loss": 0.4415, + "step": 2107 + }, + { + "epoch": 5.791208791208791, + "grad_norm": 11.362165451049805, + "learning_rate": 4.7104395604395605e-05, + "loss": 0.4249, + "step": 2108 + }, + { + "epoch": 5.793956043956044, + "grad_norm": 9.532537460327148, + "learning_rate": 4.710302197802198e-05, + "loss": 0.3197, + "step": 2109 + }, + { + "epoch": 5.7967032967032965, + "grad_norm": 18.57332420349121, + "learning_rate": 4.710164835164835e-05, + "loss": 0.9755, + "step": 2110 + }, + { + "epoch": 5.799450549450549, + "grad_norm": 20.970230102539062, + "learning_rate": 4.710027472527473e-05, + "loss": 0.8971, + "step": 2111 + }, + { + "epoch": 5.802197802197802, + "grad_norm": 20.287349700927734, + "learning_rate": 4.70989010989011e-05, + "loss": 0.9717, + "step": 2112 + }, + { + "epoch": 5.804945054945055, + "grad_norm": 12.952693939208984, + "learning_rate": 4.7097527472527475e-05, + "loss": 0.5151, + "step": 2113 + }, + { + "epoch": 5.8076923076923075, + "grad_norm": 12.92483139038086, + "learning_rate": 4.709615384615385e-05, + "loss": 0.8405, + "step": 2114 + }, + { + "epoch": 5.81043956043956, + "grad_norm": 8.815301895141602, + "learning_rate": 4.709478021978022e-05, + "loss": 0.3673, + "step": 2115 + }, + { + "epoch": 5.813186813186813, + "grad_norm": 19.66790008544922, + "learning_rate": 4.70934065934066e-05, + "loss": 1.3094, + "step": 2116 + }, + { + "epoch": 5.815934065934066, + "grad_norm": 10.141153335571289, + "learning_rate": 4.709203296703297e-05, + "loss": 0.4364, + "step": 2117 + }, + { + "epoch": 5.818681318681318, + "grad_norm": 15.109489440917969, + "learning_rate": 4.7090659340659346e-05, + "loss": 0.7035, + "step": 2118 + }, + { + "epoch": 5.821428571428571, + "grad_norm": 7.477375030517578, + "learning_rate": 4.708928571428572e-05, + "loss": 0.2455, + "step": 2119 + }, + { + "epoch": 5.824175824175824, + "grad_norm": 14.981712341308594, + "learning_rate": 4.708791208791209e-05, + "loss": 0.6311, + "step": 2120 + }, + { + "epoch": 5.826923076923077, + "grad_norm": 14.290719032287598, + "learning_rate": 4.708653846153846e-05, + "loss": 0.3503, + "step": 2121 + }, + { + "epoch": 5.829670329670329, + "grad_norm": 8.146201133728027, + "learning_rate": 4.708516483516483e-05, + "loss": 0.424, + "step": 2122 + }, + { + "epoch": 5.832417582417582, + "grad_norm": 13.91606616973877, + "learning_rate": 4.708379120879121e-05, + "loss": 0.6891, + "step": 2123 + }, + { + "epoch": 5.835164835164835, + "grad_norm": 13.407280921936035, + "learning_rate": 4.7082417582417586e-05, + "loss": 0.8669, + "step": 2124 + }, + { + "epoch": 5.837912087912088, + "grad_norm": 18.67896270751953, + "learning_rate": 4.7081043956043956e-05, + "loss": 1.0017, + "step": 2125 + }, + { + "epoch": 5.84065934065934, + "grad_norm": 11.273113250732422, + "learning_rate": 4.707967032967033e-05, + "loss": 0.2787, + "step": 2126 + }, + { + "epoch": 5.843406593406593, + "grad_norm": 13.385401725769043, + "learning_rate": 4.70782967032967e-05, + "loss": 0.585, + "step": 2127 + }, + { + "epoch": 5.846153846153846, + "grad_norm": 14.741960525512695, + "learning_rate": 4.707692307692308e-05, + "loss": 0.6813, + "step": 2128 + }, + { + "epoch": 5.8489010989010985, + "grad_norm": 16.22576332092285, + "learning_rate": 4.7075549450549456e-05, + "loss": 0.7698, + "step": 2129 + }, + { + "epoch": 5.851648351648351, + "grad_norm": 15.17618179321289, + "learning_rate": 4.7074175824175826e-05, + "loss": 0.8762, + "step": 2130 + }, + { + "epoch": 5.854395604395604, + "grad_norm": 9.875465393066406, + "learning_rate": 4.70728021978022e-05, + "loss": 0.504, + "step": 2131 + }, + { + "epoch": 5.857142857142857, + "grad_norm": 19.60595703125, + "learning_rate": 4.707142857142857e-05, + "loss": 1.0994, + "step": 2132 + }, + { + "epoch": 5.8598901098901095, + "grad_norm": 11.798335075378418, + "learning_rate": 4.707005494505495e-05, + "loss": 0.5387, + "step": 2133 + }, + { + "epoch": 5.862637362637363, + "grad_norm": 19.021608352661133, + "learning_rate": 4.706868131868133e-05, + "loss": 0.8193, + "step": 2134 + }, + { + "epoch": 5.865384615384615, + "grad_norm": 15.53034782409668, + "learning_rate": 4.70673076923077e-05, + "loss": 0.7908, + "step": 2135 + }, + { + "epoch": 5.868131868131869, + "grad_norm": 16.84150505065918, + "learning_rate": 4.706593406593407e-05, + "loss": 0.843, + "step": 2136 + }, + { + "epoch": 5.8708791208791204, + "grad_norm": 13.120985984802246, + "learning_rate": 4.706456043956044e-05, + "loss": 0.5038, + "step": 2137 + }, + { + "epoch": 5.873626373626374, + "grad_norm": 10.651968002319336, + "learning_rate": 4.7063186813186814e-05, + "loss": 0.4586, + "step": 2138 + }, + { + "epoch": 5.876373626373626, + "grad_norm": 14.110017776489258, + "learning_rate": 4.706181318681319e-05, + "loss": 0.7357, + "step": 2139 + }, + { + "epoch": 5.8791208791208796, + "grad_norm": 9.272910118103027, + "learning_rate": 4.706043956043956e-05, + "loss": 0.4264, + "step": 2140 + }, + { + "epoch": 5.881868131868131, + "grad_norm": 15.338903427124023, + "learning_rate": 4.705906593406594e-05, + "loss": 0.6849, + "step": 2141 + }, + { + "epoch": 5.884615384615385, + "grad_norm": 7.644251346588135, + "learning_rate": 4.705769230769231e-05, + "loss": 0.3148, + "step": 2142 + }, + { + "epoch": 5.887362637362637, + "grad_norm": 9.333579063415527, + "learning_rate": 4.7056318681318684e-05, + "loss": 0.3394, + "step": 2143 + }, + { + "epoch": 5.8901098901098905, + "grad_norm": 11.621562957763672, + "learning_rate": 4.705494505494506e-05, + "loss": 0.4386, + "step": 2144 + }, + { + "epoch": 5.892857142857143, + "grad_norm": 11.134092330932617, + "learning_rate": 4.705357142857143e-05, + "loss": 0.3891, + "step": 2145 + }, + { + "epoch": 5.895604395604396, + "grad_norm": 20.863874435424805, + "learning_rate": 4.705219780219781e-05, + "loss": 1.0775, + "step": 2146 + }, + { + "epoch": 5.898351648351649, + "grad_norm": 14.336661338806152, + "learning_rate": 4.705082417582418e-05, + "loss": 0.6599, + "step": 2147 + }, + { + "epoch": 5.9010989010989015, + "grad_norm": 12.835058212280273, + "learning_rate": 4.7049450549450555e-05, + "loss": 0.4512, + "step": 2148 + }, + { + "epoch": 5.903846153846154, + "grad_norm": 10.564923286437988, + "learning_rate": 4.704807692307693e-05, + "loss": 0.3963, + "step": 2149 + }, + { + "epoch": 5.906593406593407, + "grad_norm": 9.727378845214844, + "learning_rate": 4.70467032967033e-05, + "loss": 0.2996, + "step": 2150 + }, + { + "epoch": 5.90934065934066, + "grad_norm": 11.756525039672852, + "learning_rate": 4.704532967032967e-05, + "loss": 0.3986, + "step": 2151 + }, + { + "epoch": 5.912087912087912, + "grad_norm": 19.36644744873047, + "learning_rate": 4.704395604395604e-05, + "loss": 0.9494, + "step": 2152 + }, + { + "epoch": 5.914835164835165, + "grad_norm": 18.096834182739258, + "learning_rate": 4.704258241758242e-05, + "loss": 0.7302, + "step": 2153 + }, + { + "epoch": 5.917582417582418, + "grad_norm": 8.429322242736816, + "learning_rate": 4.7041208791208795e-05, + "loss": 0.3659, + "step": 2154 + }, + { + "epoch": 5.920329670329671, + "grad_norm": 23.4072322845459, + "learning_rate": 4.7039835164835165e-05, + "loss": 1.0948, + "step": 2155 + }, + { + "epoch": 5.923076923076923, + "grad_norm": 10.770936965942383, + "learning_rate": 4.703846153846154e-05, + "loss": 0.4613, + "step": 2156 + }, + { + "epoch": 5.925824175824176, + "grad_norm": 12.41324520111084, + "learning_rate": 4.703708791208791e-05, + "loss": 0.5101, + "step": 2157 + }, + { + "epoch": 5.928571428571429, + "grad_norm": 13.182705879211426, + "learning_rate": 4.703571428571429e-05, + "loss": 0.5521, + "step": 2158 + }, + { + "epoch": 5.931318681318682, + "grad_norm": 16.865562438964844, + "learning_rate": 4.7034340659340665e-05, + "loss": 0.7876, + "step": 2159 + }, + { + "epoch": 5.934065934065934, + "grad_norm": 15.836922645568848, + "learning_rate": 4.7032967032967035e-05, + "loss": 0.6424, + "step": 2160 + }, + { + "epoch": 5.936813186813187, + "grad_norm": 19.158721923828125, + "learning_rate": 4.703159340659341e-05, + "loss": 0.8675, + "step": 2161 + }, + { + "epoch": 5.93956043956044, + "grad_norm": 10.240264892578125, + "learning_rate": 4.703021978021978e-05, + "loss": 0.2743, + "step": 2162 + }, + { + "epoch": 5.9423076923076925, + "grad_norm": 14.853920936584473, + "learning_rate": 4.702884615384616e-05, + "loss": 0.7675, + "step": 2163 + }, + { + "epoch": 5.945054945054945, + "grad_norm": 14.747917175292969, + "learning_rate": 4.702747252747253e-05, + "loss": 0.5473, + "step": 2164 + }, + { + "epoch": 5.947802197802198, + "grad_norm": 10.230646133422852, + "learning_rate": 4.7026098901098906e-05, + "loss": 0.3138, + "step": 2165 + }, + { + "epoch": 5.950549450549451, + "grad_norm": 15.89743423461914, + "learning_rate": 4.7024725274725276e-05, + "loss": 0.678, + "step": 2166 + }, + { + "epoch": 5.9532967032967035, + "grad_norm": 10.111494064331055, + "learning_rate": 4.7023351648351646e-05, + "loss": 0.5239, + "step": 2167 + }, + { + "epoch": 5.956043956043956, + "grad_norm": 14.640373229980469, + "learning_rate": 4.702197802197802e-05, + "loss": 0.6666, + "step": 2168 + }, + { + "epoch": 5.958791208791209, + "grad_norm": 11.260517120361328, + "learning_rate": 4.702060439560439e-05, + "loss": 0.4394, + "step": 2169 + }, + { + "epoch": 5.961538461538462, + "grad_norm": 16.462018966674805, + "learning_rate": 4.701923076923077e-05, + "loss": 0.6433, + "step": 2170 + }, + { + "epoch": 5.964285714285714, + "grad_norm": 13.597944259643555, + "learning_rate": 4.7017857142857146e-05, + "loss": 0.5142, + "step": 2171 + }, + { + "epoch": 5.967032967032967, + "grad_norm": 14.49205493927002, + "learning_rate": 4.7016483516483516e-05, + "loss": 0.8046, + "step": 2172 + }, + { + "epoch": 5.96978021978022, + "grad_norm": 19.60105323791504, + "learning_rate": 4.701510989010989e-05, + "loss": 0.5666, + "step": 2173 + }, + { + "epoch": 5.972527472527473, + "grad_norm": 17.401283264160156, + "learning_rate": 4.701373626373626e-05, + "loss": 0.6667, + "step": 2174 + }, + { + "epoch": 5.975274725274725, + "grad_norm": 12.629371643066406, + "learning_rate": 4.701236263736264e-05, + "loss": 0.6426, + "step": 2175 + }, + { + "epoch": 5.978021978021978, + "grad_norm": 7.627819538116455, + "learning_rate": 4.701098901098902e-05, + "loss": 0.3103, + "step": 2176 + }, + { + "epoch": 5.980769230769231, + "grad_norm": 12.1553955078125, + "learning_rate": 4.700961538461539e-05, + "loss": 0.6938, + "step": 2177 + }, + { + "epoch": 5.983516483516484, + "grad_norm": 17.47783851623535, + "learning_rate": 4.7008241758241764e-05, + "loss": 0.7158, + "step": 2178 + }, + { + "epoch": 5.986263736263736, + "grad_norm": 10.437960624694824, + "learning_rate": 4.7006868131868134e-05, + "loss": 0.391, + "step": 2179 + }, + { + "epoch": 5.989010989010989, + "grad_norm": 12.776809692382812, + "learning_rate": 4.700549450549451e-05, + "loss": 0.508, + "step": 2180 + }, + { + "epoch": 5.991758241758242, + "grad_norm": 12.239479064941406, + "learning_rate": 4.700412087912088e-05, + "loss": 0.5434, + "step": 2181 + }, + { + "epoch": 5.9945054945054945, + "grad_norm": 14.273141860961914, + "learning_rate": 4.700274725274725e-05, + "loss": 0.5012, + "step": 2182 + }, + { + "epoch": 5.997252747252747, + "grad_norm": 9.872082710266113, + "learning_rate": 4.700137362637363e-05, + "loss": 0.416, + "step": 2183 + }, + { + "epoch": 6.0, + "grad_norm": 44.33592987060547, + "learning_rate": 4.7e-05, + "loss": 0.9289, + "step": 2184 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.6735537190082644, + "eval_f1": 0.642923697594951, + "eval_f1_DuraRiadoRio_64x64": 0.3058823529411765, + "eval_f1_Mole_64x64": 0.7300380228136882, + "eval_f1_Quebrado_64x64": 0.7955801104972375, + "eval_f1_RiadoRio_64x64": 0.6201550387596899, + "eval_f1_RioFechado_64x64": 0.762962962962963, + "eval_loss": 1.0301247835159302, + "eval_precision": 0.7565197891327134, + "eval_precision_DuraRiadoRio_64x64": 1.0, + "eval_precision_Mole_64x64": 0.8067226890756303, + "eval_precision_Quebrado_64x64": 0.6605504587155964, + "eval_precision_RiadoRio_64x64": 0.5106382978723404, + "eval_precision_RioFechado_64x64": 0.8046875, + "eval_recall": 0.6724096038217611, + "eval_recall_DuraRiadoRio_64x64": 0.18055555555555555, + "eval_recall_Mole_64x64": 0.6666666666666666, + "eval_recall_Quebrado_64x64": 1.0, + "eval_recall_RiadoRio_64x64": 0.7894736842105263, + "eval_recall_RioFechado_64x64": 0.7253521126760564, + "eval_runtime": 1.7145, + "eval_samples_per_second": 423.441, + "eval_steps_per_second": 26.83, + "step": 2184 + }, + { + "epoch": 6.002747252747253, + "grad_norm": 18.51569938659668, + "learning_rate": 4.6998626373626374e-05, + "loss": 0.7353, + "step": 2185 + }, + { + "epoch": 6.0054945054945055, + "grad_norm": 15.404217720031738, + "learning_rate": 4.699725274725275e-05, + "loss": 0.5313, + "step": 2186 + }, + { + "epoch": 6.008241758241758, + "grad_norm": 15.100975036621094, + "learning_rate": 4.699587912087912e-05, + "loss": 0.5521, + "step": 2187 + }, + { + "epoch": 6.010989010989011, + "grad_norm": 13.067594528198242, + "learning_rate": 4.69945054945055e-05, + "loss": 0.4976, + "step": 2188 + }, + { + "epoch": 6.013736263736264, + "grad_norm": 12.916304588317871, + "learning_rate": 4.699313186813187e-05, + "loss": 0.5976, + "step": 2189 + }, + { + "epoch": 6.016483516483516, + "grad_norm": 14.819759368896484, + "learning_rate": 4.6991758241758244e-05, + "loss": 0.6325, + "step": 2190 + }, + { + "epoch": 6.019230769230769, + "grad_norm": 11.913764953613281, + "learning_rate": 4.699038461538462e-05, + "loss": 0.5113, + "step": 2191 + }, + { + "epoch": 6.021978021978022, + "grad_norm": 16.300174713134766, + "learning_rate": 4.698901098901099e-05, + "loss": 0.5796, + "step": 2192 + }, + { + "epoch": 6.024725274725275, + "grad_norm": 13.238667488098145, + "learning_rate": 4.698763736263737e-05, + "loss": 0.5303, + "step": 2193 + }, + { + "epoch": 6.027472527472527, + "grad_norm": 13.317375183105469, + "learning_rate": 4.698626373626374e-05, + "loss": 0.5833, + "step": 2194 + }, + { + "epoch": 6.03021978021978, + "grad_norm": 13.245109558105469, + "learning_rate": 4.6984890109890115e-05, + "loss": 0.7694, + "step": 2195 + }, + { + "epoch": 6.032967032967033, + "grad_norm": 19.286109924316406, + "learning_rate": 4.6983516483516485e-05, + "loss": 1.2002, + "step": 2196 + }, + { + "epoch": 6.035714285714286, + "grad_norm": 24.76116943359375, + "learning_rate": 4.6982142857142855e-05, + "loss": 0.8841, + "step": 2197 + }, + { + "epoch": 6.038461538461538, + "grad_norm": 13.914743423461914, + "learning_rate": 4.698076923076923e-05, + "loss": 0.8282, + "step": 2198 + }, + { + "epoch": 6.041208791208791, + "grad_norm": 16.513822555541992, + "learning_rate": 4.69793956043956e-05, + "loss": 0.8101, + "step": 2199 + }, + { + "epoch": 6.043956043956044, + "grad_norm": 14.288755416870117, + "learning_rate": 4.697802197802198e-05, + "loss": 0.7642, + "step": 2200 + }, + { + "epoch": 6.0467032967032965, + "grad_norm": 19.81776237487793, + "learning_rate": 4.6976648351648355e-05, + "loss": 1.0588, + "step": 2201 + }, + { + "epoch": 6.049450549450549, + "grad_norm": 14.044941902160645, + "learning_rate": 4.6975274725274725e-05, + "loss": 0.7099, + "step": 2202 + }, + { + "epoch": 6.052197802197802, + "grad_norm": 9.365203857421875, + "learning_rate": 4.69739010989011e-05, + "loss": 0.3544, + "step": 2203 + }, + { + "epoch": 6.054945054945055, + "grad_norm": 14.163168907165527, + "learning_rate": 4.697252747252747e-05, + "loss": 0.6205, + "step": 2204 + }, + { + "epoch": 6.0576923076923075, + "grad_norm": 9.908754348754883, + "learning_rate": 4.697115384615385e-05, + "loss": 0.4579, + "step": 2205 + }, + { + "epoch": 6.06043956043956, + "grad_norm": 14.702707290649414, + "learning_rate": 4.6969780219780226e-05, + "loss": 0.8653, + "step": 2206 + }, + { + "epoch": 6.063186813186813, + "grad_norm": 13.379210472106934, + "learning_rate": 4.6968406593406596e-05, + "loss": 0.4547, + "step": 2207 + }, + { + "epoch": 6.065934065934066, + "grad_norm": 13.857527732849121, + "learning_rate": 4.696703296703297e-05, + "loss": 0.4968, + "step": 2208 + }, + { + "epoch": 6.068681318681318, + "grad_norm": 11.430426597595215, + "learning_rate": 4.696565934065934e-05, + "loss": 0.4245, + "step": 2209 + }, + { + "epoch": 6.071428571428571, + "grad_norm": 15.747259140014648, + "learning_rate": 4.696428571428572e-05, + "loss": 0.7, + "step": 2210 + }, + { + "epoch": 6.074175824175824, + "grad_norm": 20.377979278564453, + "learning_rate": 4.696291208791209e-05, + "loss": 0.7407, + "step": 2211 + }, + { + "epoch": 6.076923076923077, + "grad_norm": 7.626149654388428, + "learning_rate": 4.696153846153846e-05, + "loss": 0.2911, + "step": 2212 + }, + { + "epoch": 6.079670329670329, + "grad_norm": 13.842339515686035, + "learning_rate": 4.6960164835164836e-05, + "loss": 0.6987, + "step": 2213 + }, + { + "epoch": 6.082417582417582, + "grad_norm": 14.375617027282715, + "learning_rate": 4.6958791208791206e-05, + "loss": 0.7197, + "step": 2214 + }, + { + "epoch": 6.085164835164835, + "grad_norm": 14.03903865814209, + "learning_rate": 4.695741758241758e-05, + "loss": 0.7389, + "step": 2215 + }, + { + "epoch": 6.087912087912088, + "grad_norm": 16.715496063232422, + "learning_rate": 4.695604395604396e-05, + "loss": 0.6983, + "step": 2216 + }, + { + "epoch": 6.09065934065934, + "grad_norm": 9.460906028747559, + "learning_rate": 4.695467032967033e-05, + "loss": 0.2217, + "step": 2217 + }, + { + "epoch": 6.093406593406593, + "grad_norm": 9.98160457611084, + "learning_rate": 4.695329670329671e-05, + "loss": 0.3938, + "step": 2218 + }, + { + "epoch": 6.096153846153846, + "grad_norm": 10.793807029724121, + "learning_rate": 4.695192307692308e-05, + "loss": 0.4737, + "step": 2219 + }, + { + "epoch": 6.0989010989010985, + "grad_norm": 18.213237762451172, + "learning_rate": 4.6950549450549453e-05, + "loss": 0.8806, + "step": 2220 + }, + { + "epoch": 6.101648351648351, + "grad_norm": 14.156662940979004, + "learning_rate": 4.694917582417583e-05, + "loss": 0.6007, + "step": 2221 + }, + { + "epoch": 6.104395604395604, + "grad_norm": 9.46948528289795, + "learning_rate": 4.69478021978022e-05, + "loss": 0.416, + "step": 2222 + }, + { + "epoch": 6.107142857142857, + "grad_norm": 20.051025390625, + "learning_rate": 4.694642857142858e-05, + "loss": 1.3106, + "step": 2223 + }, + { + "epoch": 6.1098901098901095, + "grad_norm": 11.156311988830566, + "learning_rate": 4.694505494505495e-05, + "loss": 0.4988, + "step": 2224 + }, + { + "epoch": 6.112637362637362, + "grad_norm": 19.599536895751953, + "learning_rate": 4.6943681318681324e-05, + "loss": 0.7648, + "step": 2225 + }, + { + "epoch": 6.115384615384615, + "grad_norm": 11.35228157043457, + "learning_rate": 4.6942307692307694e-05, + "loss": 0.3256, + "step": 2226 + }, + { + "epoch": 6.118131868131868, + "grad_norm": 12.929259300231934, + "learning_rate": 4.6940934065934064e-05, + "loss": 0.7534, + "step": 2227 + }, + { + "epoch": 6.1208791208791204, + "grad_norm": 11.816793441772461, + "learning_rate": 4.693956043956044e-05, + "loss": 0.3875, + "step": 2228 + }, + { + "epoch": 6.123626373626374, + "grad_norm": 15.609375953674316, + "learning_rate": 4.693818681318681e-05, + "loss": 0.8488, + "step": 2229 + }, + { + "epoch": 6.126373626373626, + "grad_norm": 14.087238311767578, + "learning_rate": 4.693681318681319e-05, + "loss": 0.5618, + "step": 2230 + }, + { + "epoch": 6.1291208791208796, + "grad_norm": 8.556110382080078, + "learning_rate": 4.6935439560439564e-05, + "loss": 0.312, + "step": 2231 + }, + { + "epoch": 6.131868131868132, + "grad_norm": 14.686616897583008, + "learning_rate": 4.6934065934065934e-05, + "loss": 0.5902, + "step": 2232 + }, + { + "epoch": 6.134615384615385, + "grad_norm": 11.931970596313477, + "learning_rate": 4.693269230769231e-05, + "loss": 0.6335, + "step": 2233 + }, + { + "epoch": 6.137362637362638, + "grad_norm": 15.957256317138672, + "learning_rate": 4.693131868131868e-05, + "loss": 0.9465, + "step": 2234 + }, + { + "epoch": 6.1401098901098905, + "grad_norm": 19.929889678955078, + "learning_rate": 4.692994505494506e-05, + "loss": 0.9779, + "step": 2235 + }, + { + "epoch": 6.142857142857143, + "grad_norm": 11.31843090057373, + "learning_rate": 4.6928571428571435e-05, + "loss": 0.4785, + "step": 2236 + }, + { + "epoch": 6.145604395604396, + "grad_norm": 15.097321510314941, + "learning_rate": 4.6927197802197805e-05, + "loss": 0.7439, + "step": 2237 + }, + { + "epoch": 6.148351648351649, + "grad_norm": 18.11370277404785, + "learning_rate": 4.692582417582418e-05, + "loss": 0.8426, + "step": 2238 + }, + { + "epoch": 6.1510989010989015, + "grad_norm": 13.214000701904297, + "learning_rate": 4.692445054945055e-05, + "loss": 0.4257, + "step": 2239 + }, + { + "epoch": 6.153846153846154, + "grad_norm": 17.274568557739258, + "learning_rate": 4.692307692307693e-05, + "loss": 0.8703, + "step": 2240 + }, + { + "epoch": 6.156593406593407, + "grad_norm": 17.23959732055664, + "learning_rate": 4.69217032967033e-05, + "loss": 0.8214, + "step": 2241 + }, + { + "epoch": 6.15934065934066, + "grad_norm": 13.473724365234375, + "learning_rate": 4.692032967032967e-05, + "loss": 0.5509, + "step": 2242 + }, + { + "epoch": 6.162087912087912, + "grad_norm": 12.413822174072266, + "learning_rate": 4.6918956043956045e-05, + "loss": 0.5043, + "step": 2243 + }, + { + "epoch": 6.164835164835165, + "grad_norm": 11.70253849029541, + "learning_rate": 4.6917582417582415e-05, + "loss": 0.4176, + "step": 2244 + }, + { + "epoch": 6.167582417582418, + "grad_norm": 11.479327201843262, + "learning_rate": 4.691620879120879e-05, + "loss": 0.4915, + "step": 2245 + }, + { + "epoch": 6.170329670329671, + "grad_norm": 12.186172485351562, + "learning_rate": 4.691483516483517e-05, + "loss": 0.4366, + "step": 2246 + }, + { + "epoch": 6.173076923076923, + "grad_norm": 14.803732872009277, + "learning_rate": 4.691346153846154e-05, + "loss": 0.8639, + "step": 2247 + }, + { + "epoch": 6.175824175824176, + "grad_norm": 8.981201171875, + "learning_rate": 4.6912087912087916e-05, + "loss": 0.3563, + "step": 2248 + }, + { + "epoch": 6.178571428571429, + "grad_norm": 19.856475830078125, + "learning_rate": 4.6910714285714286e-05, + "loss": 0.7714, + "step": 2249 + }, + { + "epoch": 6.181318681318682, + "grad_norm": 10.471216201782227, + "learning_rate": 4.690934065934066e-05, + "loss": 0.4479, + "step": 2250 + }, + { + "epoch": 6.184065934065934, + "grad_norm": 17.03852653503418, + "learning_rate": 4.690796703296704e-05, + "loss": 0.8711, + "step": 2251 + }, + { + "epoch": 6.186813186813187, + "grad_norm": 18.140262603759766, + "learning_rate": 4.690659340659341e-05, + "loss": 0.8385, + "step": 2252 + }, + { + "epoch": 6.18956043956044, + "grad_norm": 11.933829307556152, + "learning_rate": 4.6905219780219786e-05, + "loss": 0.4738, + "step": 2253 + }, + { + "epoch": 6.1923076923076925, + "grad_norm": 18.422073364257812, + "learning_rate": 4.6903846153846156e-05, + "loss": 0.7634, + "step": 2254 + }, + { + "epoch": 6.195054945054945, + "grad_norm": 14.401677131652832, + "learning_rate": 4.690247252747253e-05, + "loss": 0.8675, + "step": 2255 + }, + { + "epoch": 6.197802197802198, + "grad_norm": 16.687564849853516, + "learning_rate": 4.69010989010989e-05, + "loss": 0.6662, + "step": 2256 + }, + { + "epoch": 6.200549450549451, + "grad_norm": 16.274106979370117, + "learning_rate": 4.689972527472527e-05, + "loss": 0.988, + "step": 2257 + }, + { + "epoch": 6.2032967032967035, + "grad_norm": 17.644100189208984, + "learning_rate": 4.689835164835165e-05, + "loss": 0.7905, + "step": 2258 + }, + { + "epoch": 6.206043956043956, + "grad_norm": 14.640959739685059, + "learning_rate": 4.689697802197802e-05, + "loss": 0.4988, + "step": 2259 + }, + { + "epoch": 6.208791208791209, + "grad_norm": 16.25084686279297, + "learning_rate": 4.6895604395604396e-05, + "loss": 0.8287, + "step": 2260 + }, + { + "epoch": 6.211538461538462, + "grad_norm": 15.347614288330078, + "learning_rate": 4.689423076923077e-05, + "loss": 0.7478, + "step": 2261 + }, + { + "epoch": 6.214285714285714, + "grad_norm": 12.095968246459961, + "learning_rate": 4.689285714285714e-05, + "loss": 0.4357, + "step": 2262 + }, + { + "epoch": 6.217032967032967, + "grad_norm": 11.835132598876953, + "learning_rate": 4.689148351648352e-05, + "loss": 0.4571, + "step": 2263 + }, + { + "epoch": 6.21978021978022, + "grad_norm": 15.757242202758789, + "learning_rate": 4.689010989010989e-05, + "loss": 0.6437, + "step": 2264 + }, + { + "epoch": 6.222527472527473, + "grad_norm": 11.120218276977539, + "learning_rate": 4.688873626373627e-05, + "loss": 0.6, + "step": 2265 + }, + { + "epoch": 6.225274725274725, + "grad_norm": 15.476195335388184, + "learning_rate": 4.6887362637362644e-05, + "loss": 0.601, + "step": 2266 + }, + { + "epoch": 6.228021978021978, + "grad_norm": 16.29433250427246, + "learning_rate": 4.6885989010989014e-05, + "loss": 0.6271, + "step": 2267 + }, + { + "epoch": 6.230769230769231, + "grad_norm": 14.326465606689453, + "learning_rate": 4.688461538461539e-05, + "loss": 0.7989, + "step": 2268 + }, + { + "epoch": 6.233516483516484, + "grad_norm": 14.071785926818848, + "learning_rate": 4.688324175824176e-05, + "loss": 0.9885, + "step": 2269 + }, + { + "epoch": 6.236263736263736, + "grad_norm": 10.52000904083252, + "learning_rate": 4.688186813186814e-05, + "loss": 0.3338, + "step": 2270 + }, + { + "epoch": 6.239010989010989, + "grad_norm": 12.582571983337402, + "learning_rate": 4.688049450549451e-05, + "loss": 0.598, + "step": 2271 + }, + { + "epoch": 6.241758241758242, + "grad_norm": 10.380338668823242, + "learning_rate": 4.687912087912088e-05, + "loss": 0.4541, + "step": 2272 + }, + { + "epoch": 6.2445054945054945, + "grad_norm": 13.546777725219727, + "learning_rate": 4.6877747252747254e-05, + "loss": 0.7856, + "step": 2273 + }, + { + "epoch": 6.247252747252747, + "grad_norm": 18.35124397277832, + "learning_rate": 4.6876373626373624e-05, + "loss": 0.8136, + "step": 2274 + }, + { + "epoch": 6.25, + "grad_norm": 17.608205795288086, + "learning_rate": 4.6875e-05, + "loss": 0.7591, + "step": 2275 + }, + { + "epoch": 6.252747252747253, + "grad_norm": 13.436078071594238, + "learning_rate": 4.687362637362638e-05, + "loss": 0.5732, + "step": 2276 + }, + { + "epoch": 6.2554945054945055, + "grad_norm": 15.65339469909668, + "learning_rate": 4.687225274725275e-05, + "loss": 0.935, + "step": 2277 + }, + { + "epoch": 6.258241758241758, + "grad_norm": 14.930228233337402, + "learning_rate": 4.6870879120879125e-05, + "loss": 0.5734, + "step": 2278 + }, + { + "epoch": 6.260989010989011, + "grad_norm": 17.92757225036621, + "learning_rate": 4.6869505494505495e-05, + "loss": 0.7039, + "step": 2279 + }, + { + "epoch": 6.263736263736264, + "grad_norm": 12.544900894165039, + "learning_rate": 4.686813186813187e-05, + "loss": 0.5596, + "step": 2280 + }, + { + "epoch": 6.266483516483516, + "grad_norm": 15.342753410339355, + "learning_rate": 4.686675824175825e-05, + "loss": 0.8481, + "step": 2281 + }, + { + "epoch": 6.269230769230769, + "grad_norm": 17.30314826965332, + "learning_rate": 4.686538461538462e-05, + "loss": 0.7489, + "step": 2282 + }, + { + "epoch": 6.271978021978022, + "grad_norm": 16.08698272705078, + "learning_rate": 4.6864010989010995e-05, + "loss": 0.8775, + "step": 2283 + }, + { + "epoch": 6.274725274725275, + "grad_norm": 13.51001262664795, + "learning_rate": 4.6862637362637365e-05, + "loss": 0.4358, + "step": 2284 + }, + { + "epoch": 6.277472527472527, + "grad_norm": 15.779351234436035, + "learning_rate": 4.686126373626374e-05, + "loss": 0.6888, + "step": 2285 + }, + { + "epoch": 6.28021978021978, + "grad_norm": 11.02768325805664, + "learning_rate": 4.685989010989011e-05, + "loss": 0.4968, + "step": 2286 + }, + { + "epoch": 6.282967032967033, + "grad_norm": 17.909011840820312, + "learning_rate": 4.685851648351648e-05, + "loss": 0.784, + "step": 2287 + }, + { + "epoch": 6.285714285714286, + "grad_norm": 15.627165794372559, + "learning_rate": 4.685714285714286e-05, + "loss": 0.717, + "step": 2288 + }, + { + "epoch": 6.288461538461538, + "grad_norm": 9.677651405334473, + "learning_rate": 4.685576923076923e-05, + "loss": 0.3161, + "step": 2289 + }, + { + "epoch": 6.291208791208791, + "grad_norm": 12.038764953613281, + "learning_rate": 4.6854395604395605e-05, + "loss": 0.5192, + "step": 2290 + }, + { + "epoch": 6.293956043956044, + "grad_norm": 8.270084381103516, + "learning_rate": 4.685302197802198e-05, + "loss": 0.2444, + "step": 2291 + }, + { + "epoch": 6.2967032967032965, + "grad_norm": 13.751866340637207, + "learning_rate": 4.685164835164835e-05, + "loss": 0.5895, + "step": 2292 + }, + { + "epoch": 6.299450549450549, + "grad_norm": 11.903210639953613, + "learning_rate": 4.685027472527473e-05, + "loss": 0.4422, + "step": 2293 + }, + { + "epoch": 6.302197802197802, + "grad_norm": 14.927972793579102, + "learning_rate": 4.68489010989011e-05, + "loss": 0.6202, + "step": 2294 + }, + { + "epoch": 6.304945054945055, + "grad_norm": 16.265613555908203, + "learning_rate": 4.6847527472527476e-05, + "loss": 1.0835, + "step": 2295 + }, + { + "epoch": 6.3076923076923075, + "grad_norm": 9.657478332519531, + "learning_rate": 4.684615384615385e-05, + "loss": 0.3095, + "step": 2296 + }, + { + "epoch": 6.31043956043956, + "grad_norm": 17.30778694152832, + "learning_rate": 4.684478021978022e-05, + "loss": 0.5207, + "step": 2297 + }, + { + "epoch": 6.313186813186813, + "grad_norm": 19.033693313598633, + "learning_rate": 4.68434065934066e-05, + "loss": 0.6801, + "step": 2298 + }, + { + "epoch": 6.315934065934066, + "grad_norm": 14.312005996704102, + "learning_rate": 4.684203296703297e-05, + "loss": 0.6314, + "step": 2299 + }, + { + "epoch": 6.318681318681318, + "grad_norm": 10.123703002929688, + "learning_rate": 4.6840659340659346e-05, + "loss": 0.5634, + "step": 2300 + }, + { + "epoch": 6.321428571428571, + "grad_norm": 4.979803562164307, + "learning_rate": 4.6839285714285716e-05, + "loss": 0.1843, + "step": 2301 + }, + { + "epoch": 6.324175824175824, + "grad_norm": 9.421921730041504, + "learning_rate": 4.6837912087912086e-05, + "loss": 0.5407, + "step": 2302 + }, + { + "epoch": 6.326923076923077, + "grad_norm": 11.108701705932617, + "learning_rate": 4.683653846153846e-05, + "loss": 0.743, + "step": 2303 + }, + { + "epoch": 6.329670329670329, + "grad_norm": 15.504948616027832, + "learning_rate": 4.683516483516483e-05, + "loss": 0.7785, + "step": 2304 + }, + { + "epoch": 6.332417582417582, + "grad_norm": 12.400391578674316, + "learning_rate": 4.683379120879121e-05, + "loss": 0.5653, + "step": 2305 + }, + { + "epoch": 6.335164835164835, + "grad_norm": 15.230780601501465, + "learning_rate": 4.683241758241759e-05, + "loss": 0.6178, + "step": 2306 + }, + { + "epoch": 6.337912087912088, + "grad_norm": 14.962453842163086, + "learning_rate": 4.683104395604396e-05, + "loss": 0.8592, + "step": 2307 + }, + { + "epoch": 6.34065934065934, + "grad_norm": 17.34432029724121, + "learning_rate": 4.6829670329670334e-05, + "loss": 0.9638, + "step": 2308 + }, + { + "epoch": 6.343406593406593, + "grad_norm": 12.164788246154785, + "learning_rate": 4.6828296703296704e-05, + "loss": 0.485, + "step": 2309 + }, + { + "epoch": 6.346153846153846, + "grad_norm": 12.75887680053711, + "learning_rate": 4.682692307692308e-05, + "loss": 0.596, + "step": 2310 + }, + { + "epoch": 6.3489010989010985, + "grad_norm": 14.229572296142578, + "learning_rate": 4.682554945054946e-05, + "loss": 0.6031, + "step": 2311 + }, + { + "epoch": 6.351648351648351, + "grad_norm": 11.956195831298828, + "learning_rate": 4.682417582417583e-05, + "loss": 0.4862, + "step": 2312 + }, + { + "epoch": 6.354395604395604, + "grad_norm": 14.426011085510254, + "learning_rate": 4.6822802197802204e-05, + "loss": 0.8511, + "step": 2313 + }, + { + "epoch": 6.357142857142857, + "grad_norm": 16.786226272583008, + "learning_rate": 4.6821428571428574e-05, + "loss": 0.8178, + "step": 2314 + }, + { + "epoch": 6.3598901098901095, + "grad_norm": 13.809663772583008, + "learning_rate": 4.682005494505495e-05, + "loss": 0.5454, + "step": 2315 + }, + { + "epoch": 6.362637362637362, + "grad_norm": 14.07949161529541, + "learning_rate": 4.681868131868132e-05, + "loss": 0.7842, + "step": 2316 + }, + { + "epoch": 6.365384615384615, + "grad_norm": 13.350825309753418, + "learning_rate": 4.681730769230769e-05, + "loss": 0.5723, + "step": 2317 + }, + { + "epoch": 6.368131868131869, + "grad_norm": 11.734549522399902, + "learning_rate": 4.681593406593407e-05, + "loss": 0.5667, + "step": 2318 + }, + { + "epoch": 6.3708791208791204, + "grad_norm": 16.76517677307129, + "learning_rate": 4.681456043956044e-05, + "loss": 0.7355, + "step": 2319 + }, + { + "epoch": 6.373626373626374, + "grad_norm": 12.322579383850098, + "learning_rate": 4.6813186813186814e-05, + "loss": 0.369, + "step": 2320 + }, + { + "epoch": 6.376373626373626, + "grad_norm": 18.99522590637207, + "learning_rate": 4.681181318681319e-05, + "loss": 0.5524, + "step": 2321 + }, + { + "epoch": 6.3791208791208796, + "grad_norm": 14.340660095214844, + "learning_rate": 4.681043956043956e-05, + "loss": 0.5527, + "step": 2322 + }, + { + "epoch": 6.381868131868131, + "grad_norm": 9.219655990600586, + "learning_rate": 4.680906593406594e-05, + "loss": 0.4061, + "step": 2323 + }, + { + "epoch": 6.384615384615385, + "grad_norm": 16.015522003173828, + "learning_rate": 4.680769230769231e-05, + "loss": 0.7363, + "step": 2324 + }, + { + "epoch": 6.387362637362638, + "grad_norm": 11.291810989379883, + "learning_rate": 4.6806318681318685e-05, + "loss": 0.4292, + "step": 2325 + }, + { + "epoch": 6.3901098901098905, + "grad_norm": 12.930643081665039, + "learning_rate": 4.680494505494506e-05, + "loss": 0.5496, + "step": 2326 + }, + { + "epoch": 6.392857142857143, + "grad_norm": 9.427420616149902, + "learning_rate": 4.680357142857143e-05, + "loss": 0.4068, + "step": 2327 + }, + { + "epoch": 6.395604395604396, + "grad_norm": 10.012459754943848, + "learning_rate": 4.680219780219781e-05, + "loss": 0.3648, + "step": 2328 + }, + { + "epoch": 6.398351648351649, + "grad_norm": 13.49395751953125, + "learning_rate": 4.680082417582418e-05, + "loss": 0.6441, + "step": 2329 + }, + { + "epoch": 6.4010989010989015, + "grad_norm": 14.715170860290527, + "learning_rate": 4.6799450549450555e-05, + "loss": 0.6276, + "step": 2330 + }, + { + "epoch": 6.403846153846154, + "grad_norm": 14.777511596679688, + "learning_rate": 4.6798076923076925e-05, + "loss": 0.7674, + "step": 2331 + }, + { + "epoch": 6.406593406593407, + "grad_norm": 16.639686584472656, + "learning_rate": 4.6796703296703295e-05, + "loss": 0.7584, + "step": 2332 + }, + { + "epoch": 6.40934065934066, + "grad_norm": 15.411419868469238, + "learning_rate": 4.679532967032967e-05, + "loss": 0.7365, + "step": 2333 + }, + { + "epoch": 6.412087912087912, + "grad_norm": 15.365646362304688, + "learning_rate": 4.679395604395604e-05, + "loss": 0.6606, + "step": 2334 + }, + { + "epoch": 6.414835164835165, + "grad_norm": 17.215612411499023, + "learning_rate": 4.679258241758242e-05, + "loss": 0.9618, + "step": 2335 + }, + { + "epoch": 6.417582417582418, + "grad_norm": 9.933335304260254, + "learning_rate": 4.6791208791208796e-05, + "loss": 0.3699, + "step": 2336 + }, + { + "epoch": 6.420329670329671, + "grad_norm": 16.29693031311035, + "learning_rate": 4.6789835164835166e-05, + "loss": 0.7668, + "step": 2337 + }, + { + "epoch": 6.423076923076923, + "grad_norm": 20.0748348236084, + "learning_rate": 4.678846153846154e-05, + "loss": 1.0558, + "step": 2338 + }, + { + "epoch": 6.425824175824176, + "grad_norm": 14.11687183380127, + "learning_rate": 4.678708791208791e-05, + "loss": 0.4521, + "step": 2339 + }, + { + "epoch": 6.428571428571429, + "grad_norm": 24.12006950378418, + "learning_rate": 4.678571428571429e-05, + "loss": 1.0496, + "step": 2340 + }, + { + "epoch": 6.431318681318682, + "grad_norm": 13.635631561279297, + "learning_rate": 4.6784340659340666e-05, + "loss": 0.7497, + "step": 2341 + }, + { + "epoch": 6.434065934065934, + "grad_norm": 14.363274574279785, + "learning_rate": 4.6782967032967036e-05, + "loss": 0.4615, + "step": 2342 + }, + { + "epoch": 6.436813186813187, + "grad_norm": 9.768007278442383, + "learning_rate": 4.678159340659341e-05, + "loss": 0.5203, + "step": 2343 + }, + { + "epoch": 6.43956043956044, + "grad_norm": 16.75444221496582, + "learning_rate": 4.678021978021978e-05, + "loss": 0.9069, + "step": 2344 + }, + { + "epoch": 6.4423076923076925, + "grad_norm": 13.357499122619629, + "learning_rate": 4.677884615384616e-05, + "loss": 0.6013, + "step": 2345 + }, + { + "epoch": 6.445054945054945, + "grad_norm": 12.067432403564453, + "learning_rate": 4.677747252747253e-05, + "loss": 0.5344, + "step": 2346 + }, + { + "epoch": 6.447802197802198, + "grad_norm": 11.172935485839844, + "learning_rate": 4.67760989010989e-05, + "loss": 0.4754, + "step": 2347 + }, + { + "epoch": 6.450549450549451, + "grad_norm": 15.427852630615234, + "learning_rate": 4.677472527472528e-05, + "loss": 0.4851, + "step": 2348 + }, + { + "epoch": 6.4532967032967035, + "grad_norm": 14.009298324584961, + "learning_rate": 4.677335164835165e-05, + "loss": 0.6263, + "step": 2349 + }, + { + "epoch": 6.456043956043956, + "grad_norm": 17.159286499023438, + "learning_rate": 4.6771978021978023e-05, + "loss": 0.8585, + "step": 2350 + }, + { + "epoch": 6.458791208791209, + "grad_norm": 10.380244255065918, + "learning_rate": 4.67706043956044e-05, + "loss": 0.4244, + "step": 2351 + }, + { + "epoch": 6.461538461538462, + "grad_norm": 12.986699104309082, + "learning_rate": 4.676923076923077e-05, + "loss": 0.9114, + "step": 2352 + }, + { + "epoch": 6.464285714285714, + "grad_norm": 12.824471473693848, + "learning_rate": 4.676785714285715e-05, + "loss": 0.5812, + "step": 2353 + }, + { + "epoch": 6.467032967032967, + "grad_norm": 13.121755599975586, + "learning_rate": 4.676648351648352e-05, + "loss": 0.4833, + "step": 2354 + }, + { + "epoch": 6.46978021978022, + "grad_norm": 16.178560256958008, + "learning_rate": 4.6765109890109894e-05, + "loss": 0.9119, + "step": 2355 + }, + { + "epoch": 6.472527472527473, + "grad_norm": 12.207609176635742, + "learning_rate": 4.676373626373627e-05, + "loss": 0.5354, + "step": 2356 + }, + { + "epoch": 6.475274725274725, + "grad_norm": 14.060015678405762, + "learning_rate": 4.676236263736264e-05, + "loss": 0.6205, + "step": 2357 + }, + { + "epoch": 6.478021978021978, + "grad_norm": 13.485978126525879, + "learning_rate": 4.676098901098902e-05, + "loss": 0.6425, + "step": 2358 + }, + { + "epoch": 6.480769230769231, + "grad_norm": 10.489693641662598, + "learning_rate": 4.675961538461539e-05, + "loss": 0.3571, + "step": 2359 + }, + { + "epoch": 6.483516483516484, + "grad_norm": 13.550048828125, + "learning_rate": 4.6758241758241764e-05, + "loss": 0.6197, + "step": 2360 + }, + { + "epoch": 6.486263736263736, + "grad_norm": 9.728740692138672, + "learning_rate": 4.6756868131868134e-05, + "loss": 0.4023, + "step": 2361 + }, + { + "epoch": 6.489010989010989, + "grad_norm": 11.863856315612793, + "learning_rate": 4.6755494505494504e-05, + "loss": 0.436, + "step": 2362 + }, + { + "epoch": 6.491758241758242, + "grad_norm": 13.725719451904297, + "learning_rate": 4.675412087912088e-05, + "loss": 0.6243, + "step": 2363 + }, + { + "epoch": 6.4945054945054945, + "grad_norm": 14.534283638000488, + "learning_rate": 4.675274725274725e-05, + "loss": 0.6094, + "step": 2364 + }, + { + "epoch": 6.497252747252747, + "grad_norm": 7.470677375793457, + "learning_rate": 4.675137362637363e-05, + "loss": 0.3581, + "step": 2365 + }, + { + "epoch": 6.5, + "grad_norm": 12.74001407623291, + "learning_rate": 4.6750000000000005e-05, + "loss": 0.4936, + "step": 2366 + }, + { + "epoch": 6.502747252747253, + "grad_norm": 13.365567207336426, + "learning_rate": 4.6748626373626375e-05, + "loss": 0.5536, + "step": 2367 + }, + { + "epoch": 6.5054945054945055, + "grad_norm": 16.269746780395508, + "learning_rate": 4.674725274725275e-05, + "loss": 0.7934, + "step": 2368 + }, + { + "epoch": 6.508241758241758, + "grad_norm": 12.743583679199219, + "learning_rate": 4.674587912087912e-05, + "loss": 0.4364, + "step": 2369 + }, + { + "epoch": 6.510989010989011, + "grad_norm": 18.34634780883789, + "learning_rate": 4.67445054945055e-05, + "loss": 0.8652, + "step": 2370 + }, + { + "epoch": 6.513736263736264, + "grad_norm": 11.941476821899414, + "learning_rate": 4.6743131868131875e-05, + "loss": 0.5108, + "step": 2371 + }, + { + "epoch": 6.516483516483516, + "grad_norm": 15.463470458984375, + "learning_rate": 4.6741758241758245e-05, + "loss": 0.8487, + "step": 2372 + }, + { + "epoch": 6.519230769230769, + "grad_norm": 12.189726829528809, + "learning_rate": 4.674038461538462e-05, + "loss": 0.6129, + "step": 2373 + }, + { + "epoch": 6.521978021978022, + "grad_norm": 14.609180450439453, + "learning_rate": 4.673901098901099e-05, + "loss": 0.4163, + "step": 2374 + }, + { + "epoch": 6.524725274725275, + "grad_norm": 12.141524314880371, + "learning_rate": 4.673763736263736e-05, + "loss": 0.4817, + "step": 2375 + }, + { + "epoch": 6.527472527472527, + "grad_norm": 18.974166870117188, + "learning_rate": 4.673626373626374e-05, + "loss": 0.7715, + "step": 2376 + }, + { + "epoch": 6.53021978021978, + "grad_norm": 17.079025268554688, + "learning_rate": 4.673489010989011e-05, + "loss": 0.7756, + "step": 2377 + }, + { + "epoch": 6.532967032967033, + "grad_norm": 14.088582992553711, + "learning_rate": 4.6733516483516486e-05, + "loss": 0.7235, + "step": 2378 + }, + { + "epoch": 6.535714285714286, + "grad_norm": 11.060881614685059, + "learning_rate": 4.6732142857142856e-05, + "loss": 0.3991, + "step": 2379 + }, + { + "epoch": 6.538461538461538, + "grad_norm": 12.765702247619629, + "learning_rate": 4.673076923076923e-05, + "loss": 0.6472, + "step": 2380 + }, + { + "epoch": 6.541208791208791, + "grad_norm": 14.658965110778809, + "learning_rate": 4.672939560439561e-05, + "loss": 0.8383, + "step": 2381 + }, + { + "epoch": 6.543956043956044, + "grad_norm": 12.634927749633789, + "learning_rate": 4.672802197802198e-05, + "loss": 0.4321, + "step": 2382 + }, + { + "epoch": 6.5467032967032965, + "grad_norm": 14.724032402038574, + "learning_rate": 4.6726648351648356e-05, + "loss": 0.6737, + "step": 2383 + }, + { + "epoch": 6.549450549450549, + "grad_norm": 18.570709228515625, + "learning_rate": 4.6725274725274726e-05, + "loss": 0.8784, + "step": 2384 + }, + { + "epoch": 6.552197802197802, + "grad_norm": 13.751541137695312, + "learning_rate": 4.67239010989011e-05, + "loss": 0.5984, + "step": 2385 + }, + { + "epoch": 6.554945054945055, + "grad_norm": 12.875673294067383, + "learning_rate": 4.672252747252748e-05, + "loss": 0.3972, + "step": 2386 + }, + { + "epoch": 6.5576923076923075, + "grad_norm": 13.97119426727295, + "learning_rate": 4.672115384615385e-05, + "loss": 0.4918, + "step": 2387 + }, + { + "epoch": 6.56043956043956, + "grad_norm": 18.982505798339844, + "learning_rate": 4.6719780219780226e-05, + "loss": 0.7555, + "step": 2388 + }, + { + "epoch": 6.563186813186813, + "grad_norm": 10.166035652160645, + "learning_rate": 4.6718406593406596e-05, + "loss": 0.4692, + "step": 2389 + }, + { + "epoch": 6.565934065934066, + "grad_norm": 13.227474212646484, + "learning_rate": 4.6717032967032966e-05, + "loss": 0.6399, + "step": 2390 + }, + { + "epoch": 6.568681318681318, + "grad_norm": 13.607721328735352, + "learning_rate": 4.671565934065934e-05, + "loss": 0.5779, + "step": 2391 + }, + { + "epoch": 6.571428571428571, + "grad_norm": 21.525835037231445, + "learning_rate": 4.671428571428571e-05, + "loss": 1.3048, + "step": 2392 + }, + { + "epoch": 6.574175824175824, + "grad_norm": 10.13453197479248, + "learning_rate": 4.671291208791209e-05, + "loss": 0.5098, + "step": 2393 + }, + { + "epoch": 6.576923076923077, + "grad_norm": 11.93448257446289, + "learning_rate": 4.671153846153846e-05, + "loss": 0.4856, + "step": 2394 + }, + { + "epoch": 6.579670329670329, + "grad_norm": 14.226972579956055, + "learning_rate": 4.671016483516484e-05, + "loss": 0.4713, + "step": 2395 + }, + { + "epoch": 6.582417582417582, + "grad_norm": 13.774019241333008, + "learning_rate": 4.670879120879121e-05, + "loss": 0.5821, + "step": 2396 + }, + { + "epoch": 6.585164835164835, + "grad_norm": 11.967432975769043, + "learning_rate": 4.6707417582417584e-05, + "loss": 0.3338, + "step": 2397 + }, + { + "epoch": 6.587912087912088, + "grad_norm": 7.145039081573486, + "learning_rate": 4.670604395604396e-05, + "loss": 0.2284, + "step": 2398 + }, + { + "epoch": 6.59065934065934, + "grad_norm": 14.542641639709473, + "learning_rate": 4.670467032967033e-05, + "loss": 0.7093, + "step": 2399 + }, + { + "epoch": 6.593406593406593, + "grad_norm": 17.92657470703125, + "learning_rate": 4.670329670329671e-05, + "loss": 0.9485, + "step": 2400 + }, + { + "epoch": 6.596153846153846, + "grad_norm": 7.546200275421143, + "learning_rate": 4.670192307692308e-05, + "loss": 0.214, + "step": 2401 + }, + { + "epoch": 6.5989010989010985, + "grad_norm": 10.040294647216797, + "learning_rate": 4.6700549450549454e-05, + "loss": 0.475, + "step": 2402 + }, + { + "epoch": 6.601648351648351, + "grad_norm": 10.366243362426758, + "learning_rate": 4.669917582417583e-05, + "loss": 0.4778, + "step": 2403 + }, + { + "epoch": 6.604395604395604, + "grad_norm": 12.771906852722168, + "learning_rate": 4.66978021978022e-05, + "loss": 0.5713, + "step": 2404 + }, + { + "epoch": 6.607142857142857, + "grad_norm": 21.322072982788086, + "learning_rate": 4.669642857142857e-05, + "loss": 1.235, + "step": 2405 + }, + { + "epoch": 6.6098901098901095, + "grad_norm": 11.570098876953125, + "learning_rate": 4.669505494505495e-05, + "loss": 0.4898, + "step": 2406 + }, + { + "epoch": 6.612637362637363, + "grad_norm": 17.28434944152832, + "learning_rate": 4.669368131868132e-05, + "loss": 0.91, + "step": 2407 + }, + { + "epoch": 6.615384615384615, + "grad_norm": 18.97443389892578, + "learning_rate": 4.6692307692307695e-05, + "loss": 0.848, + "step": 2408 + }, + { + "epoch": 6.618131868131869, + "grad_norm": 14.04698371887207, + "learning_rate": 4.6690934065934065e-05, + "loss": 0.675, + "step": 2409 + }, + { + "epoch": 6.6208791208791204, + "grad_norm": 10.520537376403809, + "learning_rate": 4.668956043956044e-05, + "loss": 0.3934, + "step": 2410 + }, + { + "epoch": 6.623626373626374, + "grad_norm": 12.728436470031738, + "learning_rate": 4.668818681318681e-05, + "loss": 0.3782, + "step": 2411 + }, + { + "epoch": 6.626373626373626, + "grad_norm": 10.378328323364258, + "learning_rate": 4.668681318681319e-05, + "loss": 0.4839, + "step": 2412 + }, + { + "epoch": 6.6291208791208796, + "grad_norm": 14.158259391784668, + "learning_rate": 4.6685439560439565e-05, + "loss": 0.5919, + "step": 2413 + }, + { + "epoch": 6.631868131868131, + "grad_norm": 17.514020919799805, + "learning_rate": 4.6684065934065935e-05, + "loss": 0.6039, + "step": 2414 + }, + { + "epoch": 6.634615384615385, + "grad_norm": 18.753854751586914, + "learning_rate": 4.668269230769231e-05, + "loss": 0.8571, + "step": 2415 + }, + { + "epoch": 6.637362637362637, + "grad_norm": 13.795825004577637, + "learning_rate": 4.668131868131868e-05, + "loss": 0.5852, + "step": 2416 + }, + { + "epoch": 6.6401098901098905, + "grad_norm": 9.491530418395996, + "learning_rate": 4.667994505494506e-05, + "loss": 0.4244, + "step": 2417 + }, + { + "epoch": 6.642857142857143, + "grad_norm": 13.326532363891602, + "learning_rate": 4.6678571428571435e-05, + "loss": 0.5168, + "step": 2418 + }, + { + "epoch": 6.645604395604396, + "grad_norm": 21.691734313964844, + "learning_rate": 4.6677197802197805e-05, + "loss": 0.9304, + "step": 2419 + }, + { + "epoch": 6.648351648351649, + "grad_norm": 17.083890914916992, + "learning_rate": 4.6675824175824175e-05, + "loss": 0.8994, + "step": 2420 + }, + { + "epoch": 6.6510989010989015, + "grad_norm": 16.803346633911133, + "learning_rate": 4.667445054945055e-05, + "loss": 0.9351, + "step": 2421 + }, + { + "epoch": 6.653846153846154, + "grad_norm": 7.205305576324463, + "learning_rate": 4.667307692307692e-05, + "loss": 0.2951, + "step": 2422 + }, + { + "epoch": 6.656593406593407, + "grad_norm": 14.319421768188477, + "learning_rate": 4.66717032967033e-05, + "loss": 0.5764, + "step": 2423 + }, + { + "epoch": 6.65934065934066, + "grad_norm": 10.955920219421387, + "learning_rate": 4.667032967032967e-05, + "loss": 0.2935, + "step": 2424 + }, + { + "epoch": 6.662087912087912, + "grad_norm": 9.965885162353516, + "learning_rate": 4.6668956043956046e-05, + "loss": 0.4758, + "step": 2425 + }, + { + "epoch": 6.664835164835165, + "grad_norm": 5.121649742126465, + "learning_rate": 4.6667582417582416e-05, + "loss": 0.2332, + "step": 2426 + }, + { + "epoch": 6.667582417582418, + "grad_norm": 13.121455192565918, + "learning_rate": 4.666620879120879e-05, + "loss": 0.5536, + "step": 2427 + }, + { + "epoch": 6.670329670329671, + "grad_norm": 7.625482082366943, + "learning_rate": 4.666483516483517e-05, + "loss": 0.3304, + "step": 2428 + }, + { + "epoch": 6.673076923076923, + "grad_norm": 13.785929679870605, + "learning_rate": 4.666346153846154e-05, + "loss": 0.5691, + "step": 2429 + }, + { + "epoch": 6.675824175824176, + "grad_norm": 15.13182544708252, + "learning_rate": 4.6662087912087916e-05, + "loss": 0.6314, + "step": 2430 + }, + { + "epoch": 6.678571428571429, + "grad_norm": 16.148109436035156, + "learning_rate": 4.6660714285714286e-05, + "loss": 0.713, + "step": 2431 + }, + { + "epoch": 6.681318681318682, + "grad_norm": 8.816008567810059, + "learning_rate": 4.665934065934066e-05, + "loss": 0.2975, + "step": 2432 + }, + { + "epoch": 6.684065934065934, + "grad_norm": 21.816198348999023, + "learning_rate": 4.665796703296704e-05, + "loss": 1.0609, + "step": 2433 + }, + { + "epoch": 6.686813186813187, + "grad_norm": 16.9818058013916, + "learning_rate": 4.665659340659341e-05, + "loss": 1.137, + "step": 2434 + }, + { + "epoch": 6.68956043956044, + "grad_norm": 13.912055969238281, + "learning_rate": 4.665521978021978e-05, + "loss": 0.7172, + "step": 2435 + }, + { + "epoch": 6.6923076923076925, + "grad_norm": 16.28388786315918, + "learning_rate": 4.665384615384616e-05, + "loss": 0.5264, + "step": 2436 + }, + { + "epoch": 6.695054945054945, + "grad_norm": 11.391287803649902, + "learning_rate": 4.665247252747253e-05, + "loss": 0.4562, + "step": 2437 + }, + { + "epoch": 6.697802197802198, + "grad_norm": 16.266849517822266, + "learning_rate": 4.6651098901098904e-05, + "loss": 1.0152, + "step": 2438 + }, + { + "epoch": 6.700549450549451, + "grad_norm": 13.441425323486328, + "learning_rate": 4.6649725274725274e-05, + "loss": 0.6515, + "step": 2439 + }, + { + "epoch": 6.7032967032967035, + "grad_norm": 18.316492080688477, + "learning_rate": 4.664835164835165e-05, + "loss": 1.091, + "step": 2440 + }, + { + "epoch": 6.706043956043956, + "grad_norm": 10.283857345581055, + "learning_rate": 4.664697802197802e-05, + "loss": 0.4794, + "step": 2441 + }, + { + "epoch": 6.708791208791209, + "grad_norm": 14.449700355529785, + "learning_rate": 4.66456043956044e-05, + "loss": 0.6394, + "step": 2442 + }, + { + "epoch": 6.711538461538462, + "grad_norm": 16.043357849121094, + "learning_rate": 4.6644230769230774e-05, + "loss": 0.8052, + "step": 2443 + }, + { + "epoch": 6.714285714285714, + "grad_norm": 10.8335599899292, + "learning_rate": 4.6642857142857144e-05, + "loss": 0.3693, + "step": 2444 + }, + { + "epoch": 6.717032967032967, + "grad_norm": 11.461417198181152, + "learning_rate": 4.664148351648352e-05, + "loss": 0.3221, + "step": 2445 + }, + { + "epoch": 6.71978021978022, + "grad_norm": 11.948433876037598, + "learning_rate": 4.664010989010989e-05, + "loss": 0.5675, + "step": 2446 + }, + { + "epoch": 6.722527472527473, + "grad_norm": 12.934981346130371, + "learning_rate": 4.663873626373627e-05, + "loss": 0.5502, + "step": 2447 + }, + { + "epoch": 6.725274725274725, + "grad_norm": 14.970763206481934, + "learning_rate": 4.6637362637362644e-05, + "loss": 0.8104, + "step": 2448 + }, + { + "epoch": 6.728021978021978, + "grad_norm": 10.29699993133545, + "learning_rate": 4.6635989010989014e-05, + "loss": 0.3949, + "step": 2449 + }, + { + "epoch": 6.730769230769231, + "grad_norm": 9.992168426513672, + "learning_rate": 4.6634615384615384e-05, + "loss": 0.4447, + "step": 2450 + }, + { + "epoch": 6.733516483516484, + "grad_norm": 16.1412296295166, + "learning_rate": 4.663324175824176e-05, + "loss": 0.7719, + "step": 2451 + }, + { + "epoch": 6.736263736263736, + "grad_norm": 14.700328826904297, + "learning_rate": 4.663186813186813e-05, + "loss": 0.5577, + "step": 2452 + }, + { + "epoch": 6.739010989010989, + "grad_norm": 12.19515609741211, + "learning_rate": 4.663049450549451e-05, + "loss": 0.5164, + "step": 2453 + }, + { + "epoch": 6.741758241758242, + "grad_norm": 16.841838836669922, + "learning_rate": 4.662912087912088e-05, + "loss": 0.6965, + "step": 2454 + }, + { + "epoch": 6.7445054945054945, + "grad_norm": 14.807512283325195, + "learning_rate": 4.6627747252747255e-05, + "loss": 0.6572, + "step": 2455 + }, + { + "epoch": 6.747252747252747, + "grad_norm": 11.480608940124512, + "learning_rate": 4.6626373626373625e-05, + "loss": 0.4273, + "step": 2456 + }, + { + "epoch": 6.75, + "grad_norm": 20.449804306030273, + "learning_rate": 4.6625e-05, + "loss": 0.7673, + "step": 2457 + }, + { + "epoch": 6.752747252747253, + "grad_norm": 9.868199348449707, + "learning_rate": 4.662362637362638e-05, + "loss": 0.3516, + "step": 2458 + }, + { + "epoch": 6.7554945054945055, + "grad_norm": 15.350285530090332, + "learning_rate": 4.662225274725275e-05, + "loss": 0.6842, + "step": 2459 + }, + { + "epoch": 6.758241758241758, + "grad_norm": 10.308221817016602, + "learning_rate": 4.6620879120879125e-05, + "loss": 0.5743, + "step": 2460 + }, + { + "epoch": 6.760989010989011, + "grad_norm": 17.472503662109375, + "learning_rate": 4.6619505494505495e-05, + "loss": 0.5803, + "step": 2461 + }, + { + "epoch": 6.763736263736264, + "grad_norm": 13.971575736999512, + "learning_rate": 4.661813186813187e-05, + "loss": 0.8752, + "step": 2462 + }, + { + "epoch": 6.766483516483516, + "grad_norm": 20.2631893157959, + "learning_rate": 4.661675824175825e-05, + "loss": 1.0785, + "step": 2463 + }, + { + "epoch": 6.769230769230769, + "grad_norm": 11.814764022827148, + "learning_rate": 4.661538461538462e-05, + "loss": 0.4224, + "step": 2464 + }, + { + "epoch": 6.771978021978022, + "grad_norm": 16.288957595825195, + "learning_rate": 4.661401098901099e-05, + "loss": 0.7504, + "step": 2465 + }, + { + "epoch": 6.774725274725275, + "grad_norm": 8.73544979095459, + "learning_rate": 4.6612637362637366e-05, + "loss": 0.3689, + "step": 2466 + }, + { + "epoch": 6.777472527472527, + "grad_norm": 13.55180549621582, + "learning_rate": 4.6611263736263736e-05, + "loss": 0.5192, + "step": 2467 + }, + { + "epoch": 6.78021978021978, + "grad_norm": 19.20780372619629, + "learning_rate": 4.660989010989011e-05, + "loss": 0.7927, + "step": 2468 + }, + { + "epoch": 6.782967032967033, + "grad_norm": 11.185770034790039, + "learning_rate": 4.660851648351648e-05, + "loss": 0.5302, + "step": 2469 + }, + { + "epoch": 6.785714285714286, + "grad_norm": 14.557459831237793, + "learning_rate": 4.660714285714286e-05, + "loss": 0.4667, + "step": 2470 + }, + { + "epoch": 6.788461538461538, + "grad_norm": 15.794300079345703, + "learning_rate": 4.660576923076923e-05, + "loss": 0.9108, + "step": 2471 + }, + { + "epoch": 6.791208791208791, + "grad_norm": 19.30437469482422, + "learning_rate": 4.6604395604395606e-05, + "loss": 0.9463, + "step": 2472 + }, + { + "epoch": 6.793956043956044, + "grad_norm": 16.634714126586914, + "learning_rate": 4.660302197802198e-05, + "loss": 0.8319, + "step": 2473 + }, + { + "epoch": 6.7967032967032965, + "grad_norm": 8.489397048950195, + "learning_rate": 4.660164835164835e-05, + "loss": 0.3814, + "step": 2474 + }, + { + "epoch": 6.799450549450549, + "grad_norm": 14.383519172668457, + "learning_rate": 4.660027472527473e-05, + "loss": 0.4762, + "step": 2475 + }, + { + "epoch": 6.802197802197802, + "grad_norm": 14.926796913146973, + "learning_rate": 4.65989010989011e-05, + "loss": 0.5052, + "step": 2476 + }, + { + "epoch": 6.804945054945055, + "grad_norm": 13.118508338928223, + "learning_rate": 4.6597527472527477e-05, + "loss": 0.5177, + "step": 2477 + }, + { + "epoch": 6.8076923076923075, + "grad_norm": 16.402578353881836, + "learning_rate": 4.659615384615385e-05, + "loss": 0.7363, + "step": 2478 + }, + { + "epoch": 6.81043956043956, + "grad_norm": 12.262394905090332, + "learning_rate": 4.6594780219780223e-05, + "loss": 0.5857, + "step": 2479 + }, + { + "epoch": 6.813186813186813, + "grad_norm": 11.575726509094238, + "learning_rate": 4.6593406593406593e-05, + "loss": 0.4611, + "step": 2480 + }, + { + "epoch": 6.815934065934066, + "grad_norm": 13.797388076782227, + "learning_rate": 4.659203296703297e-05, + "loss": 0.5995, + "step": 2481 + }, + { + "epoch": 6.818681318681318, + "grad_norm": 15.53144645690918, + "learning_rate": 4.659065934065934e-05, + "loss": 0.6512, + "step": 2482 + }, + { + "epoch": 6.821428571428571, + "grad_norm": 17.69875717163086, + "learning_rate": 4.658928571428572e-05, + "loss": 0.939, + "step": 2483 + }, + { + "epoch": 6.824175824175824, + "grad_norm": 15.425260543823242, + "learning_rate": 4.658791208791209e-05, + "loss": 0.3355, + "step": 2484 + }, + { + "epoch": 6.826923076923077, + "grad_norm": 12.161690711975098, + "learning_rate": 4.6586538461538464e-05, + "loss": 0.6249, + "step": 2485 + }, + { + "epoch": 6.829670329670329, + "grad_norm": 12.977633476257324, + "learning_rate": 4.6585164835164834e-05, + "loss": 0.5755, + "step": 2486 + }, + { + "epoch": 6.832417582417582, + "grad_norm": 17.290708541870117, + "learning_rate": 4.658379120879121e-05, + "loss": 0.5729, + "step": 2487 + }, + { + "epoch": 6.835164835164835, + "grad_norm": 14.2727632522583, + "learning_rate": 4.658241758241759e-05, + "loss": 0.6425, + "step": 2488 + }, + { + "epoch": 6.837912087912088, + "grad_norm": 15.523407936096191, + "learning_rate": 4.658104395604396e-05, + "loss": 0.7378, + "step": 2489 + }, + { + "epoch": 6.84065934065934, + "grad_norm": 10.262336730957031, + "learning_rate": 4.6579670329670334e-05, + "loss": 0.4698, + "step": 2490 + }, + { + "epoch": 6.843406593406593, + "grad_norm": 14.60869026184082, + "learning_rate": 4.6578296703296704e-05, + "loss": 0.5255, + "step": 2491 + }, + { + "epoch": 6.846153846153846, + "grad_norm": 22.157718658447266, + "learning_rate": 4.657692307692308e-05, + "loss": 1.2932, + "step": 2492 + }, + { + "epoch": 6.8489010989010985, + "grad_norm": 10.750992774963379, + "learning_rate": 4.657554945054946e-05, + "loss": 0.4108, + "step": 2493 + }, + { + "epoch": 6.851648351648351, + "grad_norm": 14.004359245300293, + "learning_rate": 4.657417582417583e-05, + "loss": 0.8337, + "step": 2494 + }, + { + "epoch": 6.854395604395604, + "grad_norm": 14.636526107788086, + "learning_rate": 4.65728021978022e-05, + "loss": 0.5445, + "step": 2495 + }, + { + "epoch": 6.857142857142857, + "grad_norm": 17.349637985229492, + "learning_rate": 4.6571428571428575e-05, + "loss": 0.6231, + "step": 2496 + }, + { + "epoch": 6.8598901098901095, + "grad_norm": 13.303594589233398, + "learning_rate": 4.6570054945054945e-05, + "loss": 0.4921, + "step": 2497 + }, + { + "epoch": 6.862637362637363, + "grad_norm": 13.415660858154297, + "learning_rate": 4.656868131868132e-05, + "loss": 0.5247, + "step": 2498 + }, + { + "epoch": 6.865384615384615, + "grad_norm": 15.24579906463623, + "learning_rate": 4.656730769230769e-05, + "loss": 0.6681, + "step": 2499 + }, + { + "epoch": 6.868131868131869, + "grad_norm": 17.562522888183594, + "learning_rate": 4.656593406593407e-05, + "loss": 0.6367, + "step": 2500 + }, + { + "epoch": 6.8708791208791204, + "grad_norm": 17.386722564697266, + "learning_rate": 4.656456043956044e-05, + "loss": 0.5731, + "step": 2501 + }, + { + "epoch": 6.873626373626374, + "grad_norm": 11.363360404968262, + "learning_rate": 4.6563186813186815e-05, + "loss": 0.5843, + "step": 2502 + }, + { + "epoch": 6.876373626373626, + "grad_norm": 12.104889869689941, + "learning_rate": 4.656181318681319e-05, + "loss": 0.6405, + "step": 2503 + }, + { + "epoch": 6.8791208791208796, + "grad_norm": 11.990538597106934, + "learning_rate": 4.656043956043956e-05, + "loss": 0.3795, + "step": 2504 + }, + { + "epoch": 6.881868131868131, + "grad_norm": 12.48770809173584, + "learning_rate": 4.655906593406594e-05, + "loss": 0.6128, + "step": 2505 + }, + { + "epoch": 6.884615384615385, + "grad_norm": 8.952713966369629, + "learning_rate": 4.655769230769231e-05, + "loss": 0.3194, + "step": 2506 + }, + { + "epoch": 6.887362637362637, + "grad_norm": 13.934737205505371, + "learning_rate": 4.6556318681318686e-05, + "loss": 0.6083, + "step": 2507 + }, + { + "epoch": 6.8901098901098905, + "grad_norm": 15.538232803344727, + "learning_rate": 4.655494505494506e-05, + "loss": 0.5047, + "step": 2508 + }, + { + "epoch": 6.892857142857143, + "grad_norm": 17.274185180664062, + "learning_rate": 4.655357142857143e-05, + "loss": 0.8175, + "step": 2509 + }, + { + "epoch": 6.895604395604396, + "grad_norm": 11.262225151062012, + "learning_rate": 4.65521978021978e-05, + "loss": 0.4307, + "step": 2510 + }, + { + "epoch": 6.898351648351649, + "grad_norm": 13.09830379486084, + "learning_rate": 4.655082417582418e-05, + "loss": 0.6029, + "step": 2511 + }, + { + "epoch": 6.9010989010989015, + "grad_norm": 14.445871353149414, + "learning_rate": 4.654945054945055e-05, + "loss": 0.946, + "step": 2512 + }, + { + "epoch": 6.903846153846154, + "grad_norm": 20.21465492248535, + "learning_rate": 4.6548076923076926e-05, + "loss": 1.1843, + "step": 2513 + }, + { + "epoch": 6.906593406593407, + "grad_norm": 11.102828025817871, + "learning_rate": 4.6546703296703296e-05, + "loss": 0.5209, + "step": 2514 + }, + { + "epoch": 6.90934065934066, + "grad_norm": 17.654176712036133, + "learning_rate": 4.654532967032967e-05, + "loss": 0.6591, + "step": 2515 + }, + { + "epoch": 6.912087912087912, + "grad_norm": 17.924190521240234, + "learning_rate": 4.654395604395604e-05, + "loss": 0.939, + "step": 2516 + }, + { + "epoch": 6.914835164835165, + "grad_norm": 17.101531982421875, + "learning_rate": 4.654258241758242e-05, + "loss": 0.6733, + "step": 2517 + }, + { + "epoch": 6.917582417582418, + "grad_norm": 14.901361465454102, + "learning_rate": 4.6541208791208796e-05, + "loss": 0.838, + "step": 2518 + }, + { + "epoch": 6.920329670329671, + "grad_norm": 17.613279342651367, + "learning_rate": 4.6539835164835166e-05, + "loss": 0.7802, + "step": 2519 + }, + { + "epoch": 6.923076923076923, + "grad_norm": 10.22826862335205, + "learning_rate": 4.653846153846154e-05, + "loss": 0.4068, + "step": 2520 + }, + { + "epoch": 6.925824175824176, + "grad_norm": 9.967215538024902, + "learning_rate": 4.653708791208791e-05, + "loss": 0.5022, + "step": 2521 + }, + { + "epoch": 6.928571428571429, + "grad_norm": 14.190114974975586, + "learning_rate": 4.653571428571429e-05, + "loss": 0.4297, + "step": 2522 + }, + { + "epoch": 6.931318681318682, + "grad_norm": 7.2005934715271, + "learning_rate": 4.653434065934067e-05, + "loss": 0.2736, + "step": 2523 + }, + { + "epoch": 6.934065934065934, + "grad_norm": 15.573386192321777, + "learning_rate": 4.653296703296704e-05, + "loss": 0.7365, + "step": 2524 + }, + { + "epoch": 6.936813186813187, + "grad_norm": 11.508296012878418, + "learning_rate": 4.653159340659341e-05, + "loss": 0.4062, + "step": 2525 + }, + { + "epoch": 6.93956043956044, + "grad_norm": 12.224074363708496, + "learning_rate": 4.6530219780219784e-05, + "loss": 0.4884, + "step": 2526 + }, + { + "epoch": 6.9423076923076925, + "grad_norm": 18.681442260742188, + "learning_rate": 4.6528846153846154e-05, + "loss": 0.7774, + "step": 2527 + }, + { + "epoch": 6.945054945054945, + "grad_norm": 15.029083251953125, + "learning_rate": 4.652747252747253e-05, + "loss": 0.6601, + "step": 2528 + }, + { + "epoch": 6.947802197802198, + "grad_norm": 14.770222663879395, + "learning_rate": 4.65260989010989e-05, + "loss": 0.5958, + "step": 2529 + }, + { + "epoch": 6.950549450549451, + "grad_norm": 11.350419998168945, + "learning_rate": 4.652472527472528e-05, + "loss": 0.3561, + "step": 2530 + }, + { + "epoch": 6.9532967032967035, + "grad_norm": 13.642738342285156, + "learning_rate": 4.652335164835165e-05, + "loss": 0.7524, + "step": 2531 + }, + { + "epoch": 6.956043956043956, + "grad_norm": 16.12249755859375, + "learning_rate": 4.6521978021978024e-05, + "loss": 0.6928, + "step": 2532 + }, + { + "epoch": 6.958791208791209, + "grad_norm": 12.175159454345703, + "learning_rate": 4.65206043956044e-05, + "loss": 0.5735, + "step": 2533 + }, + { + "epoch": 6.961538461538462, + "grad_norm": 14.966527938842773, + "learning_rate": 4.651923076923077e-05, + "loss": 0.865, + "step": 2534 + }, + { + "epoch": 6.964285714285714, + "grad_norm": 18.331472396850586, + "learning_rate": 4.651785714285715e-05, + "loss": 1.1375, + "step": 2535 + }, + { + "epoch": 6.967032967032967, + "grad_norm": 14.764216423034668, + "learning_rate": 4.651648351648352e-05, + "loss": 0.6042, + "step": 2536 + }, + { + "epoch": 6.96978021978022, + "grad_norm": 15.22944164276123, + "learning_rate": 4.6515109890109895e-05, + "loss": 0.6827, + "step": 2537 + }, + { + "epoch": 6.972527472527473, + "grad_norm": 15.411703109741211, + "learning_rate": 4.651373626373627e-05, + "loss": 0.6571, + "step": 2538 + }, + { + "epoch": 6.975274725274725, + "grad_norm": 11.530426025390625, + "learning_rate": 4.651236263736264e-05, + "loss": 0.436, + "step": 2539 + }, + { + "epoch": 6.978021978021978, + "grad_norm": 14.049763679504395, + "learning_rate": 4.651098901098901e-05, + "loss": 0.6378, + "step": 2540 + }, + { + "epoch": 6.980769230769231, + "grad_norm": 11.450155258178711, + "learning_rate": 4.650961538461539e-05, + "loss": 0.2837, + "step": 2541 + }, + { + "epoch": 6.983516483516484, + "grad_norm": 13.312076568603516, + "learning_rate": 4.650824175824176e-05, + "loss": 0.52, + "step": 2542 + }, + { + "epoch": 6.986263736263736, + "grad_norm": 13.643149375915527, + "learning_rate": 4.6506868131868135e-05, + "loss": 0.8072, + "step": 2543 + }, + { + "epoch": 6.989010989010989, + "grad_norm": 16.265132904052734, + "learning_rate": 4.6505494505494505e-05, + "loss": 0.6156, + "step": 2544 + }, + { + "epoch": 6.991758241758242, + "grad_norm": 16.70711326599121, + "learning_rate": 4.650412087912088e-05, + "loss": 0.8342, + "step": 2545 + }, + { + "epoch": 6.9945054945054945, + "grad_norm": 14.100566864013672, + "learning_rate": 4.650274725274725e-05, + "loss": 0.684, + "step": 2546 + }, + { + "epoch": 6.997252747252747, + "grad_norm": 13.663581848144531, + "learning_rate": 4.650137362637363e-05, + "loss": 0.6481, + "step": 2547 + }, + { + "epoch": 7.0, + "grad_norm": 41.598140716552734, + "learning_rate": 4.6500000000000005e-05, + "loss": 1.4282, + "step": 2548 + }, + { + "epoch": 7.0, + "eval_accuracy": 0.4573002754820937, + "eval_f1": 0.4141154005783266, + "eval_f1_DuraRiadoRio_64x64": 0.5146726862302483, + "eval_f1_Mole_64x64": 0.5088161209068011, + "eval_f1_Quebrado_64x64": 0.11764705882352941, + "eval_f1_RiadoRio_64x64": 0.4097560975609756, + "eval_f1_RioFechado_64x64": 0.5196850393700787, + "eval_loss": 2.2074365615844727, + "eval_precision": 0.632443786730087, + "eval_precision_DuraRiadoRio_64x64": 0.38127090301003347, + "eval_precision_Mole_64x64": 0.39920948616600793, + "eval_precision_Quebrado_64x64": 1.0, + "eval_precision_RiadoRio_64x64": 0.7924528301886793, + "eval_precision_RioFechado_64x64": 0.5892857142857143, + "eval_recall": 0.4593320154847212, + "eval_recall_DuraRiadoRio_64x64": 0.7916666666666666, + "eval_recall_Mole_64x64": 0.7013888888888888, + "eval_recall_Quebrado_64x64": 0.0625, + "eval_recall_RiadoRio_64x64": 0.27631578947368424, + "eval_recall_RioFechado_64x64": 0.4647887323943662, + "eval_runtime": 1.7075, + "eval_samples_per_second": 425.181, + "eval_steps_per_second": 26.94, + "step": 2548 + }, + { + "epoch": 7.002747252747253, + "grad_norm": 16.196378707885742, + "learning_rate": 4.6498626373626375e-05, + "loss": 0.496, + "step": 2549 + }, + { + "epoch": 7.0054945054945055, + "grad_norm": 14.369813919067383, + "learning_rate": 4.649725274725275e-05, + "loss": 0.5823, + "step": 2550 + }, + { + "epoch": 7.008241758241758, + "grad_norm": 12.27263355255127, + "learning_rate": 4.649587912087912e-05, + "loss": 0.7029, + "step": 2551 + }, + { + "epoch": 7.010989010989011, + "grad_norm": 12.171524047851562, + "learning_rate": 4.64945054945055e-05, + "loss": 0.4747, + "step": 2552 + }, + { + "epoch": 7.013736263736264, + "grad_norm": 11.612302780151367, + "learning_rate": 4.6493131868131876e-05, + "loss": 0.5287, + "step": 2553 + }, + { + "epoch": 7.016483516483516, + "grad_norm": 14.270370483398438, + "learning_rate": 4.6491758241758246e-05, + "loss": 0.6955, + "step": 2554 + }, + { + "epoch": 7.019230769230769, + "grad_norm": 9.821734428405762, + "learning_rate": 4.6490384615384616e-05, + "loss": 0.4049, + "step": 2555 + }, + { + "epoch": 7.021978021978022, + "grad_norm": 11.975263595581055, + "learning_rate": 4.6489010989010986e-05, + "loss": 0.4585, + "step": 2556 + }, + { + "epoch": 7.024725274725275, + "grad_norm": 16.94623565673828, + "learning_rate": 4.648763736263736e-05, + "loss": 0.6959, + "step": 2557 + }, + { + "epoch": 7.027472527472527, + "grad_norm": 12.887588500976562, + "learning_rate": 4.648626373626374e-05, + "loss": 0.4566, + "step": 2558 + }, + { + "epoch": 7.03021978021978, + "grad_norm": 12.772314071655273, + "learning_rate": 4.648489010989011e-05, + "loss": 0.4523, + "step": 2559 + }, + { + "epoch": 7.032967032967033, + "grad_norm": 13.908536911010742, + "learning_rate": 4.6483516483516486e-05, + "loss": 0.6709, + "step": 2560 + }, + { + "epoch": 7.035714285714286, + "grad_norm": 12.253188133239746, + "learning_rate": 4.6482142857142856e-05, + "loss": 0.5398, + "step": 2561 + }, + { + "epoch": 7.038461538461538, + "grad_norm": 16.559200286865234, + "learning_rate": 4.648076923076923e-05, + "loss": 0.6481, + "step": 2562 + }, + { + "epoch": 7.041208791208791, + "grad_norm": 11.166229248046875, + "learning_rate": 4.647939560439561e-05, + "loss": 0.6337, + "step": 2563 + }, + { + "epoch": 7.043956043956044, + "grad_norm": 11.865071296691895, + "learning_rate": 4.647802197802198e-05, + "loss": 0.5178, + "step": 2564 + }, + { + "epoch": 7.0467032967032965, + "grad_norm": 11.238410949707031, + "learning_rate": 4.647664835164836e-05, + "loss": 0.5463, + "step": 2565 + }, + { + "epoch": 7.049450549450549, + "grad_norm": 11.719343185424805, + "learning_rate": 4.647527472527473e-05, + "loss": 0.7534, + "step": 2566 + }, + { + "epoch": 7.052197802197802, + "grad_norm": 10.047380447387695, + "learning_rate": 4.6473901098901104e-05, + "loss": 0.333, + "step": 2567 + }, + { + "epoch": 7.054945054945055, + "grad_norm": 9.952554702758789, + "learning_rate": 4.647252747252748e-05, + "loss": 0.4229, + "step": 2568 + }, + { + "epoch": 7.0576923076923075, + "grad_norm": 15.234945297241211, + "learning_rate": 4.647115384615385e-05, + "loss": 0.6538, + "step": 2569 + }, + { + "epoch": 7.06043956043956, + "grad_norm": 9.05521011352539, + "learning_rate": 4.646978021978022e-05, + "loss": 0.3119, + "step": 2570 + }, + { + "epoch": 7.063186813186813, + "grad_norm": 19.98065948486328, + "learning_rate": 4.646840659340659e-05, + "loss": 0.9333, + "step": 2571 + }, + { + "epoch": 7.065934065934066, + "grad_norm": 12.642918586730957, + "learning_rate": 4.646703296703297e-05, + "loss": 0.4748, + "step": 2572 + }, + { + "epoch": 7.068681318681318, + "grad_norm": 16.192792892456055, + "learning_rate": 4.6465659340659344e-05, + "loss": 1.004, + "step": 2573 + }, + { + "epoch": 7.071428571428571, + "grad_norm": 12.754257202148438, + "learning_rate": 4.6464285714285714e-05, + "loss": 0.5561, + "step": 2574 + }, + { + "epoch": 7.074175824175824, + "grad_norm": 14.4180326461792, + "learning_rate": 4.646291208791209e-05, + "loss": 0.8144, + "step": 2575 + }, + { + "epoch": 7.076923076923077, + "grad_norm": 11.612858772277832, + "learning_rate": 4.646153846153846e-05, + "loss": 0.4758, + "step": 2576 + }, + { + "epoch": 7.079670329670329, + "grad_norm": 12.645294189453125, + "learning_rate": 4.646016483516484e-05, + "loss": 0.5702, + "step": 2577 + }, + { + "epoch": 7.082417582417582, + "grad_norm": 13.932577133178711, + "learning_rate": 4.6458791208791214e-05, + "loss": 0.6303, + "step": 2578 + }, + { + "epoch": 7.085164835164835, + "grad_norm": 20.73105239868164, + "learning_rate": 4.6457417582417584e-05, + "loss": 1.1036, + "step": 2579 + }, + { + "epoch": 7.087912087912088, + "grad_norm": 13.236849784851074, + "learning_rate": 4.645604395604396e-05, + "loss": 0.6911, + "step": 2580 + }, + { + "epoch": 7.09065934065934, + "grad_norm": 10.129514694213867, + "learning_rate": 4.645467032967033e-05, + "loss": 0.3303, + "step": 2581 + }, + { + "epoch": 7.093406593406593, + "grad_norm": 13.942323684692383, + "learning_rate": 4.645329670329671e-05, + "loss": 0.6159, + "step": 2582 + }, + { + "epoch": 7.096153846153846, + "grad_norm": 9.826959609985352, + "learning_rate": 4.6451923076923085e-05, + "loss": 0.4498, + "step": 2583 + }, + { + "epoch": 7.0989010989010985, + "grad_norm": 9.218853950500488, + "learning_rate": 4.6450549450549455e-05, + "loss": 0.41, + "step": 2584 + }, + { + "epoch": 7.101648351648351, + "grad_norm": 13.662128448486328, + "learning_rate": 4.6449175824175825e-05, + "loss": 0.4976, + "step": 2585 + }, + { + "epoch": 7.104395604395604, + "grad_norm": 17.20374870300293, + "learning_rate": 4.6447802197802195e-05, + "loss": 0.7587, + "step": 2586 + }, + { + "epoch": 7.107142857142857, + "grad_norm": 14.374102592468262, + "learning_rate": 4.644642857142857e-05, + "loss": 0.6005, + "step": 2587 + }, + { + "epoch": 7.1098901098901095, + "grad_norm": 8.071090698242188, + "learning_rate": 4.644505494505495e-05, + "loss": 0.3009, + "step": 2588 + }, + { + "epoch": 7.112637362637362, + "grad_norm": 14.301705360412598, + "learning_rate": 4.644368131868132e-05, + "loss": 0.5871, + "step": 2589 + }, + { + "epoch": 7.115384615384615, + "grad_norm": 12.911857604980469, + "learning_rate": 4.6442307692307695e-05, + "loss": 0.5659, + "step": 2590 + }, + { + "epoch": 7.118131868131868, + "grad_norm": 15.013937950134277, + "learning_rate": 4.6440934065934065e-05, + "loss": 0.6399, + "step": 2591 + }, + { + "epoch": 7.1208791208791204, + "grad_norm": 17.9877986907959, + "learning_rate": 4.643956043956044e-05, + "loss": 0.8771, + "step": 2592 + }, + { + "epoch": 7.123626373626374, + "grad_norm": 15.916648864746094, + "learning_rate": 4.643818681318682e-05, + "loss": 0.6834, + "step": 2593 + }, + { + "epoch": 7.126373626373626, + "grad_norm": 13.831721305847168, + "learning_rate": 4.643681318681319e-05, + "loss": 0.7133, + "step": 2594 + }, + { + "epoch": 7.1291208791208796, + "grad_norm": 13.653879165649414, + "learning_rate": 4.6435439560439566e-05, + "loss": 0.5672, + "step": 2595 + }, + { + "epoch": 7.131868131868132, + "grad_norm": 15.677621841430664, + "learning_rate": 4.6434065934065936e-05, + "loss": 0.5966, + "step": 2596 + }, + { + "epoch": 7.134615384615385, + "grad_norm": 9.57106876373291, + "learning_rate": 4.643269230769231e-05, + "loss": 0.4478, + "step": 2597 + }, + { + "epoch": 7.137362637362638, + "grad_norm": 17.323486328125, + "learning_rate": 4.643131868131869e-05, + "loss": 1.0533, + "step": 2598 + }, + { + "epoch": 7.1401098901098905, + "grad_norm": 14.572265625, + "learning_rate": 4.642994505494506e-05, + "loss": 0.6619, + "step": 2599 + }, + { + "epoch": 7.142857142857143, + "grad_norm": 12.647554397583008, + "learning_rate": 4.642857142857143e-05, + "loss": 0.4278, + "step": 2600 + }, + { + "epoch": 7.145604395604396, + "grad_norm": 13.381975173950195, + "learning_rate": 4.64271978021978e-05, + "loss": 0.4198, + "step": 2601 + }, + { + "epoch": 7.148351648351649, + "grad_norm": 11.876761436462402, + "learning_rate": 4.6425824175824176e-05, + "loss": 0.6182, + "step": 2602 + }, + { + "epoch": 7.1510989010989015, + "grad_norm": 19.318937301635742, + "learning_rate": 4.642445054945055e-05, + "loss": 1.3902, + "step": 2603 + }, + { + "epoch": 7.153846153846154, + "grad_norm": 14.3508939743042, + "learning_rate": 4.642307692307692e-05, + "loss": 0.669, + "step": 2604 + }, + { + "epoch": 7.156593406593407, + "grad_norm": 17.993955612182617, + "learning_rate": 4.64217032967033e-05, + "loss": 0.9614, + "step": 2605 + }, + { + "epoch": 7.15934065934066, + "grad_norm": 19.748313903808594, + "learning_rate": 4.642032967032967e-05, + "loss": 0.8684, + "step": 2606 + }, + { + "epoch": 7.162087912087912, + "grad_norm": 11.554266929626465, + "learning_rate": 4.6418956043956047e-05, + "loss": 0.5099, + "step": 2607 + }, + { + "epoch": 7.164835164835165, + "grad_norm": 14.074213981628418, + "learning_rate": 4.641758241758242e-05, + "loss": 0.6407, + "step": 2608 + }, + { + "epoch": 7.167582417582418, + "grad_norm": 14.407166481018066, + "learning_rate": 4.6416208791208793e-05, + "loss": 0.7485, + "step": 2609 + }, + { + "epoch": 7.170329670329671, + "grad_norm": 11.639347076416016, + "learning_rate": 4.641483516483517e-05, + "loss": 0.5814, + "step": 2610 + }, + { + "epoch": 7.173076923076923, + "grad_norm": 10.694490432739258, + "learning_rate": 4.641346153846154e-05, + "loss": 0.5006, + "step": 2611 + }, + { + "epoch": 7.175824175824176, + "grad_norm": 14.56292724609375, + "learning_rate": 4.641208791208792e-05, + "loss": 0.5413, + "step": 2612 + }, + { + "epoch": 7.178571428571429, + "grad_norm": 15.116785049438477, + "learning_rate": 4.6410714285714294e-05, + "loss": 0.6604, + "step": 2613 + }, + { + "epoch": 7.181318681318682, + "grad_norm": 8.540352821350098, + "learning_rate": 4.6409340659340664e-05, + "loss": 0.351, + "step": 2614 + }, + { + "epoch": 7.184065934065934, + "grad_norm": 10.74270248413086, + "learning_rate": 4.6407967032967034e-05, + "loss": 0.4614, + "step": 2615 + }, + { + "epoch": 7.186813186813187, + "grad_norm": 19.492298126220703, + "learning_rate": 4.6406593406593404e-05, + "loss": 0.9973, + "step": 2616 + }, + { + "epoch": 7.18956043956044, + "grad_norm": 12.21657943725586, + "learning_rate": 4.640521978021978e-05, + "loss": 0.4964, + "step": 2617 + }, + { + "epoch": 7.1923076923076925, + "grad_norm": 15.15424919128418, + "learning_rate": 4.640384615384615e-05, + "loss": 0.769, + "step": 2618 + }, + { + "epoch": 7.195054945054945, + "grad_norm": 13.260683059692383, + "learning_rate": 4.640247252747253e-05, + "loss": 0.5446, + "step": 2619 + }, + { + "epoch": 7.197802197802198, + "grad_norm": 19.79872703552246, + "learning_rate": 4.6401098901098904e-05, + "loss": 0.9729, + "step": 2620 + }, + { + "epoch": 7.200549450549451, + "grad_norm": 11.638460159301758, + "learning_rate": 4.6399725274725274e-05, + "loss": 0.4774, + "step": 2621 + }, + { + "epoch": 7.2032967032967035, + "grad_norm": 13.273284912109375, + "learning_rate": 4.639835164835165e-05, + "loss": 0.5273, + "step": 2622 + }, + { + "epoch": 7.206043956043956, + "grad_norm": 14.556699752807617, + "learning_rate": 4.639697802197802e-05, + "loss": 0.5574, + "step": 2623 + }, + { + "epoch": 7.208791208791209, + "grad_norm": 18.595827102661133, + "learning_rate": 4.63956043956044e-05, + "loss": 0.7145, + "step": 2624 + }, + { + "epoch": 7.211538461538462, + "grad_norm": 13.964397430419922, + "learning_rate": 4.6394230769230775e-05, + "loss": 0.9658, + "step": 2625 + }, + { + "epoch": 7.214285714285714, + "grad_norm": 11.57116413116455, + "learning_rate": 4.6392857142857145e-05, + "loss": 0.5236, + "step": 2626 + }, + { + "epoch": 7.217032967032967, + "grad_norm": 14.714824676513672, + "learning_rate": 4.639148351648352e-05, + "loss": 0.691, + "step": 2627 + }, + { + "epoch": 7.21978021978022, + "grad_norm": 12.363405227661133, + "learning_rate": 4.639010989010989e-05, + "loss": 0.4012, + "step": 2628 + }, + { + "epoch": 7.222527472527473, + "grad_norm": 10.483065605163574, + "learning_rate": 4.638873626373627e-05, + "loss": 0.3985, + "step": 2629 + }, + { + "epoch": 7.225274725274725, + "grad_norm": 9.454229354858398, + "learning_rate": 4.638736263736264e-05, + "loss": 0.2946, + "step": 2630 + }, + { + "epoch": 7.228021978021978, + "grad_norm": 14.027496337890625, + "learning_rate": 4.638598901098901e-05, + "loss": 0.5995, + "step": 2631 + }, + { + "epoch": 7.230769230769231, + "grad_norm": 12.188474655151367, + "learning_rate": 4.6384615384615385e-05, + "loss": 0.6567, + "step": 2632 + }, + { + "epoch": 7.233516483516484, + "grad_norm": 9.554971694946289, + "learning_rate": 4.6383241758241755e-05, + "loss": 0.3948, + "step": 2633 + }, + { + "epoch": 7.236263736263736, + "grad_norm": 8.726463317871094, + "learning_rate": 4.638186813186813e-05, + "loss": 0.313, + "step": 2634 + }, + { + "epoch": 7.239010989010989, + "grad_norm": 14.404788970947266, + "learning_rate": 4.638049450549451e-05, + "loss": 0.6744, + "step": 2635 + }, + { + "epoch": 7.241758241758242, + "grad_norm": 14.686065673828125, + "learning_rate": 4.637912087912088e-05, + "loss": 0.5493, + "step": 2636 + }, + { + "epoch": 7.2445054945054945, + "grad_norm": 14.414834976196289, + "learning_rate": 4.6377747252747256e-05, + "loss": 0.5293, + "step": 2637 + }, + { + "epoch": 7.247252747252747, + "grad_norm": 7.646644592285156, + "learning_rate": 4.6376373626373626e-05, + "loss": 0.2523, + "step": 2638 + }, + { + "epoch": 7.25, + "grad_norm": 14.713813781738281, + "learning_rate": 4.6375e-05, + "loss": 0.4809, + "step": 2639 + }, + { + "epoch": 7.252747252747253, + "grad_norm": 13.96348762512207, + "learning_rate": 4.637362637362638e-05, + "loss": 0.5689, + "step": 2640 + }, + { + "epoch": 7.2554945054945055, + "grad_norm": 10.436104774475098, + "learning_rate": 4.637225274725275e-05, + "loss": 0.4031, + "step": 2641 + }, + { + "epoch": 7.258241758241758, + "grad_norm": 11.782792091369629, + "learning_rate": 4.6370879120879126e-05, + "loss": 0.3586, + "step": 2642 + }, + { + "epoch": 7.260989010989011, + "grad_norm": 9.964699745178223, + "learning_rate": 4.6369505494505496e-05, + "loss": 0.5357, + "step": 2643 + }, + { + "epoch": 7.263736263736264, + "grad_norm": 11.16984748840332, + "learning_rate": 4.636813186813187e-05, + "loss": 0.4595, + "step": 2644 + }, + { + "epoch": 7.266483516483516, + "grad_norm": 12.795028686523438, + "learning_rate": 4.636675824175824e-05, + "loss": 0.6193, + "step": 2645 + }, + { + "epoch": 7.269230769230769, + "grad_norm": 14.918221473693848, + "learning_rate": 4.636538461538461e-05, + "loss": 0.7211, + "step": 2646 + }, + { + "epoch": 7.271978021978022, + "grad_norm": 12.501459121704102, + "learning_rate": 4.636401098901099e-05, + "loss": 0.5964, + "step": 2647 + }, + { + "epoch": 7.274725274725275, + "grad_norm": 9.584349632263184, + "learning_rate": 4.636263736263736e-05, + "loss": 0.393, + "step": 2648 + }, + { + "epoch": 7.277472527472527, + "grad_norm": 9.161460876464844, + "learning_rate": 4.6361263736263736e-05, + "loss": 0.3869, + "step": 2649 + }, + { + "epoch": 7.28021978021978, + "grad_norm": 10.700600624084473, + "learning_rate": 4.635989010989011e-05, + "loss": 0.2901, + "step": 2650 + }, + { + "epoch": 7.282967032967033, + "grad_norm": 13.173033714294434, + "learning_rate": 4.635851648351648e-05, + "loss": 0.5967, + "step": 2651 + }, + { + "epoch": 7.285714285714286, + "grad_norm": 20.635652542114258, + "learning_rate": 4.635714285714286e-05, + "loss": 1.007, + "step": 2652 + }, + { + "epoch": 7.288461538461538, + "grad_norm": 10.839397430419922, + "learning_rate": 4.635576923076923e-05, + "loss": 0.4842, + "step": 2653 + }, + { + "epoch": 7.291208791208791, + "grad_norm": 16.735368728637695, + "learning_rate": 4.635439560439561e-05, + "loss": 0.9358, + "step": 2654 + }, + { + "epoch": 7.293956043956044, + "grad_norm": 15.610974311828613, + "learning_rate": 4.6353021978021984e-05, + "loss": 0.4249, + "step": 2655 + }, + { + "epoch": 7.2967032967032965, + "grad_norm": 15.311429023742676, + "learning_rate": 4.6351648351648354e-05, + "loss": 0.5937, + "step": 2656 + }, + { + "epoch": 7.299450549450549, + "grad_norm": 17.293821334838867, + "learning_rate": 4.635027472527473e-05, + "loss": 0.8899, + "step": 2657 + }, + { + "epoch": 7.302197802197802, + "grad_norm": 12.03342342376709, + "learning_rate": 4.63489010989011e-05, + "loss": 0.7048, + "step": 2658 + }, + { + "epoch": 7.304945054945055, + "grad_norm": 14.157901763916016, + "learning_rate": 4.634752747252748e-05, + "loss": 0.5604, + "step": 2659 + }, + { + "epoch": 7.3076923076923075, + "grad_norm": 17.649667739868164, + "learning_rate": 4.634615384615385e-05, + "loss": 0.9101, + "step": 2660 + }, + { + "epoch": 7.31043956043956, + "grad_norm": 11.92165756225586, + "learning_rate": 4.634478021978022e-05, + "loss": 0.4752, + "step": 2661 + }, + { + "epoch": 7.313186813186813, + "grad_norm": 16.82352066040039, + "learning_rate": 4.6343406593406594e-05, + "loss": 0.8505, + "step": 2662 + }, + { + "epoch": 7.315934065934066, + "grad_norm": 16.907291412353516, + "learning_rate": 4.6342032967032964e-05, + "loss": 0.6417, + "step": 2663 + }, + { + "epoch": 7.318681318681318, + "grad_norm": 16.395591735839844, + "learning_rate": 4.634065934065934e-05, + "loss": 0.7696, + "step": 2664 + }, + { + "epoch": 7.321428571428571, + "grad_norm": 17.1440372467041, + "learning_rate": 4.633928571428572e-05, + "loss": 1.1415, + "step": 2665 + }, + { + "epoch": 7.324175824175824, + "grad_norm": 18.58583641052246, + "learning_rate": 4.633791208791209e-05, + "loss": 0.8077, + "step": 2666 + }, + { + "epoch": 7.326923076923077, + "grad_norm": 13.650349617004395, + "learning_rate": 4.6336538461538465e-05, + "loss": 0.4973, + "step": 2667 + }, + { + "epoch": 7.329670329670329, + "grad_norm": 11.921208381652832, + "learning_rate": 4.6335164835164835e-05, + "loss": 0.5095, + "step": 2668 + }, + { + "epoch": 7.332417582417582, + "grad_norm": 13.613518714904785, + "learning_rate": 4.633379120879121e-05, + "loss": 0.612, + "step": 2669 + }, + { + "epoch": 7.335164835164835, + "grad_norm": 13.343140602111816, + "learning_rate": 4.633241758241759e-05, + "loss": 0.6715, + "step": 2670 + }, + { + "epoch": 7.337912087912088, + "grad_norm": 13.18339729309082, + "learning_rate": 4.633104395604396e-05, + "loss": 0.8083, + "step": 2671 + }, + { + "epoch": 7.34065934065934, + "grad_norm": 13.663662910461426, + "learning_rate": 4.6329670329670335e-05, + "loss": 0.6502, + "step": 2672 + }, + { + "epoch": 7.343406593406593, + "grad_norm": 13.30273151397705, + "learning_rate": 4.6328296703296705e-05, + "loss": 0.6089, + "step": 2673 + }, + { + "epoch": 7.346153846153846, + "grad_norm": 18.13620948791504, + "learning_rate": 4.632692307692308e-05, + "loss": 0.9648, + "step": 2674 + }, + { + "epoch": 7.3489010989010985, + "grad_norm": 10.3305082321167, + "learning_rate": 4.632554945054945e-05, + "loss": 0.5401, + "step": 2675 + }, + { + "epoch": 7.351648351648351, + "grad_norm": 12.742277145385742, + "learning_rate": 4.632417582417582e-05, + "loss": 0.468, + "step": 2676 + }, + { + "epoch": 7.354395604395604, + "grad_norm": 12.891582489013672, + "learning_rate": 4.63228021978022e-05, + "loss": 0.649, + "step": 2677 + }, + { + "epoch": 7.357142857142857, + "grad_norm": 13.121954917907715, + "learning_rate": 4.632142857142857e-05, + "loss": 0.6048, + "step": 2678 + }, + { + "epoch": 7.3598901098901095, + "grad_norm": 12.690694808959961, + "learning_rate": 4.6320054945054945e-05, + "loss": 0.5578, + "step": 2679 + }, + { + "epoch": 7.362637362637362, + "grad_norm": 11.875875473022461, + "learning_rate": 4.631868131868132e-05, + "loss": 0.3519, + "step": 2680 + }, + { + "epoch": 7.365384615384615, + "grad_norm": 6.495707988739014, + "learning_rate": 4.631730769230769e-05, + "loss": 0.2322, + "step": 2681 + }, + { + "epoch": 7.368131868131869, + "grad_norm": 13.172040939331055, + "learning_rate": 4.631593406593407e-05, + "loss": 0.5714, + "step": 2682 + }, + { + "epoch": 7.3708791208791204, + "grad_norm": 14.523499488830566, + "learning_rate": 4.631456043956044e-05, + "loss": 0.656, + "step": 2683 + }, + { + "epoch": 7.373626373626374, + "grad_norm": 17.049890518188477, + "learning_rate": 4.6313186813186816e-05, + "loss": 0.7062, + "step": 2684 + }, + { + "epoch": 7.376373626373626, + "grad_norm": 12.739666938781738, + "learning_rate": 4.631181318681319e-05, + "loss": 0.5727, + "step": 2685 + }, + { + "epoch": 7.3791208791208796, + "grad_norm": 11.706123352050781, + "learning_rate": 4.631043956043956e-05, + "loss": 0.4112, + "step": 2686 + }, + { + "epoch": 7.381868131868131, + "grad_norm": 12.48083782196045, + "learning_rate": 4.630906593406594e-05, + "loss": 0.5242, + "step": 2687 + }, + { + "epoch": 7.384615384615385, + "grad_norm": 8.475325584411621, + "learning_rate": 4.630769230769231e-05, + "loss": 0.2662, + "step": 2688 + }, + { + "epoch": 7.387362637362638, + "grad_norm": 15.004555702209473, + "learning_rate": 4.6306318681318686e-05, + "loss": 0.8603, + "step": 2689 + }, + { + "epoch": 7.3901098901098905, + "grad_norm": 14.927389144897461, + "learning_rate": 4.6304945054945056e-05, + "loss": 0.7038, + "step": 2690 + }, + { + "epoch": 7.392857142857143, + "grad_norm": 14.568015098571777, + "learning_rate": 4.6303571428571426e-05, + "loss": 0.5183, + "step": 2691 + }, + { + "epoch": 7.395604395604396, + "grad_norm": 12.497570991516113, + "learning_rate": 4.63021978021978e-05, + "loss": 0.4337, + "step": 2692 + }, + { + "epoch": 7.398351648351649, + "grad_norm": 13.210328102111816, + "learning_rate": 4.630082417582417e-05, + "loss": 0.4615, + "step": 2693 + }, + { + "epoch": 7.4010989010989015, + "grad_norm": 10.168069839477539, + "learning_rate": 4.629945054945055e-05, + "loss": 0.5127, + "step": 2694 + }, + { + "epoch": 7.403846153846154, + "grad_norm": 20.574722290039062, + "learning_rate": 4.629807692307693e-05, + "loss": 0.909, + "step": 2695 + }, + { + "epoch": 7.406593406593407, + "grad_norm": 11.534417152404785, + "learning_rate": 4.62967032967033e-05, + "loss": 0.5746, + "step": 2696 + }, + { + "epoch": 7.40934065934066, + "grad_norm": 13.472307205200195, + "learning_rate": 4.6295329670329674e-05, + "loss": 0.5402, + "step": 2697 + }, + { + "epoch": 7.412087912087912, + "grad_norm": 9.11225700378418, + "learning_rate": 4.6293956043956044e-05, + "loss": 0.2762, + "step": 2698 + }, + { + "epoch": 7.414835164835165, + "grad_norm": 17.578170776367188, + "learning_rate": 4.629258241758242e-05, + "loss": 0.8288, + "step": 2699 + }, + { + "epoch": 7.417582417582418, + "grad_norm": 14.126459121704102, + "learning_rate": 4.62912087912088e-05, + "loss": 0.7392, + "step": 2700 + }, + { + "epoch": 7.420329670329671, + "grad_norm": 11.12666130065918, + "learning_rate": 4.628983516483517e-05, + "loss": 0.4023, + "step": 2701 + }, + { + "epoch": 7.423076923076923, + "grad_norm": 20.945945739746094, + "learning_rate": 4.6288461538461544e-05, + "loss": 1.1298, + "step": 2702 + }, + { + "epoch": 7.425824175824176, + "grad_norm": 18.772733688354492, + "learning_rate": 4.6287087912087914e-05, + "loss": 0.8917, + "step": 2703 + }, + { + "epoch": 7.428571428571429, + "grad_norm": 8.083663940429688, + "learning_rate": 4.628571428571429e-05, + "loss": 0.2811, + "step": 2704 + }, + { + "epoch": 7.431318681318682, + "grad_norm": 9.012843132019043, + "learning_rate": 4.628434065934066e-05, + "loss": 0.3692, + "step": 2705 + }, + { + "epoch": 7.434065934065934, + "grad_norm": 15.573989868164062, + "learning_rate": 4.628296703296703e-05, + "loss": 0.9058, + "step": 2706 + }, + { + "epoch": 7.436813186813187, + "grad_norm": 16.27947235107422, + "learning_rate": 4.628159340659341e-05, + "loss": 0.891, + "step": 2707 + }, + { + "epoch": 7.43956043956044, + "grad_norm": 18.032413482666016, + "learning_rate": 4.628021978021978e-05, + "loss": 0.7393, + "step": 2708 + }, + { + "epoch": 7.4423076923076925, + "grad_norm": 12.148624420166016, + "learning_rate": 4.6278846153846154e-05, + "loss": 0.4613, + "step": 2709 + }, + { + "epoch": 7.445054945054945, + "grad_norm": 8.158241271972656, + "learning_rate": 4.627747252747253e-05, + "loss": 0.334, + "step": 2710 + }, + { + "epoch": 7.447802197802198, + "grad_norm": 14.220664024353027, + "learning_rate": 4.62760989010989e-05, + "loss": 0.575, + "step": 2711 + }, + { + "epoch": 7.450549450549451, + "grad_norm": 6.620748996734619, + "learning_rate": 4.627472527472528e-05, + "loss": 0.2927, + "step": 2712 + }, + { + "epoch": 7.4532967032967035, + "grad_norm": 7.287067413330078, + "learning_rate": 4.627335164835165e-05, + "loss": 0.222, + "step": 2713 + }, + { + "epoch": 7.456043956043956, + "grad_norm": 8.791528701782227, + "learning_rate": 4.6271978021978025e-05, + "loss": 0.3141, + "step": 2714 + }, + { + "epoch": 7.458791208791209, + "grad_norm": 11.71854019165039, + "learning_rate": 4.62706043956044e-05, + "loss": 0.6947, + "step": 2715 + }, + { + "epoch": 7.461538461538462, + "grad_norm": 18.931171417236328, + "learning_rate": 4.626923076923077e-05, + "loss": 0.8209, + "step": 2716 + }, + { + "epoch": 7.464285714285714, + "grad_norm": 15.024423599243164, + "learning_rate": 4.626785714285715e-05, + "loss": 0.677, + "step": 2717 + }, + { + "epoch": 7.467032967032967, + "grad_norm": 8.625986099243164, + "learning_rate": 4.626648351648352e-05, + "loss": 0.3339, + "step": 2718 + }, + { + "epoch": 7.46978021978022, + "grad_norm": 12.594048500061035, + "learning_rate": 4.6265109890109895e-05, + "loss": 0.4457, + "step": 2719 + }, + { + "epoch": 7.472527472527473, + "grad_norm": 9.937516212463379, + "learning_rate": 4.6263736263736265e-05, + "loss": 0.3721, + "step": 2720 + }, + { + "epoch": 7.475274725274725, + "grad_norm": 16.422101974487305, + "learning_rate": 4.6262362637362635e-05, + "loss": 0.9603, + "step": 2721 + }, + { + "epoch": 7.478021978021978, + "grad_norm": 12.256248474121094, + "learning_rate": 4.626098901098901e-05, + "loss": 0.4914, + "step": 2722 + }, + { + "epoch": 7.480769230769231, + "grad_norm": 12.832304000854492, + "learning_rate": 4.625961538461538e-05, + "loss": 0.7614, + "step": 2723 + }, + { + "epoch": 7.483516483516484, + "grad_norm": 12.39186954498291, + "learning_rate": 4.625824175824176e-05, + "loss": 0.5908, + "step": 2724 + }, + { + "epoch": 7.486263736263736, + "grad_norm": 22.699243545532227, + "learning_rate": 4.6256868131868136e-05, + "loss": 1.3204, + "step": 2725 + }, + { + "epoch": 7.489010989010989, + "grad_norm": 8.953751564025879, + "learning_rate": 4.6255494505494506e-05, + "loss": 0.3744, + "step": 2726 + }, + { + "epoch": 7.491758241758242, + "grad_norm": 13.25493335723877, + "learning_rate": 4.625412087912088e-05, + "loss": 0.4777, + "step": 2727 + }, + { + "epoch": 7.4945054945054945, + "grad_norm": 13.060187339782715, + "learning_rate": 4.625274725274725e-05, + "loss": 0.5655, + "step": 2728 + }, + { + "epoch": 7.497252747252747, + "grad_norm": 12.350716590881348, + "learning_rate": 4.625137362637363e-05, + "loss": 0.4501, + "step": 2729 + }, + { + "epoch": 7.5, + "grad_norm": 12.899587631225586, + "learning_rate": 4.6250000000000006e-05, + "loss": 0.7272, + "step": 2730 + }, + { + "epoch": 7.502747252747253, + "grad_norm": 18.572736740112305, + "learning_rate": 4.6248626373626376e-05, + "loss": 0.6092, + "step": 2731 + }, + { + "epoch": 7.5054945054945055, + "grad_norm": 16.57925033569336, + "learning_rate": 4.624725274725275e-05, + "loss": 1.0617, + "step": 2732 + }, + { + "epoch": 7.508241758241758, + "grad_norm": 15.857089042663574, + "learning_rate": 4.624587912087912e-05, + "loss": 0.5983, + "step": 2733 + }, + { + "epoch": 7.510989010989011, + "grad_norm": 10.225573539733887, + "learning_rate": 4.62445054945055e-05, + "loss": 0.3806, + "step": 2734 + }, + { + "epoch": 7.513736263736264, + "grad_norm": 11.31294059753418, + "learning_rate": 4.624313186813187e-05, + "loss": 0.4451, + "step": 2735 + }, + { + "epoch": 7.516483516483516, + "grad_norm": 15.147214889526367, + "learning_rate": 4.624175824175824e-05, + "loss": 0.6945, + "step": 2736 + }, + { + "epoch": 7.519230769230769, + "grad_norm": 12.763422012329102, + "learning_rate": 4.6240384615384617e-05, + "loss": 0.7604, + "step": 2737 + }, + { + "epoch": 7.521978021978022, + "grad_norm": 14.475162506103516, + "learning_rate": 4.6239010989010987e-05, + "loss": 0.7735, + "step": 2738 + }, + { + "epoch": 7.524725274725275, + "grad_norm": 11.677862167358398, + "learning_rate": 4.623763736263736e-05, + "loss": 0.5455, + "step": 2739 + }, + { + "epoch": 7.527472527472527, + "grad_norm": 13.3921537399292, + "learning_rate": 4.623626373626374e-05, + "loss": 0.731, + "step": 2740 + }, + { + "epoch": 7.53021978021978, + "grad_norm": 10.043436050415039, + "learning_rate": 4.623489010989011e-05, + "loss": 0.5569, + "step": 2741 + }, + { + "epoch": 7.532967032967033, + "grad_norm": 9.847211837768555, + "learning_rate": 4.623351648351649e-05, + "loss": 0.337, + "step": 2742 + }, + { + "epoch": 7.535714285714286, + "grad_norm": 19.197885513305664, + "learning_rate": 4.623214285714286e-05, + "loss": 0.8904, + "step": 2743 + }, + { + "epoch": 7.538461538461538, + "grad_norm": 16.69031524658203, + "learning_rate": 4.6230769230769234e-05, + "loss": 0.6857, + "step": 2744 + }, + { + "epoch": 7.541208791208791, + "grad_norm": 12.04238510131836, + "learning_rate": 4.622939560439561e-05, + "loss": 0.705, + "step": 2745 + }, + { + "epoch": 7.543956043956044, + "grad_norm": 13.036534309387207, + "learning_rate": 4.622802197802198e-05, + "loss": 0.8526, + "step": 2746 + }, + { + "epoch": 7.5467032967032965, + "grad_norm": 7.215691089630127, + "learning_rate": 4.622664835164836e-05, + "loss": 0.1738, + "step": 2747 + }, + { + "epoch": 7.549450549450549, + "grad_norm": 11.395613670349121, + "learning_rate": 4.622527472527473e-05, + "loss": 0.3661, + "step": 2748 + }, + { + "epoch": 7.552197802197802, + "grad_norm": 13.760514259338379, + "learning_rate": 4.6223901098901104e-05, + "loss": 0.5796, + "step": 2749 + }, + { + "epoch": 7.554945054945055, + "grad_norm": 17.435087203979492, + "learning_rate": 4.6222527472527474e-05, + "loss": 0.8003, + "step": 2750 + }, + { + "epoch": 7.5576923076923075, + "grad_norm": 13.2011137008667, + "learning_rate": 4.6221153846153844e-05, + "loss": 0.5279, + "step": 2751 + }, + { + "epoch": 7.56043956043956, + "grad_norm": 11.22888469696045, + "learning_rate": 4.621978021978022e-05, + "loss": 0.5447, + "step": 2752 + }, + { + "epoch": 7.563186813186813, + "grad_norm": 13.63586711883545, + "learning_rate": 4.621840659340659e-05, + "loss": 0.6979, + "step": 2753 + }, + { + "epoch": 7.565934065934066, + "grad_norm": 19.624441146850586, + "learning_rate": 4.621703296703297e-05, + "loss": 0.8744, + "step": 2754 + }, + { + "epoch": 7.568681318681318, + "grad_norm": 16.63457679748535, + "learning_rate": 4.6215659340659345e-05, + "loss": 0.8193, + "step": 2755 + }, + { + "epoch": 7.571428571428571, + "grad_norm": 19.243459701538086, + "learning_rate": 4.6214285714285715e-05, + "loss": 1.0583, + "step": 2756 + }, + { + "epoch": 7.574175824175824, + "grad_norm": 10.312565803527832, + "learning_rate": 4.621291208791209e-05, + "loss": 0.3489, + "step": 2757 + }, + { + "epoch": 7.576923076923077, + "grad_norm": 11.408676147460938, + "learning_rate": 4.621153846153846e-05, + "loss": 0.4019, + "step": 2758 + }, + { + "epoch": 7.579670329670329, + "grad_norm": 15.557141304016113, + "learning_rate": 4.621016483516484e-05, + "loss": 0.5102, + "step": 2759 + }, + { + "epoch": 7.582417582417582, + "grad_norm": 8.964559555053711, + "learning_rate": 4.6208791208791215e-05, + "loss": 0.3835, + "step": 2760 + }, + { + "epoch": 7.585164835164835, + "grad_norm": 15.253222465515137, + "learning_rate": 4.6207417582417585e-05, + "loss": 0.5867, + "step": 2761 + }, + { + "epoch": 7.587912087912088, + "grad_norm": 13.776189804077148, + "learning_rate": 4.620604395604396e-05, + "loss": 0.523, + "step": 2762 + }, + { + "epoch": 7.59065934065934, + "grad_norm": 18.57472038269043, + "learning_rate": 4.620467032967033e-05, + "loss": 0.8644, + "step": 2763 + }, + { + "epoch": 7.593406593406593, + "grad_norm": 9.351615905761719, + "learning_rate": 4.620329670329671e-05, + "loss": 0.3994, + "step": 2764 + }, + { + "epoch": 7.596153846153846, + "grad_norm": 10.250563621520996, + "learning_rate": 4.620192307692308e-05, + "loss": 0.3985, + "step": 2765 + }, + { + "epoch": 7.5989010989010985, + "grad_norm": 13.328774452209473, + "learning_rate": 4.620054945054945e-05, + "loss": 0.4771, + "step": 2766 + }, + { + "epoch": 7.601648351648351, + "grad_norm": 13.232091903686523, + "learning_rate": 4.6199175824175826e-05, + "loss": 0.5809, + "step": 2767 + }, + { + "epoch": 7.604395604395604, + "grad_norm": 12.975154876708984, + "learning_rate": 4.6197802197802196e-05, + "loss": 0.53, + "step": 2768 + }, + { + "epoch": 7.607142857142857, + "grad_norm": 17.356176376342773, + "learning_rate": 4.619642857142857e-05, + "loss": 0.7432, + "step": 2769 + }, + { + "epoch": 7.6098901098901095, + "grad_norm": 13.359146118164062, + "learning_rate": 4.619505494505495e-05, + "loss": 0.6548, + "step": 2770 + }, + { + "epoch": 7.612637362637363, + "grad_norm": 14.533524513244629, + "learning_rate": 4.619368131868132e-05, + "loss": 0.5413, + "step": 2771 + }, + { + "epoch": 7.615384615384615, + "grad_norm": 16.520862579345703, + "learning_rate": 4.6192307692307696e-05, + "loss": 0.9405, + "step": 2772 + }, + { + "epoch": 7.618131868131869, + "grad_norm": 18.1629638671875, + "learning_rate": 4.6190934065934066e-05, + "loss": 0.83, + "step": 2773 + }, + { + "epoch": 7.6208791208791204, + "grad_norm": 11.561117172241211, + "learning_rate": 4.618956043956044e-05, + "loss": 0.487, + "step": 2774 + }, + { + "epoch": 7.623626373626374, + "grad_norm": 11.218109130859375, + "learning_rate": 4.618818681318682e-05, + "loss": 0.4878, + "step": 2775 + }, + { + "epoch": 7.626373626373626, + "grad_norm": 15.403046607971191, + "learning_rate": 4.618681318681319e-05, + "loss": 0.7563, + "step": 2776 + }, + { + "epoch": 7.6291208791208796, + "grad_norm": 12.280186653137207, + "learning_rate": 4.6185439560439566e-05, + "loss": 0.4659, + "step": 2777 + }, + { + "epoch": 7.631868131868131, + "grad_norm": 16.718120574951172, + "learning_rate": 4.6184065934065936e-05, + "loss": 0.6934, + "step": 2778 + }, + { + "epoch": 7.634615384615385, + "grad_norm": 8.055961608886719, + "learning_rate": 4.618269230769231e-05, + "loss": 0.2764, + "step": 2779 + }, + { + "epoch": 7.637362637362637, + "grad_norm": 14.995570182800293, + "learning_rate": 4.618131868131868e-05, + "loss": 0.7179, + "step": 2780 + }, + { + "epoch": 7.6401098901098905, + "grad_norm": 10.758801460266113, + "learning_rate": 4.617994505494505e-05, + "loss": 0.4656, + "step": 2781 + }, + { + "epoch": 7.642857142857143, + "grad_norm": 8.46735668182373, + "learning_rate": 4.617857142857143e-05, + "loss": 0.3565, + "step": 2782 + }, + { + "epoch": 7.645604395604396, + "grad_norm": 13.2058744430542, + "learning_rate": 4.61771978021978e-05, + "loss": 0.5559, + "step": 2783 + }, + { + "epoch": 7.648351648351649, + "grad_norm": 8.161993980407715, + "learning_rate": 4.617582417582418e-05, + "loss": 0.2959, + "step": 2784 + }, + { + "epoch": 7.6510989010989015, + "grad_norm": 16.1104793548584, + "learning_rate": 4.6174450549450554e-05, + "loss": 0.911, + "step": 2785 + }, + { + "epoch": 7.653846153846154, + "grad_norm": 10.016940116882324, + "learning_rate": 4.6173076923076924e-05, + "loss": 0.3233, + "step": 2786 + }, + { + "epoch": 7.656593406593407, + "grad_norm": 16.043176651000977, + "learning_rate": 4.61717032967033e-05, + "loss": 0.5104, + "step": 2787 + }, + { + "epoch": 7.65934065934066, + "grad_norm": 16.98271942138672, + "learning_rate": 4.617032967032967e-05, + "loss": 0.7229, + "step": 2788 + }, + { + "epoch": 7.662087912087912, + "grad_norm": 11.085813522338867, + "learning_rate": 4.616895604395605e-05, + "loss": 0.4353, + "step": 2789 + }, + { + "epoch": 7.664835164835165, + "grad_norm": 8.229021072387695, + "learning_rate": 4.6167582417582424e-05, + "loss": 0.3189, + "step": 2790 + }, + { + "epoch": 7.667582417582418, + "grad_norm": 12.006905555725098, + "learning_rate": 4.6166208791208794e-05, + "loss": 0.565, + "step": 2791 + }, + { + "epoch": 7.670329670329671, + "grad_norm": 7.759079456329346, + "learning_rate": 4.616483516483517e-05, + "loss": 0.2396, + "step": 2792 + }, + { + "epoch": 7.673076923076923, + "grad_norm": 15.461600303649902, + "learning_rate": 4.616346153846154e-05, + "loss": 0.7754, + "step": 2793 + }, + { + "epoch": 7.675824175824176, + "grad_norm": 12.965706825256348, + "learning_rate": 4.616208791208792e-05, + "loss": 0.4608, + "step": 2794 + }, + { + "epoch": 7.678571428571429, + "grad_norm": 15.446773529052734, + "learning_rate": 4.616071428571429e-05, + "loss": 0.6314, + "step": 2795 + }, + { + "epoch": 7.681318681318682, + "grad_norm": 10.091814994812012, + "learning_rate": 4.615934065934066e-05, + "loss": 0.3976, + "step": 2796 + }, + { + "epoch": 7.684065934065934, + "grad_norm": 9.450323104858398, + "learning_rate": 4.6157967032967035e-05, + "loss": 0.2893, + "step": 2797 + }, + { + "epoch": 7.686813186813187, + "grad_norm": 16.092519760131836, + "learning_rate": 4.6156593406593405e-05, + "loss": 0.5438, + "step": 2798 + }, + { + "epoch": 7.68956043956044, + "grad_norm": 16.539216995239258, + "learning_rate": 4.615521978021978e-05, + "loss": 0.7484, + "step": 2799 + }, + { + "epoch": 7.6923076923076925, + "grad_norm": 14.573563575744629, + "learning_rate": 4.615384615384616e-05, + "loss": 0.7659, + "step": 2800 + }, + { + "epoch": 7.695054945054945, + "grad_norm": 11.006186485290527, + "learning_rate": 4.615247252747253e-05, + "loss": 0.4282, + "step": 2801 + }, + { + "epoch": 7.697802197802198, + "grad_norm": 17.832355499267578, + "learning_rate": 4.6151098901098905e-05, + "loss": 0.7322, + "step": 2802 + }, + { + "epoch": 7.700549450549451, + "grad_norm": 13.301580429077148, + "learning_rate": 4.6149725274725275e-05, + "loss": 0.5531, + "step": 2803 + }, + { + "epoch": 7.7032967032967035, + "grad_norm": 9.424972534179688, + "learning_rate": 4.614835164835165e-05, + "loss": 0.3054, + "step": 2804 + }, + { + "epoch": 7.706043956043956, + "grad_norm": 17.154125213623047, + "learning_rate": 4.614697802197803e-05, + "loss": 0.7283, + "step": 2805 + }, + { + "epoch": 7.708791208791209, + "grad_norm": 15.793336868286133, + "learning_rate": 4.61456043956044e-05, + "loss": 0.7923, + "step": 2806 + }, + { + "epoch": 7.711538461538462, + "grad_norm": 16.967931747436523, + "learning_rate": 4.6144230769230775e-05, + "loss": 0.7115, + "step": 2807 + }, + { + "epoch": 7.714285714285714, + "grad_norm": 14.458186149597168, + "learning_rate": 4.6142857142857145e-05, + "loss": 0.7397, + "step": 2808 + }, + { + "epoch": 7.717032967032967, + "grad_norm": 14.569135665893555, + "learning_rate": 4.614148351648352e-05, + "loss": 0.8904, + "step": 2809 + }, + { + "epoch": 7.71978021978022, + "grad_norm": 17.58612632751465, + "learning_rate": 4.614010989010989e-05, + "loss": 0.8001, + "step": 2810 + }, + { + "epoch": 7.722527472527473, + "grad_norm": 16.591501235961914, + "learning_rate": 4.613873626373626e-05, + "loss": 0.9323, + "step": 2811 + }, + { + "epoch": 7.725274725274725, + "grad_norm": 14.035099029541016, + "learning_rate": 4.613736263736264e-05, + "loss": 0.66, + "step": 2812 + }, + { + "epoch": 7.728021978021978, + "grad_norm": 11.918166160583496, + "learning_rate": 4.613598901098901e-05, + "loss": 0.4079, + "step": 2813 + }, + { + "epoch": 7.730769230769231, + "grad_norm": 13.043932914733887, + "learning_rate": 4.6134615384615386e-05, + "loss": 0.6086, + "step": 2814 + }, + { + "epoch": 7.733516483516484, + "grad_norm": 10.027658462524414, + "learning_rate": 4.613324175824176e-05, + "loss": 0.4031, + "step": 2815 + }, + { + "epoch": 7.736263736263736, + "grad_norm": 15.339384078979492, + "learning_rate": 4.613186813186813e-05, + "loss": 0.4982, + "step": 2816 + }, + { + "epoch": 7.739010989010989, + "grad_norm": 12.448575019836426, + "learning_rate": 4.613049450549451e-05, + "loss": 0.4743, + "step": 2817 + }, + { + "epoch": 7.741758241758242, + "grad_norm": 15.945208549499512, + "learning_rate": 4.612912087912088e-05, + "loss": 0.587, + "step": 2818 + }, + { + "epoch": 7.7445054945054945, + "grad_norm": 11.47579574584961, + "learning_rate": 4.6127747252747256e-05, + "loss": 0.5342, + "step": 2819 + }, + { + "epoch": 7.747252747252747, + "grad_norm": 16.93745231628418, + "learning_rate": 4.612637362637363e-05, + "loss": 0.9763, + "step": 2820 + }, + { + "epoch": 7.75, + "grad_norm": 9.941747665405273, + "learning_rate": 4.6125e-05, + "loss": 0.27, + "step": 2821 + }, + { + "epoch": 7.752747252747253, + "grad_norm": 12.252495765686035, + "learning_rate": 4.612362637362638e-05, + "loss": 0.4394, + "step": 2822 + }, + { + "epoch": 7.7554945054945055, + "grad_norm": 13.333915710449219, + "learning_rate": 4.612225274725275e-05, + "loss": 0.6124, + "step": 2823 + }, + { + "epoch": 7.758241758241758, + "grad_norm": 15.924127578735352, + "learning_rate": 4.612087912087913e-05, + "loss": 0.5337, + "step": 2824 + }, + { + "epoch": 7.760989010989011, + "grad_norm": 13.136165618896484, + "learning_rate": 4.61195054945055e-05, + "loss": 0.4499, + "step": 2825 + }, + { + "epoch": 7.763736263736264, + "grad_norm": 12.985946655273438, + "learning_rate": 4.611813186813187e-05, + "loss": 0.4744, + "step": 2826 + }, + { + "epoch": 7.766483516483516, + "grad_norm": 16.76839256286621, + "learning_rate": 4.6116758241758244e-05, + "loss": 0.7816, + "step": 2827 + }, + { + "epoch": 7.769230769230769, + "grad_norm": 19.483783721923828, + "learning_rate": 4.6115384615384614e-05, + "loss": 0.9278, + "step": 2828 + }, + { + "epoch": 7.771978021978022, + "grad_norm": 14.59335708618164, + "learning_rate": 4.611401098901099e-05, + "loss": 0.6313, + "step": 2829 + }, + { + "epoch": 7.774725274725275, + "grad_norm": 16.934492111206055, + "learning_rate": 4.611263736263737e-05, + "loss": 0.6625, + "step": 2830 + }, + { + "epoch": 7.777472527472527, + "grad_norm": 9.180108070373535, + "learning_rate": 4.611126373626374e-05, + "loss": 0.3389, + "step": 2831 + }, + { + "epoch": 7.78021978021978, + "grad_norm": 20.618799209594727, + "learning_rate": 4.6109890109890114e-05, + "loss": 0.8085, + "step": 2832 + }, + { + "epoch": 7.782967032967033, + "grad_norm": 11.972612380981445, + "learning_rate": 4.6108516483516484e-05, + "loss": 0.4367, + "step": 2833 + }, + { + "epoch": 7.785714285714286, + "grad_norm": 13.596381187438965, + "learning_rate": 4.610714285714286e-05, + "loss": 0.5362, + "step": 2834 + }, + { + "epoch": 7.788461538461538, + "grad_norm": 13.80854606628418, + "learning_rate": 4.610576923076924e-05, + "loss": 0.4024, + "step": 2835 + }, + { + "epoch": 7.791208791208791, + "grad_norm": 8.833579063415527, + "learning_rate": 4.610439560439561e-05, + "loss": 0.295, + "step": 2836 + }, + { + "epoch": 7.793956043956044, + "grad_norm": 10.012462615966797, + "learning_rate": 4.6103021978021984e-05, + "loss": 0.3823, + "step": 2837 + }, + { + "epoch": 7.7967032967032965, + "grad_norm": 9.277077674865723, + "learning_rate": 4.6101648351648354e-05, + "loss": 0.3394, + "step": 2838 + }, + { + "epoch": 7.799450549450549, + "grad_norm": 11.420194625854492, + "learning_rate": 4.610027472527473e-05, + "loss": 0.413, + "step": 2839 + }, + { + "epoch": 7.802197802197802, + "grad_norm": 13.540287017822266, + "learning_rate": 4.60989010989011e-05, + "loss": 0.5859, + "step": 2840 + }, + { + "epoch": 7.804945054945055, + "grad_norm": 17.822307586669922, + "learning_rate": 4.609752747252747e-05, + "loss": 0.9717, + "step": 2841 + }, + { + "epoch": 7.8076923076923075, + "grad_norm": 17.587072372436523, + "learning_rate": 4.609615384615385e-05, + "loss": 0.533, + "step": 2842 + }, + { + "epoch": 7.81043956043956, + "grad_norm": 9.90713119506836, + "learning_rate": 4.609478021978022e-05, + "loss": 0.534, + "step": 2843 + }, + { + "epoch": 7.813186813186813, + "grad_norm": 22.060056686401367, + "learning_rate": 4.6093406593406595e-05, + "loss": 1.1492, + "step": 2844 + }, + { + "epoch": 7.815934065934066, + "grad_norm": 12.384928703308105, + "learning_rate": 4.6092032967032965e-05, + "loss": 0.4462, + "step": 2845 + }, + { + "epoch": 7.818681318681318, + "grad_norm": 16.73975372314453, + "learning_rate": 4.609065934065934e-05, + "loss": 0.636, + "step": 2846 + }, + { + "epoch": 7.821428571428571, + "grad_norm": 13.658682823181152, + "learning_rate": 4.608928571428572e-05, + "loss": 0.4725, + "step": 2847 + }, + { + "epoch": 7.824175824175824, + "grad_norm": 11.104522705078125, + "learning_rate": 4.608791208791209e-05, + "loss": 0.366, + "step": 2848 + }, + { + "epoch": 7.826923076923077, + "grad_norm": 15.40419864654541, + "learning_rate": 4.6086538461538465e-05, + "loss": 0.535, + "step": 2849 + }, + { + "epoch": 7.829670329670329, + "grad_norm": 13.360269546508789, + "learning_rate": 4.6085164835164835e-05, + "loss": 0.6075, + "step": 2850 + }, + { + "epoch": 7.832417582417582, + "grad_norm": 18.38500213623047, + "learning_rate": 4.608379120879121e-05, + "loss": 0.9955, + "step": 2851 + }, + { + "epoch": 7.835164835164835, + "grad_norm": 14.508176803588867, + "learning_rate": 4.608241758241759e-05, + "loss": 0.4464, + "step": 2852 + }, + { + "epoch": 7.837912087912088, + "grad_norm": 13.625961303710938, + "learning_rate": 4.608104395604396e-05, + "loss": 0.7429, + "step": 2853 + }, + { + "epoch": 7.84065934065934, + "grad_norm": 11.753037452697754, + "learning_rate": 4.6079670329670336e-05, + "loss": 0.3537, + "step": 2854 + }, + { + "epoch": 7.843406593406593, + "grad_norm": 13.774680137634277, + "learning_rate": 4.6078296703296706e-05, + "loss": 0.6495, + "step": 2855 + }, + { + "epoch": 7.846153846153846, + "grad_norm": 5.855600833892822, + "learning_rate": 4.6076923076923076e-05, + "loss": 0.1478, + "step": 2856 + }, + { + "epoch": 7.8489010989010985, + "grad_norm": 16.224403381347656, + "learning_rate": 4.607554945054945e-05, + "loss": 0.6571, + "step": 2857 + }, + { + "epoch": 7.851648351648351, + "grad_norm": 10.850955963134766, + "learning_rate": 4.607417582417582e-05, + "loss": 0.319, + "step": 2858 + }, + { + "epoch": 7.854395604395604, + "grad_norm": 16.848726272583008, + "learning_rate": 4.60728021978022e-05, + "loss": 0.6323, + "step": 2859 + }, + { + "epoch": 7.857142857142857, + "grad_norm": 10.244348526000977, + "learning_rate": 4.607142857142857e-05, + "loss": 0.3429, + "step": 2860 + }, + { + "epoch": 7.8598901098901095, + "grad_norm": 9.005125045776367, + "learning_rate": 4.6070054945054946e-05, + "loss": 0.2809, + "step": 2861 + }, + { + "epoch": 7.862637362637363, + "grad_norm": 15.556509971618652, + "learning_rate": 4.606868131868132e-05, + "loss": 0.6409, + "step": 2862 + }, + { + "epoch": 7.865384615384615, + "grad_norm": 16.184282302856445, + "learning_rate": 4.606730769230769e-05, + "loss": 0.6976, + "step": 2863 + }, + { + "epoch": 7.868131868131869, + "grad_norm": 13.715841293334961, + "learning_rate": 4.606593406593407e-05, + "loss": 0.6986, + "step": 2864 + }, + { + "epoch": 7.8708791208791204, + "grad_norm": 13.707592010498047, + "learning_rate": 4.606456043956044e-05, + "loss": 0.5022, + "step": 2865 + }, + { + "epoch": 7.873626373626374, + "grad_norm": 18.539499282836914, + "learning_rate": 4.6063186813186817e-05, + "loss": 1.0591, + "step": 2866 + }, + { + "epoch": 7.876373626373626, + "grad_norm": 14.500576972961426, + "learning_rate": 4.606181318681319e-05, + "loss": 0.473, + "step": 2867 + }, + { + "epoch": 7.8791208791208796, + "grad_norm": 14.125558853149414, + "learning_rate": 4.606043956043956e-05, + "loss": 0.4806, + "step": 2868 + }, + { + "epoch": 7.881868131868131, + "grad_norm": 12.401162147521973, + "learning_rate": 4.605906593406594e-05, + "loss": 0.3186, + "step": 2869 + }, + { + "epoch": 7.884615384615385, + "grad_norm": 13.074682235717773, + "learning_rate": 4.605769230769231e-05, + "loss": 0.6984, + "step": 2870 + }, + { + "epoch": 7.887362637362637, + "grad_norm": 17.037702560424805, + "learning_rate": 4.605631868131868e-05, + "loss": 0.6061, + "step": 2871 + }, + { + "epoch": 7.8901098901098905, + "grad_norm": 17.846405029296875, + "learning_rate": 4.605494505494506e-05, + "loss": 0.8868, + "step": 2872 + }, + { + "epoch": 7.892857142857143, + "grad_norm": 10.258648872375488, + "learning_rate": 4.605357142857143e-05, + "loss": 0.29, + "step": 2873 + }, + { + "epoch": 7.895604395604396, + "grad_norm": 16.43813133239746, + "learning_rate": 4.6052197802197804e-05, + "loss": 0.9098, + "step": 2874 + }, + { + "epoch": 7.898351648351649, + "grad_norm": 11.19149112701416, + "learning_rate": 4.6050824175824174e-05, + "loss": 0.4772, + "step": 2875 + }, + { + "epoch": 7.9010989010989015, + "grad_norm": 11.604917526245117, + "learning_rate": 4.604945054945055e-05, + "loss": 0.3731, + "step": 2876 + }, + { + "epoch": 7.903846153846154, + "grad_norm": 13.134469985961914, + "learning_rate": 4.604807692307693e-05, + "loss": 0.4476, + "step": 2877 + }, + { + "epoch": 7.906593406593407, + "grad_norm": 13.846549987792969, + "learning_rate": 4.60467032967033e-05, + "loss": 0.5404, + "step": 2878 + }, + { + "epoch": 7.90934065934066, + "grad_norm": 10.60960578918457, + "learning_rate": 4.6045329670329674e-05, + "loss": 0.3183, + "step": 2879 + }, + { + "epoch": 7.912087912087912, + "grad_norm": 15.983661651611328, + "learning_rate": 4.6043956043956044e-05, + "loss": 0.7377, + "step": 2880 + }, + { + "epoch": 7.914835164835165, + "grad_norm": 10.981123924255371, + "learning_rate": 4.604258241758242e-05, + "loss": 0.4106, + "step": 2881 + }, + { + "epoch": 7.917582417582418, + "grad_norm": 10.933618545532227, + "learning_rate": 4.60412087912088e-05, + "loss": 0.3053, + "step": 2882 + }, + { + "epoch": 7.920329670329671, + "grad_norm": 17.74830436706543, + "learning_rate": 4.603983516483517e-05, + "loss": 0.8233, + "step": 2883 + }, + { + "epoch": 7.923076923076923, + "grad_norm": 11.087935447692871, + "learning_rate": 4.6038461538461545e-05, + "loss": 0.4704, + "step": 2884 + }, + { + "epoch": 7.925824175824176, + "grad_norm": 14.142638206481934, + "learning_rate": 4.6037087912087915e-05, + "loss": 0.5509, + "step": 2885 + }, + { + "epoch": 7.928571428571429, + "grad_norm": 15.853981971740723, + "learning_rate": 4.6035714285714285e-05, + "loss": 0.9144, + "step": 2886 + }, + { + "epoch": 7.931318681318682, + "grad_norm": 11.195783615112305, + "learning_rate": 4.603434065934066e-05, + "loss": 0.6161, + "step": 2887 + }, + { + "epoch": 7.934065934065934, + "grad_norm": 17.573881149291992, + "learning_rate": 4.603296703296703e-05, + "loss": 0.8937, + "step": 2888 + }, + { + "epoch": 7.936813186813187, + "grad_norm": 11.131484985351562, + "learning_rate": 4.603159340659341e-05, + "loss": 0.5202, + "step": 2889 + }, + { + "epoch": 7.93956043956044, + "grad_norm": 13.386231422424316, + "learning_rate": 4.603021978021978e-05, + "loss": 0.4778, + "step": 2890 + }, + { + "epoch": 7.9423076923076925, + "grad_norm": 19.958389282226562, + "learning_rate": 4.6028846153846155e-05, + "loss": 0.7603, + "step": 2891 + }, + { + "epoch": 7.945054945054945, + "grad_norm": 14.966949462890625, + "learning_rate": 4.602747252747253e-05, + "loss": 0.6276, + "step": 2892 + }, + { + "epoch": 7.947802197802198, + "grad_norm": 16.012250900268555, + "learning_rate": 4.60260989010989e-05, + "loss": 0.5521, + "step": 2893 + }, + { + "epoch": 7.950549450549451, + "grad_norm": 13.901983261108398, + "learning_rate": 4.602472527472528e-05, + "loss": 0.6716, + "step": 2894 + }, + { + "epoch": 7.9532967032967035, + "grad_norm": 15.347372055053711, + "learning_rate": 4.602335164835165e-05, + "loss": 0.7232, + "step": 2895 + }, + { + "epoch": 7.956043956043956, + "grad_norm": 14.14265251159668, + "learning_rate": 4.6021978021978026e-05, + "loss": 0.5615, + "step": 2896 + }, + { + "epoch": 7.958791208791209, + "grad_norm": 15.139132499694824, + "learning_rate": 4.60206043956044e-05, + "loss": 0.6609, + "step": 2897 + }, + { + "epoch": 7.961538461538462, + "grad_norm": 9.309677124023438, + "learning_rate": 4.601923076923077e-05, + "loss": 0.3796, + "step": 2898 + }, + { + "epoch": 7.964285714285714, + "grad_norm": 8.859094619750977, + "learning_rate": 4.601785714285715e-05, + "loss": 0.3366, + "step": 2899 + }, + { + "epoch": 7.967032967032967, + "grad_norm": 11.297155380249023, + "learning_rate": 4.601648351648352e-05, + "loss": 0.3341, + "step": 2900 + }, + { + "epoch": 7.96978021978022, + "grad_norm": 11.353927612304688, + "learning_rate": 4.601510989010989e-05, + "loss": 0.4295, + "step": 2901 + }, + { + "epoch": 7.972527472527473, + "grad_norm": 11.478813171386719, + "learning_rate": 4.6013736263736266e-05, + "loss": 0.5304, + "step": 2902 + }, + { + "epoch": 7.975274725274725, + "grad_norm": 16.016170501708984, + "learning_rate": 4.6012362637362636e-05, + "loss": 0.8207, + "step": 2903 + }, + { + "epoch": 7.978021978021978, + "grad_norm": 12.128875732421875, + "learning_rate": 4.601098901098901e-05, + "loss": 0.4337, + "step": 2904 + }, + { + "epoch": 7.980769230769231, + "grad_norm": 10.065195083618164, + "learning_rate": 4.600961538461538e-05, + "loss": 0.3967, + "step": 2905 + }, + { + "epoch": 7.983516483516484, + "grad_norm": 19.434770584106445, + "learning_rate": 4.600824175824176e-05, + "loss": 0.8409, + "step": 2906 + }, + { + "epoch": 7.986263736263736, + "grad_norm": 15.969730377197266, + "learning_rate": 4.6006868131868136e-05, + "loss": 0.5305, + "step": 2907 + }, + { + "epoch": 7.989010989010989, + "grad_norm": 21.20648765563965, + "learning_rate": 4.6005494505494506e-05, + "loss": 0.9844, + "step": 2908 + }, + { + "epoch": 7.991758241758242, + "grad_norm": 18.67546844482422, + "learning_rate": 4.600412087912088e-05, + "loss": 0.9114, + "step": 2909 + }, + { + "epoch": 7.9945054945054945, + "grad_norm": 18.402647018432617, + "learning_rate": 4.600274725274725e-05, + "loss": 0.8547, + "step": 2910 + }, + { + "epoch": 7.997252747252747, + "grad_norm": 13.51319408416748, + "learning_rate": 4.600137362637363e-05, + "loss": 0.578, + "step": 2911 + }, + { + "epoch": 8.0, + "grad_norm": 38.97821044921875, + "learning_rate": 4.600000000000001e-05, + "loss": 0.8611, + "step": 2912 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.7878787878787878, + "eval_f1": 0.7744163897961297, + "eval_f1_DuraRiadoRio_64x64": 0.5767441860465117, + "eval_f1_Mole_64x64": 0.8788927335640139, + "eval_f1_Quebrado_64x64": 0.89937106918239, + "eval_f1_RiadoRio_64x64": 0.7027027027027027, + "eval_f1_RioFechado_64x64": 0.8143712574850299, + "eval_loss": 0.7746490240097046, + "eval_precision": 0.8002992283201122, + "eval_precision_DuraRiadoRio_64x64": 0.8732394366197183, + "eval_precision_Mole_64x64": 0.8758620689655172, + "eval_precision_Quebrado_64x64": 0.8218390804597702, + "eval_precision_RiadoRio_64x64": 0.7222222222222222, + "eval_precision_RioFechado_64x64": 0.7083333333333334, + "eval_recall": 0.7895025121489169, + "eval_recall_DuraRiadoRio_64x64": 0.4305555555555556, + "eval_recall_Mole_64x64": 0.8819444444444444, + "eval_recall_Quebrado_64x64": 0.9930555555555556, + "eval_recall_RiadoRio_64x64": 0.6842105263157895, + "eval_recall_RioFechado_64x64": 0.9577464788732394, + "eval_runtime": 1.7955, + "eval_samples_per_second": 404.335, + "eval_steps_per_second": 25.619, + "step": 2912 + }, + { + "epoch": 8.002747252747254, + "grad_norm": 8.78028392791748, + "learning_rate": 4.599862637362638e-05, + "loss": 0.3627, + "step": 2913 + }, + { + "epoch": 8.005494505494505, + "grad_norm": 19.220962524414062, + "learning_rate": 4.5997252747252754e-05, + "loss": 0.8623, + "step": 2914 + }, + { + "epoch": 8.008241758241759, + "grad_norm": 12.002206802368164, + "learning_rate": 4.5995879120879124e-05, + "loss": 0.6237, + "step": 2915 + }, + { + "epoch": 8.010989010989011, + "grad_norm": 12.868818283081055, + "learning_rate": 4.5994505494505494e-05, + "loss": 0.4112, + "step": 2916 + }, + { + "epoch": 8.013736263736265, + "grad_norm": 18.259340286254883, + "learning_rate": 4.599313186813187e-05, + "loss": 0.674, + "step": 2917 + }, + { + "epoch": 8.016483516483516, + "grad_norm": 5.914481163024902, + "learning_rate": 4.599175824175824e-05, + "loss": 0.2094, + "step": 2918 + }, + { + "epoch": 8.01923076923077, + "grad_norm": 12.552376747131348, + "learning_rate": 4.599038461538462e-05, + "loss": 0.6571, + "step": 2919 + }, + { + "epoch": 8.021978021978022, + "grad_norm": 18.872705459594727, + "learning_rate": 4.598901098901099e-05, + "loss": 0.8666, + "step": 2920 + }, + { + "epoch": 8.024725274725276, + "grad_norm": 16.47540855407715, + "learning_rate": 4.5987637362637364e-05, + "loss": 0.7115, + "step": 2921 + }, + { + "epoch": 8.027472527472527, + "grad_norm": 11.25133228302002, + "learning_rate": 4.598626373626374e-05, + "loss": 0.4176, + "step": 2922 + }, + { + "epoch": 8.030219780219781, + "grad_norm": 15.850499153137207, + "learning_rate": 4.598489010989011e-05, + "loss": 0.6523, + "step": 2923 + }, + { + "epoch": 8.032967032967033, + "grad_norm": 14.17114543914795, + "learning_rate": 4.598351648351649e-05, + "loss": 0.6302, + "step": 2924 + }, + { + "epoch": 8.035714285714286, + "grad_norm": 13.278406143188477, + "learning_rate": 4.598214285714286e-05, + "loss": 0.6204, + "step": 2925 + }, + { + "epoch": 8.038461538461538, + "grad_norm": 13.623250007629395, + "learning_rate": 4.5980769230769235e-05, + "loss": 0.4595, + "step": 2926 + }, + { + "epoch": 8.041208791208792, + "grad_norm": 11.759716033935547, + "learning_rate": 4.597939560439561e-05, + "loss": 0.4479, + "step": 2927 + }, + { + "epoch": 8.043956043956044, + "grad_norm": 18.126976013183594, + "learning_rate": 4.597802197802198e-05, + "loss": 1.1064, + "step": 2928 + }, + { + "epoch": 8.046703296703297, + "grad_norm": 15.081695556640625, + "learning_rate": 4.597664835164836e-05, + "loss": 0.6009, + "step": 2929 + }, + { + "epoch": 8.04945054945055, + "grad_norm": 11.362120628356934, + "learning_rate": 4.597527472527473e-05, + "loss": 0.4934, + "step": 2930 + }, + { + "epoch": 8.052197802197803, + "grad_norm": 19.720252990722656, + "learning_rate": 4.59739010989011e-05, + "loss": 0.7875, + "step": 2931 + }, + { + "epoch": 8.054945054945055, + "grad_norm": 13.362395286560059, + "learning_rate": 4.5972527472527475e-05, + "loss": 0.4849, + "step": 2932 + }, + { + "epoch": 8.057692307692308, + "grad_norm": 17.61749267578125, + "learning_rate": 4.5971153846153845e-05, + "loss": 0.6875, + "step": 2933 + }, + { + "epoch": 8.06043956043956, + "grad_norm": 11.398611068725586, + "learning_rate": 4.596978021978022e-05, + "loss": 0.5095, + "step": 2934 + }, + { + "epoch": 8.063186813186814, + "grad_norm": 8.273728370666504, + "learning_rate": 4.596840659340659e-05, + "loss": 0.3586, + "step": 2935 + }, + { + "epoch": 8.065934065934066, + "grad_norm": 16.745800018310547, + "learning_rate": 4.596703296703297e-05, + "loss": 0.6419, + "step": 2936 + }, + { + "epoch": 8.06868131868132, + "grad_norm": 15.102184295654297, + "learning_rate": 4.5965659340659345e-05, + "loss": 0.5787, + "step": 2937 + }, + { + "epoch": 8.071428571428571, + "grad_norm": 9.467080116271973, + "learning_rate": 4.5964285714285715e-05, + "loss": 0.3315, + "step": 2938 + }, + { + "epoch": 8.074175824175825, + "grad_norm": 8.850454330444336, + "learning_rate": 4.596291208791209e-05, + "loss": 0.316, + "step": 2939 + }, + { + "epoch": 8.076923076923077, + "grad_norm": 7.608499526977539, + "learning_rate": 4.596153846153846e-05, + "loss": 0.3993, + "step": 2940 + }, + { + "epoch": 8.07967032967033, + "grad_norm": 15.512845993041992, + "learning_rate": 4.596016483516484e-05, + "loss": 0.8293, + "step": 2941 + }, + { + "epoch": 8.082417582417582, + "grad_norm": 14.224342346191406, + "learning_rate": 4.5958791208791216e-05, + "loss": 0.5367, + "step": 2942 + }, + { + "epoch": 8.085164835164836, + "grad_norm": 11.16100788116455, + "learning_rate": 4.5957417582417586e-05, + "loss": 0.4916, + "step": 2943 + }, + { + "epoch": 8.087912087912088, + "grad_norm": 8.86666202545166, + "learning_rate": 4.595604395604396e-05, + "loss": 0.3366, + "step": 2944 + }, + { + "epoch": 8.090659340659341, + "grad_norm": 13.204610824584961, + "learning_rate": 4.595467032967033e-05, + "loss": 0.7766, + "step": 2945 + }, + { + "epoch": 8.093406593406593, + "grad_norm": 9.951101303100586, + "learning_rate": 4.59532967032967e-05, + "loss": 0.3786, + "step": 2946 + }, + { + "epoch": 8.096153846153847, + "grad_norm": 13.67656421661377, + "learning_rate": 4.595192307692308e-05, + "loss": 0.4435, + "step": 2947 + }, + { + "epoch": 8.098901098901099, + "grad_norm": 10.30819320678711, + "learning_rate": 4.595054945054945e-05, + "loss": 0.4607, + "step": 2948 + }, + { + "epoch": 8.101648351648352, + "grad_norm": 16.40850830078125, + "learning_rate": 4.5949175824175826e-05, + "loss": 0.6673, + "step": 2949 + }, + { + "epoch": 8.104395604395604, + "grad_norm": 16.305782318115234, + "learning_rate": 4.5947802197802196e-05, + "loss": 0.6318, + "step": 2950 + }, + { + "epoch": 8.107142857142858, + "grad_norm": 8.485063552856445, + "learning_rate": 4.594642857142857e-05, + "loss": 0.3005, + "step": 2951 + }, + { + "epoch": 8.10989010989011, + "grad_norm": 14.836073875427246, + "learning_rate": 4.594505494505495e-05, + "loss": 0.802, + "step": 2952 + }, + { + "epoch": 8.112637362637363, + "grad_norm": 15.263893127441406, + "learning_rate": 4.594368131868132e-05, + "loss": 0.5848, + "step": 2953 + }, + { + "epoch": 8.115384615384615, + "grad_norm": 15.846710205078125, + "learning_rate": 4.59423076923077e-05, + "loss": 0.5349, + "step": 2954 + }, + { + "epoch": 8.118131868131869, + "grad_norm": 12.495291709899902, + "learning_rate": 4.594093406593407e-05, + "loss": 0.4668, + "step": 2955 + }, + { + "epoch": 8.12087912087912, + "grad_norm": 19.98910140991211, + "learning_rate": 4.5939560439560443e-05, + "loss": 1.0913, + "step": 2956 + }, + { + "epoch": 8.123626373626374, + "grad_norm": 16.285186767578125, + "learning_rate": 4.593818681318682e-05, + "loss": 0.7392, + "step": 2957 + }, + { + "epoch": 8.126373626373626, + "grad_norm": 9.057930946350098, + "learning_rate": 4.593681318681319e-05, + "loss": 0.2159, + "step": 2958 + }, + { + "epoch": 8.12912087912088, + "grad_norm": 13.995107650756836, + "learning_rate": 4.593543956043957e-05, + "loss": 0.7296, + "step": 2959 + }, + { + "epoch": 8.131868131868131, + "grad_norm": 13.9113187789917, + "learning_rate": 4.593406593406594e-05, + "loss": 0.4855, + "step": 2960 + }, + { + "epoch": 8.134615384615385, + "grad_norm": 13.236214637756348, + "learning_rate": 4.593269230769231e-05, + "loss": 0.7726, + "step": 2961 + }, + { + "epoch": 8.137362637362637, + "grad_norm": 11.899454116821289, + "learning_rate": 4.5931318681318684e-05, + "loss": 0.7975, + "step": 2962 + }, + { + "epoch": 8.14010989010989, + "grad_norm": 14.710234642028809, + "learning_rate": 4.5929945054945054e-05, + "loss": 0.6098, + "step": 2963 + }, + { + "epoch": 8.142857142857142, + "grad_norm": 15.006961822509766, + "learning_rate": 4.592857142857143e-05, + "loss": 0.5824, + "step": 2964 + }, + { + "epoch": 8.145604395604396, + "grad_norm": 12.348369598388672, + "learning_rate": 4.59271978021978e-05, + "loss": 0.554, + "step": 2965 + }, + { + "epoch": 8.148351648351648, + "grad_norm": 19.368864059448242, + "learning_rate": 4.592582417582418e-05, + "loss": 0.7373, + "step": 2966 + }, + { + "epoch": 8.151098901098901, + "grad_norm": 11.495586395263672, + "learning_rate": 4.5924450549450554e-05, + "loss": 0.4089, + "step": 2967 + }, + { + "epoch": 8.153846153846153, + "grad_norm": 16.18409538269043, + "learning_rate": 4.5923076923076924e-05, + "loss": 0.6284, + "step": 2968 + }, + { + "epoch": 8.156593406593407, + "grad_norm": 10.670654296875, + "learning_rate": 4.59217032967033e-05, + "loss": 0.449, + "step": 2969 + }, + { + "epoch": 8.159340659340659, + "grad_norm": 18.222148895263672, + "learning_rate": 4.592032967032967e-05, + "loss": 0.9519, + "step": 2970 + }, + { + "epoch": 8.162087912087912, + "grad_norm": 10.566802024841309, + "learning_rate": 4.591895604395605e-05, + "loss": 0.4097, + "step": 2971 + }, + { + "epoch": 8.164835164835164, + "grad_norm": 10.000975608825684, + "learning_rate": 4.5917582417582425e-05, + "loss": 0.3929, + "step": 2972 + }, + { + "epoch": 8.167582417582418, + "grad_norm": 16.577537536621094, + "learning_rate": 4.5916208791208795e-05, + "loss": 0.889, + "step": 2973 + }, + { + "epoch": 8.17032967032967, + "grad_norm": 14.495469093322754, + "learning_rate": 4.591483516483517e-05, + "loss": 0.9273, + "step": 2974 + }, + { + "epoch": 8.173076923076923, + "grad_norm": 11.561259269714355, + "learning_rate": 4.591346153846154e-05, + "loss": 0.4079, + "step": 2975 + }, + { + "epoch": 8.175824175824175, + "grad_norm": 15.602472305297852, + "learning_rate": 4.591208791208791e-05, + "loss": 0.6656, + "step": 2976 + }, + { + "epoch": 8.178571428571429, + "grad_norm": 9.93869400024414, + "learning_rate": 4.591071428571429e-05, + "loss": 0.337, + "step": 2977 + }, + { + "epoch": 8.18131868131868, + "grad_norm": 13.745155334472656, + "learning_rate": 4.590934065934066e-05, + "loss": 0.5256, + "step": 2978 + }, + { + "epoch": 8.184065934065934, + "grad_norm": 14.969755172729492, + "learning_rate": 4.5907967032967035e-05, + "loss": 0.6887, + "step": 2979 + }, + { + "epoch": 8.186813186813186, + "grad_norm": 10.961463928222656, + "learning_rate": 4.5906593406593405e-05, + "loss": 0.4161, + "step": 2980 + }, + { + "epoch": 8.18956043956044, + "grad_norm": 11.469545364379883, + "learning_rate": 4.590521978021978e-05, + "loss": 0.3561, + "step": 2981 + }, + { + "epoch": 8.192307692307692, + "grad_norm": 14.150470733642578, + "learning_rate": 4.590384615384616e-05, + "loss": 0.6694, + "step": 2982 + }, + { + "epoch": 8.195054945054945, + "grad_norm": 11.328240394592285, + "learning_rate": 4.590247252747253e-05, + "loss": 0.5085, + "step": 2983 + }, + { + "epoch": 8.197802197802197, + "grad_norm": 5.739175319671631, + "learning_rate": 4.5901098901098906e-05, + "loss": 0.224, + "step": 2984 + }, + { + "epoch": 8.20054945054945, + "grad_norm": 11.956316947937012, + "learning_rate": 4.5899725274725276e-05, + "loss": 0.4791, + "step": 2985 + }, + { + "epoch": 8.203296703296703, + "grad_norm": 14.700742721557617, + "learning_rate": 4.589835164835165e-05, + "loss": 0.4728, + "step": 2986 + }, + { + "epoch": 8.206043956043956, + "grad_norm": 22.345083236694336, + "learning_rate": 4.589697802197803e-05, + "loss": 0.9256, + "step": 2987 + }, + { + "epoch": 8.208791208791208, + "grad_norm": 15.096616744995117, + "learning_rate": 4.58956043956044e-05, + "loss": 0.725, + "step": 2988 + }, + { + "epoch": 8.211538461538462, + "grad_norm": 17.547077178955078, + "learning_rate": 4.5894230769230776e-05, + "loss": 0.6598, + "step": 2989 + }, + { + "epoch": 8.214285714285714, + "grad_norm": 16.209766387939453, + "learning_rate": 4.5892857142857146e-05, + "loss": 0.6979, + "step": 2990 + }, + { + "epoch": 8.217032967032967, + "grad_norm": 14.856477737426758, + "learning_rate": 4.5891483516483516e-05, + "loss": 0.57, + "step": 2991 + }, + { + "epoch": 8.219780219780219, + "grad_norm": 13.854763984680176, + "learning_rate": 4.589010989010989e-05, + "loss": 0.4269, + "step": 2992 + }, + { + "epoch": 8.222527472527473, + "grad_norm": 10.456400871276855, + "learning_rate": 4.588873626373626e-05, + "loss": 0.3307, + "step": 2993 + }, + { + "epoch": 8.225274725274724, + "grad_norm": 10.347318649291992, + "learning_rate": 4.588736263736264e-05, + "loss": 0.5822, + "step": 2994 + }, + { + "epoch": 8.228021978021978, + "grad_norm": 11.149473190307617, + "learning_rate": 4.588598901098901e-05, + "loss": 0.4389, + "step": 2995 + }, + { + "epoch": 8.23076923076923, + "grad_norm": 13.966972351074219, + "learning_rate": 4.5884615384615387e-05, + "loss": 0.9256, + "step": 2996 + }, + { + "epoch": 8.233516483516484, + "grad_norm": 14.807673454284668, + "learning_rate": 4.588324175824176e-05, + "loss": 0.5761, + "step": 2997 + }, + { + "epoch": 8.236263736263735, + "grad_norm": 18.253795623779297, + "learning_rate": 4.588186813186813e-05, + "loss": 0.7607, + "step": 2998 + }, + { + "epoch": 8.239010989010989, + "grad_norm": 9.357004165649414, + "learning_rate": 4.588049450549451e-05, + "loss": 0.4991, + "step": 2999 + }, + { + "epoch": 8.241758241758241, + "grad_norm": 15.006227493286133, + "learning_rate": 4.587912087912088e-05, + "loss": 0.6519, + "step": 3000 + }, + { + "epoch": 8.244505494505495, + "grad_norm": 16.083187103271484, + "learning_rate": 4.587774725274726e-05, + "loss": 0.5821, + "step": 3001 + }, + { + "epoch": 8.247252747252748, + "grad_norm": 14.382303237915039, + "learning_rate": 4.5876373626373634e-05, + "loss": 0.8164, + "step": 3002 + }, + { + "epoch": 8.25, + "grad_norm": 9.732848167419434, + "learning_rate": 4.5875000000000004e-05, + "loss": 0.3562, + "step": 3003 + }, + { + "epoch": 8.252747252747252, + "grad_norm": 14.951460838317871, + "learning_rate": 4.5873626373626374e-05, + "loss": 0.5917, + "step": 3004 + }, + { + "epoch": 8.255494505494505, + "grad_norm": 12.189262390136719, + "learning_rate": 4.587225274725275e-05, + "loss": 0.5702, + "step": 3005 + }, + { + "epoch": 8.258241758241759, + "grad_norm": 8.26356315612793, + "learning_rate": 4.587087912087912e-05, + "loss": 0.3811, + "step": 3006 + }, + { + "epoch": 8.260989010989011, + "grad_norm": 13.89671802520752, + "learning_rate": 4.58695054945055e-05, + "loss": 0.7571, + "step": 3007 + }, + { + "epoch": 8.263736263736265, + "grad_norm": 14.113932609558105, + "learning_rate": 4.586813186813187e-05, + "loss": 0.4398, + "step": 3008 + }, + { + "epoch": 8.266483516483516, + "grad_norm": 17.85199546813965, + "learning_rate": 4.5866758241758244e-05, + "loss": 1.2554, + "step": 3009 + }, + { + "epoch": 8.26923076923077, + "grad_norm": 13.865694046020508, + "learning_rate": 4.5865384615384614e-05, + "loss": 0.8115, + "step": 3010 + }, + { + "epoch": 8.271978021978022, + "grad_norm": 8.650030136108398, + "learning_rate": 4.586401098901099e-05, + "loss": 0.3444, + "step": 3011 + }, + { + "epoch": 8.274725274725276, + "grad_norm": 11.74370288848877, + "learning_rate": 4.586263736263737e-05, + "loss": 0.5147, + "step": 3012 + }, + { + "epoch": 8.277472527472527, + "grad_norm": 6.4902496337890625, + "learning_rate": 4.586126373626374e-05, + "loss": 0.2169, + "step": 3013 + }, + { + "epoch": 8.280219780219781, + "grad_norm": 19.71916961669922, + "learning_rate": 4.5859890109890115e-05, + "loss": 1.0092, + "step": 3014 + }, + { + "epoch": 8.282967032967033, + "grad_norm": 17.19866943359375, + "learning_rate": 4.5858516483516485e-05, + "loss": 0.7351, + "step": 3015 + }, + { + "epoch": 8.285714285714286, + "grad_norm": 16.387216567993164, + "learning_rate": 4.585714285714286e-05, + "loss": 0.6496, + "step": 3016 + }, + { + "epoch": 8.288461538461538, + "grad_norm": 13.978226661682129, + "learning_rate": 4.585576923076924e-05, + "loss": 0.5613, + "step": 3017 + }, + { + "epoch": 8.291208791208792, + "grad_norm": 15.262354850769043, + "learning_rate": 4.585439560439561e-05, + "loss": 0.7255, + "step": 3018 + }, + { + "epoch": 8.293956043956044, + "grad_norm": 11.096461296081543, + "learning_rate": 4.585302197802198e-05, + "loss": 0.4044, + "step": 3019 + }, + { + "epoch": 8.296703296703297, + "grad_norm": 10.26732349395752, + "learning_rate": 4.5851648351648355e-05, + "loss": 0.4001, + "step": 3020 + }, + { + "epoch": 8.29945054945055, + "grad_norm": 14.792302131652832, + "learning_rate": 4.5850274725274725e-05, + "loss": 0.5785, + "step": 3021 + }, + { + "epoch": 8.302197802197803, + "grad_norm": 8.466326713562012, + "learning_rate": 4.58489010989011e-05, + "loss": 0.2796, + "step": 3022 + }, + { + "epoch": 8.304945054945055, + "grad_norm": 13.412755012512207, + "learning_rate": 4.584752747252747e-05, + "loss": 0.6419, + "step": 3023 + }, + { + "epoch": 8.307692307692308, + "grad_norm": 9.900323867797852, + "learning_rate": 4.584615384615385e-05, + "loss": 0.3464, + "step": 3024 + }, + { + "epoch": 8.31043956043956, + "grad_norm": 14.20952033996582, + "learning_rate": 4.584478021978022e-05, + "loss": 0.5955, + "step": 3025 + }, + { + "epoch": 8.313186813186814, + "grad_norm": 10.81110954284668, + "learning_rate": 4.5843406593406596e-05, + "loss": 0.4766, + "step": 3026 + }, + { + "epoch": 8.315934065934066, + "grad_norm": 12.039955139160156, + "learning_rate": 4.584203296703297e-05, + "loss": 0.4822, + "step": 3027 + }, + { + "epoch": 8.31868131868132, + "grad_norm": 6.48356294631958, + "learning_rate": 4.584065934065934e-05, + "loss": 0.2439, + "step": 3028 + }, + { + "epoch": 8.321428571428571, + "grad_norm": 14.752033233642578, + "learning_rate": 4.583928571428572e-05, + "loss": 0.808, + "step": 3029 + }, + { + "epoch": 8.324175824175825, + "grad_norm": 16.735578536987305, + "learning_rate": 4.583791208791209e-05, + "loss": 0.579, + "step": 3030 + }, + { + "epoch": 8.326923076923077, + "grad_norm": 21.221389770507812, + "learning_rate": 4.5836538461538466e-05, + "loss": 1.3235, + "step": 3031 + }, + { + "epoch": 8.32967032967033, + "grad_norm": 10.439717292785645, + "learning_rate": 4.583516483516484e-05, + "loss": 0.3731, + "step": 3032 + }, + { + "epoch": 8.332417582417582, + "grad_norm": 12.3917875289917, + "learning_rate": 4.583379120879121e-05, + "loss": 0.46, + "step": 3033 + }, + { + "epoch": 8.335164835164836, + "grad_norm": 11.089614868164062, + "learning_rate": 4.583241758241758e-05, + "loss": 0.2892, + "step": 3034 + }, + { + "epoch": 8.337912087912088, + "grad_norm": 12.418868064880371, + "learning_rate": 4.583104395604396e-05, + "loss": 0.4965, + "step": 3035 + }, + { + "epoch": 8.340659340659341, + "grad_norm": 17.106735229492188, + "learning_rate": 4.582967032967033e-05, + "loss": 0.8382, + "step": 3036 + }, + { + "epoch": 8.343406593406593, + "grad_norm": 8.548680305480957, + "learning_rate": 4.5828296703296706e-05, + "loss": 0.2802, + "step": 3037 + }, + { + "epoch": 8.346153846153847, + "grad_norm": 10.277823448181152, + "learning_rate": 4.5826923076923076e-05, + "loss": 0.3128, + "step": 3038 + }, + { + "epoch": 8.348901098901099, + "grad_norm": 6.378070831298828, + "learning_rate": 4.582554945054945e-05, + "loss": 0.1992, + "step": 3039 + }, + { + "epoch": 8.351648351648352, + "grad_norm": 18.11420249938965, + "learning_rate": 4.582417582417582e-05, + "loss": 0.5813, + "step": 3040 + }, + { + "epoch": 8.354395604395604, + "grad_norm": 13.762532234191895, + "learning_rate": 4.58228021978022e-05, + "loss": 0.5972, + "step": 3041 + }, + { + "epoch": 8.357142857142858, + "grad_norm": 9.656848907470703, + "learning_rate": 4.582142857142858e-05, + "loss": 0.3458, + "step": 3042 + }, + { + "epoch": 8.35989010989011, + "grad_norm": 10.11975383758545, + "learning_rate": 4.582005494505495e-05, + "loss": 0.3613, + "step": 3043 + }, + { + "epoch": 8.362637362637363, + "grad_norm": 11.402962684631348, + "learning_rate": 4.5818681318681324e-05, + "loss": 0.4267, + "step": 3044 + }, + { + "epoch": 8.365384615384615, + "grad_norm": 13.79867172241211, + "learning_rate": 4.5817307692307694e-05, + "loss": 0.5747, + "step": 3045 + }, + { + "epoch": 8.368131868131869, + "grad_norm": 15.802892684936523, + "learning_rate": 4.581593406593407e-05, + "loss": 0.7172, + "step": 3046 + }, + { + "epoch": 8.37087912087912, + "grad_norm": 14.865583419799805, + "learning_rate": 4.581456043956045e-05, + "loss": 0.9959, + "step": 3047 + }, + { + "epoch": 8.373626373626374, + "grad_norm": 14.789447784423828, + "learning_rate": 4.581318681318682e-05, + "loss": 0.5443, + "step": 3048 + }, + { + "epoch": 8.376373626373626, + "grad_norm": 15.20505142211914, + "learning_rate": 4.581181318681319e-05, + "loss": 0.5469, + "step": 3049 + }, + { + "epoch": 8.37912087912088, + "grad_norm": 15.661688804626465, + "learning_rate": 4.5810439560439564e-05, + "loss": 0.4849, + "step": 3050 + }, + { + "epoch": 8.381868131868131, + "grad_norm": 11.022067070007324, + "learning_rate": 4.5809065934065934e-05, + "loss": 0.4129, + "step": 3051 + }, + { + "epoch": 8.384615384615385, + "grad_norm": 16.686195373535156, + "learning_rate": 4.580769230769231e-05, + "loss": 0.6065, + "step": 3052 + }, + { + "epoch": 8.387362637362637, + "grad_norm": 15.440815925598145, + "learning_rate": 4.580631868131868e-05, + "loss": 0.7405, + "step": 3053 + }, + { + "epoch": 8.39010989010989, + "grad_norm": 12.214265823364258, + "learning_rate": 4.580494505494506e-05, + "loss": 0.3813, + "step": 3054 + }, + { + "epoch": 8.392857142857142, + "grad_norm": 12.499283790588379, + "learning_rate": 4.580357142857143e-05, + "loss": 0.49, + "step": 3055 + }, + { + "epoch": 8.395604395604396, + "grad_norm": 13.120756149291992, + "learning_rate": 4.5802197802197804e-05, + "loss": 0.5968, + "step": 3056 + }, + { + "epoch": 8.398351648351648, + "grad_norm": 11.838634490966797, + "learning_rate": 4.580082417582418e-05, + "loss": 0.3097, + "step": 3057 + }, + { + "epoch": 8.401098901098901, + "grad_norm": 11.908446311950684, + "learning_rate": 4.579945054945055e-05, + "loss": 0.3798, + "step": 3058 + }, + { + "epoch": 8.403846153846153, + "grad_norm": 18.384830474853516, + "learning_rate": 4.579807692307693e-05, + "loss": 0.8704, + "step": 3059 + }, + { + "epoch": 8.406593406593407, + "grad_norm": 10.879990577697754, + "learning_rate": 4.57967032967033e-05, + "loss": 0.4157, + "step": 3060 + }, + { + "epoch": 8.409340659340659, + "grad_norm": 10.986507415771484, + "learning_rate": 4.5795329670329675e-05, + "loss": 0.6069, + "step": 3061 + }, + { + "epoch": 8.412087912087912, + "grad_norm": 10.177727699279785, + "learning_rate": 4.579395604395605e-05, + "loss": 0.5045, + "step": 3062 + }, + { + "epoch": 8.414835164835164, + "grad_norm": 18.802406311035156, + "learning_rate": 4.579258241758242e-05, + "loss": 0.9525, + "step": 3063 + }, + { + "epoch": 8.417582417582418, + "grad_norm": 11.13134765625, + "learning_rate": 4.579120879120879e-05, + "loss": 0.5618, + "step": 3064 + }, + { + "epoch": 8.42032967032967, + "grad_norm": 16.793685913085938, + "learning_rate": 4.578983516483517e-05, + "loss": 0.8993, + "step": 3065 + }, + { + "epoch": 8.423076923076923, + "grad_norm": 15.10982894897461, + "learning_rate": 4.578846153846154e-05, + "loss": 0.5744, + "step": 3066 + }, + { + "epoch": 8.425824175824175, + "grad_norm": 7.077610969543457, + "learning_rate": 4.5787087912087915e-05, + "loss": 0.2204, + "step": 3067 + }, + { + "epoch": 8.428571428571429, + "grad_norm": 17.911325454711914, + "learning_rate": 4.5785714285714285e-05, + "loss": 0.7928, + "step": 3068 + }, + { + "epoch": 8.43131868131868, + "grad_norm": 11.385720252990723, + "learning_rate": 4.578434065934066e-05, + "loss": 0.4173, + "step": 3069 + }, + { + "epoch": 8.434065934065934, + "grad_norm": 13.101485252380371, + "learning_rate": 4.578296703296703e-05, + "loss": 0.835, + "step": 3070 + }, + { + "epoch": 8.436813186813186, + "grad_norm": 18.24617576599121, + "learning_rate": 4.578159340659341e-05, + "loss": 0.605, + "step": 3071 + }, + { + "epoch": 8.43956043956044, + "grad_norm": 18.37336540222168, + "learning_rate": 4.578021978021978e-05, + "loss": 0.6399, + "step": 3072 + }, + { + "epoch": 8.442307692307692, + "grad_norm": 13.746720314025879, + "learning_rate": 4.5778846153846156e-05, + "loss": 0.7542, + "step": 3073 + }, + { + "epoch": 8.445054945054945, + "grad_norm": 9.713496208190918, + "learning_rate": 4.577747252747253e-05, + "loss": 0.347, + "step": 3074 + }, + { + "epoch": 8.447802197802197, + "grad_norm": 11.508280754089355, + "learning_rate": 4.57760989010989e-05, + "loss": 0.4224, + "step": 3075 + }, + { + "epoch": 8.45054945054945, + "grad_norm": 11.765969276428223, + "learning_rate": 4.577472527472528e-05, + "loss": 0.4373, + "step": 3076 + }, + { + "epoch": 8.453296703296703, + "grad_norm": 10.844145774841309, + "learning_rate": 4.577335164835165e-05, + "loss": 0.5253, + "step": 3077 + }, + { + "epoch": 8.456043956043956, + "grad_norm": 10.853275299072266, + "learning_rate": 4.5771978021978026e-05, + "loss": 0.4684, + "step": 3078 + }, + { + "epoch": 8.458791208791208, + "grad_norm": 11.692671775817871, + "learning_rate": 4.5770604395604396e-05, + "loss": 0.5565, + "step": 3079 + }, + { + "epoch": 8.461538461538462, + "grad_norm": 10.663459777832031, + "learning_rate": 4.576923076923077e-05, + "loss": 0.3837, + "step": 3080 + }, + { + "epoch": 8.464285714285714, + "grad_norm": 14.603250503540039, + "learning_rate": 4.576785714285714e-05, + "loss": 0.507, + "step": 3081 + }, + { + "epoch": 8.467032967032967, + "grad_norm": 20.166353225708008, + "learning_rate": 4.576648351648351e-05, + "loss": 0.8113, + "step": 3082 + }, + { + "epoch": 8.469780219780219, + "grad_norm": 16.1197509765625, + "learning_rate": 4.576510989010989e-05, + "loss": 0.7788, + "step": 3083 + }, + { + "epoch": 8.472527472527473, + "grad_norm": 19.209333419799805, + "learning_rate": 4.576373626373627e-05, + "loss": 1.1447, + "step": 3084 + }, + { + "epoch": 8.475274725274724, + "grad_norm": 13.475423812866211, + "learning_rate": 4.576236263736264e-05, + "loss": 0.7703, + "step": 3085 + }, + { + "epoch": 8.478021978021978, + "grad_norm": 20.40512466430664, + "learning_rate": 4.5760989010989013e-05, + "loss": 0.8728, + "step": 3086 + }, + { + "epoch": 8.48076923076923, + "grad_norm": 10.344709396362305, + "learning_rate": 4.5759615384615383e-05, + "loss": 0.3531, + "step": 3087 + }, + { + "epoch": 8.483516483516484, + "grad_norm": 12.811279296875, + "learning_rate": 4.575824175824176e-05, + "loss": 0.5804, + "step": 3088 + }, + { + "epoch": 8.486263736263735, + "grad_norm": 9.691773414611816, + "learning_rate": 4.575686813186814e-05, + "loss": 0.4175, + "step": 3089 + }, + { + "epoch": 8.489010989010989, + "grad_norm": 17.755348205566406, + "learning_rate": 4.575549450549451e-05, + "loss": 0.8154, + "step": 3090 + }, + { + "epoch": 8.491758241758241, + "grad_norm": 13.650099754333496, + "learning_rate": 4.5754120879120884e-05, + "loss": 0.4383, + "step": 3091 + }, + { + "epoch": 8.494505494505495, + "grad_norm": 17.952974319458008, + "learning_rate": 4.5752747252747254e-05, + "loss": 0.7995, + "step": 3092 + }, + { + "epoch": 8.497252747252748, + "grad_norm": 9.793290138244629, + "learning_rate": 4.575137362637363e-05, + "loss": 0.3985, + "step": 3093 + }, + { + "epoch": 8.5, + "grad_norm": 13.49299430847168, + "learning_rate": 4.575e-05, + "loss": 0.6244, + "step": 3094 + }, + { + "epoch": 8.502747252747252, + "grad_norm": 15.959529876708984, + "learning_rate": 4.574862637362638e-05, + "loss": 0.7384, + "step": 3095 + }, + { + "epoch": 8.505494505494505, + "grad_norm": 14.281515121459961, + "learning_rate": 4.574725274725275e-05, + "loss": 0.4733, + "step": 3096 + }, + { + "epoch": 8.508241758241759, + "grad_norm": 11.862913131713867, + "learning_rate": 4.574587912087912e-05, + "loss": 0.6616, + "step": 3097 + }, + { + "epoch": 8.510989010989011, + "grad_norm": 14.461541175842285, + "learning_rate": 4.5744505494505494e-05, + "loss": 0.5914, + "step": 3098 + }, + { + "epoch": 8.513736263736263, + "grad_norm": 9.961856842041016, + "learning_rate": 4.574313186813187e-05, + "loss": 0.3215, + "step": 3099 + }, + { + "epoch": 8.516483516483516, + "grad_norm": 12.472250938415527, + "learning_rate": 4.574175824175824e-05, + "loss": 0.4634, + "step": 3100 + }, + { + "epoch": 8.51923076923077, + "grad_norm": 17.21784210205078, + "learning_rate": 4.574038461538462e-05, + "loss": 0.641, + "step": 3101 + }, + { + "epoch": 8.521978021978022, + "grad_norm": 18.505088806152344, + "learning_rate": 4.573901098901099e-05, + "loss": 0.9413, + "step": 3102 + }, + { + "epoch": 8.524725274725276, + "grad_norm": 13.532179832458496, + "learning_rate": 4.5737637362637365e-05, + "loss": 0.5874, + "step": 3103 + }, + { + "epoch": 8.527472527472527, + "grad_norm": 15.431078910827637, + "learning_rate": 4.573626373626374e-05, + "loss": 0.5357, + "step": 3104 + }, + { + "epoch": 8.530219780219781, + "grad_norm": 9.872710227966309, + "learning_rate": 4.573489010989011e-05, + "loss": 0.369, + "step": 3105 + }, + { + "epoch": 8.532967032967033, + "grad_norm": 16.569368362426758, + "learning_rate": 4.573351648351649e-05, + "loss": 0.5516, + "step": 3106 + }, + { + "epoch": 8.535714285714286, + "grad_norm": 12.173046112060547, + "learning_rate": 4.573214285714286e-05, + "loss": 0.6677, + "step": 3107 + }, + { + "epoch": 8.538461538461538, + "grad_norm": 22.781827926635742, + "learning_rate": 4.5730769230769235e-05, + "loss": 1.046, + "step": 3108 + }, + { + "epoch": 8.541208791208792, + "grad_norm": 16.003559112548828, + "learning_rate": 4.5729395604395605e-05, + "loss": 0.7763, + "step": 3109 + }, + { + "epoch": 8.543956043956044, + "grad_norm": 14.569315910339355, + "learning_rate": 4.572802197802198e-05, + "loss": 0.5545, + "step": 3110 + }, + { + "epoch": 8.546703296703297, + "grad_norm": 21.110271453857422, + "learning_rate": 4.572664835164835e-05, + "loss": 1.2143, + "step": 3111 + }, + { + "epoch": 8.54945054945055, + "grad_norm": 12.688735961914062, + "learning_rate": 4.572527472527472e-05, + "loss": 0.4538, + "step": 3112 + }, + { + "epoch": 8.552197802197803, + "grad_norm": 12.852532386779785, + "learning_rate": 4.57239010989011e-05, + "loss": 0.6449, + "step": 3113 + }, + { + "epoch": 8.554945054945055, + "grad_norm": 10.122232437133789, + "learning_rate": 4.5722527472527476e-05, + "loss": 0.4302, + "step": 3114 + }, + { + "epoch": 8.557692307692308, + "grad_norm": 11.652806282043457, + "learning_rate": 4.5721153846153846e-05, + "loss": 0.4724, + "step": 3115 + }, + { + "epoch": 8.56043956043956, + "grad_norm": 18.488971710205078, + "learning_rate": 4.571978021978022e-05, + "loss": 0.9706, + "step": 3116 + }, + { + "epoch": 8.563186813186814, + "grad_norm": 17.82693862915039, + "learning_rate": 4.571840659340659e-05, + "loss": 0.7612, + "step": 3117 + }, + { + "epoch": 8.565934065934066, + "grad_norm": 14.277449607849121, + "learning_rate": 4.571703296703297e-05, + "loss": 0.5485, + "step": 3118 + }, + { + "epoch": 8.56868131868132, + "grad_norm": 8.976397514343262, + "learning_rate": 4.5715659340659346e-05, + "loss": 0.3581, + "step": 3119 + }, + { + "epoch": 8.571428571428571, + "grad_norm": 4.474623680114746, + "learning_rate": 4.5714285714285716e-05, + "loss": 0.1439, + "step": 3120 + }, + { + "epoch": 8.574175824175825, + "grad_norm": 9.955979347229004, + "learning_rate": 4.571291208791209e-05, + "loss": 0.3644, + "step": 3121 + }, + { + "epoch": 8.576923076923077, + "grad_norm": 4.729223251342773, + "learning_rate": 4.571153846153846e-05, + "loss": 0.1399, + "step": 3122 + }, + { + "epoch": 8.57967032967033, + "grad_norm": 10.23276424407959, + "learning_rate": 4.571016483516484e-05, + "loss": 0.414, + "step": 3123 + }, + { + "epoch": 8.582417582417582, + "grad_norm": 9.482268333435059, + "learning_rate": 4.570879120879121e-05, + "loss": 0.382, + "step": 3124 + }, + { + "epoch": 8.585164835164836, + "grad_norm": 14.155045509338379, + "learning_rate": 4.5707417582417587e-05, + "loss": 0.5868, + "step": 3125 + }, + { + "epoch": 8.587912087912088, + "grad_norm": 21.720294952392578, + "learning_rate": 4.5706043956043957e-05, + "loss": 0.871, + "step": 3126 + }, + { + "epoch": 8.590659340659341, + "grad_norm": 15.266056060791016, + "learning_rate": 4.5704670329670327e-05, + "loss": 0.7459, + "step": 3127 + }, + { + "epoch": 8.593406593406593, + "grad_norm": 9.634064674377441, + "learning_rate": 4.57032967032967e-05, + "loss": 0.2714, + "step": 3128 + }, + { + "epoch": 8.596153846153847, + "grad_norm": 9.288034439086914, + "learning_rate": 4.570192307692308e-05, + "loss": 0.3872, + "step": 3129 + }, + { + "epoch": 8.598901098901099, + "grad_norm": 17.678010940551758, + "learning_rate": 4.570054945054945e-05, + "loss": 0.6147, + "step": 3130 + }, + { + "epoch": 8.601648351648352, + "grad_norm": 13.520967483520508, + "learning_rate": 4.569917582417583e-05, + "loss": 0.5573, + "step": 3131 + }, + { + "epoch": 8.604395604395604, + "grad_norm": 19.078447341918945, + "learning_rate": 4.56978021978022e-05, + "loss": 1.0347, + "step": 3132 + }, + { + "epoch": 8.607142857142858, + "grad_norm": 17.46770668029785, + "learning_rate": 4.5696428571428574e-05, + "loss": 0.6134, + "step": 3133 + }, + { + "epoch": 8.60989010989011, + "grad_norm": 14.13709831237793, + "learning_rate": 4.569505494505495e-05, + "loss": 0.7705, + "step": 3134 + }, + { + "epoch": 8.612637362637363, + "grad_norm": 12.868316650390625, + "learning_rate": 4.569368131868132e-05, + "loss": 0.5849, + "step": 3135 + }, + { + "epoch": 8.615384615384615, + "grad_norm": 12.457307815551758, + "learning_rate": 4.56923076923077e-05, + "loss": 0.497, + "step": 3136 + }, + { + "epoch": 8.618131868131869, + "grad_norm": 12.21854019165039, + "learning_rate": 4.569093406593407e-05, + "loss": 0.4881, + "step": 3137 + }, + { + "epoch": 8.62087912087912, + "grad_norm": 11.979886054992676, + "learning_rate": 4.5689560439560444e-05, + "loss": 0.415, + "step": 3138 + }, + { + "epoch": 8.623626373626374, + "grad_norm": 24.762407302856445, + "learning_rate": 4.5688186813186814e-05, + "loss": 1.5597, + "step": 3139 + }, + { + "epoch": 8.626373626373626, + "grad_norm": 12.748909950256348, + "learning_rate": 4.568681318681319e-05, + "loss": 0.4186, + "step": 3140 + }, + { + "epoch": 8.62912087912088, + "grad_norm": 13.164185523986816, + "learning_rate": 4.568543956043956e-05, + "loss": 0.4154, + "step": 3141 + }, + { + "epoch": 8.631868131868131, + "grad_norm": 12.119552612304688, + "learning_rate": 4.568406593406593e-05, + "loss": 0.3805, + "step": 3142 + }, + { + "epoch": 8.634615384615385, + "grad_norm": 10.229643821716309, + "learning_rate": 4.568269230769231e-05, + "loss": 0.3727, + "step": 3143 + }, + { + "epoch": 8.637362637362637, + "grad_norm": 12.972190856933594, + "learning_rate": 4.5681318681318685e-05, + "loss": 0.539, + "step": 3144 + }, + { + "epoch": 8.64010989010989, + "grad_norm": 11.199519157409668, + "learning_rate": 4.5679945054945055e-05, + "loss": 0.3552, + "step": 3145 + }, + { + "epoch": 8.642857142857142, + "grad_norm": 11.424439430236816, + "learning_rate": 4.567857142857143e-05, + "loss": 0.4375, + "step": 3146 + }, + { + "epoch": 8.645604395604396, + "grad_norm": 10.395254135131836, + "learning_rate": 4.56771978021978e-05, + "loss": 0.378, + "step": 3147 + }, + { + "epoch": 8.648351648351648, + "grad_norm": 14.548558235168457, + "learning_rate": 4.567582417582418e-05, + "loss": 0.7592, + "step": 3148 + }, + { + "epoch": 8.651098901098901, + "grad_norm": 16.210386276245117, + "learning_rate": 4.5674450549450555e-05, + "loss": 0.6545, + "step": 3149 + }, + { + "epoch": 8.653846153846153, + "grad_norm": 14.69741439819336, + "learning_rate": 4.5673076923076925e-05, + "loss": 0.5495, + "step": 3150 + }, + { + "epoch": 8.656593406593407, + "grad_norm": 12.505964279174805, + "learning_rate": 4.56717032967033e-05, + "loss": 0.4055, + "step": 3151 + }, + { + "epoch": 8.659340659340659, + "grad_norm": 9.427135467529297, + "learning_rate": 4.567032967032967e-05, + "loss": 0.3104, + "step": 3152 + }, + { + "epoch": 8.662087912087912, + "grad_norm": 13.225785255432129, + "learning_rate": 4.566895604395605e-05, + "loss": 0.6133, + "step": 3153 + }, + { + "epoch": 8.664835164835164, + "grad_norm": 8.744283676147461, + "learning_rate": 4.566758241758242e-05, + "loss": 0.3517, + "step": 3154 + }, + { + "epoch": 8.667582417582418, + "grad_norm": 6.137974739074707, + "learning_rate": 4.5666208791208795e-05, + "loss": 0.1517, + "step": 3155 + }, + { + "epoch": 8.67032967032967, + "grad_norm": 22.203575134277344, + "learning_rate": 4.5664835164835166e-05, + "loss": 1.4101, + "step": 3156 + }, + { + "epoch": 8.673076923076923, + "grad_norm": 18.67082977294922, + "learning_rate": 4.5663461538461536e-05, + "loss": 0.9213, + "step": 3157 + }, + { + "epoch": 8.675824175824175, + "grad_norm": 15.988119125366211, + "learning_rate": 4.566208791208791e-05, + "loss": 0.7063, + "step": 3158 + }, + { + "epoch": 8.678571428571429, + "grad_norm": 11.547896385192871, + "learning_rate": 4.566071428571429e-05, + "loss": 0.5266, + "step": 3159 + }, + { + "epoch": 8.68131868131868, + "grad_norm": 10.333834648132324, + "learning_rate": 4.565934065934066e-05, + "loss": 0.3943, + "step": 3160 + }, + { + "epoch": 8.684065934065934, + "grad_norm": 7.402255058288574, + "learning_rate": 4.5657967032967036e-05, + "loss": 0.2065, + "step": 3161 + }, + { + "epoch": 8.686813186813186, + "grad_norm": 11.928838729858398, + "learning_rate": 4.5656593406593406e-05, + "loss": 0.851, + "step": 3162 + }, + { + "epoch": 8.68956043956044, + "grad_norm": 21.881103515625, + "learning_rate": 4.565521978021978e-05, + "loss": 1.1134, + "step": 3163 + }, + { + "epoch": 8.692307692307692, + "grad_norm": 14.271710395812988, + "learning_rate": 4.565384615384616e-05, + "loss": 0.7764, + "step": 3164 + }, + { + "epoch": 8.695054945054945, + "grad_norm": 8.619462013244629, + "learning_rate": 4.565247252747253e-05, + "loss": 0.2506, + "step": 3165 + }, + { + "epoch": 8.697802197802197, + "grad_norm": 18.57211685180664, + "learning_rate": 4.5651098901098906e-05, + "loss": 1.0764, + "step": 3166 + }, + { + "epoch": 8.70054945054945, + "grad_norm": 13.956514358520508, + "learning_rate": 4.5649725274725276e-05, + "loss": 0.6673, + "step": 3167 + }, + { + "epoch": 8.703296703296703, + "grad_norm": 12.557063102722168, + "learning_rate": 4.564835164835165e-05, + "loss": 0.3811, + "step": 3168 + }, + { + "epoch": 8.706043956043956, + "grad_norm": 17.39226531982422, + "learning_rate": 4.564697802197802e-05, + "loss": 0.8271, + "step": 3169 + }, + { + "epoch": 8.708791208791208, + "grad_norm": 7.3121185302734375, + "learning_rate": 4.56456043956044e-05, + "loss": 0.1924, + "step": 3170 + }, + { + "epoch": 8.711538461538462, + "grad_norm": 16.53993034362793, + "learning_rate": 4.564423076923077e-05, + "loss": 0.8053, + "step": 3171 + }, + { + "epoch": 8.714285714285714, + "grad_norm": 19.076255798339844, + "learning_rate": 4.564285714285714e-05, + "loss": 0.8929, + "step": 3172 + }, + { + "epoch": 8.717032967032967, + "grad_norm": 11.166154861450195, + "learning_rate": 4.564148351648352e-05, + "loss": 0.4904, + "step": 3173 + }, + { + "epoch": 8.719780219780219, + "grad_norm": 18.91844367980957, + "learning_rate": 4.5640109890109894e-05, + "loss": 0.8855, + "step": 3174 + }, + { + "epoch": 8.722527472527473, + "grad_norm": 10.589899063110352, + "learning_rate": 4.5638736263736264e-05, + "loss": 0.449, + "step": 3175 + }, + { + "epoch": 8.725274725274724, + "grad_norm": 12.81711196899414, + "learning_rate": 4.563736263736264e-05, + "loss": 0.3834, + "step": 3176 + }, + { + "epoch": 8.728021978021978, + "grad_norm": 12.162862777709961, + "learning_rate": 4.563598901098901e-05, + "loss": 0.5117, + "step": 3177 + }, + { + "epoch": 8.73076923076923, + "grad_norm": 17.88814353942871, + "learning_rate": 4.563461538461539e-05, + "loss": 1.0023, + "step": 3178 + }, + { + "epoch": 8.733516483516484, + "grad_norm": 13.692009925842285, + "learning_rate": 4.5633241758241764e-05, + "loss": 0.4701, + "step": 3179 + }, + { + "epoch": 8.736263736263737, + "grad_norm": 14.436851501464844, + "learning_rate": 4.5631868131868134e-05, + "loss": 0.5734, + "step": 3180 + }, + { + "epoch": 8.739010989010989, + "grad_norm": 14.269552230834961, + "learning_rate": 4.563049450549451e-05, + "loss": 0.5797, + "step": 3181 + }, + { + "epoch": 8.741758241758241, + "grad_norm": 17.770782470703125, + "learning_rate": 4.562912087912088e-05, + "loss": 1.0077, + "step": 3182 + }, + { + "epoch": 8.744505494505495, + "grad_norm": 14.242012023925781, + "learning_rate": 4.562774725274726e-05, + "loss": 0.5844, + "step": 3183 + }, + { + "epoch": 8.747252747252748, + "grad_norm": 15.553943634033203, + "learning_rate": 4.562637362637363e-05, + "loss": 0.5989, + "step": 3184 + }, + { + "epoch": 8.75, + "grad_norm": 14.64834213256836, + "learning_rate": 4.5625e-05, + "loss": 0.6111, + "step": 3185 + }, + { + "epoch": 8.752747252747252, + "grad_norm": 6.728370666503906, + "learning_rate": 4.5623626373626374e-05, + "loss": 0.2606, + "step": 3186 + }, + { + "epoch": 8.755494505494505, + "grad_norm": 15.954706192016602, + "learning_rate": 4.5622252747252745e-05, + "loss": 0.8177, + "step": 3187 + }, + { + "epoch": 8.758241758241759, + "grad_norm": 12.42354679107666, + "learning_rate": 4.562087912087912e-05, + "loss": 0.4855, + "step": 3188 + }, + { + "epoch": 8.760989010989011, + "grad_norm": 15.221386909484863, + "learning_rate": 4.56195054945055e-05, + "loss": 1.0096, + "step": 3189 + }, + { + "epoch": 8.763736263736263, + "grad_norm": 10.977704048156738, + "learning_rate": 4.561813186813187e-05, + "loss": 0.4322, + "step": 3190 + }, + { + "epoch": 8.766483516483516, + "grad_norm": 13.261281967163086, + "learning_rate": 4.5616758241758245e-05, + "loss": 0.8135, + "step": 3191 + }, + { + "epoch": 8.76923076923077, + "grad_norm": 17.135162353515625, + "learning_rate": 4.5615384615384615e-05, + "loss": 0.9625, + "step": 3192 + }, + { + "epoch": 8.771978021978022, + "grad_norm": 14.954371452331543, + "learning_rate": 4.561401098901099e-05, + "loss": 0.6454, + "step": 3193 + }, + { + "epoch": 8.774725274725276, + "grad_norm": 13.063876152038574, + "learning_rate": 4.561263736263737e-05, + "loss": 0.43, + "step": 3194 + }, + { + "epoch": 8.777472527472527, + "grad_norm": 16.256311416625977, + "learning_rate": 4.561126373626374e-05, + "loss": 0.898, + "step": 3195 + }, + { + "epoch": 8.780219780219781, + "grad_norm": 11.076983451843262, + "learning_rate": 4.5609890109890115e-05, + "loss": 0.5553, + "step": 3196 + }, + { + "epoch": 8.782967032967033, + "grad_norm": 10.665117263793945, + "learning_rate": 4.5608516483516485e-05, + "loss": 0.4822, + "step": 3197 + }, + { + "epoch": 8.785714285714286, + "grad_norm": 17.834415435791016, + "learning_rate": 4.560714285714286e-05, + "loss": 0.8057, + "step": 3198 + }, + { + "epoch": 8.788461538461538, + "grad_norm": 12.848676681518555, + "learning_rate": 4.560576923076923e-05, + "loss": 0.6202, + "step": 3199 + }, + { + "epoch": 8.791208791208792, + "grad_norm": 13.562824249267578, + "learning_rate": 4.56043956043956e-05, + "loss": 0.728, + "step": 3200 + }, + { + "epoch": 8.793956043956044, + "grad_norm": 14.618701934814453, + "learning_rate": 4.560302197802198e-05, + "loss": 0.6073, + "step": 3201 + }, + { + "epoch": 8.796703296703297, + "grad_norm": 11.527606010437012, + "learning_rate": 4.560164835164835e-05, + "loss": 0.5375, + "step": 3202 + }, + { + "epoch": 8.79945054945055, + "grad_norm": 17.571022033691406, + "learning_rate": 4.5600274725274726e-05, + "loss": 0.7662, + "step": 3203 + }, + { + "epoch": 8.802197802197803, + "grad_norm": 9.650001525878906, + "learning_rate": 4.55989010989011e-05, + "loss": 0.3944, + "step": 3204 + }, + { + "epoch": 8.804945054945055, + "grad_norm": 12.936376571655273, + "learning_rate": 4.559752747252747e-05, + "loss": 0.5475, + "step": 3205 + }, + { + "epoch": 8.807692307692308, + "grad_norm": 13.656991004943848, + "learning_rate": 4.559615384615385e-05, + "loss": 0.5081, + "step": 3206 + }, + { + "epoch": 8.81043956043956, + "grad_norm": 11.084982872009277, + "learning_rate": 4.559478021978022e-05, + "loss": 0.4943, + "step": 3207 + }, + { + "epoch": 8.813186813186814, + "grad_norm": 13.323628425598145, + "learning_rate": 4.5593406593406596e-05, + "loss": 0.613, + "step": 3208 + }, + { + "epoch": 8.815934065934066, + "grad_norm": 15.204508781433105, + "learning_rate": 4.559203296703297e-05, + "loss": 0.545, + "step": 3209 + }, + { + "epoch": 8.81868131868132, + "grad_norm": 21.2530517578125, + "learning_rate": 4.559065934065934e-05, + "loss": 0.9077, + "step": 3210 + }, + { + "epoch": 8.821428571428571, + "grad_norm": 11.105875968933105, + "learning_rate": 4.558928571428572e-05, + "loss": 0.3935, + "step": 3211 + }, + { + "epoch": 8.824175824175825, + "grad_norm": 10.01102352142334, + "learning_rate": 4.558791208791209e-05, + "loss": 0.3075, + "step": 3212 + }, + { + "epoch": 8.826923076923077, + "grad_norm": 13.56716251373291, + "learning_rate": 4.558653846153847e-05, + "loss": 0.4566, + "step": 3213 + }, + { + "epoch": 8.82967032967033, + "grad_norm": 17.164209365844727, + "learning_rate": 4.558516483516484e-05, + "loss": 1.1112, + "step": 3214 + }, + { + "epoch": 8.832417582417582, + "grad_norm": 12.398136138916016, + "learning_rate": 4.558379120879121e-05, + "loss": 0.4157, + "step": 3215 + }, + { + "epoch": 8.835164835164836, + "grad_norm": 7.311124324798584, + "learning_rate": 4.5582417582417583e-05, + "loss": 0.2195, + "step": 3216 + }, + { + "epoch": 8.837912087912088, + "grad_norm": 11.937407493591309, + "learning_rate": 4.5581043956043953e-05, + "loss": 0.4614, + "step": 3217 + }, + { + "epoch": 8.840659340659341, + "grad_norm": 14.88707447052002, + "learning_rate": 4.557967032967033e-05, + "loss": 0.5089, + "step": 3218 + }, + { + "epoch": 8.843406593406593, + "grad_norm": 14.310063362121582, + "learning_rate": 4.557829670329671e-05, + "loss": 0.6295, + "step": 3219 + }, + { + "epoch": 8.846153846153847, + "grad_norm": 13.792305946350098, + "learning_rate": 4.557692307692308e-05, + "loss": 0.4785, + "step": 3220 + }, + { + "epoch": 8.848901098901099, + "grad_norm": 6.259251117706299, + "learning_rate": 4.5575549450549454e-05, + "loss": 0.195, + "step": 3221 + }, + { + "epoch": 8.851648351648352, + "grad_norm": 15.971683502197266, + "learning_rate": 4.5574175824175824e-05, + "loss": 0.668, + "step": 3222 + }, + { + "epoch": 8.854395604395604, + "grad_norm": 15.653207778930664, + "learning_rate": 4.55728021978022e-05, + "loss": 0.6442, + "step": 3223 + }, + { + "epoch": 8.857142857142858, + "grad_norm": 17.45726203918457, + "learning_rate": 4.557142857142858e-05, + "loss": 0.8909, + "step": 3224 + }, + { + "epoch": 8.85989010989011, + "grad_norm": 21.771644592285156, + "learning_rate": 4.557005494505495e-05, + "loss": 1.4646, + "step": 3225 + }, + { + "epoch": 8.862637362637363, + "grad_norm": 9.808745384216309, + "learning_rate": 4.5568681318681324e-05, + "loss": 0.3046, + "step": 3226 + }, + { + "epoch": 8.865384615384615, + "grad_norm": 14.262948989868164, + "learning_rate": 4.5567307692307694e-05, + "loss": 0.5386, + "step": 3227 + }, + { + "epoch": 8.868131868131869, + "grad_norm": 12.741817474365234, + "learning_rate": 4.556593406593407e-05, + "loss": 0.532, + "step": 3228 + }, + { + "epoch": 8.87087912087912, + "grad_norm": 11.289917945861816, + "learning_rate": 4.556456043956044e-05, + "loss": 0.575, + "step": 3229 + }, + { + "epoch": 8.873626373626374, + "grad_norm": 10.548943519592285, + "learning_rate": 4.556318681318681e-05, + "loss": 0.4398, + "step": 3230 + }, + { + "epoch": 8.876373626373626, + "grad_norm": 10.59855842590332, + "learning_rate": 4.556181318681319e-05, + "loss": 0.4291, + "step": 3231 + }, + { + "epoch": 8.87912087912088, + "grad_norm": 13.968793869018555, + "learning_rate": 4.556043956043956e-05, + "loss": 0.4852, + "step": 3232 + }, + { + "epoch": 8.881868131868131, + "grad_norm": 8.068833351135254, + "learning_rate": 4.5559065934065935e-05, + "loss": 0.3078, + "step": 3233 + }, + { + "epoch": 8.884615384615385, + "grad_norm": 16.168968200683594, + "learning_rate": 4.555769230769231e-05, + "loss": 0.7496, + "step": 3234 + }, + { + "epoch": 8.887362637362637, + "grad_norm": 12.129348754882812, + "learning_rate": 4.555631868131868e-05, + "loss": 0.4025, + "step": 3235 + }, + { + "epoch": 8.89010989010989, + "grad_norm": 11.963101387023926, + "learning_rate": 4.555494505494506e-05, + "loss": 0.4472, + "step": 3236 + }, + { + "epoch": 8.892857142857142, + "grad_norm": 18.477083206176758, + "learning_rate": 4.555357142857143e-05, + "loss": 0.9015, + "step": 3237 + }, + { + "epoch": 8.895604395604396, + "grad_norm": 13.77047348022461, + "learning_rate": 4.5552197802197805e-05, + "loss": 0.5244, + "step": 3238 + }, + { + "epoch": 8.898351648351648, + "grad_norm": 13.509657859802246, + "learning_rate": 4.555082417582418e-05, + "loss": 0.5702, + "step": 3239 + }, + { + "epoch": 8.901098901098901, + "grad_norm": 12.086492538452148, + "learning_rate": 4.554945054945055e-05, + "loss": 0.3787, + "step": 3240 + }, + { + "epoch": 8.903846153846153, + "grad_norm": 12.466296195983887, + "learning_rate": 4.554807692307693e-05, + "loss": 0.8011, + "step": 3241 + }, + { + "epoch": 8.906593406593407, + "grad_norm": 16.7324275970459, + "learning_rate": 4.55467032967033e-05, + "loss": 0.739, + "step": 3242 + }, + { + "epoch": 8.909340659340659, + "grad_norm": 14.421697616577148, + "learning_rate": 4.5545329670329676e-05, + "loss": 0.5496, + "step": 3243 + }, + { + "epoch": 8.912087912087912, + "grad_norm": 13.856643676757812, + "learning_rate": 4.5543956043956046e-05, + "loss": 0.5672, + "step": 3244 + }, + { + "epoch": 8.914835164835164, + "grad_norm": 19.6488094329834, + "learning_rate": 4.5542582417582416e-05, + "loss": 0.9416, + "step": 3245 + }, + { + "epoch": 8.917582417582418, + "grad_norm": 7.072961807250977, + "learning_rate": 4.554120879120879e-05, + "loss": 0.2504, + "step": 3246 + }, + { + "epoch": 8.92032967032967, + "grad_norm": 16.030166625976562, + "learning_rate": 4.553983516483516e-05, + "loss": 0.7652, + "step": 3247 + }, + { + "epoch": 8.923076923076923, + "grad_norm": 11.56762981414795, + "learning_rate": 4.553846153846154e-05, + "loss": 0.4736, + "step": 3248 + }, + { + "epoch": 8.925824175824175, + "grad_norm": 11.181938171386719, + "learning_rate": 4.5537087912087916e-05, + "loss": 0.479, + "step": 3249 + }, + { + "epoch": 8.928571428571429, + "grad_norm": 10.146078109741211, + "learning_rate": 4.5535714285714286e-05, + "loss": 0.4546, + "step": 3250 + }, + { + "epoch": 8.93131868131868, + "grad_norm": 12.668216705322266, + "learning_rate": 4.553434065934066e-05, + "loss": 0.4063, + "step": 3251 + }, + { + "epoch": 8.934065934065934, + "grad_norm": 14.669557571411133, + "learning_rate": 4.553296703296703e-05, + "loss": 0.6167, + "step": 3252 + }, + { + "epoch": 8.936813186813186, + "grad_norm": 12.601616859436035, + "learning_rate": 4.553159340659341e-05, + "loss": 0.5444, + "step": 3253 + }, + { + "epoch": 8.93956043956044, + "grad_norm": 11.612359046936035, + "learning_rate": 4.5530219780219786e-05, + "loss": 0.4026, + "step": 3254 + }, + { + "epoch": 8.942307692307692, + "grad_norm": 11.928210258483887, + "learning_rate": 4.5528846153846157e-05, + "loss": 0.4778, + "step": 3255 + }, + { + "epoch": 8.945054945054945, + "grad_norm": 18.08989906311035, + "learning_rate": 4.552747252747253e-05, + "loss": 0.8174, + "step": 3256 + }, + { + "epoch": 8.947802197802197, + "grad_norm": 10.470930099487305, + "learning_rate": 4.55260989010989e-05, + "loss": 0.3679, + "step": 3257 + }, + { + "epoch": 8.95054945054945, + "grad_norm": 13.720728874206543, + "learning_rate": 4.552472527472528e-05, + "loss": 0.572, + "step": 3258 + }, + { + "epoch": 8.953296703296703, + "grad_norm": 12.929862976074219, + "learning_rate": 4.552335164835165e-05, + "loss": 0.4582, + "step": 3259 + }, + { + "epoch": 8.956043956043956, + "grad_norm": 14.562843322753906, + "learning_rate": 4.552197802197802e-05, + "loss": 0.5924, + "step": 3260 + }, + { + "epoch": 8.958791208791208, + "grad_norm": 15.719204902648926, + "learning_rate": 4.55206043956044e-05, + "loss": 0.5795, + "step": 3261 + }, + { + "epoch": 8.961538461538462, + "grad_norm": 14.993578910827637, + "learning_rate": 4.551923076923077e-05, + "loss": 0.5621, + "step": 3262 + }, + { + "epoch": 8.964285714285714, + "grad_norm": 17.23745346069336, + "learning_rate": 4.5517857142857144e-05, + "loss": 0.7237, + "step": 3263 + }, + { + "epoch": 8.967032967032967, + "grad_norm": 12.58370304107666, + "learning_rate": 4.551648351648352e-05, + "loss": 0.4667, + "step": 3264 + }, + { + "epoch": 8.969780219780219, + "grad_norm": 10.689604759216309, + "learning_rate": 4.551510989010989e-05, + "loss": 0.3914, + "step": 3265 + }, + { + "epoch": 8.972527472527473, + "grad_norm": 10.901738166809082, + "learning_rate": 4.551373626373627e-05, + "loss": 0.537, + "step": 3266 + }, + { + "epoch": 8.975274725274724, + "grad_norm": 20.346092224121094, + "learning_rate": 4.551236263736264e-05, + "loss": 1.0586, + "step": 3267 + }, + { + "epoch": 8.978021978021978, + "grad_norm": 18.650550842285156, + "learning_rate": 4.5510989010989014e-05, + "loss": 0.6555, + "step": 3268 + }, + { + "epoch": 8.98076923076923, + "grad_norm": 11.60534381866455, + "learning_rate": 4.550961538461539e-05, + "loss": 0.542, + "step": 3269 + }, + { + "epoch": 8.983516483516484, + "grad_norm": 8.464818954467773, + "learning_rate": 4.550824175824176e-05, + "loss": 0.378, + "step": 3270 + }, + { + "epoch": 8.986263736263737, + "grad_norm": 9.385807037353516, + "learning_rate": 4.550686813186814e-05, + "loss": 0.3663, + "step": 3271 + }, + { + "epoch": 8.989010989010989, + "grad_norm": 11.88023853302002, + "learning_rate": 4.550549450549451e-05, + "loss": 0.3888, + "step": 3272 + }, + { + "epoch": 8.991758241758241, + "grad_norm": 12.125711441040039, + "learning_rate": 4.5504120879120885e-05, + "loss": 0.5941, + "step": 3273 + }, + { + "epoch": 8.994505494505495, + "grad_norm": 13.467037200927734, + "learning_rate": 4.5502747252747255e-05, + "loss": 0.4778, + "step": 3274 + }, + { + "epoch": 8.997252747252748, + "grad_norm": 13.959638595581055, + "learning_rate": 4.5501373626373625e-05, + "loss": 0.5732, + "step": 3275 + }, + { + "epoch": 9.0, + "grad_norm": 35.43592071533203, + "learning_rate": 4.55e-05, + "loss": 0.9293, + "step": 3276 + }, + { + "epoch": 9.0, + "eval_accuracy": 0.7107438016528925, + "eval_f1": 0.7095049502041085, + "eval_f1_DuraRiadoRio_64x64": 0.6120689655172413, + "eval_f1_Mole_64x64": 0.8267716535433071, + "eval_f1_Quebrado_64x64": 0.7480519480519481, + "eval_f1_RiadoRio_64x64": 0.625, + "eval_f1_RioFechado_64x64": 0.735632183908046, + "eval_loss": 1.040939450263977, + "eval_precision": 0.7521669588242691, + "eval_precision_DuraRiadoRio_64x64": 0.8068181818181818, + "eval_precision_Mole_64x64": 0.9545454545454546, + "eval_precision_Quebrado_64x64": 0.5975103734439834, + "eval_precision_RiadoRio_64x64": 0.5952380952380952, + "eval_precision_RioFechado_64x64": 0.8067226890756303, + "eval_recall": 0.7112346594184993, + "eval_recall_DuraRiadoRio_64x64": 0.4930555555555556, + "eval_recall_Mole_64x64": 0.7291666666666666, + "eval_recall_Quebrado_64x64": 1.0, + "eval_recall_RiadoRio_64x64": 0.6578947368421053, + "eval_recall_RioFechado_64x64": 0.676056338028169, + "eval_runtime": 1.7462, + "eval_samples_per_second": 415.763, + "eval_steps_per_second": 26.343, + "step": 3276 + }, + { + "epoch": 9.002747252747254, + "grad_norm": 20.246498107910156, + "learning_rate": 4.549862637362637e-05, + "loss": 1.0521, + "step": 3277 + }, + { + "epoch": 9.005494505494505, + "grad_norm": 14.403979301452637, + "learning_rate": 4.549725274725275e-05, + "loss": 0.7185, + "step": 3278 + }, + { + "epoch": 9.008241758241759, + "grad_norm": 10.476969718933105, + "learning_rate": 4.5495879120879125e-05, + "loss": 0.3782, + "step": 3279 + }, + { + "epoch": 9.010989010989011, + "grad_norm": 9.03640365600586, + "learning_rate": 4.5494505494505495e-05, + "loss": 0.3841, + "step": 3280 + }, + { + "epoch": 9.013736263736265, + "grad_norm": 12.489935874938965, + "learning_rate": 4.549313186813187e-05, + "loss": 0.5553, + "step": 3281 + }, + { + "epoch": 9.016483516483516, + "grad_norm": 11.39212417602539, + "learning_rate": 4.549175824175824e-05, + "loss": 0.4805, + "step": 3282 + }, + { + "epoch": 9.01923076923077, + "grad_norm": 10.266518592834473, + "learning_rate": 4.549038461538462e-05, + "loss": 0.5114, + "step": 3283 + }, + { + "epoch": 9.021978021978022, + "grad_norm": 14.895232200622559, + "learning_rate": 4.5489010989010995e-05, + "loss": 0.5797, + "step": 3284 + }, + { + "epoch": 9.024725274725276, + "grad_norm": 19.278085708618164, + "learning_rate": 4.5487637362637365e-05, + "loss": 0.6972, + "step": 3285 + }, + { + "epoch": 9.027472527472527, + "grad_norm": 15.527153968811035, + "learning_rate": 4.548626373626374e-05, + "loss": 0.61, + "step": 3286 + }, + { + "epoch": 9.030219780219781, + "grad_norm": 15.11829948425293, + "learning_rate": 4.548489010989011e-05, + "loss": 0.572, + "step": 3287 + }, + { + "epoch": 9.032967032967033, + "grad_norm": 15.684015274047852, + "learning_rate": 4.548351648351649e-05, + "loss": 0.6162, + "step": 3288 + }, + { + "epoch": 9.035714285714286, + "grad_norm": 15.275558471679688, + "learning_rate": 4.548214285714286e-05, + "loss": 0.5315, + "step": 3289 + }, + { + "epoch": 9.038461538461538, + "grad_norm": 13.82695484161377, + "learning_rate": 4.548076923076923e-05, + "loss": 0.42, + "step": 3290 + }, + { + "epoch": 9.041208791208792, + "grad_norm": 16.846702575683594, + "learning_rate": 4.5479395604395606e-05, + "loss": 0.8249, + "step": 3291 + }, + { + "epoch": 9.043956043956044, + "grad_norm": 13.697308540344238, + "learning_rate": 4.5478021978021976e-05, + "loss": 0.6027, + "step": 3292 + }, + { + "epoch": 9.046703296703297, + "grad_norm": 6.010608673095703, + "learning_rate": 4.547664835164835e-05, + "loss": 0.217, + "step": 3293 + }, + { + "epoch": 9.04945054945055, + "grad_norm": 10.204061508178711, + "learning_rate": 4.547527472527473e-05, + "loss": 0.3421, + "step": 3294 + }, + { + "epoch": 9.052197802197803, + "grad_norm": 7.707108020782471, + "learning_rate": 4.54739010989011e-05, + "loss": 0.374, + "step": 3295 + }, + { + "epoch": 9.054945054945055, + "grad_norm": 9.847957611083984, + "learning_rate": 4.5472527472527476e-05, + "loss": 0.323, + "step": 3296 + }, + { + "epoch": 9.057692307692308, + "grad_norm": 12.898515701293945, + "learning_rate": 4.5471153846153846e-05, + "loss": 0.3418, + "step": 3297 + }, + { + "epoch": 9.06043956043956, + "grad_norm": 11.069341659545898, + "learning_rate": 4.546978021978022e-05, + "loss": 0.385, + "step": 3298 + }, + { + "epoch": 9.063186813186814, + "grad_norm": 15.625402450561523, + "learning_rate": 4.546840659340659e-05, + "loss": 0.5864, + "step": 3299 + }, + { + "epoch": 9.065934065934066, + "grad_norm": 11.615623474121094, + "learning_rate": 4.546703296703297e-05, + "loss": 0.3765, + "step": 3300 + }, + { + "epoch": 9.06868131868132, + "grad_norm": 17.85849380493164, + "learning_rate": 4.546565934065935e-05, + "loss": 0.609, + "step": 3301 + }, + { + "epoch": 9.071428571428571, + "grad_norm": 13.724383354187012, + "learning_rate": 4.546428571428572e-05, + "loss": 0.649, + "step": 3302 + }, + { + "epoch": 9.074175824175825, + "grad_norm": 22.132055282592773, + "learning_rate": 4.5462912087912094e-05, + "loss": 1.2268, + "step": 3303 + }, + { + "epoch": 9.076923076923077, + "grad_norm": 18.860774993896484, + "learning_rate": 4.5461538461538464e-05, + "loss": 0.9504, + "step": 3304 + }, + { + "epoch": 9.07967032967033, + "grad_norm": 12.04716968536377, + "learning_rate": 4.5460164835164834e-05, + "loss": 0.4204, + "step": 3305 + }, + { + "epoch": 9.082417582417582, + "grad_norm": 13.22218132019043, + "learning_rate": 4.545879120879121e-05, + "loss": 0.6852, + "step": 3306 + }, + { + "epoch": 9.085164835164836, + "grad_norm": 14.35710620880127, + "learning_rate": 4.545741758241758e-05, + "loss": 0.54, + "step": 3307 + }, + { + "epoch": 9.087912087912088, + "grad_norm": 12.613524436950684, + "learning_rate": 4.545604395604396e-05, + "loss": 0.6586, + "step": 3308 + }, + { + "epoch": 9.090659340659341, + "grad_norm": 14.7123441696167, + "learning_rate": 4.545467032967033e-05, + "loss": 0.6594, + "step": 3309 + }, + { + "epoch": 9.093406593406593, + "grad_norm": 11.90976333618164, + "learning_rate": 4.5453296703296704e-05, + "loss": 0.5811, + "step": 3310 + }, + { + "epoch": 9.096153846153847, + "grad_norm": 21.074649810791016, + "learning_rate": 4.545192307692308e-05, + "loss": 0.8077, + "step": 3311 + }, + { + "epoch": 9.098901098901099, + "grad_norm": 12.342068672180176, + "learning_rate": 4.545054945054945e-05, + "loss": 0.6779, + "step": 3312 + }, + { + "epoch": 9.101648351648352, + "grad_norm": 10.438471794128418, + "learning_rate": 4.544917582417583e-05, + "loss": 0.5281, + "step": 3313 + }, + { + "epoch": 9.104395604395604, + "grad_norm": 11.613525390625, + "learning_rate": 4.54478021978022e-05, + "loss": 0.4663, + "step": 3314 + }, + { + "epoch": 9.107142857142858, + "grad_norm": 9.72909927368164, + "learning_rate": 4.5446428571428574e-05, + "loss": 0.2809, + "step": 3315 + }, + { + "epoch": 9.10989010989011, + "grad_norm": 12.160820960998535, + "learning_rate": 4.544505494505495e-05, + "loss": 0.5187, + "step": 3316 + }, + { + "epoch": 9.112637362637363, + "grad_norm": 13.230887413024902, + "learning_rate": 4.544368131868132e-05, + "loss": 0.2784, + "step": 3317 + }, + { + "epoch": 9.115384615384615, + "grad_norm": 9.96566104888916, + "learning_rate": 4.54423076923077e-05, + "loss": 0.382, + "step": 3318 + }, + { + "epoch": 9.118131868131869, + "grad_norm": 11.73149585723877, + "learning_rate": 4.544093406593407e-05, + "loss": 0.3955, + "step": 3319 + }, + { + "epoch": 9.12087912087912, + "grad_norm": 20.50703239440918, + "learning_rate": 4.543956043956044e-05, + "loss": 0.8429, + "step": 3320 + }, + { + "epoch": 9.123626373626374, + "grad_norm": 12.33705997467041, + "learning_rate": 4.5438186813186815e-05, + "loss": 0.5038, + "step": 3321 + }, + { + "epoch": 9.126373626373626, + "grad_norm": 14.853391647338867, + "learning_rate": 4.5436813186813185e-05, + "loss": 0.6549, + "step": 3322 + }, + { + "epoch": 9.12912087912088, + "grad_norm": 14.533729553222656, + "learning_rate": 4.543543956043956e-05, + "loss": 0.6365, + "step": 3323 + }, + { + "epoch": 9.131868131868131, + "grad_norm": 13.55894947052002, + "learning_rate": 4.543406593406593e-05, + "loss": 0.5075, + "step": 3324 + }, + { + "epoch": 9.134615384615385, + "grad_norm": 15.831229209899902, + "learning_rate": 4.543269230769231e-05, + "loss": 0.7735, + "step": 3325 + }, + { + "epoch": 9.137362637362637, + "grad_norm": 13.619685173034668, + "learning_rate": 4.5431318681318685e-05, + "loss": 0.575, + "step": 3326 + }, + { + "epoch": 9.14010989010989, + "grad_norm": 9.327371597290039, + "learning_rate": 4.5429945054945055e-05, + "loss": 0.4065, + "step": 3327 + }, + { + "epoch": 9.142857142857142, + "grad_norm": 9.402037620544434, + "learning_rate": 4.542857142857143e-05, + "loss": 0.2432, + "step": 3328 + }, + { + "epoch": 9.145604395604396, + "grad_norm": 5.617231369018555, + "learning_rate": 4.54271978021978e-05, + "loss": 0.1427, + "step": 3329 + }, + { + "epoch": 9.148351648351648, + "grad_norm": 18.64061164855957, + "learning_rate": 4.542582417582418e-05, + "loss": 0.8571, + "step": 3330 + }, + { + "epoch": 9.151098901098901, + "grad_norm": 12.851651191711426, + "learning_rate": 4.5424450549450556e-05, + "loss": 0.4191, + "step": 3331 + }, + { + "epoch": 9.153846153846153, + "grad_norm": 13.940690994262695, + "learning_rate": 4.5423076923076926e-05, + "loss": 0.4656, + "step": 3332 + }, + { + "epoch": 9.156593406593407, + "grad_norm": 14.255372047424316, + "learning_rate": 4.54217032967033e-05, + "loss": 0.4595, + "step": 3333 + }, + { + "epoch": 9.159340659340659, + "grad_norm": 14.03138256072998, + "learning_rate": 4.542032967032967e-05, + "loss": 0.6802, + "step": 3334 + }, + { + "epoch": 9.162087912087912, + "grad_norm": 12.96867561340332, + "learning_rate": 4.541895604395604e-05, + "loss": 0.6605, + "step": 3335 + }, + { + "epoch": 9.164835164835164, + "grad_norm": 12.98653793334961, + "learning_rate": 4.541758241758242e-05, + "loss": 0.3535, + "step": 3336 + }, + { + "epoch": 9.167582417582418, + "grad_norm": 11.469747543334961, + "learning_rate": 4.541620879120879e-05, + "loss": 0.4406, + "step": 3337 + }, + { + "epoch": 9.17032967032967, + "grad_norm": 10.309654235839844, + "learning_rate": 4.5414835164835166e-05, + "loss": 0.254, + "step": 3338 + }, + { + "epoch": 9.173076923076923, + "grad_norm": 17.56523895263672, + "learning_rate": 4.5413461538461536e-05, + "loss": 0.9129, + "step": 3339 + }, + { + "epoch": 9.175824175824175, + "grad_norm": 18.927549362182617, + "learning_rate": 4.541208791208791e-05, + "loss": 0.9159, + "step": 3340 + }, + { + "epoch": 9.178571428571429, + "grad_norm": 14.249682426452637, + "learning_rate": 4.541071428571429e-05, + "loss": 0.4522, + "step": 3341 + }, + { + "epoch": 9.18131868131868, + "grad_norm": 12.634782791137695, + "learning_rate": 4.540934065934066e-05, + "loss": 0.5892, + "step": 3342 + }, + { + "epoch": 9.184065934065934, + "grad_norm": 17.125864028930664, + "learning_rate": 4.540796703296704e-05, + "loss": 0.8124, + "step": 3343 + }, + { + "epoch": 9.186813186813186, + "grad_norm": 9.600407600402832, + "learning_rate": 4.540659340659341e-05, + "loss": 0.4164, + "step": 3344 + }, + { + "epoch": 9.18956043956044, + "grad_norm": 11.077255249023438, + "learning_rate": 4.5405219780219783e-05, + "loss": 0.5313, + "step": 3345 + }, + { + "epoch": 9.192307692307692, + "grad_norm": 7.640988349914551, + "learning_rate": 4.540384615384616e-05, + "loss": 0.2929, + "step": 3346 + }, + { + "epoch": 9.195054945054945, + "grad_norm": 14.208203315734863, + "learning_rate": 4.540247252747253e-05, + "loss": 0.6203, + "step": 3347 + }, + { + "epoch": 9.197802197802197, + "grad_norm": 5.9599714279174805, + "learning_rate": 4.540109890109891e-05, + "loss": 0.2298, + "step": 3348 + }, + { + "epoch": 9.20054945054945, + "grad_norm": 10.994632720947266, + "learning_rate": 4.539972527472528e-05, + "loss": 0.339, + "step": 3349 + }, + { + "epoch": 9.203296703296703, + "grad_norm": 6.639439105987549, + "learning_rate": 4.539835164835165e-05, + "loss": 0.205, + "step": 3350 + }, + { + "epoch": 9.206043956043956, + "grad_norm": 14.493724822998047, + "learning_rate": 4.5396978021978024e-05, + "loss": 0.5137, + "step": 3351 + }, + { + "epoch": 9.208791208791208, + "grad_norm": 13.898791313171387, + "learning_rate": 4.5395604395604394e-05, + "loss": 0.4381, + "step": 3352 + }, + { + "epoch": 9.211538461538462, + "grad_norm": 11.646848678588867, + "learning_rate": 4.539423076923077e-05, + "loss": 0.5033, + "step": 3353 + }, + { + "epoch": 9.214285714285714, + "grad_norm": 17.349035263061523, + "learning_rate": 4.539285714285714e-05, + "loss": 0.8979, + "step": 3354 + }, + { + "epoch": 9.217032967032967, + "grad_norm": 18.50515365600586, + "learning_rate": 4.539148351648352e-05, + "loss": 0.8208, + "step": 3355 + }, + { + "epoch": 9.219780219780219, + "grad_norm": 17.905014038085938, + "learning_rate": 4.5390109890109894e-05, + "loss": 0.7316, + "step": 3356 + }, + { + "epoch": 9.222527472527473, + "grad_norm": 15.531579971313477, + "learning_rate": 4.5388736263736264e-05, + "loss": 0.6444, + "step": 3357 + }, + { + "epoch": 9.225274725274724, + "grad_norm": 17.150484085083008, + "learning_rate": 4.538736263736264e-05, + "loss": 0.8815, + "step": 3358 + }, + { + "epoch": 9.228021978021978, + "grad_norm": 9.615921974182129, + "learning_rate": 4.538598901098901e-05, + "loss": 0.3706, + "step": 3359 + }, + { + "epoch": 9.23076923076923, + "grad_norm": 15.032639503479004, + "learning_rate": 4.538461538461539e-05, + "loss": 0.564, + "step": 3360 + }, + { + "epoch": 9.233516483516484, + "grad_norm": 13.034996032714844, + "learning_rate": 4.5383241758241765e-05, + "loss": 0.5143, + "step": 3361 + }, + { + "epoch": 9.236263736263735, + "grad_norm": 11.299793243408203, + "learning_rate": 4.5381868131868135e-05, + "loss": 0.5429, + "step": 3362 + }, + { + "epoch": 9.239010989010989, + "grad_norm": 22.372486114501953, + "learning_rate": 4.538049450549451e-05, + "loss": 1.0437, + "step": 3363 + }, + { + "epoch": 9.241758241758241, + "grad_norm": 10.794919967651367, + "learning_rate": 4.537912087912088e-05, + "loss": 0.454, + "step": 3364 + }, + { + "epoch": 9.244505494505495, + "grad_norm": 18.089414596557617, + "learning_rate": 4.537774725274725e-05, + "loss": 1.0136, + "step": 3365 + }, + { + "epoch": 9.247252747252748, + "grad_norm": 13.570367813110352, + "learning_rate": 4.537637362637363e-05, + "loss": 0.3839, + "step": 3366 + }, + { + "epoch": 9.25, + "grad_norm": 10.054828643798828, + "learning_rate": 4.5375e-05, + "loss": 0.4211, + "step": 3367 + }, + { + "epoch": 9.252747252747252, + "grad_norm": 16.961414337158203, + "learning_rate": 4.5373626373626375e-05, + "loss": 1.0217, + "step": 3368 + }, + { + "epoch": 9.255494505494505, + "grad_norm": 12.238274574279785, + "learning_rate": 4.5372252747252745e-05, + "loss": 0.4189, + "step": 3369 + }, + { + "epoch": 9.258241758241759, + "grad_norm": 12.148070335388184, + "learning_rate": 4.537087912087912e-05, + "loss": 0.466, + "step": 3370 + }, + { + "epoch": 9.260989010989011, + "grad_norm": 14.407248497009277, + "learning_rate": 4.53695054945055e-05, + "loss": 0.7115, + "step": 3371 + }, + { + "epoch": 9.263736263736265, + "grad_norm": 13.71741008758545, + "learning_rate": 4.536813186813187e-05, + "loss": 0.5789, + "step": 3372 + }, + { + "epoch": 9.266483516483516, + "grad_norm": 16.970857620239258, + "learning_rate": 4.5366758241758246e-05, + "loss": 0.7853, + "step": 3373 + }, + { + "epoch": 9.26923076923077, + "grad_norm": 11.671156883239746, + "learning_rate": 4.5365384615384616e-05, + "loss": 0.4702, + "step": 3374 + }, + { + "epoch": 9.271978021978022, + "grad_norm": 14.857378005981445, + "learning_rate": 4.536401098901099e-05, + "loss": 0.6178, + "step": 3375 + }, + { + "epoch": 9.274725274725276, + "grad_norm": 9.20972728729248, + "learning_rate": 4.536263736263737e-05, + "loss": 0.3492, + "step": 3376 + }, + { + "epoch": 9.277472527472527, + "grad_norm": 15.487483978271484, + "learning_rate": 4.536126373626374e-05, + "loss": 0.7123, + "step": 3377 + }, + { + "epoch": 9.280219780219781, + "grad_norm": 19.10435676574707, + "learning_rate": 4.5359890109890116e-05, + "loss": 0.8926, + "step": 3378 + }, + { + "epoch": 9.282967032967033, + "grad_norm": 15.95564079284668, + "learning_rate": 4.5358516483516486e-05, + "loss": 0.5861, + "step": 3379 + }, + { + "epoch": 9.285714285714286, + "grad_norm": 12.743132591247559, + "learning_rate": 4.5357142857142856e-05, + "loss": 0.6412, + "step": 3380 + }, + { + "epoch": 9.288461538461538, + "grad_norm": 14.954278945922852, + "learning_rate": 4.535576923076923e-05, + "loss": 0.8877, + "step": 3381 + }, + { + "epoch": 9.291208791208792, + "grad_norm": 16.02338409423828, + "learning_rate": 4.53543956043956e-05, + "loss": 0.7537, + "step": 3382 + }, + { + "epoch": 9.293956043956044, + "grad_norm": 6.806321144104004, + "learning_rate": 4.535302197802198e-05, + "loss": 0.2318, + "step": 3383 + }, + { + "epoch": 9.296703296703297, + "grad_norm": 13.893876075744629, + "learning_rate": 4.535164835164835e-05, + "loss": 0.3849, + "step": 3384 + }, + { + "epoch": 9.29945054945055, + "grad_norm": 11.924678802490234, + "learning_rate": 4.5350274725274726e-05, + "loss": 0.5709, + "step": 3385 + }, + { + "epoch": 9.302197802197803, + "grad_norm": 11.490449905395508, + "learning_rate": 4.53489010989011e-05, + "loss": 0.4543, + "step": 3386 + }, + { + "epoch": 9.304945054945055, + "grad_norm": 20.0833797454834, + "learning_rate": 4.534752747252747e-05, + "loss": 0.8811, + "step": 3387 + }, + { + "epoch": 9.307692307692308, + "grad_norm": 11.786768913269043, + "learning_rate": 4.534615384615385e-05, + "loss": 0.2968, + "step": 3388 + }, + { + "epoch": 9.31043956043956, + "grad_norm": 13.908963203430176, + "learning_rate": 4.534478021978022e-05, + "loss": 0.6897, + "step": 3389 + }, + { + "epoch": 9.313186813186814, + "grad_norm": 13.129705429077148, + "learning_rate": 4.53434065934066e-05, + "loss": 0.641, + "step": 3390 + }, + { + "epoch": 9.315934065934066, + "grad_norm": 11.508498191833496, + "learning_rate": 4.5342032967032974e-05, + "loss": 0.4116, + "step": 3391 + }, + { + "epoch": 9.31868131868132, + "grad_norm": 11.079371452331543, + "learning_rate": 4.5340659340659344e-05, + "loss": 0.325, + "step": 3392 + }, + { + "epoch": 9.321428571428571, + "grad_norm": 15.280088424682617, + "learning_rate": 4.533928571428572e-05, + "loss": 0.8999, + "step": 3393 + }, + { + "epoch": 9.324175824175825, + "grad_norm": 20.771718978881836, + "learning_rate": 4.533791208791209e-05, + "loss": 0.9274, + "step": 3394 + }, + { + "epoch": 9.326923076923077, + "grad_norm": 11.696106910705566, + "learning_rate": 4.533653846153846e-05, + "loss": 0.684, + "step": 3395 + }, + { + "epoch": 9.32967032967033, + "grad_norm": 12.03084659576416, + "learning_rate": 4.533516483516484e-05, + "loss": 0.5138, + "step": 3396 + }, + { + "epoch": 9.332417582417582, + "grad_norm": 13.332611083984375, + "learning_rate": 4.533379120879121e-05, + "loss": 0.5762, + "step": 3397 + }, + { + "epoch": 9.335164835164836, + "grad_norm": 11.983054161071777, + "learning_rate": 4.5332417582417584e-05, + "loss": 0.5524, + "step": 3398 + }, + { + "epoch": 9.337912087912088, + "grad_norm": 13.053563117980957, + "learning_rate": 4.5331043956043954e-05, + "loss": 0.6113, + "step": 3399 + }, + { + "epoch": 9.340659340659341, + "grad_norm": 13.720677375793457, + "learning_rate": 4.532967032967033e-05, + "loss": 0.7803, + "step": 3400 + }, + { + "epoch": 9.343406593406593, + "grad_norm": 10.621399879455566, + "learning_rate": 4.532829670329671e-05, + "loss": 0.4327, + "step": 3401 + }, + { + "epoch": 9.346153846153847, + "grad_norm": 15.535439491271973, + "learning_rate": 4.532692307692308e-05, + "loss": 0.5884, + "step": 3402 + }, + { + "epoch": 9.348901098901099, + "grad_norm": 9.501497268676758, + "learning_rate": 4.5325549450549455e-05, + "loss": 0.2309, + "step": 3403 + }, + { + "epoch": 9.351648351648352, + "grad_norm": 16.401601791381836, + "learning_rate": 4.5324175824175825e-05, + "loss": 0.721, + "step": 3404 + }, + { + "epoch": 9.354395604395604, + "grad_norm": 17.519222259521484, + "learning_rate": 4.53228021978022e-05, + "loss": 0.7144, + "step": 3405 + }, + { + "epoch": 9.357142857142858, + "grad_norm": 9.969748497009277, + "learning_rate": 4.532142857142858e-05, + "loss": 0.3605, + "step": 3406 + }, + { + "epoch": 9.35989010989011, + "grad_norm": 13.754478454589844, + "learning_rate": 4.532005494505495e-05, + "loss": 0.5459, + "step": 3407 + }, + { + "epoch": 9.362637362637363, + "grad_norm": 11.197543144226074, + "learning_rate": 4.5318681318681325e-05, + "loss": 0.6558, + "step": 3408 + }, + { + "epoch": 9.365384615384615, + "grad_norm": 18.891250610351562, + "learning_rate": 4.5317307692307695e-05, + "loss": 0.9201, + "step": 3409 + }, + { + "epoch": 9.368131868131869, + "grad_norm": 12.927350997924805, + "learning_rate": 4.5315934065934065e-05, + "loss": 0.5076, + "step": 3410 + }, + { + "epoch": 9.37087912087912, + "grad_norm": 12.322718620300293, + "learning_rate": 4.531456043956044e-05, + "loss": 0.5702, + "step": 3411 + }, + { + "epoch": 9.373626373626374, + "grad_norm": 12.855001449584961, + "learning_rate": 4.531318681318681e-05, + "loss": 0.5665, + "step": 3412 + }, + { + "epoch": 9.376373626373626, + "grad_norm": 15.874263763427734, + "learning_rate": 4.531181318681319e-05, + "loss": 0.7986, + "step": 3413 + }, + { + "epoch": 9.37912087912088, + "grad_norm": 9.410042762756348, + "learning_rate": 4.531043956043956e-05, + "loss": 0.415, + "step": 3414 + }, + { + "epoch": 9.381868131868131, + "grad_norm": 8.330790519714355, + "learning_rate": 4.5309065934065935e-05, + "loss": 0.3453, + "step": 3415 + }, + { + "epoch": 9.384615384615385, + "grad_norm": 10.403327941894531, + "learning_rate": 4.530769230769231e-05, + "loss": 0.4215, + "step": 3416 + }, + { + "epoch": 9.387362637362637, + "grad_norm": 8.312435150146484, + "learning_rate": 4.530631868131868e-05, + "loss": 0.3426, + "step": 3417 + }, + { + "epoch": 9.39010989010989, + "grad_norm": 17.513744354248047, + "learning_rate": 4.530494505494506e-05, + "loss": 0.6936, + "step": 3418 + }, + { + "epoch": 9.392857142857142, + "grad_norm": 7.447754859924316, + "learning_rate": 4.530357142857143e-05, + "loss": 0.3496, + "step": 3419 + }, + { + "epoch": 9.395604395604396, + "grad_norm": 8.2235107421875, + "learning_rate": 4.5302197802197806e-05, + "loss": 0.3558, + "step": 3420 + }, + { + "epoch": 9.398351648351648, + "grad_norm": 10.768842697143555, + "learning_rate": 4.530082417582418e-05, + "loss": 0.4477, + "step": 3421 + }, + { + "epoch": 9.401098901098901, + "grad_norm": 9.951850891113281, + "learning_rate": 4.529945054945055e-05, + "loss": 0.4725, + "step": 3422 + }, + { + "epoch": 9.403846153846153, + "grad_norm": 9.66226577758789, + "learning_rate": 4.529807692307693e-05, + "loss": 0.4657, + "step": 3423 + }, + { + "epoch": 9.406593406593407, + "grad_norm": 17.845699310302734, + "learning_rate": 4.52967032967033e-05, + "loss": 0.5511, + "step": 3424 + }, + { + "epoch": 9.409340659340659, + "grad_norm": 16.955554962158203, + "learning_rate": 4.529532967032967e-05, + "loss": 0.6692, + "step": 3425 + }, + { + "epoch": 9.412087912087912, + "grad_norm": 11.294910430908203, + "learning_rate": 4.5293956043956046e-05, + "loss": 0.3359, + "step": 3426 + }, + { + "epoch": 9.414835164835164, + "grad_norm": 11.888102531433105, + "learning_rate": 4.5292582417582416e-05, + "loss": 0.3003, + "step": 3427 + }, + { + "epoch": 9.417582417582418, + "grad_norm": 15.488094329833984, + "learning_rate": 4.529120879120879e-05, + "loss": 0.5124, + "step": 3428 + }, + { + "epoch": 9.42032967032967, + "grad_norm": 21.277509689331055, + "learning_rate": 4.528983516483516e-05, + "loss": 0.8129, + "step": 3429 + }, + { + "epoch": 9.423076923076923, + "grad_norm": 18.240825653076172, + "learning_rate": 4.528846153846154e-05, + "loss": 0.8229, + "step": 3430 + }, + { + "epoch": 9.425824175824175, + "grad_norm": 9.512568473815918, + "learning_rate": 4.528708791208792e-05, + "loss": 0.4424, + "step": 3431 + }, + { + "epoch": 9.428571428571429, + "grad_norm": 12.079129219055176, + "learning_rate": 4.528571428571429e-05, + "loss": 0.4579, + "step": 3432 + }, + { + "epoch": 9.43131868131868, + "grad_norm": 11.792241096496582, + "learning_rate": 4.5284340659340664e-05, + "loss": 0.4764, + "step": 3433 + }, + { + "epoch": 9.434065934065934, + "grad_norm": 17.189382553100586, + "learning_rate": 4.5282967032967034e-05, + "loss": 0.7187, + "step": 3434 + }, + { + "epoch": 9.436813186813186, + "grad_norm": 6.864469528198242, + "learning_rate": 4.528159340659341e-05, + "loss": 0.2833, + "step": 3435 + }, + { + "epoch": 9.43956043956044, + "grad_norm": 14.647485733032227, + "learning_rate": 4.528021978021979e-05, + "loss": 0.7781, + "step": 3436 + }, + { + "epoch": 9.442307692307692, + "grad_norm": 11.043107986450195, + "learning_rate": 4.527884615384616e-05, + "loss": 0.3774, + "step": 3437 + }, + { + "epoch": 9.445054945054945, + "grad_norm": 11.507941246032715, + "learning_rate": 4.5277472527472534e-05, + "loss": 0.6025, + "step": 3438 + }, + { + "epoch": 9.447802197802197, + "grad_norm": 11.932701110839844, + "learning_rate": 4.5276098901098904e-05, + "loss": 0.4463, + "step": 3439 + }, + { + "epoch": 9.45054945054945, + "grad_norm": 12.221341133117676, + "learning_rate": 4.5274725274725274e-05, + "loss": 0.4242, + "step": 3440 + }, + { + "epoch": 9.453296703296703, + "grad_norm": 14.675457000732422, + "learning_rate": 4.527335164835165e-05, + "loss": 0.6736, + "step": 3441 + }, + { + "epoch": 9.456043956043956, + "grad_norm": 15.49506664276123, + "learning_rate": 4.527197802197802e-05, + "loss": 0.7176, + "step": 3442 + }, + { + "epoch": 9.458791208791208, + "grad_norm": 11.923432350158691, + "learning_rate": 4.52706043956044e-05, + "loss": 0.4, + "step": 3443 + }, + { + "epoch": 9.461538461538462, + "grad_norm": 14.213737487792969, + "learning_rate": 4.526923076923077e-05, + "loss": 0.7907, + "step": 3444 + }, + { + "epoch": 9.464285714285714, + "grad_norm": 17.381317138671875, + "learning_rate": 4.5267857142857144e-05, + "loss": 0.8769, + "step": 3445 + }, + { + "epoch": 9.467032967032967, + "grad_norm": 12.878256797790527, + "learning_rate": 4.526648351648352e-05, + "loss": 0.473, + "step": 3446 + }, + { + "epoch": 9.469780219780219, + "grad_norm": 10.044303894042969, + "learning_rate": 4.526510989010989e-05, + "loss": 0.3645, + "step": 3447 + }, + { + "epoch": 9.472527472527473, + "grad_norm": 12.355035781860352, + "learning_rate": 4.526373626373627e-05, + "loss": 0.4099, + "step": 3448 + }, + { + "epoch": 9.475274725274724, + "grad_norm": 10.664010047912598, + "learning_rate": 4.526236263736264e-05, + "loss": 0.4117, + "step": 3449 + }, + { + "epoch": 9.478021978021978, + "grad_norm": 11.479026794433594, + "learning_rate": 4.5260989010989015e-05, + "loss": 0.4974, + "step": 3450 + }, + { + "epoch": 9.48076923076923, + "grad_norm": 14.647159576416016, + "learning_rate": 4.525961538461539e-05, + "loss": 0.7149, + "step": 3451 + }, + { + "epoch": 9.483516483516484, + "grad_norm": 14.457521438598633, + "learning_rate": 4.525824175824176e-05, + "loss": 0.6146, + "step": 3452 + }, + { + "epoch": 9.486263736263735, + "grad_norm": 10.739633560180664, + "learning_rate": 4.525686813186814e-05, + "loss": 0.4044, + "step": 3453 + }, + { + "epoch": 9.489010989010989, + "grad_norm": 14.757638931274414, + "learning_rate": 4.525549450549451e-05, + "loss": 0.3859, + "step": 3454 + }, + { + "epoch": 9.491758241758241, + "grad_norm": 11.771687507629395, + "learning_rate": 4.525412087912088e-05, + "loss": 0.3658, + "step": 3455 + }, + { + "epoch": 9.494505494505495, + "grad_norm": 14.22956371307373, + "learning_rate": 4.5252747252747255e-05, + "loss": 0.5856, + "step": 3456 + }, + { + "epoch": 9.497252747252748, + "grad_norm": 14.802605628967285, + "learning_rate": 4.5251373626373625e-05, + "loss": 0.5755, + "step": 3457 + }, + { + "epoch": 9.5, + "grad_norm": 8.045891761779785, + "learning_rate": 4.525e-05, + "loss": 0.3775, + "step": 3458 + }, + { + "epoch": 9.502747252747252, + "grad_norm": 11.161466598510742, + "learning_rate": 4.524862637362637e-05, + "loss": 0.4692, + "step": 3459 + }, + { + "epoch": 9.505494505494505, + "grad_norm": 16.27707862854004, + "learning_rate": 4.524725274725275e-05, + "loss": 0.6881, + "step": 3460 + }, + { + "epoch": 9.508241758241759, + "grad_norm": 10.488993644714355, + "learning_rate": 4.5245879120879126e-05, + "loss": 0.4254, + "step": 3461 + }, + { + "epoch": 9.510989010989011, + "grad_norm": 16.271493911743164, + "learning_rate": 4.5244505494505496e-05, + "loss": 0.6177, + "step": 3462 + }, + { + "epoch": 9.513736263736263, + "grad_norm": 12.415010452270508, + "learning_rate": 4.524313186813187e-05, + "loss": 0.4532, + "step": 3463 + }, + { + "epoch": 9.516483516483516, + "grad_norm": 9.958379745483398, + "learning_rate": 4.524175824175824e-05, + "loss": 0.3481, + "step": 3464 + }, + { + "epoch": 9.51923076923077, + "grad_norm": 14.456110000610352, + "learning_rate": 4.524038461538462e-05, + "loss": 0.7129, + "step": 3465 + }, + { + "epoch": 9.521978021978022, + "grad_norm": 19.537817001342773, + "learning_rate": 4.5239010989010996e-05, + "loss": 0.8788, + "step": 3466 + }, + { + "epoch": 9.524725274725276, + "grad_norm": 11.25784969329834, + "learning_rate": 4.5237637362637366e-05, + "loss": 0.6318, + "step": 3467 + }, + { + "epoch": 9.527472527472527, + "grad_norm": 11.340076446533203, + "learning_rate": 4.523626373626374e-05, + "loss": 0.4147, + "step": 3468 + }, + { + "epoch": 9.530219780219781, + "grad_norm": 15.690706253051758, + "learning_rate": 4.523489010989011e-05, + "loss": 0.8044, + "step": 3469 + }, + { + "epoch": 9.532967032967033, + "grad_norm": 8.961565971374512, + "learning_rate": 4.523351648351648e-05, + "loss": 0.3176, + "step": 3470 + }, + { + "epoch": 9.535714285714286, + "grad_norm": 26.386022567749023, + "learning_rate": 4.523214285714286e-05, + "loss": 1.2103, + "step": 3471 + }, + { + "epoch": 9.538461538461538, + "grad_norm": 16.458377838134766, + "learning_rate": 4.523076923076923e-05, + "loss": 0.7194, + "step": 3472 + }, + { + "epoch": 9.541208791208792, + "grad_norm": 12.65361499786377, + "learning_rate": 4.522939560439561e-05, + "loss": 0.604, + "step": 3473 + }, + { + "epoch": 9.543956043956044, + "grad_norm": 9.177652359008789, + "learning_rate": 4.522802197802198e-05, + "loss": 0.3335, + "step": 3474 + }, + { + "epoch": 9.546703296703297, + "grad_norm": 13.577775955200195, + "learning_rate": 4.5226648351648353e-05, + "loss": 0.6313, + "step": 3475 + }, + { + "epoch": 9.54945054945055, + "grad_norm": 16.641122817993164, + "learning_rate": 4.522527472527473e-05, + "loss": 0.8978, + "step": 3476 + }, + { + "epoch": 9.552197802197803, + "grad_norm": 19.821758270263672, + "learning_rate": 4.52239010989011e-05, + "loss": 0.7922, + "step": 3477 + }, + { + "epoch": 9.554945054945055, + "grad_norm": 12.34482479095459, + "learning_rate": 4.522252747252748e-05, + "loss": 0.405, + "step": 3478 + }, + { + "epoch": 9.557692307692308, + "grad_norm": 15.158738136291504, + "learning_rate": 4.522115384615385e-05, + "loss": 0.5567, + "step": 3479 + }, + { + "epoch": 9.56043956043956, + "grad_norm": 9.239667892456055, + "learning_rate": 4.5219780219780224e-05, + "loss": 0.2257, + "step": 3480 + }, + { + "epoch": 9.563186813186814, + "grad_norm": 11.333401679992676, + "learning_rate": 4.52184065934066e-05, + "loss": 0.4302, + "step": 3481 + }, + { + "epoch": 9.565934065934066, + "grad_norm": 16.137454986572266, + "learning_rate": 4.521703296703297e-05, + "loss": 0.409, + "step": 3482 + }, + { + "epoch": 9.56868131868132, + "grad_norm": 14.350340843200684, + "learning_rate": 4.521565934065935e-05, + "loss": 0.4948, + "step": 3483 + }, + { + "epoch": 9.571428571428571, + "grad_norm": 13.627368927001953, + "learning_rate": 4.521428571428572e-05, + "loss": 0.6211, + "step": 3484 + }, + { + "epoch": 9.574175824175825, + "grad_norm": 16.328458786010742, + "learning_rate": 4.521291208791209e-05, + "loss": 0.6658, + "step": 3485 + }, + { + "epoch": 9.576923076923077, + "grad_norm": 8.561708450317383, + "learning_rate": 4.5211538461538464e-05, + "loss": 0.2804, + "step": 3486 + }, + { + "epoch": 9.57967032967033, + "grad_norm": 13.873937606811523, + "learning_rate": 4.5210164835164834e-05, + "loss": 0.5918, + "step": 3487 + }, + { + "epoch": 9.582417582417582, + "grad_norm": 12.370808601379395, + "learning_rate": 4.520879120879121e-05, + "loss": 0.4636, + "step": 3488 + }, + { + "epoch": 9.585164835164836, + "grad_norm": 12.966353416442871, + "learning_rate": 4.520741758241758e-05, + "loss": 0.5708, + "step": 3489 + }, + { + "epoch": 9.587912087912088, + "grad_norm": 10.931273460388184, + "learning_rate": 4.520604395604396e-05, + "loss": 0.6592, + "step": 3490 + }, + { + "epoch": 9.590659340659341, + "grad_norm": 19.88690948486328, + "learning_rate": 4.5204670329670335e-05, + "loss": 0.7332, + "step": 3491 + }, + { + "epoch": 9.593406593406593, + "grad_norm": 16.425615310668945, + "learning_rate": 4.5203296703296705e-05, + "loss": 0.8036, + "step": 3492 + }, + { + "epoch": 9.596153846153847, + "grad_norm": 12.216520309448242, + "learning_rate": 4.520192307692308e-05, + "loss": 0.5792, + "step": 3493 + }, + { + "epoch": 9.598901098901099, + "grad_norm": 8.735668182373047, + "learning_rate": 4.520054945054945e-05, + "loss": 0.2825, + "step": 3494 + }, + { + "epoch": 9.601648351648352, + "grad_norm": 13.81529712677002, + "learning_rate": 4.519917582417583e-05, + "loss": 0.449, + "step": 3495 + }, + { + "epoch": 9.604395604395604, + "grad_norm": 13.370238304138184, + "learning_rate": 4.5197802197802205e-05, + "loss": 0.6052, + "step": 3496 + }, + { + "epoch": 9.607142857142858, + "grad_norm": 8.965913772583008, + "learning_rate": 4.5196428571428575e-05, + "loss": 0.4199, + "step": 3497 + }, + { + "epoch": 9.60989010989011, + "grad_norm": 10.736780166625977, + "learning_rate": 4.519505494505495e-05, + "loss": 0.3714, + "step": 3498 + }, + { + "epoch": 9.612637362637363, + "grad_norm": 10.543012619018555, + "learning_rate": 4.519368131868132e-05, + "loss": 0.6273, + "step": 3499 + }, + { + "epoch": 9.615384615384615, + "grad_norm": 14.801488876342773, + "learning_rate": 4.519230769230769e-05, + "loss": 0.5062, + "step": 3500 + }, + { + "epoch": 9.618131868131869, + "grad_norm": 16.004926681518555, + "learning_rate": 4.519093406593407e-05, + "loss": 0.6155, + "step": 3501 + }, + { + "epoch": 9.62087912087912, + "grad_norm": 16.191099166870117, + "learning_rate": 4.518956043956044e-05, + "loss": 0.5853, + "step": 3502 + }, + { + "epoch": 9.623626373626374, + "grad_norm": 12.762660026550293, + "learning_rate": 4.5188186813186816e-05, + "loss": 0.3348, + "step": 3503 + }, + { + "epoch": 9.626373626373626, + "grad_norm": 15.93148136138916, + "learning_rate": 4.5186813186813186e-05, + "loss": 0.6824, + "step": 3504 + }, + { + "epoch": 9.62912087912088, + "grad_norm": 9.667213439941406, + "learning_rate": 4.518543956043956e-05, + "loss": 0.4127, + "step": 3505 + }, + { + "epoch": 9.631868131868131, + "grad_norm": 15.821977615356445, + "learning_rate": 4.518406593406594e-05, + "loss": 0.9566, + "step": 3506 + }, + { + "epoch": 9.634615384615385, + "grad_norm": 15.196727752685547, + "learning_rate": 4.518269230769231e-05, + "loss": 0.6283, + "step": 3507 + }, + { + "epoch": 9.637362637362637, + "grad_norm": 10.079445838928223, + "learning_rate": 4.5181318681318686e-05, + "loss": 0.2484, + "step": 3508 + }, + { + "epoch": 9.64010989010989, + "grad_norm": 9.453990936279297, + "learning_rate": 4.5179945054945056e-05, + "loss": 0.3063, + "step": 3509 + }, + { + "epoch": 9.642857142857142, + "grad_norm": 17.781530380249023, + "learning_rate": 4.517857142857143e-05, + "loss": 1.282, + "step": 3510 + }, + { + "epoch": 9.645604395604396, + "grad_norm": 19.28397560119629, + "learning_rate": 4.517719780219781e-05, + "loss": 0.9771, + "step": 3511 + }, + { + "epoch": 9.648351648351648, + "grad_norm": 12.505898475646973, + "learning_rate": 4.517582417582418e-05, + "loss": 0.7021, + "step": 3512 + }, + { + "epoch": 9.651098901098901, + "grad_norm": 13.746596336364746, + "learning_rate": 4.5174450549450556e-05, + "loss": 0.6722, + "step": 3513 + }, + { + "epoch": 9.653846153846153, + "grad_norm": 14.967217445373535, + "learning_rate": 4.5173076923076926e-05, + "loss": 0.5583, + "step": 3514 + }, + { + "epoch": 9.656593406593407, + "grad_norm": 12.533987998962402, + "learning_rate": 4.5171703296703296e-05, + "loss": 0.5481, + "step": 3515 + }, + { + "epoch": 9.659340659340659, + "grad_norm": 13.471561431884766, + "learning_rate": 4.517032967032967e-05, + "loss": 0.642, + "step": 3516 + }, + { + "epoch": 9.662087912087912, + "grad_norm": 16.132896423339844, + "learning_rate": 4.516895604395604e-05, + "loss": 0.6063, + "step": 3517 + }, + { + "epoch": 9.664835164835164, + "grad_norm": 16.569150924682617, + "learning_rate": 4.516758241758242e-05, + "loss": 0.8281, + "step": 3518 + }, + { + "epoch": 9.667582417582418, + "grad_norm": 10.359013557434082, + "learning_rate": 4.516620879120879e-05, + "loss": 0.2942, + "step": 3519 + }, + { + "epoch": 9.67032967032967, + "grad_norm": 14.9059476852417, + "learning_rate": 4.516483516483517e-05, + "loss": 0.6824, + "step": 3520 + }, + { + "epoch": 9.673076923076923, + "grad_norm": 12.586658477783203, + "learning_rate": 4.5163461538461544e-05, + "loss": 0.5221, + "step": 3521 + }, + { + "epoch": 9.675824175824175, + "grad_norm": 5.4318037033081055, + "learning_rate": 4.5162087912087914e-05, + "loss": 0.2436, + "step": 3522 + }, + { + "epoch": 9.678571428571429, + "grad_norm": 16.057531356811523, + "learning_rate": 4.516071428571429e-05, + "loss": 0.8092, + "step": 3523 + }, + { + "epoch": 9.68131868131868, + "grad_norm": 24.811147689819336, + "learning_rate": 4.515934065934066e-05, + "loss": 1.5662, + "step": 3524 + }, + { + "epoch": 9.684065934065934, + "grad_norm": 7.159148216247559, + "learning_rate": 4.515796703296704e-05, + "loss": 0.2577, + "step": 3525 + }, + { + "epoch": 9.686813186813186, + "grad_norm": 10.960245132446289, + "learning_rate": 4.5156593406593414e-05, + "loss": 0.4664, + "step": 3526 + }, + { + "epoch": 9.68956043956044, + "grad_norm": 17.580076217651367, + "learning_rate": 4.5155219780219784e-05, + "loss": 0.9006, + "step": 3527 + }, + { + "epoch": 9.692307692307692, + "grad_norm": 11.979397773742676, + "learning_rate": 4.515384615384616e-05, + "loss": 0.4572, + "step": 3528 + }, + { + "epoch": 9.695054945054945, + "grad_norm": 9.275666236877441, + "learning_rate": 4.515247252747253e-05, + "loss": 0.3376, + "step": 3529 + }, + { + "epoch": 9.697802197802197, + "grad_norm": 10.804064750671387, + "learning_rate": 4.51510989010989e-05, + "loss": 0.4335, + "step": 3530 + }, + { + "epoch": 9.70054945054945, + "grad_norm": 11.874817848205566, + "learning_rate": 4.514972527472527e-05, + "loss": 0.4502, + "step": 3531 + }, + { + "epoch": 9.703296703296703, + "grad_norm": 13.571765899658203, + "learning_rate": 4.514835164835165e-05, + "loss": 0.5276, + "step": 3532 + }, + { + "epoch": 9.706043956043956, + "grad_norm": 10.586847305297852, + "learning_rate": 4.5146978021978025e-05, + "loss": 0.3913, + "step": 3533 + }, + { + "epoch": 9.708791208791208, + "grad_norm": 9.49067211151123, + "learning_rate": 4.5145604395604395e-05, + "loss": 0.2617, + "step": 3534 + }, + { + "epoch": 9.711538461538462, + "grad_norm": 14.077157020568848, + "learning_rate": 4.514423076923077e-05, + "loss": 0.5807, + "step": 3535 + }, + { + "epoch": 9.714285714285714, + "grad_norm": 15.081523895263672, + "learning_rate": 4.514285714285714e-05, + "loss": 0.6101, + "step": 3536 + }, + { + "epoch": 9.717032967032967, + "grad_norm": 7.714298725128174, + "learning_rate": 4.514148351648352e-05, + "loss": 0.2347, + "step": 3537 + }, + { + "epoch": 9.719780219780219, + "grad_norm": 11.83521842956543, + "learning_rate": 4.5140109890109895e-05, + "loss": 0.4308, + "step": 3538 + }, + { + "epoch": 9.722527472527473, + "grad_norm": 12.453317642211914, + "learning_rate": 4.5138736263736265e-05, + "loss": 0.5399, + "step": 3539 + }, + { + "epoch": 9.725274725274724, + "grad_norm": 9.745162963867188, + "learning_rate": 4.513736263736264e-05, + "loss": 0.3807, + "step": 3540 + }, + { + "epoch": 9.728021978021978, + "grad_norm": 14.712671279907227, + "learning_rate": 4.513598901098901e-05, + "loss": 0.5444, + "step": 3541 + }, + { + "epoch": 9.73076923076923, + "grad_norm": 10.644003868103027, + "learning_rate": 4.513461538461539e-05, + "loss": 0.4617, + "step": 3542 + }, + { + "epoch": 9.733516483516484, + "grad_norm": 10.025932312011719, + "learning_rate": 4.5133241758241765e-05, + "loss": 0.3125, + "step": 3543 + }, + { + "epoch": 9.736263736263737, + "grad_norm": 9.637622833251953, + "learning_rate": 4.5131868131868135e-05, + "loss": 0.3261, + "step": 3544 + }, + { + "epoch": 9.739010989010989, + "grad_norm": 11.551977157592773, + "learning_rate": 4.5130494505494505e-05, + "loss": 0.554, + "step": 3545 + }, + { + "epoch": 9.741758241758241, + "grad_norm": 12.309918403625488, + "learning_rate": 4.5129120879120875e-05, + "loss": 0.3675, + "step": 3546 + }, + { + "epoch": 9.744505494505495, + "grad_norm": 13.531700134277344, + "learning_rate": 4.512774725274725e-05, + "loss": 0.5783, + "step": 3547 + }, + { + "epoch": 9.747252747252748, + "grad_norm": 12.208170890808105, + "learning_rate": 4.512637362637363e-05, + "loss": 0.6396, + "step": 3548 + }, + { + "epoch": 9.75, + "grad_norm": 13.599942207336426, + "learning_rate": 4.5125e-05, + "loss": 0.6723, + "step": 3549 + }, + { + "epoch": 9.752747252747252, + "grad_norm": 13.510129928588867, + "learning_rate": 4.5123626373626376e-05, + "loss": 0.4147, + "step": 3550 + }, + { + "epoch": 9.755494505494505, + "grad_norm": 13.31808853149414, + "learning_rate": 4.5122252747252746e-05, + "loss": 0.8025, + "step": 3551 + }, + { + "epoch": 9.758241758241759, + "grad_norm": 16.800819396972656, + "learning_rate": 4.512087912087912e-05, + "loss": 0.7756, + "step": 3552 + }, + { + "epoch": 9.760989010989011, + "grad_norm": 17.7878475189209, + "learning_rate": 4.51195054945055e-05, + "loss": 0.8994, + "step": 3553 + }, + { + "epoch": 9.763736263736263, + "grad_norm": 14.690296173095703, + "learning_rate": 4.511813186813187e-05, + "loss": 0.5767, + "step": 3554 + }, + { + "epoch": 9.766483516483516, + "grad_norm": 8.633861541748047, + "learning_rate": 4.5116758241758246e-05, + "loss": 0.4244, + "step": 3555 + }, + { + "epoch": 9.76923076923077, + "grad_norm": 10.9052152633667, + "learning_rate": 4.5115384615384616e-05, + "loss": 0.4532, + "step": 3556 + }, + { + "epoch": 9.771978021978022, + "grad_norm": 7.318408966064453, + "learning_rate": 4.511401098901099e-05, + "loss": 0.2373, + "step": 3557 + }, + { + "epoch": 9.774725274725276, + "grad_norm": 7.798877239227295, + "learning_rate": 4.511263736263737e-05, + "loss": 0.2773, + "step": 3558 + }, + { + "epoch": 9.777472527472527, + "grad_norm": 17.040742874145508, + "learning_rate": 4.511126373626374e-05, + "loss": 0.6019, + "step": 3559 + }, + { + "epoch": 9.780219780219781, + "grad_norm": 20.127958297729492, + "learning_rate": 4.510989010989011e-05, + "loss": 0.8365, + "step": 3560 + }, + { + "epoch": 9.782967032967033, + "grad_norm": 15.738155364990234, + "learning_rate": 4.510851648351648e-05, + "loss": 0.6257, + "step": 3561 + }, + { + "epoch": 9.785714285714286, + "grad_norm": 16.792001724243164, + "learning_rate": 4.510714285714286e-05, + "loss": 0.5731, + "step": 3562 + }, + { + "epoch": 9.788461538461538, + "grad_norm": 12.72128677368164, + "learning_rate": 4.5105769230769234e-05, + "loss": 0.4927, + "step": 3563 + }, + { + "epoch": 9.791208791208792, + "grad_norm": 12.008317947387695, + "learning_rate": 4.5104395604395604e-05, + "loss": 0.3577, + "step": 3564 + }, + { + "epoch": 9.793956043956044, + "grad_norm": 8.60240364074707, + "learning_rate": 4.510302197802198e-05, + "loss": 0.3041, + "step": 3565 + }, + { + "epoch": 9.796703296703297, + "grad_norm": 8.146905899047852, + "learning_rate": 4.510164835164835e-05, + "loss": 0.2201, + "step": 3566 + }, + { + "epoch": 9.79945054945055, + "grad_norm": 9.162959098815918, + "learning_rate": 4.510027472527473e-05, + "loss": 0.3626, + "step": 3567 + }, + { + "epoch": 9.802197802197803, + "grad_norm": 7.8583807945251465, + "learning_rate": 4.5098901098901104e-05, + "loss": 0.2447, + "step": 3568 + }, + { + "epoch": 9.804945054945055, + "grad_norm": 8.895145416259766, + "learning_rate": 4.5097527472527474e-05, + "loss": 0.2714, + "step": 3569 + }, + { + "epoch": 9.807692307692308, + "grad_norm": 4.969665050506592, + "learning_rate": 4.509615384615385e-05, + "loss": 0.1778, + "step": 3570 + }, + { + "epoch": 9.81043956043956, + "grad_norm": 19.24249839782715, + "learning_rate": 4.509478021978022e-05, + "loss": 0.7501, + "step": 3571 + }, + { + "epoch": 9.813186813186814, + "grad_norm": 18.557064056396484, + "learning_rate": 4.50934065934066e-05, + "loss": 0.7664, + "step": 3572 + }, + { + "epoch": 9.815934065934066, + "grad_norm": 5.735690116882324, + "learning_rate": 4.5092032967032974e-05, + "loss": 0.1726, + "step": 3573 + }, + { + "epoch": 9.81868131868132, + "grad_norm": 7.886556625366211, + "learning_rate": 4.5090659340659344e-05, + "loss": 0.1899, + "step": 3574 + }, + { + "epoch": 9.821428571428571, + "grad_norm": 20.72349739074707, + "learning_rate": 4.5089285714285714e-05, + "loss": 1.1257, + "step": 3575 + }, + { + "epoch": 9.824175824175825, + "grad_norm": 9.249990463256836, + "learning_rate": 4.5087912087912084e-05, + "loss": 0.3377, + "step": 3576 + }, + { + "epoch": 9.826923076923077, + "grad_norm": 12.887482643127441, + "learning_rate": 4.508653846153846e-05, + "loss": 0.65, + "step": 3577 + }, + { + "epoch": 9.82967032967033, + "grad_norm": 12.135053634643555, + "learning_rate": 4.508516483516484e-05, + "loss": 0.4297, + "step": 3578 + }, + { + "epoch": 9.832417582417582, + "grad_norm": 20.875150680541992, + "learning_rate": 4.508379120879121e-05, + "loss": 1.0861, + "step": 3579 + }, + { + "epoch": 9.835164835164836, + "grad_norm": 11.891955375671387, + "learning_rate": 4.5082417582417585e-05, + "loss": 0.628, + "step": 3580 + }, + { + "epoch": 9.837912087912088, + "grad_norm": 17.612510681152344, + "learning_rate": 4.5081043956043955e-05, + "loss": 0.9385, + "step": 3581 + }, + { + "epoch": 9.840659340659341, + "grad_norm": 14.54854965209961, + "learning_rate": 4.507967032967033e-05, + "loss": 0.4584, + "step": 3582 + }, + { + "epoch": 9.843406593406593, + "grad_norm": 18.773334503173828, + "learning_rate": 4.507829670329671e-05, + "loss": 0.7194, + "step": 3583 + }, + { + "epoch": 9.846153846153847, + "grad_norm": 15.858711242675781, + "learning_rate": 4.507692307692308e-05, + "loss": 0.5611, + "step": 3584 + }, + { + "epoch": 9.848901098901099, + "grad_norm": 10.572458267211914, + "learning_rate": 4.5075549450549455e-05, + "loss": 0.4128, + "step": 3585 + }, + { + "epoch": 9.851648351648352, + "grad_norm": 9.454532623291016, + "learning_rate": 4.5074175824175825e-05, + "loss": 0.3592, + "step": 3586 + }, + { + "epoch": 9.854395604395604, + "grad_norm": 23.7873592376709, + "learning_rate": 4.50728021978022e-05, + "loss": 1.1315, + "step": 3587 + }, + { + "epoch": 9.857142857142858, + "grad_norm": 11.219834327697754, + "learning_rate": 4.507142857142858e-05, + "loss": 0.5558, + "step": 3588 + }, + { + "epoch": 9.85989010989011, + "grad_norm": 18.75878143310547, + "learning_rate": 4.507005494505495e-05, + "loss": 0.9021, + "step": 3589 + }, + { + "epoch": 9.862637362637363, + "grad_norm": 15.957250595092773, + "learning_rate": 4.506868131868132e-05, + "loss": 0.4898, + "step": 3590 + }, + { + "epoch": 9.865384615384615, + "grad_norm": 10.247462272644043, + "learning_rate": 4.506730769230769e-05, + "loss": 0.469, + "step": 3591 + }, + { + "epoch": 9.868131868131869, + "grad_norm": 16.101865768432617, + "learning_rate": 4.5065934065934066e-05, + "loss": 0.5756, + "step": 3592 + }, + { + "epoch": 9.87087912087912, + "grad_norm": 9.73345947265625, + "learning_rate": 4.506456043956044e-05, + "loss": 0.392, + "step": 3593 + }, + { + "epoch": 9.873626373626374, + "grad_norm": 10.911571502685547, + "learning_rate": 4.506318681318681e-05, + "loss": 0.3394, + "step": 3594 + }, + { + "epoch": 9.876373626373626, + "grad_norm": 12.902626037597656, + "learning_rate": 4.506181318681319e-05, + "loss": 0.535, + "step": 3595 + }, + { + "epoch": 9.87912087912088, + "grad_norm": 9.386209487915039, + "learning_rate": 4.506043956043956e-05, + "loss": 0.3853, + "step": 3596 + }, + { + "epoch": 9.881868131868131, + "grad_norm": 14.871570587158203, + "learning_rate": 4.5059065934065936e-05, + "loss": 0.7378, + "step": 3597 + }, + { + "epoch": 9.884615384615385, + "grad_norm": 6.685184955596924, + "learning_rate": 4.505769230769231e-05, + "loss": 0.2153, + "step": 3598 + }, + { + "epoch": 9.887362637362637, + "grad_norm": 8.254502296447754, + "learning_rate": 4.505631868131868e-05, + "loss": 0.3362, + "step": 3599 + }, + { + "epoch": 9.89010989010989, + "grad_norm": 11.648550987243652, + "learning_rate": 4.505494505494506e-05, + "loss": 0.4308, + "step": 3600 + }, + { + "epoch": 9.892857142857142, + "grad_norm": 11.649450302124023, + "learning_rate": 4.505357142857143e-05, + "loss": 0.5244, + "step": 3601 + }, + { + "epoch": 9.895604395604396, + "grad_norm": 16.59109878540039, + "learning_rate": 4.5052197802197807e-05, + "loss": 0.6584, + "step": 3602 + }, + { + "epoch": 9.898351648351648, + "grad_norm": 16.994140625, + "learning_rate": 4.5050824175824183e-05, + "loss": 0.6919, + "step": 3603 + }, + { + "epoch": 9.901098901098901, + "grad_norm": 18.63201904296875, + "learning_rate": 4.5049450549450553e-05, + "loss": 1.1845, + "step": 3604 + }, + { + "epoch": 9.903846153846153, + "grad_norm": 8.337308883666992, + "learning_rate": 4.5048076923076923e-05, + "loss": 0.3867, + "step": 3605 + }, + { + "epoch": 9.906593406593407, + "grad_norm": 14.49372386932373, + "learning_rate": 4.5046703296703293e-05, + "loss": 0.6282, + "step": 3606 + }, + { + "epoch": 9.909340659340659, + "grad_norm": 9.754169464111328, + "learning_rate": 4.504532967032967e-05, + "loss": 0.3844, + "step": 3607 + }, + { + "epoch": 9.912087912087912, + "grad_norm": 13.42047119140625, + "learning_rate": 4.504395604395605e-05, + "loss": 0.3981, + "step": 3608 + }, + { + "epoch": 9.914835164835164, + "grad_norm": 10.764372825622559, + "learning_rate": 4.504258241758242e-05, + "loss": 0.2951, + "step": 3609 + }, + { + "epoch": 9.917582417582418, + "grad_norm": 13.174395561218262, + "learning_rate": 4.5041208791208794e-05, + "loss": 0.5373, + "step": 3610 + }, + { + "epoch": 9.92032967032967, + "grad_norm": 14.662847518920898, + "learning_rate": 4.5039835164835164e-05, + "loss": 0.6132, + "step": 3611 + }, + { + "epoch": 9.923076923076923, + "grad_norm": 14.208292961120605, + "learning_rate": 4.503846153846154e-05, + "loss": 0.5198, + "step": 3612 + }, + { + "epoch": 9.925824175824175, + "grad_norm": 6.946134090423584, + "learning_rate": 4.503708791208792e-05, + "loss": 0.292, + "step": 3613 + }, + { + "epoch": 9.928571428571429, + "grad_norm": 10.933822631835938, + "learning_rate": 4.503571428571429e-05, + "loss": 0.3956, + "step": 3614 + }, + { + "epoch": 9.93131868131868, + "grad_norm": 11.179559707641602, + "learning_rate": 4.5034340659340664e-05, + "loss": 0.4163, + "step": 3615 + }, + { + "epoch": 9.934065934065934, + "grad_norm": 12.885475158691406, + "learning_rate": 4.5032967032967034e-05, + "loss": 0.4698, + "step": 3616 + }, + { + "epoch": 9.936813186813186, + "grad_norm": 16.722572326660156, + "learning_rate": 4.503159340659341e-05, + "loss": 0.6684, + "step": 3617 + }, + { + "epoch": 9.93956043956044, + "grad_norm": 16.5261173248291, + "learning_rate": 4.503021978021978e-05, + "loss": 0.7218, + "step": 3618 + }, + { + "epoch": 9.942307692307692, + "grad_norm": 8.52740478515625, + "learning_rate": 4.502884615384616e-05, + "loss": 0.289, + "step": 3619 + }, + { + "epoch": 9.945054945054945, + "grad_norm": 18.954153060913086, + "learning_rate": 4.502747252747253e-05, + "loss": 1.0486, + "step": 3620 + }, + { + "epoch": 9.947802197802197, + "grad_norm": 17.549388885498047, + "learning_rate": 4.50260989010989e-05, + "loss": 0.6943, + "step": 3621 + }, + { + "epoch": 9.95054945054945, + "grad_norm": 15.023763656616211, + "learning_rate": 4.5024725274725275e-05, + "loss": 0.5681, + "step": 3622 + }, + { + "epoch": 9.953296703296703, + "grad_norm": 7.517483234405518, + "learning_rate": 4.502335164835165e-05, + "loss": 0.2068, + "step": 3623 + }, + { + "epoch": 9.956043956043956, + "grad_norm": 12.888446807861328, + "learning_rate": 4.502197802197802e-05, + "loss": 0.4982, + "step": 3624 + }, + { + "epoch": 9.958791208791208, + "grad_norm": 12.511934280395508, + "learning_rate": 4.50206043956044e-05, + "loss": 0.6637, + "step": 3625 + }, + { + "epoch": 9.961538461538462, + "grad_norm": 15.991985321044922, + "learning_rate": 4.501923076923077e-05, + "loss": 0.6251, + "step": 3626 + }, + { + "epoch": 9.964285714285714, + "grad_norm": 13.40742301940918, + "learning_rate": 4.5017857142857145e-05, + "loss": 0.5254, + "step": 3627 + }, + { + "epoch": 9.967032967032967, + "grad_norm": 14.572123527526855, + "learning_rate": 4.501648351648352e-05, + "loss": 0.6049, + "step": 3628 + }, + { + "epoch": 9.969780219780219, + "grad_norm": 13.002614974975586, + "learning_rate": 4.501510989010989e-05, + "loss": 0.3374, + "step": 3629 + }, + { + "epoch": 9.972527472527473, + "grad_norm": 16.022380828857422, + "learning_rate": 4.501373626373627e-05, + "loss": 0.6181, + "step": 3630 + }, + { + "epoch": 9.975274725274724, + "grad_norm": 12.999805450439453, + "learning_rate": 4.501236263736264e-05, + "loss": 0.554, + "step": 3631 + }, + { + "epoch": 9.978021978021978, + "grad_norm": 9.87100601196289, + "learning_rate": 4.5010989010989016e-05, + "loss": 0.2529, + "step": 3632 + }, + { + "epoch": 9.98076923076923, + "grad_norm": 10.962029457092285, + "learning_rate": 4.5009615384615386e-05, + "loss": 0.484, + "step": 3633 + }, + { + "epoch": 9.983516483516484, + "grad_norm": 10.836342811584473, + "learning_rate": 4.500824175824176e-05, + "loss": 0.5176, + "step": 3634 + }, + { + "epoch": 9.986263736263737, + "grad_norm": 8.664877891540527, + "learning_rate": 4.500686813186813e-05, + "loss": 0.4789, + "step": 3635 + }, + { + "epoch": 9.989010989010989, + "grad_norm": 10.395407676696777, + "learning_rate": 4.50054945054945e-05, + "loss": 0.388, + "step": 3636 + }, + { + "epoch": 9.991758241758241, + "grad_norm": 16.197147369384766, + "learning_rate": 4.500412087912088e-05, + "loss": 0.6163, + "step": 3637 + }, + { + "epoch": 9.994505494505495, + "grad_norm": 11.192273139953613, + "learning_rate": 4.5002747252747256e-05, + "loss": 0.4296, + "step": 3638 + }, + { + "epoch": 9.997252747252748, + "grad_norm": 18.90943145751953, + "learning_rate": 4.5001373626373626e-05, + "loss": 1.0056, + "step": 3639 + }, + { + "epoch": 10.0, + "grad_norm": 8.420125961303711, + "learning_rate": 4.5e-05, + "loss": 0.1113, + "step": 3640 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.6473829201101928, + "eval_f1": 0.6271738057494781, + "eval_f1_DuraRiadoRio_64x64": 0.5507246376811594, + "eval_f1_Mole_64x64": 0.6214442013129103, + "eval_f1_Quebrado_64x64": 0.7419354838709677, + "eval_f1_RiadoRio_64x64": 0.41, + "eval_f1_RioFechado_64x64": 0.8117647058823529, + "eval_loss": 1.331787109375, + "eval_precision": 0.7588375548838806, + "eval_precision_DuraRiadoRio_64x64": 0.9047619047619048, + "eval_precision_Mole_64x64": 0.4536741214057508, + "eval_precision_Quebrado_64x64": 0.8846153846153846, + "eval_precision_RiadoRio_64x64": 0.8541666666666666, + "eval_precision_RioFechado_64x64": 0.696969696969697, + "eval_recall": 0.6524802322708179, + "eval_recall_DuraRiadoRio_64x64": 0.3958333333333333, + "eval_recall_Mole_64x64": 0.9861111111111112, + "eval_recall_Quebrado_64x64": 0.6388888888888888, + "eval_recall_RiadoRio_64x64": 0.26973684210526316, + "eval_recall_RioFechado_64x64": 0.971830985915493, + "eval_runtime": 1.7318, + "eval_samples_per_second": 419.209, + "eval_steps_per_second": 26.561, + "step": 3640 + }, + { + "epoch": 10.002747252747254, + "grad_norm": 13.515734672546387, + "learning_rate": 4.499862637362637e-05, + "loss": 0.5477, + "step": 3641 + }, + { + "epoch": 10.005494505494505, + "grad_norm": 12.839634895324707, + "learning_rate": 4.499725274725275e-05, + "loss": 0.38, + "step": 3642 + }, + { + "epoch": 10.008241758241759, + "grad_norm": 13.09783935546875, + "learning_rate": 4.4995879120879126e-05, + "loss": 0.5965, + "step": 3643 + }, + { + "epoch": 10.010989010989011, + "grad_norm": 11.61003589630127, + "learning_rate": 4.4994505494505496e-05, + "loss": 0.5158, + "step": 3644 + }, + { + "epoch": 10.013736263736265, + "grad_norm": 8.148268699645996, + "learning_rate": 4.499313186813187e-05, + "loss": 0.276, + "step": 3645 + }, + { + "epoch": 10.016483516483516, + "grad_norm": 18.228206634521484, + "learning_rate": 4.499175824175824e-05, + "loss": 0.7315, + "step": 3646 + }, + { + "epoch": 10.01923076923077, + "grad_norm": 13.839861869812012, + "learning_rate": 4.499038461538462e-05, + "loss": 0.5858, + "step": 3647 + }, + { + "epoch": 10.021978021978022, + "grad_norm": 17.083187103271484, + "learning_rate": 4.498901098901099e-05, + "loss": 0.5192, + "step": 3648 + }, + { + "epoch": 10.024725274725276, + "grad_norm": 15.326313018798828, + "learning_rate": 4.498763736263737e-05, + "loss": 0.5522, + "step": 3649 + }, + { + "epoch": 10.027472527472527, + "grad_norm": 8.963342666625977, + "learning_rate": 4.498626373626374e-05, + "loss": 0.2744, + "step": 3650 + }, + { + "epoch": 10.030219780219781, + "grad_norm": 13.621835708618164, + "learning_rate": 4.498489010989011e-05, + "loss": 0.4837, + "step": 3651 + }, + { + "epoch": 10.032967032967033, + "grad_norm": 9.634900093078613, + "learning_rate": 4.4983516483516484e-05, + "loss": 0.343, + "step": 3652 + }, + { + "epoch": 10.035714285714286, + "grad_norm": 11.341564178466797, + "learning_rate": 4.498214285714286e-05, + "loss": 0.4051, + "step": 3653 + }, + { + "epoch": 10.038461538461538, + "grad_norm": 8.468554496765137, + "learning_rate": 4.498076923076923e-05, + "loss": 0.276, + "step": 3654 + }, + { + "epoch": 10.041208791208792, + "grad_norm": 11.822296142578125, + "learning_rate": 4.497939560439561e-05, + "loss": 0.3101, + "step": 3655 + }, + { + "epoch": 10.043956043956044, + "grad_norm": 19.249385833740234, + "learning_rate": 4.497802197802198e-05, + "loss": 0.7789, + "step": 3656 + }, + { + "epoch": 10.046703296703297, + "grad_norm": 15.864392280578613, + "learning_rate": 4.4976648351648354e-05, + "loss": 0.5119, + "step": 3657 + }, + { + "epoch": 10.04945054945055, + "grad_norm": 9.257916450500488, + "learning_rate": 4.497527472527473e-05, + "loss": 0.3176, + "step": 3658 + }, + { + "epoch": 10.052197802197803, + "grad_norm": 11.184690475463867, + "learning_rate": 4.49739010989011e-05, + "loss": 0.4909, + "step": 3659 + }, + { + "epoch": 10.054945054945055, + "grad_norm": 10.212104797363281, + "learning_rate": 4.497252747252748e-05, + "loss": 0.3921, + "step": 3660 + }, + { + "epoch": 10.057692307692308, + "grad_norm": 24.579626083374023, + "learning_rate": 4.497115384615385e-05, + "loss": 1.0261, + "step": 3661 + }, + { + "epoch": 10.06043956043956, + "grad_norm": 13.835305213928223, + "learning_rate": 4.4969780219780225e-05, + "loss": 0.4961, + "step": 3662 + }, + { + "epoch": 10.063186813186814, + "grad_norm": 8.790030479431152, + "learning_rate": 4.4968406593406595e-05, + "loss": 0.3975, + "step": 3663 + }, + { + "epoch": 10.065934065934066, + "grad_norm": 17.754945755004883, + "learning_rate": 4.496703296703297e-05, + "loss": 0.7154, + "step": 3664 + }, + { + "epoch": 10.06868131868132, + "grad_norm": 11.161182403564453, + "learning_rate": 4.496565934065934e-05, + "loss": 0.2856, + "step": 3665 + }, + { + "epoch": 10.071428571428571, + "grad_norm": 15.655505180358887, + "learning_rate": 4.496428571428571e-05, + "loss": 0.4194, + "step": 3666 + }, + { + "epoch": 10.074175824175825, + "grad_norm": 16.935047149658203, + "learning_rate": 4.496291208791209e-05, + "loss": 0.7909, + "step": 3667 + }, + { + "epoch": 10.076923076923077, + "grad_norm": 6.934863567352295, + "learning_rate": 4.4961538461538465e-05, + "loss": 0.1909, + "step": 3668 + }, + { + "epoch": 10.07967032967033, + "grad_norm": 15.382360458374023, + "learning_rate": 4.4960164835164835e-05, + "loss": 0.4993, + "step": 3669 + }, + { + "epoch": 10.082417582417582, + "grad_norm": 14.881192207336426, + "learning_rate": 4.495879120879121e-05, + "loss": 0.6317, + "step": 3670 + }, + { + "epoch": 10.085164835164836, + "grad_norm": 9.610764503479004, + "learning_rate": 4.495741758241758e-05, + "loss": 0.3011, + "step": 3671 + }, + { + "epoch": 10.087912087912088, + "grad_norm": 9.843636512756348, + "learning_rate": 4.495604395604396e-05, + "loss": 0.2796, + "step": 3672 + }, + { + "epoch": 10.090659340659341, + "grad_norm": 14.268589973449707, + "learning_rate": 4.4954670329670335e-05, + "loss": 0.6678, + "step": 3673 + }, + { + "epoch": 10.093406593406593, + "grad_norm": 11.585519790649414, + "learning_rate": 4.4953296703296705e-05, + "loss": 0.4628, + "step": 3674 + }, + { + "epoch": 10.096153846153847, + "grad_norm": 14.805119514465332, + "learning_rate": 4.495192307692308e-05, + "loss": 0.5263, + "step": 3675 + }, + { + "epoch": 10.098901098901099, + "grad_norm": 13.709455490112305, + "learning_rate": 4.495054945054945e-05, + "loss": 0.8862, + "step": 3676 + }, + { + "epoch": 10.101648351648352, + "grad_norm": 14.933768272399902, + "learning_rate": 4.494917582417583e-05, + "loss": 0.5213, + "step": 3677 + }, + { + "epoch": 10.104395604395604, + "grad_norm": 13.00874137878418, + "learning_rate": 4.49478021978022e-05, + "loss": 0.4878, + "step": 3678 + }, + { + "epoch": 10.107142857142858, + "grad_norm": 9.869813919067383, + "learning_rate": 4.4946428571428576e-05, + "loss": 0.3777, + "step": 3679 + }, + { + "epoch": 10.10989010989011, + "grad_norm": 14.2601318359375, + "learning_rate": 4.4945054945054946e-05, + "loss": 0.5428, + "step": 3680 + }, + { + "epoch": 10.112637362637363, + "grad_norm": 15.974414825439453, + "learning_rate": 4.4943681318681316e-05, + "loss": 0.8783, + "step": 3681 + }, + { + "epoch": 10.115384615384615, + "grad_norm": 10.862194061279297, + "learning_rate": 4.494230769230769e-05, + "loss": 0.2423, + "step": 3682 + }, + { + "epoch": 10.118131868131869, + "grad_norm": 11.304253578186035, + "learning_rate": 4.494093406593407e-05, + "loss": 0.4468, + "step": 3683 + }, + { + "epoch": 10.12087912087912, + "grad_norm": 10.890806198120117, + "learning_rate": 4.493956043956044e-05, + "loss": 0.3699, + "step": 3684 + }, + { + "epoch": 10.123626373626374, + "grad_norm": 5.112331390380859, + "learning_rate": 4.4938186813186816e-05, + "loss": 0.1226, + "step": 3685 + }, + { + "epoch": 10.126373626373626, + "grad_norm": 7.748462200164795, + "learning_rate": 4.4936813186813186e-05, + "loss": 0.2517, + "step": 3686 + }, + { + "epoch": 10.12912087912088, + "grad_norm": 10.001758575439453, + "learning_rate": 4.493543956043956e-05, + "loss": 0.4504, + "step": 3687 + }, + { + "epoch": 10.131868131868131, + "grad_norm": 13.796475410461426, + "learning_rate": 4.493406593406594e-05, + "loss": 0.4223, + "step": 3688 + }, + { + "epoch": 10.134615384615385, + "grad_norm": 7.088019847869873, + "learning_rate": 4.493269230769231e-05, + "loss": 0.2264, + "step": 3689 + }, + { + "epoch": 10.137362637362637, + "grad_norm": 14.90166187286377, + "learning_rate": 4.493131868131869e-05, + "loss": 0.5622, + "step": 3690 + }, + { + "epoch": 10.14010989010989, + "grad_norm": 17.443744659423828, + "learning_rate": 4.492994505494506e-05, + "loss": 0.6048, + "step": 3691 + }, + { + "epoch": 10.142857142857142, + "grad_norm": 18.46187973022461, + "learning_rate": 4.4928571428571434e-05, + "loss": 0.8979, + "step": 3692 + }, + { + "epoch": 10.145604395604396, + "grad_norm": 20.397377014160156, + "learning_rate": 4.4927197802197804e-05, + "loss": 0.9798, + "step": 3693 + }, + { + "epoch": 10.148351648351648, + "grad_norm": 15.958083152770996, + "learning_rate": 4.492582417582418e-05, + "loss": 0.7115, + "step": 3694 + }, + { + "epoch": 10.151098901098901, + "grad_norm": 10.373092651367188, + "learning_rate": 4.492445054945055e-05, + "loss": 0.3119, + "step": 3695 + }, + { + "epoch": 10.153846153846153, + "grad_norm": 19.677343368530273, + "learning_rate": 4.492307692307692e-05, + "loss": 0.8333, + "step": 3696 + }, + { + "epoch": 10.156593406593407, + "grad_norm": 15.347228050231934, + "learning_rate": 4.49217032967033e-05, + "loss": 0.7837, + "step": 3697 + }, + { + "epoch": 10.159340659340659, + "grad_norm": 8.505817413330078, + "learning_rate": 4.4920329670329674e-05, + "loss": 0.35, + "step": 3698 + }, + { + "epoch": 10.162087912087912, + "grad_norm": 5.3461503982543945, + "learning_rate": 4.4918956043956044e-05, + "loss": 0.1845, + "step": 3699 + }, + { + "epoch": 10.164835164835164, + "grad_norm": 15.383482933044434, + "learning_rate": 4.491758241758242e-05, + "loss": 0.5103, + "step": 3700 + }, + { + "epoch": 10.167582417582418, + "grad_norm": 15.307927131652832, + "learning_rate": 4.491620879120879e-05, + "loss": 0.3774, + "step": 3701 + }, + { + "epoch": 10.17032967032967, + "grad_norm": 10.810093879699707, + "learning_rate": 4.491483516483517e-05, + "loss": 0.3174, + "step": 3702 + }, + { + "epoch": 10.173076923076923, + "grad_norm": 11.18254280090332, + "learning_rate": 4.4913461538461544e-05, + "loss": 0.4091, + "step": 3703 + }, + { + "epoch": 10.175824175824175, + "grad_norm": 14.701115608215332, + "learning_rate": 4.4912087912087914e-05, + "loss": 0.6846, + "step": 3704 + }, + { + "epoch": 10.178571428571429, + "grad_norm": 8.18221378326416, + "learning_rate": 4.491071428571429e-05, + "loss": 0.2123, + "step": 3705 + }, + { + "epoch": 10.18131868131868, + "grad_norm": 14.758180618286133, + "learning_rate": 4.490934065934066e-05, + "loss": 0.6582, + "step": 3706 + }, + { + "epoch": 10.184065934065934, + "grad_norm": 9.89427375793457, + "learning_rate": 4.490796703296704e-05, + "loss": 0.2619, + "step": 3707 + }, + { + "epoch": 10.186813186813186, + "grad_norm": 13.078579902648926, + "learning_rate": 4.490659340659341e-05, + "loss": 0.5765, + "step": 3708 + }, + { + "epoch": 10.18956043956044, + "grad_norm": 13.753803253173828, + "learning_rate": 4.4905219780219785e-05, + "loss": 0.6334, + "step": 3709 + }, + { + "epoch": 10.192307692307692, + "grad_norm": 15.362019538879395, + "learning_rate": 4.4903846153846155e-05, + "loss": 0.7117, + "step": 3710 + }, + { + "epoch": 10.195054945054945, + "grad_norm": 8.197586059570312, + "learning_rate": 4.4902472527472525e-05, + "loss": 0.2537, + "step": 3711 + }, + { + "epoch": 10.197802197802197, + "grad_norm": 8.562808990478516, + "learning_rate": 4.49010989010989e-05, + "loss": 0.3671, + "step": 3712 + }, + { + "epoch": 10.20054945054945, + "grad_norm": 11.27086353302002, + "learning_rate": 4.489972527472528e-05, + "loss": 0.3361, + "step": 3713 + }, + { + "epoch": 10.203296703296703, + "grad_norm": 13.323612213134766, + "learning_rate": 4.489835164835165e-05, + "loss": 0.4494, + "step": 3714 + }, + { + "epoch": 10.206043956043956, + "grad_norm": 11.578031539916992, + "learning_rate": 4.4896978021978025e-05, + "loss": 0.2933, + "step": 3715 + }, + { + "epoch": 10.208791208791208, + "grad_norm": 17.027191162109375, + "learning_rate": 4.4895604395604395e-05, + "loss": 0.7097, + "step": 3716 + }, + { + "epoch": 10.211538461538462, + "grad_norm": 8.221196174621582, + "learning_rate": 4.489423076923077e-05, + "loss": 0.2881, + "step": 3717 + }, + { + "epoch": 10.214285714285714, + "grad_norm": 17.38507843017578, + "learning_rate": 4.489285714285715e-05, + "loss": 0.6758, + "step": 3718 + }, + { + "epoch": 10.217032967032967, + "grad_norm": 12.717047691345215, + "learning_rate": 4.489148351648352e-05, + "loss": 0.3935, + "step": 3719 + }, + { + "epoch": 10.219780219780219, + "grad_norm": 13.041645050048828, + "learning_rate": 4.4890109890109896e-05, + "loss": 0.5102, + "step": 3720 + }, + { + "epoch": 10.222527472527473, + "grad_norm": 11.100045204162598, + "learning_rate": 4.4888736263736266e-05, + "loss": 0.4461, + "step": 3721 + }, + { + "epoch": 10.225274725274724, + "grad_norm": 16.28251838684082, + "learning_rate": 4.488736263736264e-05, + "loss": 0.7621, + "step": 3722 + }, + { + "epoch": 10.228021978021978, + "grad_norm": 17.044523239135742, + "learning_rate": 4.488598901098901e-05, + "loss": 0.6107, + "step": 3723 + }, + { + "epoch": 10.23076923076923, + "grad_norm": 12.142094612121582, + "learning_rate": 4.488461538461539e-05, + "loss": 0.514, + "step": 3724 + }, + { + "epoch": 10.233516483516484, + "grad_norm": 8.316386222839355, + "learning_rate": 4.488324175824176e-05, + "loss": 0.2478, + "step": 3725 + }, + { + "epoch": 10.236263736263735, + "grad_norm": 8.066507339477539, + "learning_rate": 4.488186813186813e-05, + "loss": 0.3746, + "step": 3726 + }, + { + "epoch": 10.239010989010989, + "grad_norm": 12.554228782653809, + "learning_rate": 4.4880494505494506e-05, + "loss": 0.5748, + "step": 3727 + }, + { + "epoch": 10.241758241758241, + "grad_norm": 10.94845962524414, + "learning_rate": 4.487912087912088e-05, + "loss": 0.5611, + "step": 3728 + }, + { + "epoch": 10.244505494505495, + "grad_norm": 10.184335708618164, + "learning_rate": 4.487774725274725e-05, + "loss": 0.2773, + "step": 3729 + }, + { + "epoch": 10.247252747252748, + "grad_norm": 9.380012512207031, + "learning_rate": 4.487637362637363e-05, + "loss": 0.2754, + "step": 3730 + }, + { + "epoch": 10.25, + "grad_norm": 12.281593322753906, + "learning_rate": 4.4875e-05, + "loss": 0.5422, + "step": 3731 + }, + { + "epoch": 10.252747252747252, + "grad_norm": 13.898869514465332, + "learning_rate": 4.4873626373626377e-05, + "loss": 0.5132, + "step": 3732 + }, + { + "epoch": 10.255494505494505, + "grad_norm": 16.435834884643555, + "learning_rate": 4.487225274725275e-05, + "loss": 0.8768, + "step": 3733 + }, + { + "epoch": 10.258241758241759, + "grad_norm": 17.63187026977539, + "learning_rate": 4.4870879120879123e-05, + "loss": 0.4723, + "step": 3734 + }, + { + "epoch": 10.260989010989011, + "grad_norm": 6.231398105621338, + "learning_rate": 4.48695054945055e-05, + "loss": 0.2429, + "step": 3735 + }, + { + "epoch": 10.263736263736265, + "grad_norm": 14.80846881866455, + "learning_rate": 4.486813186813187e-05, + "loss": 0.4028, + "step": 3736 + }, + { + "epoch": 10.266483516483516, + "grad_norm": 15.681675910949707, + "learning_rate": 4.486675824175825e-05, + "loss": 0.4875, + "step": 3737 + }, + { + "epoch": 10.26923076923077, + "grad_norm": 14.607465744018555, + "learning_rate": 4.486538461538462e-05, + "loss": 0.497, + "step": 3738 + }, + { + "epoch": 10.271978021978022, + "grad_norm": 11.493581771850586, + "learning_rate": 4.4864010989010994e-05, + "loss": 0.4429, + "step": 3739 + }, + { + "epoch": 10.274725274725276, + "grad_norm": 13.373882293701172, + "learning_rate": 4.4862637362637364e-05, + "loss": 0.4549, + "step": 3740 + }, + { + "epoch": 10.277472527472527, + "grad_norm": 13.615224838256836, + "learning_rate": 4.4861263736263734e-05, + "loss": 0.4524, + "step": 3741 + }, + { + "epoch": 10.280219780219781, + "grad_norm": 12.867456436157227, + "learning_rate": 4.485989010989011e-05, + "loss": 0.4459, + "step": 3742 + }, + { + "epoch": 10.282967032967033, + "grad_norm": 12.841781616210938, + "learning_rate": 4.485851648351649e-05, + "loss": 0.5061, + "step": 3743 + }, + { + "epoch": 10.285714285714286, + "grad_norm": 16.091651916503906, + "learning_rate": 4.485714285714286e-05, + "loss": 0.649, + "step": 3744 + }, + { + "epoch": 10.288461538461538, + "grad_norm": 10.649213790893555, + "learning_rate": 4.4855769230769234e-05, + "loss": 0.5381, + "step": 3745 + }, + { + "epoch": 10.291208791208792, + "grad_norm": 9.825004577636719, + "learning_rate": 4.4854395604395604e-05, + "loss": 0.3346, + "step": 3746 + }, + { + "epoch": 10.293956043956044, + "grad_norm": 16.422199249267578, + "learning_rate": 4.485302197802198e-05, + "loss": 0.8598, + "step": 3747 + }, + { + "epoch": 10.296703296703297, + "grad_norm": 16.166414260864258, + "learning_rate": 4.485164835164836e-05, + "loss": 0.5913, + "step": 3748 + }, + { + "epoch": 10.29945054945055, + "grad_norm": 19.9667911529541, + "learning_rate": 4.485027472527473e-05, + "loss": 1.0479, + "step": 3749 + }, + { + "epoch": 10.302197802197803, + "grad_norm": 8.257761001586914, + "learning_rate": 4.4848901098901105e-05, + "loss": 0.2962, + "step": 3750 + }, + { + "epoch": 10.304945054945055, + "grad_norm": 9.980966567993164, + "learning_rate": 4.4847527472527475e-05, + "loss": 0.3021, + "step": 3751 + }, + { + "epoch": 10.307692307692308, + "grad_norm": 7.795393466949463, + "learning_rate": 4.484615384615385e-05, + "loss": 0.2567, + "step": 3752 + }, + { + "epoch": 10.31043956043956, + "grad_norm": 13.28386402130127, + "learning_rate": 4.484478021978022e-05, + "loss": 0.5258, + "step": 3753 + }, + { + "epoch": 10.313186813186814, + "grad_norm": 11.915828704833984, + "learning_rate": 4.48434065934066e-05, + "loss": 0.3747, + "step": 3754 + }, + { + "epoch": 10.315934065934066, + "grad_norm": 15.605371475219727, + "learning_rate": 4.484203296703297e-05, + "loss": 0.6353, + "step": 3755 + }, + { + "epoch": 10.31868131868132, + "grad_norm": 15.815308570861816, + "learning_rate": 4.484065934065934e-05, + "loss": 0.7043, + "step": 3756 + }, + { + "epoch": 10.321428571428571, + "grad_norm": 9.038290977478027, + "learning_rate": 4.4839285714285715e-05, + "loss": 0.3638, + "step": 3757 + }, + { + "epoch": 10.324175824175825, + "grad_norm": 14.031686782836914, + "learning_rate": 4.4837912087912085e-05, + "loss": 0.4189, + "step": 3758 + }, + { + "epoch": 10.326923076923077, + "grad_norm": 10.781455039978027, + "learning_rate": 4.483653846153846e-05, + "loss": 0.322, + "step": 3759 + }, + { + "epoch": 10.32967032967033, + "grad_norm": 15.936960220336914, + "learning_rate": 4.483516483516484e-05, + "loss": 0.5635, + "step": 3760 + }, + { + "epoch": 10.332417582417582, + "grad_norm": 15.725106239318848, + "learning_rate": 4.483379120879121e-05, + "loss": 0.6105, + "step": 3761 + }, + { + "epoch": 10.335164835164836, + "grad_norm": 15.023571014404297, + "learning_rate": 4.4832417582417586e-05, + "loss": 0.6816, + "step": 3762 + }, + { + "epoch": 10.337912087912088, + "grad_norm": 15.754302024841309, + "learning_rate": 4.4831043956043956e-05, + "loss": 0.7364, + "step": 3763 + }, + { + "epoch": 10.340659340659341, + "grad_norm": 13.79681396484375, + "learning_rate": 4.482967032967033e-05, + "loss": 0.3891, + "step": 3764 + }, + { + "epoch": 10.343406593406593, + "grad_norm": 9.84841537475586, + "learning_rate": 4.482829670329671e-05, + "loss": 0.3989, + "step": 3765 + }, + { + "epoch": 10.346153846153847, + "grad_norm": 14.843132972717285, + "learning_rate": 4.482692307692308e-05, + "loss": 0.404, + "step": 3766 + }, + { + "epoch": 10.348901098901099, + "grad_norm": 17.688554763793945, + "learning_rate": 4.4825549450549456e-05, + "loss": 0.7113, + "step": 3767 + }, + { + "epoch": 10.351648351648352, + "grad_norm": 18.053890228271484, + "learning_rate": 4.4824175824175826e-05, + "loss": 0.577, + "step": 3768 + }, + { + "epoch": 10.354395604395604, + "grad_norm": 16.20226287841797, + "learning_rate": 4.48228021978022e-05, + "loss": 0.5585, + "step": 3769 + }, + { + "epoch": 10.357142857142858, + "grad_norm": 11.245142936706543, + "learning_rate": 4.482142857142857e-05, + "loss": 0.4037, + "step": 3770 + }, + { + "epoch": 10.35989010989011, + "grad_norm": 15.682464599609375, + "learning_rate": 4.482005494505494e-05, + "loss": 0.5406, + "step": 3771 + }, + { + "epoch": 10.362637362637363, + "grad_norm": 9.083024978637695, + "learning_rate": 4.481868131868132e-05, + "loss": 0.3445, + "step": 3772 + }, + { + "epoch": 10.365384615384615, + "grad_norm": 13.364893913269043, + "learning_rate": 4.481730769230769e-05, + "loss": 0.44, + "step": 3773 + }, + { + "epoch": 10.368131868131869, + "grad_norm": 12.724578857421875, + "learning_rate": 4.4815934065934066e-05, + "loss": 0.6842, + "step": 3774 + }, + { + "epoch": 10.37087912087912, + "grad_norm": 12.423768043518066, + "learning_rate": 4.481456043956044e-05, + "loss": 0.5093, + "step": 3775 + }, + { + "epoch": 10.373626373626374, + "grad_norm": 12.59946346282959, + "learning_rate": 4.481318681318681e-05, + "loss": 0.4712, + "step": 3776 + }, + { + "epoch": 10.376373626373626, + "grad_norm": 9.649556159973145, + "learning_rate": 4.481181318681319e-05, + "loss": 0.3381, + "step": 3777 + }, + { + "epoch": 10.37912087912088, + "grad_norm": 11.950714111328125, + "learning_rate": 4.481043956043956e-05, + "loss": 0.2907, + "step": 3778 + }, + { + "epoch": 10.381868131868131, + "grad_norm": 23.609859466552734, + "learning_rate": 4.480906593406594e-05, + "loss": 1.2273, + "step": 3779 + }, + { + "epoch": 10.384615384615385, + "grad_norm": 14.585309982299805, + "learning_rate": 4.4807692307692314e-05, + "loss": 0.5441, + "step": 3780 + }, + { + "epoch": 10.387362637362637, + "grad_norm": 6.0797882080078125, + "learning_rate": 4.4806318681318684e-05, + "loss": 0.1507, + "step": 3781 + }, + { + "epoch": 10.39010989010989, + "grad_norm": 8.820293426513672, + "learning_rate": 4.480494505494506e-05, + "loss": 0.4451, + "step": 3782 + }, + { + "epoch": 10.392857142857142, + "grad_norm": 8.532896041870117, + "learning_rate": 4.480357142857143e-05, + "loss": 0.2687, + "step": 3783 + }, + { + "epoch": 10.395604395604396, + "grad_norm": 15.866132736206055, + "learning_rate": 4.480219780219781e-05, + "loss": 0.6337, + "step": 3784 + }, + { + "epoch": 10.398351648351648, + "grad_norm": 9.303363800048828, + "learning_rate": 4.480082417582418e-05, + "loss": 0.231, + "step": 3785 + }, + { + "epoch": 10.401098901098901, + "grad_norm": 17.187488555908203, + "learning_rate": 4.479945054945055e-05, + "loss": 0.5703, + "step": 3786 + }, + { + "epoch": 10.403846153846153, + "grad_norm": 13.391166687011719, + "learning_rate": 4.4798076923076924e-05, + "loss": 0.3605, + "step": 3787 + }, + { + "epoch": 10.406593406593407, + "grad_norm": 15.322218894958496, + "learning_rate": 4.4796703296703294e-05, + "loss": 0.4594, + "step": 3788 + }, + { + "epoch": 10.409340659340659, + "grad_norm": 16.281404495239258, + "learning_rate": 4.479532967032967e-05, + "loss": 0.7, + "step": 3789 + }, + { + "epoch": 10.412087912087912, + "grad_norm": 10.433601379394531, + "learning_rate": 4.479395604395605e-05, + "loss": 0.434, + "step": 3790 + }, + { + "epoch": 10.414835164835164, + "grad_norm": 9.678287506103516, + "learning_rate": 4.479258241758242e-05, + "loss": 0.3354, + "step": 3791 + }, + { + "epoch": 10.417582417582418, + "grad_norm": 11.101856231689453, + "learning_rate": 4.4791208791208795e-05, + "loss": 0.4027, + "step": 3792 + }, + { + "epoch": 10.42032967032967, + "grad_norm": 14.491065979003906, + "learning_rate": 4.4789835164835165e-05, + "loss": 0.6173, + "step": 3793 + }, + { + "epoch": 10.423076923076923, + "grad_norm": 8.872698783874512, + "learning_rate": 4.478846153846154e-05, + "loss": 0.2291, + "step": 3794 + }, + { + "epoch": 10.425824175824175, + "grad_norm": 20.704269409179688, + "learning_rate": 4.478708791208792e-05, + "loss": 0.8335, + "step": 3795 + }, + { + "epoch": 10.428571428571429, + "grad_norm": 18.22821807861328, + "learning_rate": 4.478571428571429e-05, + "loss": 0.6453, + "step": 3796 + }, + { + "epoch": 10.43131868131868, + "grad_norm": 13.812302589416504, + "learning_rate": 4.4784340659340665e-05, + "loss": 0.5153, + "step": 3797 + }, + { + "epoch": 10.434065934065934, + "grad_norm": 12.288105964660645, + "learning_rate": 4.4782967032967035e-05, + "loss": 0.4671, + "step": 3798 + }, + { + "epoch": 10.436813186813186, + "grad_norm": 14.02379322052002, + "learning_rate": 4.478159340659341e-05, + "loss": 0.6117, + "step": 3799 + }, + { + "epoch": 10.43956043956044, + "grad_norm": 17.2179012298584, + "learning_rate": 4.478021978021978e-05, + "loss": 0.6662, + "step": 3800 + }, + { + "epoch": 10.442307692307692, + "grad_norm": 17.573869705200195, + "learning_rate": 4.477884615384615e-05, + "loss": 0.8228, + "step": 3801 + }, + { + "epoch": 10.445054945054945, + "grad_norm": 14.60477352142334, + "learning_rate": 4.477747252747253e-05, + "loss": 0.4821, + "step": 3802 + }, + { + "epoch": 10.447802197802197, + "grad_norm": 9.2405424118042, + "learning_rate": 4.47760989010989e-05, + "loss": 0.2858, + "step": 3803 + }, + { + "epoch": 10.45054945054945, + "grad_norm": 13.770122528076172, + "learning_rate": 4.4774725274725275e-05, + "loss": 0.5344, + "step": 3804 + }, + { + "epoch": 10.453296703296703, + "grad_norm": 19.25586700439453, + "learning_rate": 4.477335164835165e-05, + "loss": 0.6347, + "step": 3805 + }, + { + "epoch": 10.456043956043956, + "grad_norm": 15.314568519592285, + "learning_rate": 4.477197802197802e-05, + "loss": 0.6231, + "step": 3806 + }, + { + "epoch": 10.458791208791208, + "grad_norm": 16.3738956451416, + "learning_rate": 4.47706043956044e-05, + "loss": 0.6152, + "step": 3807 + }, + { + "epoch": 10.461538461538462, + "grad_norm": 11.386672973632812, + "learning_rate": 4.476923076923077e-05, + "loss": 0.4085, + "step": 3808 + }, + { + "epoch": 10.464285714285714, + "grad_norm": 14.17333698272705, + "learning_rate": 4.4767857142857146e-05, + "loss": 0.6399, + "step": 3809 + }, + { + "epoch": 10.467032967032967, + "grad_norm": 12.770115852355957, + "learning_rate": 4.476648351648352e-05, + "loss": 0.4627, + "step": 3810 + }, + { + "epoch": 10.469780219780219, + "grad_norm": 5.884739875793457, + "learning_rate": 4.476510989010989e-05, + "loss": 0.1353, + "step": 3811 + }, + { + "epoch": 10.472527472527473, + "grad_norm": 16.929447174072266, + "learning_rate": 4.476373626373627e-05, + "loss": 0.7092, + "step": 3812 + }, + { + "epoch": 10.475274725274724, + "grad_norm": 9.280831336975098, + "learning_rate": 4.476236263736264e-05, + "loss": 0.3247, + "step": 3813 + }, + { + "epoch": 10.478021978021978, + "grad_norm": 11.103588104248047, + "learning_rate": 4.476098901098901e-05, + "loss": 0.4617, + "step": 3814 + }, + { + "epoch": 10.48076923076923, + "grad_norm": 12.34941577911377, + "learning_rate": 4.4759615384615386e-05, + "loss": 0.5029, + "step": 3815 + }, + { + "epoch": 10.483516483516484, + "grad_norm": 11.125038146972656, + "learning_rate": 4.4758241758241756e-05, + "loss": 0.5087, + "step": 3816 + }, + { + "epoch": 10.486263736263735, + "grad_norm": 15.542630195617676, + "learning_rate": 4.475686813186813e-05, + "loss": 0.7062, + "step": 3817 + }, + { + "epoch": 10.489010989010989, + "grad_norm": 17.312744140625, + "learning_rate": 4.47554945054945e-05, + "loss": 0.5593, + "step": 3818 + }, + { + "epoch": 10.491758241758241, + "grad_norm": 8.086196899414062, + "learning_rate": 4.475412087912088e-05, + "loss": 0.2603, + "step": 3819 + }, + { + "epoch": 10.494505494505495, + "grad_norm": 15.337956428527832, + "learning_rate": 4.475274725274726e-05, + "loss": 0.5717, + "step": 3820 + }, + { + "epoch": 10.497252747252748, + "grad_norm": 13.8220853805542, + "learning_rate": 4.475137362637363e-05, + "loss": 0.5368, + "step": 3821 + }, + { + "epoch": 10.5, + "grad_norm": 12.428874015808105, + "learning_rate": 4.4750000000000004e-05, + "loss": 0.4196, + "step": 3822 + }, + { + "epoch": 10.502747252747252, + "grad_norm": 13.312653541564941, + "learning_rate": 4.4748626373626374e-05, + "loss": 0.4095, + "step": 3823 + }, + { + "epoch": 10.505494505494505, + "grad_norm": 13.02059555053711, + "learning_rate": 4.474725274725275e-05, + "loss": 0.6042, + "step": 3824 + }, + { + "epoch": 10.508241758241759, + "grad_norm": 5.946722507476807, + "learning_rate": 4.474587912087913e-05, + "loss": 0.1589, + "step": 3825 + }, + { + "epoch": 10.510989010989011, + "grad_norm": 14.598626136779785, + "learning_rate": 4.47445054945055e-05, + "loss": 0.816, + "step": 3826 + }, + { + "epoch": 10.513736263736263, + "grad_norm": 11.403963088989258, + "learning_rate": 4.4743131868131874e-05, + "loss": 0.3592, + "step": 3827 + }, + { + "epoch": 10.516483516483516, + "grad_norm": 13.483524322509766, + "learning_rate": 4.4741758241758244e-05, + "loss": 0.6212, + "step": 3828 + }, + { + "epoch": 10.51923076923077, + "grad_norm": 19.254199981689453, + "learning_rate": 4.4740384615384614e-05, + "loss": 0.9058, + "step": 3829 + }, + { + "epoch": 10.521978021978022, + "grad_norm": 11.265302658081055, + "learning_rate": 4.473901098901099e-05, + "loss": 0.3823, + "step": 3830 + }, + { + "epoch": 10.524725274725276, + "grad_norm": 13.180863380432129, + "learning_rate": 4.473763736263736e-05, + "loss": 0.6017, + "step": 3831 + }, + { + "epoch": 10.527472527472527, + "grad_norm": 13.187911987304688, + "learning_rate": 4.473626373626374e-05, + "loss": 0.6378, + "step": 3832 + }, + { + "epoch": 10.530219780219781, + "grad_norm": 16.671600341796875, + "learning_rate": 4.473489010989011e-05, + "loss": 0.5836, + "step": 3833 + }, + { + "epoch": 10.532967032967033, + "grad_norm": 9.281047821044922, + "learning_rate": 4.4733516483516484e-05, + "loss": 0.3114, + "step": 3834 + }, + { + "epoch": 10.535714285714286, + "grad_norm": 11.089178085327148, + "learning_rate": 4.473214285714286e-05, + "loss": 0.2949, + "step": 3835 + }, + { + "epoch": 10.538461538461538, + "grad_norm": 12.644865989685059, + "learning_rate": 4.473076923076923e-05, + "loss": 0.316, + "step": 3836 + }, + { + "epoch": 10.541208791208792, + "grad_norm": 19.621313095092773, + "learning_rate": 4.472939560439561e-05, + "loss": 0.6469, + "step": 3837 + }, + { + "epoch": 10.543956043956044, + "grad_norm": 10.090466499328613, + "learning_rate": 4.472802197802198e-05, + "loss": 0.3904, + "step": 3838 + }, + { + "epoch": 10.546703296703297, + "grad_norm": 13.20129680633545, + "learning_rate": 4.4726648351648355e-05, + "loss": 0.5493, + "step": 3839 + }, + { + "epoch": 10.54945054945055, + "grad_norm": 20.435665130615234, + "learning_rate": 4.472527472527473e-05, + "loss": 0.9776, + "step": 3840 + }, + { + "epoch": 10.552197802197803, + "grad_norm": 13.920609474182129, + "learning_rate": 4.47239010989011e-05, + "loss": 0.5429, + "step": 3841 + }, + { + "epoch": 10.554945054945055, + "grad_norm": 9.031139373779297, + "learning_rate": 4.472252747252748e-05, + "loss": 0.3468, + "step": 3842 + }, + { + "epoch": 10.557692307692308, + "grad_norm": 11.579024314880371, + "learning_rate": 4.472115384615385e-05, + "loss": 0.3109, + "step": 3843 + }, + { + "epoch": 10.56043956043956, + "grad_norm": 19.742778778076172, + "learning_rate": 4.471978021978022e-05, + "loss": 0.8191, + "step": 3844 + }, + { + "epoch": 10.563186813186814, + "grad_norm": 9.945218086242676, + "learning_rate": 4.4718406593406595e-05, + "loss": 0.308, + "step": 3845 + }, + { + "epoch": 10.565934065934066, + "grad_norm": 15.768914222717285, + "learning_rate": 4.4717032967032965e-05, + "loss": 0.7176, + "step": 3846 + }, + { + "epoch": 10.56868131868132, + "grad_norm": 12.774392127990723, + "learning_rate": 4.471565934065934e-05, + "loss": 0.5363, + "step": 3847 + }, + { + "epoch": 10.571428571428571, + "grad_norm": 15.662930488586426, + "learning_rate": 4.471428571428571e-05, + "loss": 0.6802, + "step": 3848 + }, + { + "epoch": 10.574175824175825, + "grad_norm": 14.199695587158203, + "learning_rate": 4.471291208791209e-05, + "loss": 0.4114, + "step": 3849 + }, + { + "epoch": 10.576923076923077, + "grad_norm": 9.237469673156738, + "learning_rate": 4.4711538461538466e-05, + "loss": 0.2303, + "step": 3850 + }, + { + "epoch": 10.57967032967033, + "grad_norm": 18.040952682495117, + "learning_rate": 4.4710164835164836e-05, + "loss": 0.7669, + "step": 3851 + }, + { + "epoch": 10.582417582417582, + "grad_norm": 13.204828262329102, + "learning_rate": 4.470879120879121e-05, + "loss": 0.7061, + "step": 3852 + }, + { + "epoch": 10.585164835164836, + "grad_norm": 13.5301513671875, + "learning_rate": 4.470741758241758e-05, + "loss": 0.6076, + "step": 3853 + }, + { + "epoch": 10.587912087912088, + "grad_norm": 14.693580627441406, + "learning_rate": 4.470604395604396e-05, + "loss": 0.6007, + "step": 3854 + }, + { + "epoch": 10.590659340659341, + "grad_norm": 10.387557029724121, + "learning_rate": 4.4704670329670336e-05, + "loss": 0.3778, + "step": 3855 + }, + { + "epoch": 10.593406593406593, + "grad_norm": 10.535540580749512, + "learning_rate": 4.4703296703296706e-05, + "loss": 0.4169, + "step": 3856 + }, + { + "epoch": 10.596153846153847, + "grad_norm": 18.819278717041016, + "learning_rate": 4.470192307692308e-05, + "loss": 1.0922, + "step": 3857 + }, + { + "epoch": 10.598901098901099, + "grad_norm": 14.894027709960938, + "learning_rate": 4.470054945054945e-05, + "loss": 0.7006, + "step": 3858 + }, + { + "epoch": 10.601648351648352, + "grad_norm": 14.666962623596191, + "learning_rate": 4.469917582417582e-05, + "loss": 0.4769, + "step": 3859 + }, + { + "epoch": 10.604395604395604, + "grad_norm": 9.397297859191895, + "learning_rate": 4.46978021978022e-05, + "loss": 0.2459, + "step": 3860 + }, + { + "epoch": 10.607142857142858, + "grad_norm": 18.279855728149414, + "learning_rate": 4.469642857142857e-05, + "loss": 0.5408, + "step": 3861 + }, + { + "epoch": 10.60989010989011, + "grad_norm": 12.153090476989746, + "learning_rate": 4.4695054945054947e-05, + "loss": 0.4381, + "step": 3862 + }, + { + "epoch": 10.612637362637363, + "grad_norm": 17.10555648803711, + "learning_rate": 4.4693681318681317e-05, + "loss": 0.8431, + "step": 3863 + }, + { + "epoch": 10.615384615384615, + "grad_norm": 23.2812557220459, + "learning_rate": 4.4692307692307693e-05, + "loss": 1.2033, + "step": 3864 + }, + { + "epoch": 10.618131868131869, + "grad_norm": 10.963319778442383, + "learning_rate": 4.469093406593407e-05, + "loss": 0.3029, + "step": 3865 + }, + { + "epoch": 10.62087912087912, + "grad_norm": 12.850751876831055, + "learning_rate": 4.468956043956044e-05, + "loss": 0.5809, + "step": 3866 + }, + { + "epoch": 10.623626373626374, + "grad_norm": 9.888993263244629, + "learning_rate": 4.468818681318682e-05, + "loss": 0.2868, + "step": 3867 + }, + { + "epoch": 10.626373626373626, + "grad_norm": 7.301464557647705, + "learning_rate": 4.468681318681319e-05, + "loss": 0.2448, + "step": 3868 + }, + { + "epoch": 10.62912087912088, + "grad_norm": 15.759703636169434, + "learning_rate": 4.4685439560439564e-05, + "loss": 0.7459, + "step": 3869 + }, + { + "epoch": 10.631868131868131, + "grad_norm": 11.739058494567871, + "learning_rate": 4.468406593406594e-05, + "loss": 0.45, + "step": 3870 + }, + { + "epoch": 10.634615384615385, + "grad_norm": 11.133711814880371, + "learning_rate": 4.468269230769231e-05, + "loss": 0.3546, + "step": 3871 + }, + { + "epoch": 10.637362637362637, + "grad_norm": 11.753990173339844, + "learning_rate": 4.468131868131869e-05, + "loss": 0.3469, + "step": 3872 + }, + { + "epoch": 10.64010989010989, + "grad_norm": 8.931634902954102, + "learning_rate": 4.467994505494506e-05, + "loss": 0.3611, + "step": 3873 + }, + { + "epoch": 10.642857142857142, + "grad_norm": 16.77058982849121, + "learning_rate": 4.467857142857143e-05, + "loss": 0.8837, + "step": 3874 + }, + { + "epoch": 10.645604395604396, + "grad_norm": 9.649072647094727, + "learning_rate": 4.4677197802197804e-05, + "loss": 0.3193, + "step": 3875 + }, + { + "epoch": 10.648351648351648, + "grad_norm": 14.111461639404297, + "learning_rate": 4.4675824175824174e-05, + "loss": 0.6265, + "step": 3876 + }, + { + "epoch": 10.651098901098901, + "grad_norm": 10.304774284362793, + "learning_rate": 4.467445054945055e-05, + "loss": 0.5444, + "step": 3877 + }, + { + "epoch": 10.653846153846153, + "grad_norm": 8.104211807250977, + "learning_rate": 4.467307692307692e-05, + "loss": 0.2305, + "step": 3878 + }, + { + "epoch": 10.656593406593407, + "grad_norm": 9.067239761352539, + "learning_rate": 4.46717032967033e-05, + "loss": 0.3042, + "step": 3879 + }, + { + "epoch": 10.659340659340659, + "grad_norm": 18.75584602355957, + "learning_rate": 4.4670329670329675e-05, + "loss": 0.7332, + "step": 3880 + }, + { + "epoch": 10.662087912087912, + "grad_norm": 13.447474479675293, + "learning_rate": 4.4668956043956045e-05, + "loss": 0.4634, + "step": 3881 + }, + { + "epoch": 10.664835164835164, + "grad_norm": 13.679316520690918, + "learning_rate": 4.466758241758242e-05, + "loss": 0.4933, + "step": 3882 + }, + { + "epoch": 10.667582417582418, + "grad_norm": 18.310693740844727, + "learning_rate": 4.466620879120879e-05, + "loss": 0.7754, + "step": 3883 + }, + { + "epoch": 10.67032967032967, + "grad_norm": 16.47719955444336, + "learning_rate": 4.466483516483517e-05, + "loss": 0.6498, + "step": 3884 + }, + { + "epoch": 10.673076923076923, + "grad_norm": 11.73328685760498, + "learning_rate": 4.4663461538461545e-05, + "loss": 0.48, + "step": 3885 + }, + { + "epoch": 10.675824175824175, + "grad_norm": 13.652495384216309, + "learning_rate": 4.4662087912087915e-05, + "loss": 0.3831, + "step": 3886 + }, + { + "epoch": 10.678571428571429, + "grad_norm": 13.818142890930176, + "learning_rate": 4.466071428571429e-05, + "loss": 0.6747, + "step": 3887 + }, + { + "epoch": 10.68131868131868, + "grad_norm": 17.121427536010742, + "learning_rate": 4.465934065934066e-05, + "loss": 0.6625, + "step": 3888 + }, + { + "epoch": 10.684065934065934, + "grad_norm": 12.041855812072754, + "learning_rate": 4.465796703296703e-05, + "loss": 0.3918, + "step": 3889 + }, + { + "epoch": 10.686813186813186, + "grad_norm": 8.850809097290039, + "learning_rate": 4.465659340659341e-05, + "loss": 0.3657, + "step": 3890 + }, + { + "epoch": 10.68956043956044, + "grad_norm": 9.884147644042969, + "learning_rate": 4.465521978021978e-05, + "loss": 0.3738, + "step": 3891 + }, + { + "epoch": 10.692307692307692, + "grad_norm": 16.8331298828125, + "learning_rate": 4.4653846153846156e-05, + "loss": 0.6565, + "step": 3892 + }, + { + "epoch": 10.695054945054945, + "grad_norm": 12.186689376831055, + "learning_rate": 4.4652472527472526e-05, + "loss": 0.4096, + "step": 3893 + }, + { + "epoch": 10.697802197802197, + "grad_norm": 12.587782859802246, + "learning_rate": 4.46510989010989e-05, + "loss": 0.3514, + "step": 3894 + }, + { + "epoch": 10.70054945054945, + "grad_norm": 16.075010299682617, + "learning_rate": 4.464972527472528e-05, + "loss": 0.8118, + "step": 3895 + }, + { + "epoch": 10.703296703296703, + "grad_norm": 13.948249816894531, + "learning_rate": 4.464835164835165e-05, + "loss": 0.4713, + "step": 3896 + }, + { + "epoch": 10.706043956043956, + "grad_norm": 15.565829277038574, + "learning_rate": 4.4646978021978026e-05, + "loss": 0.664, + "step": 3897 + }, + { + "epoch": 10.708791208791208, + "grad_norm": 9.154350280761719, + "learning_rate": 4.4645604395604396e-05, + "loss": 0.2637, + "step": 3898 + }, + { + "epoch": 10.711538461538462, + "grad_norm": 14.044010162353516, + "learning_rate": 4.464423076923077e-05, + "loss": 0.6475, + "step": 3899 + }, + { + "epoch": 10.714285714285714, + "grad_norm": 12.077484130859375, + "learning_rate": 4.464285714285715e-05, + "loss": 0.3798, + "step": 3900 + }, + { + "epoch": 10.717032967032967, + "grad_norm": 8.18334674835205, + "learning_rate": 4.464148351648352e-05, + "loss": 0.2601, + "step": 3901 + }, + { + "epoch": 10.719780219780219, + "grad_norm": 8.095651626586914, + "learning_rate": 4.4640109890109896e-05, + "loss": 0.2929, + "step": 3902 + }, + { + "epoch": 10.722527472527473, + "grad_norm": 17.482257843017578, + "learning_rate": 4.4638736263736266e-05, + "loss": 0.8808, + "step": 3903 + }, + { + "epoch": 10.725274725274724, + "grad_norm": 16.062664031982422, + "learning_rate": 4.4637362637362636e-05, + "loss": 0.5488, + "step": 3904 + }, + { + "epoch": 10.728021978021978, + "grad_norm": 14.300386428833008, + "learning_rate": 4.463598901098901e-05, + "loss": 0.523, + "step": 3905 + }, + { + "epoch": 10.73076923076923, + "grad_norm": 8.476550102233887, + "learning_rate": 4.463461538461538e-05, + "loss": 0.2599, + "step": 3906 + }, + { + "epoch": 10.733516483516484, + "grad_norm": 17.321619033813477, + "learning_rate": 4.463324175824176e-05, + "loss": 0.8613, + "step": 3907 + }, + { + "epoch": 10.736263736263737, + "grad_norm": 13.415711402893066, + "learning_rate": 4.463186813186813e-05, + "loss": 0.5488, + "step": 3908 + }, + { + "epoch": 10.739010989010989, + "grad_norm": 12.79286003112793, + "learning_rate": 4.463049450549451e-05, + "loss": 0.74, + "step": 3909 + }, + { + "epoch": 10.741758241758241, + "grad_norm": 11.575928688049316, + "learning_rate": 4.4629120879120884e-05, + "loss": 0.4307, + "step": 3910 + }, + { + "epoch": 10.744505494505495, + "grad_norm": 9.291680335998535, + "learning_rate": 4.4627747252747254e-05, + "loss": 0.2834, + "step": 3911 + }, + { + "epoch": 10.747252747252748, + "grad_norm": 18.70722007751465, + "learning_rate": 4.462637362637363e-05, + "loss": 0.64, + "step": 3912 + }, + { + "epoch": 10.75, + "grad_norm": 6.484196662902832, + "learning_rate": 4.4625e-05, + "loss": 0.3002, + "step": 3913 + }, + { + "epoch": 10.752747252747252, + "grad_norm": 11.944745063781738, + "learning_rate": 4.462362637362638e-05, + "loss": 0.3826, + "step": 3914 + }, + { + "epoch": 10.755494505494505, + "grad_norm": 15.085958480834961, + "learning_rate": 4.4622252747252754e-05, + "loss": 0.5877, + "step": 3915 + }, + { + "epoch": 10.758241758241759, + "grad_norm": 14.027709007263184, + "learning_rate": 4.4620879120879124e-05, + "loss": 0.4893, + "step": 3916 + }, + { + "epoch": 10.760989010989011, + "grad_norm": 15.706252098083496, + "learning_rate": 4.46195054945055e-05, + "loss": 0.8278, + "step": 3917 + }, + { + "epoch": 10.763736263736263, + "grad_norm": 14.272819519042969, + "learning_rate": 4.461813186813187e-05, + "loss": 0.6666, + "step": 3918 + }, + { + "epoch": 10.766483516483516, + "grad_norm": 14.558602333068848, + "learning_rate": 4.461675824175824e-05, + "loss": 0.4638, + "step": 3919 + }, + { + "epoch": 10.76923076923077, + "grad_norm": 13.274178504943848, + "learning_rate": 4.461538461538462e-05, + "loss": 0.4714, + "step": 3920 + }, + { + "epoch": 10.771978021978022, + "grad_norm": 13.493009567260742, + "learning_rate": 4.461401098901099e-05, + "loss": 0.4037, + "step": 3921 + }, + { + "epoch": 10.774725274725276, + "grad_norm": 19.37114906311035, + "learning_rate": 4.4612637362637365e-05, + "loss": 0.5862, + "step": 3922 + }, + { + "epoch": 10.777472527472527, + "grad_norm": 16.430400848388672, + "learning_rate": 4.4611263736263735e-05, + "loss": 0.6216, + "step": 3923 + }, + { + "epoch": 10.780219780219781, + "grad_norm": 13.773836135864258, + "learning_rate": 4.460989010989011e-05, + "loss": 0.4875, + "step": 3924 + }, + { + "epoch": 10.782967032967033, + "grad_norm": 13.62440299987793, + "learning_rate": 4.460851648351649e-05, + "loss": 0.5429, + "step": 3925 + }, + { + "epoch": 10.785714285714286, + "grad_norm": 13.171361923217773, + "learning_rate": 4.460714285714286e-05, + "loss": 0.4489, + "step": 3926 + }, + { + "epoch": 10.788461538461538, + "grad_norm": 12.177175521850586, + "learning_rate": 4.4605769230769235e-05, + "loss": 0.5297, + "step": 3927 + }, + { + "epoch": 10.791208791208792, + "grad_norm": 16.166954040527344, + "learning_rate": 4.4604395604395605e-05, + "loss": 0.6913, + "step": 3928 + }, + { + "epoch": 10.793956043956044, + "grad_norm": 13.69798469543457, + "learning_rate": 4.460302197802198e-05, + "loss": 0.6251, + "step": 3929 + }, + { + "epoch": 10.796703296703297, + "grad_norm": 7.555395126342773, + "learning_rate": 4.460164835164836e-05, + "loss": 0.2229, + "step": 3930 + }, + { + "epoch": 10.79945054945055, + "grad_norm": 11.187055587768555, + "learning_rate": 4.460027472527473e-05, + "loss": 0.5707, + "step": 3931 + }, + { + "epoch": 10.802197802197803, + "grad_norm": 11.45589542388916, + "learning_rate": 4.4598901098901105e-05, + "loss": 0.4117, + "step": 3932 + }, + { + "epoch": 10.804945054945055, + "grad_norm": 15.209145545959473, + "learning_rate": 4.4597527472527475e-05, + "loss": 0.5328, + "step": 3933 + }, + { + "epoch": 10.807692307692308, + "grad_norm": 11.721179008483887, + "learning_rate": 4.4596153846153845e-05, + "loss": 0.3431, + "step": 3934 + }, + { + "epoch": 10.81043956043956, + "grad_norm": 9.791288375854492, + "learning_rate": 4.459478021978022e-05, + "loss": 0.5139, + "step": 3935 + }, + { + "epoch": 10.813186813186814, + "grad_norm": 14.786367416381836, + "learning_rate": 4.459340659340659e-05, + "loss": 0.5333, + "step": 3936 + }, + { + "epoch": 10.815934065934066, + "grad_norm": 16.980066299438477, + "learning_rate": 4.459203296703297e-05, + "loss": 0.8516, + "step": 3937 + }, + { + "epoch": 10.81868131868132, + "grad_norm": 7.7261433601379395, + "learning_rate": 4.459065934065934e-05, + "loss": 0.3525, + "step": 3938 + }, + { + "epoch": 10.821428571428571, + "grad_norm": 12.744989395141602, + "learning_rate": 4.4589285714285716e-05, + "loss": 0.3314, + "step": 3939 + }, + { + "epoch": 10.824175824175825, + "grad_norm": 13.007109642028809, + "learning_rate": 4.458791208791209e-05, + "loss": 0.4064, + "step": 3940 + }, + { + "epoch": 10.826923076923077, + "grad_norm": 14.657024383544922, + "learning_rate": 4.458653846153846e-05, + "loss": 0.5952, + "step": 3941 + }, + { + "epoch": 10.82967032967033, + "grad_norm": 17.88890266418457, + "learning_rate": 4.458516483516484e-05, + "loss": 0.9563, + "step": 3942 + }, + { + "epoch": 10.832417582417582, + "grad_norm": 13.089701652526855, + "learning_rate": 4.458379120879121e-05, + "loss": 0.3406, + "step": 3943 + }, + { + "epoch": 10.835164835164836, + "grad_norm": 13.194331169128418, + "learning_rate": 4.4582417582417586e-05, + "loss": 0.4914, + "step": 3944 + }, + { + "epoch": 10.837912087912088, + "grad_norm": 10.016522407531738, + "learning_rate": 4.458104395604396e-05, + "loss": 0.4269, + "step": 3945 + }, + { + "epoch": 10.840659340659341, + "grad_norm": 13.886174201965332, + "learning_rate": 4.457967032967033e-05, + "loss": 0.7359, + "step": 3946 + }, + { + "epoch": 10.843406593406593, + "grad_norm": 16.596769332885742, + "learning_rate": 4.457829670329671e-05, + "loss": 0.7473, + "step": 3947 + }, + { + "epoch": 10.846153846153847, + "grad_norm": 8.518327713012695, + "learning_rate": 4.457692307692308e-05, + "loss": 0.2457, + "step": 3948 + }, + { + "epoch": 10.848901098901099, + "grad_norm": 9.77546215057373, + "learning_rate": 4.457554945054945e-05, + "loss": 0.2253, + "step": 3949 + }, + { + "epoch": 10.851648351648352, + "grad_norm": 15.476545333862305, + "learning_rate": 4.457417582417583e-05, + "loss": 0.6699, + "step": 3950 + }, + { + "epoch": 10.854395604395604, + "grad_norm": 16.76980209350586, + "learning_rate": 4.45728021978022e-05, + "loss": 0.7832, + "step": 3951 + }, + { + "epoch": 10.857142857142858, + "grad_norm": 10.720650672912598, + "learning_rate": 4.4571428571428574e-05, + "loss": 0.3347, + "step": 3952 + }, + { + "epoch": 10.85989010989011, + "grad_norm": 7.945478916168213, + "learning_rate": 4.4570054945054944e-05, + "loss": 0.2893, + "step": 3953 + }, + { + "epoch": 10.862637362637363, + "grad_norm": 15.52692985534668, + "learning_rate": 4.456868131868132e-05, + "loss": 0.5742, + "step": 3954 + }, + { + "epoch": 10.865384615384615, + "grad_norm": 15.529376029968262, + "learning_rate": 4.45673076923077e-05, + "loss": 0.3848, + "step": 3955 + }, + { + "epoch": 10.868131868131869, + "grad_norm": 10.225431442260742, + "learning_rate": 4.456593406593407e-05, + "loss": 0.325, + "step": 3956 + }, + { + "epoch": 10.87087912087912, + "grad_norm": 8.25500202178955, + "learning_rate": 4.4564560439560444e-05, + "loss": 0.3046, + "step": 3957 + }, + { + "epoch": 10.873626373626374, + "grad_norm": 16.968189239501953, + "learning_rate": 4.4563186813186814e-05, + "loss": 0.7252, + "step": 3958 + }, + { + "epoch": 10.876373626373626, + "grad_norm": 9.481592178344727, + "learning_rate": 4.456181318681319e-05, + "loss": 0.3356, + "step": 3959 + }, + { + "epoch": 10.87912087912088, + "grad_norm": 14.505577087402344, + "learning_rate": 4.456043956043957e-05, + "loss": 0.6092, + "step": 3960 + }, + { + "epoch": 10.881868131868131, + "grad_norm": 18.704265594482422, + "learning_rate": 4.455906593406594e-05, + "loss": 0.886, + "step": 3961 + }, + { + "epoch": 10.884615384615385, + "grad_norm": 15.614664077758789, + "learning_rate": 4.4557692307692314e-05, + "loss": 0.5091, + "step": 3962 + }, + { + "epoch": 10.887362637362637, + "grad_norm": 11.03896713256836, + "learning_rate": 4.4556318681318684e-05, + "loss": 0.4364, + "step": 3963 + }, + { + "epoch": 10.89010989010989, + "grad_norm": 14.130993843078613, + "learning_rate": 4.4554945054945054e-05, + "loss": 0.5654, + "step": 3964 + }, + { + "epoch": 10.892857142857142, + "grad_norm": 14.02644157409668, + "learning_rate": 4.455357142857143e-05, + "loss": 0.8904, + "step": 3965 + }, + { + "epoch": 10.895604395604396, + "grad_norm": 11.837194442749023, + "learning_rate": 4.45521978021978e-05, + "loss": 0.3752, + "step": 3966 + }, + { + "epoch": 10.898351648351648, + "grad_norm": 14.399565696716309, + "learning_rate": 4.455082417582418e-05, + "loss": 0.4798, + "step": 3967 + }, + { + "epoch": 10.901098901098901, + "grad_norm": 15.684873580932617, + "learning_rate": 4.454945054945055e-05, + "loss": 0.6328, + "step": 3968 + }, + { + "epoch": 10.903846153846153, + "grad_norm": 15.071587562561035, + "learning_rate": 4.4548076923076925e-05, + "loss": 0.7982, + "step": 3969 + }, + { + "epoch": 10.906593406593407, + "grad_norm": 7.5801801681518555, + "learning_rate": 4.45467032967033e-05, + "loss": 0.2101, + "step": 3970 + }, + { + "epoch": 10.909340659340659, + "grad_norm": 16.48130989074707, + "learning_rate": 4.454532967032967e-05, + "loss": 0.6176, + "step": 3971 + }, + { + "epoch": 10.912087912087912, + "grad_norm": 12.847129821777344, + "learning_rate": 4.454395604395605e-05, + "loss": 0.5436, + "step": 3972 + }, + { + "epoch": 10.914835164835164, + "grad_norm": 13.10358715057373, + "learning_rate": 4.454258241758242e-05, + "loss": 0.6054, + "step": 3973 + }, + { + "epoch": 10.917582417582418, + "grad_norm": 13.178669929504395, + "learning_rate": 4.4541208791208795e-05, + "loss": 0.7836, + "step": 3974 + }, + { + "epoch": 10.92032967032967, + "grad_norm": 15.935235023498535, + "learning_rate": 4.453983516483517e-05, + "loss": 0.823, + "step": 3975 + }, + { + "epoch": 10.923076923076923, + "grad_norm": 12.610442161560059, + "learning_rate": 4.453846153846154e-05, + "loss": 0.5234, + "step": 3976 + }, + { + "epoch": 10.925824175824175, + "grad_norm": 20.409095764160156, + "learning_rate": 4.453708791208792e-05, + "loss": 1.0511, + "step": 3977 + }, + { + "epoch": 10.928571428571429, + "grad_norm": 11.13054084777832, + "learning_rate": 4.453571428571429e-05, + "loss": 0.3907, + "step": 3978 + }, + { + "epoch": 10.93131868131868, + "grad_norm": 11.963383674621582, + "learning_rate": 4.453434065934066e-05, + "loss": 0.5222, + "step": 3979 + }, + { + "epoch": 10.934065934065934, + "grad_norm": 10.200756072998047, + "learning_rate": 4.4532967032967036e-05, + "loss": 0.3174, + "step": 3980 + }, + { + "epoch": 10.936813186813186, + "grad_norm": 12.911836624145508, + "learning_rate": 4.4531593406593406e-05, + "loss": 0.3708, + "step": 3981 + }, + { + "epoch": 10.93956043956044, + "grad_norm": 13.854068756103516, + "learning_rate": 4.453021978021978e-05, + "loss": 0.5004, + "step": 3982 + }, + { + "epoch": 10.942307692307692, + "grad_norm": 18.10212516784668, + "learning_rate": 4.452884615384615e-05, + "loss": 0.8211, + "step": 3983 + }, + { + "epoch": 10.945054945054945, + "grad_norm": 10.29050064086914, + "learning_rate": 4.452747252747253e-05, + "loss": 0.2772, + "step": 3984 + }, + { + "epoch": 10.947802197802197, + "grad_norm": 16.027698516845703, + "learning_rate": 4.45260989010989e-05, + "loss": 0.5273, + "step": 3985 + }, + { + "epoch": 10.95054945054945, + "grad_norm": 16.118898391723633, + "learning_rate": 4.4524725274725276e-05, + "loss": 0.4724, + "step": 3986 + }, + { + "epoch": 10.953296703296703, + "grad_norm": 14.997008323669434, + "learning_rate": 4.452335164835165e-05, + "loss": 0.5184, + "step": 3987 + }, + { + "epoch": 10.956043956043956, + "grad_norm": 21.319787979125977, + "learning_rate": 4.452197802197802e-05, + "loss": 0.815, + "step": 3988 + }, + { + "epoch": 10.958791208791208, + "grad_norm": 6.668311595916748, + "learning_rate": 4.45206043956044e-05, + "loss": 0.2416, + "step": 3989 + }, + { + "epoch": 10.961538461538462, + "grad_norm": 7.7369866371154785, + "learning_rate": 4.451923076923077e-05, + "loss": 0.1881, + "step": 3990 + }, + { + "epoch": 10.964285714285714, + "grad_norm": 13.357747077941895, + "learning_rate": 4.4517857142857147e-05, + "loss": 0.4203, + "step": 3991 + }, + { + "epoch": 10.967032967032967, + "grad_norm": 17.58279037475586, + "learning_rate": 4.451648351648352e-05, + "loss": 0.7866, + "step": 3992 + }, + { + "epoch": 10.969780219780219, + "grad_norm": 16.446531295776367, + "learning_rate": 4.451510989010989e-05, + "loss": 0.7595, + "step": 3993 + }, + { + "epoch": 10.972527472527473, + "grad_norm": 16.855121612548828, + "learning_rate": 4.4513736263736263e-05, + "loss": 0.7215, + "step": 3994 + }, + { + "epoch": 10.975274725274724, + "grad_norm": 10.497660636901855, + "learning_rate": 4.4512362637362633e-05, + "loss": 0.3691, + "step": 3995 + }, + { + "epoch": 10.978021978021978, + "grad_norm": 19.021636962890625, + "learning_rate": 4.451098901098901e-05, + "loss": 0.8369, + "step": 3996 + }, + { + "epoch": 10.98076923076923, + "grad_norm": 9.111525535583496, + "learning_rate": 4.450961538461539e-05, + "loss": 0.3462, + "step": 3997 + }, + { + "epoch": 10.983516483516484, + "grad_norm": 12.566564559936523, + "learning_rate": 4.450824175824176e-05, + "loss": 0.3238, + "step": 3998 + }, + { + "epoch": 10.986263736263737, + "grad_norm": 11.876863479614258, + "learning_rate": 4.4506868131868134e-05, + "loss": 0.4626, + "step": 3999 + }, + { + "epoch": 10.989010989010989, + "grad_norm": 5.716113567352295, + "learning_rate": 4.4505494505494504e-05, + "loss": 0.182, + "step": 4000 + }, + { + "epoch": 10.991758241758241, + "grad_norm": 15.869014739990234, + "learning_rate": 4.450412087912088e-05, + "loss": 0.7462, + "step": 4001 + }, + { + "epoch": 10.994505494505495, + "grad_norm": 19.066150665283203, + "learning_rate": 4.450274725274726e-05, + "loss": 0.8442, + "step": 4002 + }, + { + "epoch": 10.997252747252748, + "grad_norm": 17.737388610839844, + "learning_rate": 4.450137362637363e-05, + "loss": 0.861, + "step": 4003 + }, + { + "epoch": 11.0, + "grad_norm": 29.335420608520508, + "learning_rate": 4.4500000000000004e-05, + "loss": 0.5141, + "step": 4004 + }, + { + "epoch": 11.0, + "eval_accuracy": 0.6652892561983471, + "eval_f1": 0.648495713887079, + "eval_f1_DuraRiadoRio_64x64": 0.6130434782608696, + "eval_f1_Mole_64x64": 0.3333333333333333, + "eval_f1_Quebrado_64x64": 0.8339483394833949, + "eval_f1_RiadoRio_64x64": 0.63003663003663, + "eval_f1_RioFechado_64x64": 0.8321167883211679, + "eval_loss": 1.3893048763275146, + "eval_precision": 0.7754026286258103, + "eval_precision_DuraRiadoRio_64x64": 0.4462025316455696, + "eval_precision_Mole_64x64": 0.9666666666666667, + "eval_precision_Quebrado_64x64": 0.889763779527559, + "eval_precision_RiadoRio_64x64": 0.7107438016528925, + "eval_precision_RioFechado_64x64": 0.8636363636363636, + "eval_recall": 0.6667768305740879, + "eval_recall_DuraRiadoRio_64x64": 0.9791666666666666, + "eval_recall_Mole_64x64": 0.2013888888888889, + "eval_recall_Quebrado_64x64": 0.7847222222222222, + "eval_recall_RiadoRio_64x64": 0.5657894736842105, + "eval_recall_RioFechado_64x64": 0.8028169014084507, + "eval_runtime": 1.7855, + "eval_samples_per_second": 406.615, + "eval_steps_per_second": 25.763, + "step": 4004 + }, + { + "epoch": 11.002747252747254, + "grad_norm": 16.50416374206543, + "learning_rate": 4.4498626373626374e-05, + "loss": 0.5755, + "step": 4005 + }, + { + "epoch": 11.005494505494505, + "grad_norm": 13.63148307800293, + "learning_rate": 4.449725274725275e-05, + "loss": 0.5631, + "step": 4006 + }, + { + "epoch": 11.008241758241759, + "grad_norm": 21.340473175048828, + "learning_rate": 4.449587912087913e-05, + "loss": 0.8559, + "step": 4007 + }, + { + "epoch": 11.010989010989011, + "grad_norm": 11.660322189331055, + "learning_rate": 4.44945054945055e-05, + "loss": 0.2814, + "step": 4008 + }, + { + "epoch": 11.013736263736265, + "grad_norm": 14.504746437072754, + "learning_rate": 4.449313186813187e-05, + "loss": 0.7324, + "step": 4009 + }, + { + "epoch": 11.016483516483516, + "grad_norm": 10.52280044555664, + "learning_rate": 4.449175824175824e-05, + "loss": 0.2876, + "step": 4010 + }, + { + "epoch": 11.01923076923077, + "grad_norm": 15.132081031799316, + "learning_rate": 4.4490384615384615e-05, + "loss": 0.5716, + "step": 4011 + }, + { + "epoch": 11.021978021978022, + "grad_norm": 15.047322273254395, + "learning_rate": 4.448901098901099e-05, + "loss": 0.4958, + "step": 4012 + }, + { + "epoch": 11.024725274725276, + "grad_norm": 10.95723819732666, + "learning_rate": 4.448763736263736e-05, + "loss": 0.3842, + "step": 4013 + }, + { + "epoch": 11.027472527472527, + "grad_norm": 17.234922409057617, + "learning_rate": 4.448626373626374e-05, + "loss": 0.6318, + "step": 4014 + }, + { + "epoch": 11.030219780219781, + "grad_norm": 11.659867286682129, + "learning_rate": 4.448489010989011e-05, + "loss": 0.3983, + "step": 4015 + }, + { + "epoch": 11.032967032967033, + "grad_norm": 13.713886260986328, + "learning_rate": 4.4483516483516485e-05, + "loss": 0.5422, + "step": 4016 + }, + { + "epoch": 11.035714285714286, + "grad_norm": 15.18952465057373, + "learning_rate": 4.448214285714286e-05, + "loss": 0.4673, + "step": 4017 + }, + { + "epoch": 11.038461538461538, + "grad_norm": 17.087608337402344, + "learning_rate": 4.448076923076923e-05, + "loss": 0.697, + "step": 4018 + }, + { + "epoch": 11.041208791208792, + "grad_norm": 14.164155006408691, + "learning_rate": 4.447939560439561e-05, + "loss": 0.8204, + "step": 4019 + }, + { + "epoch": 11.043956043956044, + "grad_norm": 13.742775917053223, + "learning_rate": 4.447802197802198e-05, + "loss": 0.5027, + "step": 4020 + }, + { + "epoch": 11.046703296703297, + "grad_norm": 12.705801963806152, + "learning_rate": 4.4476648351648356e-05, + "loss": 0.4098, + "step": 4021 + }, + { + "epoch": 11.04945054945055, + "grad_norm": 15.379034996032715, + "learning_rate": 4.447527472527473e-05, + "loss": 0.6984, + "step": 4022 + }, + { + "epoch": 11.052197802197803, + "grad_norm": 14.041420936584473, + "learning_rate": 4.44739010989011e-05, + "loss": 0.4462, + "step": 4023 + }, + { + "epoch": 11.054945054945055, + "grad_norm": 12.13542652130127, + "learning_rate": 4.447252747252747e-05, + "loss": 0.3348, + "step": 4024 + }, + { + "epoch": 11.057692307692308, + "grad_norm": 16.003559112548828, + "learning_rate": 4.447115384615384e-05, + "loss": 0.4854, + "step": 4025 + }, + { + "epoch": 11.06043956043956, + "grad_norm": 11.260887145996094, + "learning_rate": 4.446978021978022e-05, + "loss": 0.4806, + "step": 4026 + }, + { + "epoch": 11.063186813186814, + "grad_norm": 14.245100975036621, + "learning_rate": 4.4468406593406596e-05, + "loss": 0.6325, + "step": 4027 + }, + { + "epoch": 11.065934065934066, + "grad_norm": 20.400920867919922, + "learning_rate": 4.4467032967032966e-05, + "loss": 0.6261, + "step": 4028 + }, + { + "epoch": 11.06868131868132, + "grad_norm": 16.201343536376953, + "learning_rate": 4.446565934065934e-05, + "loss": 0.4727, + "step": 4029 + }, + { + "epoch": 11.071428571428571, + "grad_norm": 6.222692012786865, + "learning_rate": 4.446428571428571e-05, + "loss": 0.1608, + "step": 4030 + }, + { + "epoch": 11.074175824175825, + "grad_norm": 10.87333869934082, + "learning_rate": 4.446291208791209e-05, + "loss": 0.2948, + "step": 4031 + }, + { + "epoch": 11.076923076923077, + "grad_norm": 12.91412353515625, + "learning_rate": 4.4461538461538466e-05, + "loss": 0.4704, + "step": 4032 + }, + { + "epoch": 11.07967032967033, + "grad_norm": 16.783761978149414, + "learning_rate": 4.4460164835164836e-05, + "loss": 0.4606, + "step": 4033 + }, + { + "epoch": 11.082417582417582, + "grad_norm": 15.86851692199707, + "learning_rate": 4.445879120879121e-05, + "loss": 0.4696, + "step": 4034 + }, + { + "epoch": 11.085164835164836, + "grad_norm": 15.144553184509277, + "learning_rate": 4.445741758241758e-05, + "loss": 0.6061, + "step": 4035 + }, + { + "epoch": 11.087912087912088, + "grad_norm": 7.487969398498535, + "learning_rate": 4.445604395604396e-05, + "loss": 0.2787, + "step": 4036 + }, + { + "epoch": 11.090659340659341, + "grad_norm": 10.309576034545898, + "learning_rate": 4.445467032967034e-05, + "loss": 0.5463, + "step": 4037 + }, + { + "epoch": 11.093406593406593, + "grad_norm": 18.9135684967041, + "learning_rate": 4.445329670329671e-05, + "loss": 0.9287, + "step": 4038 + }, + { + "epoch": 11.096153846153847, + "grad_norm": 8.547481536865234, + "learning_rate": 4.445192307692308e-05, + "loss": 0.3365, + "step": 4039 + }, + { + "epoch": 11.098901098901099, + "grad_norm": 13.60904598236084, + "learning_rate": 4.445054945054945e-05, + "loss": 0.6913, + "step": 4040 + }, + { + "epoch": 11.101648351648352, + "grad_norm": 19.457731246948242, + "learning_rate": 4.4449175824175824e-05, + "loss": 0.6128, + "step": 4041 + }, + { + "epoch": 11.104395604395604, + "grad_norm": 24.460020065307617, + "learning_rate": 4.44478021978022e-05, + "loss": 1.5557, + "step": 4042 + }, + { + "epoch": 11.107142857142858, + "grad_norm": 14.204195022583008, + "learning_rate": 4.444642857142857e-05, + "loss": 0.4797, + "step": 4043 + }, + { + "epoch": 11.10989010989011, + "grad_norm": 6.649289131164551, + "learning_rate": 4.444505494505495e-05, + "loss": 0.2516, + "step": 4044 + }, + { + "epoch": 11.112637362637363, + "grad_norm": 8.413649559020996, + "learning_rate": 4.444368131868132e-05, + "loss": 0.202, + "step": 4045 + }, + { + "epoch": 11.115384615384615, + "grad_norm": 8.108405113220215, + "learning_rate": 4.4442307692307694e-05, + "loss": 0.3086, + "step": 4046 + }, + { + "epoch": 11.118131868131869, + "grad_norm": 9.918314933776855, + "learning_rate": 4.444093406593407e-05, + "loss": 0.2707, + "step": 4047 + }, + { + "epoch": 11.12087912087912, + "grad_norm": 10.466681480407715, + "learning_rate": 4.443956043956044e-05, + "loss": 0.5289, + "step": 4048 + }, + { + "epoch": 11.123626373626374, + "grad_norm": 18.454256057739258, + "learning_rate": 4.443818681318682e-05, + "loss": 0.8012, + "step": 4049 + }, + { + "epoch": 11.126373626373626, + "grad_norm": 11.045997619628906, + "learning_rate": 4.443681318681319e-05, + "loss": 0.4115, + "step": 4050 + }, + { + "epoch": 11.12912087912088, + "grad_norm": 18.06109046936035, + "learning_rate": 4.4435439560439565e-05, + "loss": 0.5479, + "step": 4051 + }, + { + "epoch": 11.131868131868131, + "grad_norm": 12.483072280883789, + "learning_rate": 4.443406593406594e-05, + "loss": 0.5286, + "step": 4052 + }, + { + "epoch": 11.134615384615385, + "grad_norm": 8.004158020019531, + "learning_rate": 4.443269230769231e-05, + "loss": 0.2138, + "step": 4053 + }, + { + "epoch": 11.137362637362637, + "grad_norm": 16.71470832824707, + "learning_rate": 4.443131868131868e-05, + "loss": 0.6344, + "step": 4054 + }, + { + "epoch": 11.14010989010989, + "grad_norm": 13.173959732055664, + "learning_rate": 4.442994505494505e-05, + "loss": 0.4712, + "step": 4055 + }, + { + "epoch": 11.142857142857142, + "grad_norm": 16.777315139770508, + "learning_rate": 4.442857142857143e-05, + "loss": 0.6623, + "step": 4056 + }, + { + "epoch": 11.145604395604396, + "grad_norm": 9.883607864379883, + "learning_rate": 4.4427197802197805e-05, + "loss": 0.3654, + "step": 4057 + }, + { + "epoch": 11.148351648351648, + "grad_norm": 8.848152160644531, + "learning_rate": 4.4425824175824175e-05, + "loss": 0.2582, + "step": 4058 + }, + { + "epoch": 11.151098901098901, + "grad_norm": 10.224408149719238, + "learning_rate": 4.442445054945055e-05, + "loss": 0.3468, + "step": 4059 + }, + { + "epoch": 11.153846153846153, + "grad_norm": 8.094329833984375, + "learning_rate": 4.442307692307692e-05, + "loss": 0.2539, + "step": 4060 + }, + { + "epoch": 11.156593406593407, + "grad_norm": 16.369508743286133, + "learning_rate": 4.44217032967033e-05, + "loss": 0.7397, + "step": 4061 + }, + { + "epoch": 11.159340659340659, + "grad_norm": 20.286745071411133, + "learning_rate": 4.4420329670329675e-05, + "loss": 0.9661, + "step": 4062 + }, + { + "epoch": 11.162087912087912, + "grad_norm": 17.15540313720703, + "learning_rate": 4.4418956043956045e-05, + "loss": 0.6111, + "step": 4063 + }, + { + "epoch": 11.164835164835164, + "grad_norm": 18.76331329345703, + "learning_rate": 4.441758241758242e-05, + "loss": 1.2957, + "step": 4064 + }, + { + "epoch": 11.167582417582418, + "grad_norm": 19.39500617980957, + "learning_rate": 4.441620879120879e-05, + "loss": 0.8708, + "step": 4065 + }, + { + "epoch": 11.17032967032967, + "grad_norm": 15.611717224121094, + "learning_rate": 4.441483516483517e-05, + "loss": 0.5296, + "step": 4066 + }, + { + "epoch": 11.173076923076923, + "grad_norm": 10.43685245513916, + "learning_rate": 4.4413461538461546e-05, + "loss": 0.2613, + "step": 4067 + }, + { + "epoch": 11.175824175824175, + "grad_norm": 7.070977210998535, + "learning_rate": 4.4412087912087916e-05, + "loss": 0.2035, + "step": 4068 + }, + { + "epoch": 11.178571428571429, + "grad_norm": 16.557327270507812, + "learning_rate": 4.4410714285714286e-05, + "loss": 0.6433, + "step": 4069 + }, + { + "epoch": 11.18131868131868, + "grad_norm": 13.710708618164062, + "learning_rate": 4.4409340659340656e-05, + "loss": 0.5253, + "step": 4070 + }, + { + "epoch": 11.184065934065934, + "grad_norm": 12.035454750061035, + "learning_rate": 4.440796703296703e-05, + "loss": 0.4968, + "step": 4071 + }, + { + "epoch": 11.186813186813186, + "grad_norm": 7.514224529266357, + "learning_rate": 4.440659340659341e-05, + "loss": 0.3001, + "step": 4072 + }, + { + "epoch": 11.18956043956044, + "grad_norm": 8.733914375305176, + "learning_rate": 4.440521978021978e-05, + "loss": 0.2926, + "step": 4073 + }, + { + "epoch": 11.192307692307692, + "grad_norm": 11.926896095275879, + "learning_rate": 4.4403846153846156e-05, + "loss": 0.4204, + "step": 4074 + }, + { + "epoch": 11.195054945054945, + "grad_norm": 11.52342414855957, + "learning_rate": 4.4402472527472526e-05, + "loss": 0.3366, + "step": 4075 + }, + { + "epoch": 11.197802197802197, + "grad_norm": 15.295391082763672, + "learning_rate": 4.44010989010989e-05, + "loss": 0.7822, + "step": 4076 + }, + { + "epoch": 11.20054945054945, + "grad_norm": 8.80688190460205, + "learning_rate": 4.439972527472528e-05, + "loss": 0.2677, + "step": 4077 + }, + { + "epoch": 11.203296703296703, + "grad_norm": 9.50202751159668, + "learning_rate": 4.439835164835165e-05, + "loss": 0.2367, + "step": 4078 + }, + { + "epoch": 11.206043956043956, + "grad_norm": 15.489167213439941, + "learning_rate": 4.439697802197803e-05, + "loss": 0.5208, + "step": 4079 + }, + { + "epoch": 11.208791208791208, + "grad_norm": 13.45568561553955, + "learning_rate": 4.43956043956044e-05, + "loss": 0.6533, + "step": 4080 + }, + { + "epoch": 11.211538461538462, + "grad_norm": 7.8928914070129395, + "learning_rate": 4.4394230769230773e-05, + "loss": 0.2072, + "step": 4081 + }, + { + "epoch": 11.214285714285714, + "grad_norm": 11.681666374206543, + "learning_rate": 4.439285714285715e-05, + "loss": 0.6301, + "step": 4082 + }, + { + "epoch": 11.217032967032967, + "grad_norm": 13.436929702758789, + "learning_rate": 4.439148351648352e-05, + "loss": 0.5908, + "step": 4083 + }, + { + "epoch": 11.219780219780219, + "grad_norm": 16.557764053344727, + "learning_rate": 4.439010989010989e-05, + "loss": 0.659, + "step": 4084 + }, + { + "epoch": 11.222527472527473, + "grad_norm": 5.00301456451416, + "learning_rate": 4.438873626373626e-05, + "loss": 0.1394, + "step": 4085 + }, + { + "epoch": 11.225274725274724, + "grad_norm": 21.088645935058594, + "learning_rate": 4.438736263736264e-05, + "loss": 0.7423, + "step": 4086 + }, + { + "epoch": 11.228021978021978, + "grad_norm": 17.354650497436523, + "learning_rate": 4.4385989010989014e-05, + "loss": 0.5233, + "step": 4087 + }, + { + "epoch": 11.23076923076923, + "grad_norm": 11.717500686645508, + "learning_rate": 4.4384615384615384e-05, + "loss": 0.3252, + "step": 4088 + }, + { + "epoch": 11.233516483516484, + "grad_norm": 13.538732528686523, + "learning_rate": 4.438324175824176e-05, + "loss": 0.6266, + "step": 4089 + }, + { + "epoch": 11.236263736263735, + "grad_norm": 13.61926555633545, + "learning_rate": 4.438186813186813e-05, + "loss": 0.417, + "step": 4090 + }, + { + "epoch": 11.239010989010989, + "grad_norm": 7.602597236633301, + "learning_rate": 4.438049450549451e-05, + "loss": 0.3317, + "step": 4091 + }, + { + "epoch": 11.241758241758241, + "grad_norm": 6.479081630706787, + "learning_rate": 4.4379120879120884e-05, + "loss": 0.1567, + "step": 4092 + }, + { + "epoch": 11.244505494505495, + "grad_norm": 10.778813362121582, + "learning_rate": 4.4377747252747254e-05, + "loss": 0.4279, + "step": 4093 + }, + { + "epoch": 11.247252747252748, + "grad_norm": 12.516231536865234, + "learning_rate": 4.437637362637363e-05, + "loss": 0.4177, + "step": 4094 + }, + { + "epoch": 11.25, + "grad_norm": 14.615567207336426, + "learning_rate": 4.4375e-05, + "loss": 0.467, + "step": 4095 + }, + { + "epoch": 11.252747252747252, + "grad_norm": 19.70941925048828, + "learning_rate": 4.437362637362638e-05, + "loss": 1.0081, + "step": 4096 + }, + { + "epoch": 11.255494505494505, + "grad_norm": 15.8104887008667, + "learning_rate": 4.4372252747252755e-05, + "loss": 0.4771, + "step": 4097 + }, + { + "epoch": 11.258241758241759, + "grad_norm": 16.83464813232422, + "learning_rate": 4.4370879120879125e-05, + "loss": 0.6645, + "step": 4098 + }, + { + "epoch": 11.260989010989011, + "grad_norm": 14.367871284484863, + "learning_rate": 4.4369505494505495e-05, + "loss": 0.4513, + "step": 4099 + }, + { + "epoch": 11.263736263736265, + "grad_norm": 15.363637924194336, + "learning_rate": 4.4368131868131865e-05, + "loss": 0.3591, + "step": 4100 + }, + { + "epoch": 11.266483516483516, + "grad_norm": 14.742627143859863, + "learning_rate": 4.436675824175824e-05, + "loss": 0.7697, + "step": 4101 + }, + { + "epoch": 11.26923076923077, + "grad_norm": 14.408865928649902, + "learning_rate": 4.436538461538462e-05, + "loss": 0.788, + "step": 4102 + }, + { + "epoch": 11.271978021978022, + "grad_norm": 23.2949275970459, + "learning_rate": 4.436401098901099e-05, + "loss": 1.0519, + "step": 4103 + }, + { + "epoch": 11.274725274725276, + "grad_norm": 20.769161224365234, + "learning_rate": 4.4362637362637365e-05, + "loss": 0.7159, + "step": 4104 + }, + { + "epoch": 11.277472527472527, + "grad_norm": 14.153400421142578, + "learning_rate": 4.4361263736263735e-05, + "loss": 0.434, + "step": 4105 + }, + { + "epoch": 11.280219780219781, + "grad_norm": 9.56127643585205, + "learning_rate": 4.435989010989011e-05, + "loss": 0.3054, + "step": 4106 + }, + { + "epoch": 11.282967032967033, + "grad_norm": 9.358528137207031, + "learning_rate": 4.435851648351649e-05, + "loss": 0.3965, + "step": 4107 + }, + { + "epoch": 11.285714285714286, + "grad_norm": 10.878030776977539, + "learning_rate": 4.435714285714286e-05, + "loss": 0.3468, + "step": 4108 + }, + { + "epoch": 11.288461538461538, + "grad_norm": 13.9888277053833, + "learning_rate": 4.4355769230769236e-05, + "loss": 0.4617, + "step": 4109 + }, + { + "epoch": 11.291208791208792, + "grad_norm": 11.29294204711914, + "learning_rate": 4.4354395604395606e-05, + "loss": 0.5303, + "step": 4110 + }, + { + "epoch": 11.293956043956044, + "grad_norm": 10.148286819458008, + "learning_rate": 4.435302197802198e-05, + "loss": 0.2411, + "step": 4111 + }, + { + "epoch": 11.296703296703297, + "grad_norm": 13.407221794128418, + "learning_rate": 4.435164835164836e-05, + "loss": 0.563, + "step": 4112 + }, + { + "epoch": 11.29945054945055, + "grad_norm": 20.442466735839844, + "learning_rate": 4.435027472527473e-05, + "loss": 0.6494, + "step": 4113 + }, + { + "epoch": 11.302197802197803, + "grad_norm": 10.263169288635254, + "learning_rate": 4.43489010989011e-05, + "loss": 0.3751, + "step": 4114 + }, + { + "epoch": 11.304945054945055, + "grad_norm": 12.673134803771973, + "learning_rate": 4.434752747252747e-05, + "loss": 0.49, + "step": 4115 + }, + { + "epoch": 11.307692307692308, + "grad_norm": 13.025894165039062, + "learning_rate": 4.4346153846153846e-05, + "loss": 0.4066, + "step": 4116 + }, + { + "epoch": 11.31043956043956, + "grad_norm": 14.268576622009277, + "learning_rate": 4.434478021978022e-05, + "loss": 0.6185, + "step": 4117 + }, + { + "epoch": 11.313186813186814, + "grad_norm": 4.146722793579102, + "learning_rate": 4.434340659340659e-05, + "loss": 0.1251, + "step": 4118 + }, + { + "epoch": 11.315934065934066, + "grad_norm": 14.031461715698242, + "learning_rate": 4.434203296703297e-05, + "loss": 0.6426, + "step": 4119 + }, + { + "epoch": 11.31868131868132, + "grad_norm": 16.625638961791992, + "learning_rate": 4.434065934065934e-05, + "loss": 0.6343, + "step": 4120 + }, + { + "epoch": 11.321428571428571, + "grad_norm": 8.19240951538086, + "learning_rate": 4.4339285714285717e-05, + "loss": 0.3306, + "step": 4121 + }, + { + "epoch": 11.324175824175825, + "grad_norm": 12.853283882141113, + "learning_rate": 4.433791208791209e-05, + "loss": 0.5392, + "step": 4122 + }, + { + "epoch": 11.326923076923077, + "grad_norm": 14.253045082092285, + "learning_rate": 4.433653846153846e-05, + "loss": 0.5733, + "step": 4123 + }, + { + "epoch": 11.32967032967033, + "grad_norm": 19.816017150878906, + "learning_rate": 4.433516483516484e-05, + "loss": 0.8965, + "step": 4124 + }, + { + "epoch": 11.332417582417582, + "grad_norm": 6.3505401611328125, + "learning_rate": 4.433379120879121e-05, + "loss": 0.146, + "step": 4125 + }, + { + "epoch": 11.335164835164836, + "grad_norm": 7.5587477684021, + "learning_rate": 4.433241758241759e-05, + "loss": 0.1994, + "step": 4126 + }, + { + "epoch": 11.337912087912088, + "grad_norm": 20.970712661743164, + "learning_rate": 4.4331043956043964e-05, + "loss": 1.1363, + "step": 4127 + }, + { + "epoch": 11.340659340659341, + "grad_norm": 13.195354461669922, + "learning_rate": 4.4329670329670334e-05, + "loss": 0.4261, + "step": 4128 + }, + { + "epoch": 11.343406593406593, + "grad_norm": 10.83464241027832, + "learning_rate": 4.4328296703296704e-05, + "loss": 0.6219, + "step": 4129 + }, + { + "epoch": 11.346153846153847, + "grad_norm": 10.432086944580078, + "learning_rate": 4.4326923076923074e-05, + "loss": 0.3724, + "step": 4130 + }, + { + "epoch": 11.348901098901099, + "grad_norm": 22.29634666442871, + "learning_rate": 4.432554945054945e-05, + "loss": 0.6915, + "step": 4131 + }, + { + "epoch": 11.351648351648352, + "grad_norm": 11.766610145568848, + "learning_rate": 4.432417582417583e-05, + "loss": 0.5853, + "step": 4132 + }, + { + "epoch": 11.354395604395604, + "grad_norm": 16.448617935180664, + "learning_rate": 4.43228021978022e-05, + "loss": 0.4362, + "step": 4133 + }, + { + "epoch": 11.357142857142858, + "grad_norm": 11.752654075622559, + "learning_rate": 4.4321428571428574e-05, + "loss": 0.5323, + "step": 4134 + }, + { + "epoch": 11.35989010989011, + "grad_norm": 17.946903228759766, + "learning_rate": 4.4320054945054944e-05, + "loss": 0.7767, + "step": 4135 + }, + { + "epoch": 11.362637362637363, + "grad_norm": 15.823177337646484, + "learning_rate": 4.431868131868132e-05, + "loss": 0.5391, + "step": 4136 + }, + { + "epoch": 11.365384615384615, + "grad_norm": 15.039158821105957, + "learning_rate": 4.43173076923077e-05, + "loss": 0.6916, + "step": 4137 + }, + { + "epoch": 11.368131868131869, + "grad_norm": 16.342239379882812, + "learning_rate": 4.431593406593407e-05, + "loss": 0.568, + "step": 4138 + }, + { + "epoch": 11.37087912087912, + "grad_norm": 15.77834701538086, + "learning_rate": 4.4314560439560445e-05, + "loss": 0.6507, + "step": 4139 + }, + { + "epoch": 11.373626373626374, + "grad_norm": 16.939987182617188, + "learning_rate": 4.4313186813186815e-05, + "loss": 0.6698, + "step": 4140 + }, + { + "epoch": 11.376373626373626, + "grad_norm": 16.76042366027832, + "learning_rate": 4.431181318681319e-05, + "loss": 0.8709, + "step": 4141 + }, + { + "epoch": 11.37912087912088, + "grad_norm": 18.63080596923828, + "learning_rate": 4.431043956043957e-05, + "loss": 0.7501, + "step": 4142 + }, + { + "epoch": 11.381868131868131, + "grad_norm": 8.695893287658691, + "learning_rate": 4.430906593406594e-05, + "loss": 0.253, + "step": 4143 + }, + { + "epoch": 11.384615384615385, + "grad_norm": 17.44205093383789, + "learning_rate": 4.430769230769231e-05, + "loss": 0.6159, + "step": 4144 + }, + { + "epoch": 11.387362637362637, + "grad_norm": 12.123458862304688, + "learning_rate": 4.430631868131868e-05, + "loss": 0.4089, + "step": 4145 + }, + { + "epoch": 11.39010989010989, + "grad_norm": 8.53605842590332, + "learning_rate": 4.4304945054945055e-05, + "loss": 0.2204, + "step": 4146 + }, + { + "epoch": 11.392857142857142, + "grad_norm": 7.183598041534424, + "learning_rate": 4.430357142857143e-05, + "loss": 0.2981, + "step": 4147 + }, + { + "epoch": 11.395604395604396, + "grad_norm": 13.345097541809082, + "learning_rate": 4.43021978021978e-05, + "loss": 0.4593, + "step": 4148 + }, + { + "epoch": 11.398351648351648, + "grad_norm": 15.705109596252441, + "learning_rate": 4.430082417582418e-05, + "loss": 0.4438, + "step": 4149 + }, + { + "epoch": 11.401098901098901, + "grad_norm": 13.033543586730957, + "learning_rate": 4.429945054945055e-05, + "loss": 0.5927, + "step": 4150 + }, + { + "epoch": 11.403846153846153, + "grad_norm": 18.392791748046875, + "learning_rate": 4.4298076923076926e-05, + "loss": 0.5645, + "step": 4151 + }, + { + "epoch": 11.406593406593407, + "grad_norm": 11.815327644348145, + "learning_rate": 4.42967032967033e-05, + "loss": 0.384, + "step": 4152 + }, + { + "epoch": 11.409340659340659, + "grad_norm": 13.359217643737793, + "learning_rate": 4.429532967032967e-05, + "loss": 0.4572, + "step": 4153 + }, + { + "epoch": 11.412087912087912, + "grad_norm": 15.071924209594727, + "learning_rate": 4.429395604395605e-05, + "loss": 0.4707, + "step": 4154 + }, + { + "epoch": 11.414835164835164, + "grad_norm": 16.466838836669922, + "learning_rate": 4.429258241758242e-05, + "loss": 0.5431, + "step": 4155 + }, + { + "epoch": 11.417582417582418, + "grad_norm": 9.985086441040039, + "learning_rate": 4.4291208791208796e-05, + "loss": 0.4411, + "step": 4156 + }, + { + "epoch": 11.42032967032967, + "grad_norm": 18.6645565032959, + "learning_rate": 4.428983516483517e-05, + "loss": 0.5124, + "step": 4157 + }, + { + "epoch": 11.423076923076923, + "grad_norm": 10.837327003479004, + "learning_rate": 4.428846153846154e-05, + "loss": 0.5507, + "step": 4158 + }, + { + "epoch": 11.425824175824175, + "grad_norm": 21.38410186767578, + "learning_rate": 4.428708791208791e-05, + "loss": 0.9236, + "step": 4159 + }, + { + "epoch": 11.428571428571429, + "grad_norm": 13.595661163330078, + "learning_rate": 4.428571428571428e-05, + "loss": 0.4274, + "step": 4160 + }, + { + "epoch": 11.43131868131868, + "grad_norm": 9.2035551071167, + "learning_rate": 4.428434065934066e-05, + "loss": 0.3763, + "step": 4161 + }, + { + "epoch": 11.434065934065934, + "grad_norm": 8.435334205627441, + "learning_rate": 4.4282967032967036e-05, + "loss": 0.2368, + "step": 4162 + }, + { + "epoch": 11.436813186813186, + "grad_norm": 9.007630348205566, + "learning_rate": 4.4281593406593406e-05, + "loss": 0.3553, + "step": 4163 + }, + { + "epoch": 11.43956043956044, + "grad_norm": 14.638371467590332, + "learning_rate": 4.428021978021978e-05, + "loss": 0.4642, + "step": 4164 + }, + { + "epoch": 11.442307692307692, + "grad_norm": 14.353384017944336, + "learning_rate": 4.427884615384615e-05, + "loss": 0.5057, + "step": 4165 + }, + { + "epoch": 11.445054945054945, + "grad_norm": 14.453520774841309, + "learning_rate": 4.427747252747253e-05, + "loss": 0.5685, + "step": 4166 + }, + { + "epoch": 11.447802197802197, + "grad_norm": 10.959239959716797, + "learning_rate": 4.427609890109891e-05, + "loss": 0.2702, + "step": 4167 + }, + { + "epoch": 11.45054945054945, + "grad_norm": 21.322223663330078, + "learning_rate": 4.427472527472528e-05, + "loss": 1.0018, + "step": 4168 + }, + { + "epoch": 11.453296703296703, + "grad_norm": 6.86259651184082, + "learning_rate": 4.4273351648351654e-05, + "loss": 0.2585, + "step": 4169 + }, + { + "epoch": 11.456043956043956, + "grad_norm": 13.555098533630371, + "learning_rate": 4.4271978021978024e-05, + "loss": 0.6364, + "step": 4170 + }, + { + "epoch": 11.458791208791208, + "grad_norm": 13.727410316467285, + "learning_rate": 4.42706043956044e-05, + "loss": 0.6576, + "step": 4171 + }, + { + "epoch": 11.461538461538462, + "grad_norm": 9.5892915725708, + "learning_rate": 4.426923076923078e-05, + "loss": 0.228, + "step": 4172 + }, + { + "epoch": 11.464285714285714, + "grad_norm": 15.156208992004395, + "learning_rate": 4.426785714285715e-05, + "loss": 0.4121, + "step": 4173 + }, + { + "epoch": 11.467032967032967, + "grad_norm": 9.606224060058594, + "learning_rate": 4.426648351648352e-05, + "loss": 0.3344, + "step": 4174 + }, + { + "epoch": 11.469780219780219, + "grad_norm": 19.420372009277344, + "learning_rate": 4.426510989010989e-05, + "loss": 1.0844, + "step": 4175 + }, + { + "epoch": 11.472527472527473, + "grad_norm": 12.49914264678955, + "learning_rate": 4.4263736263736264e-05, + "loss": 0.4713, + "step": 4176 + }, + { + "epoch": 11.475274725274724, + "grad_norm": 18.70897102355957, + "learning_rate": 4.426236263736264e-05, + "loss": 0.5138, + "step": 4177 + }, + { + "epoch": 11.478021978021978, + "grad_norm": 13.380637168884277, + "learning_rate": 4.426098901098901e-05, + "loss": 0.5769, + "step": 4178 + }, + { + "epoch": 11.48076923076923, + "grad_norm": 7.964258670806885, + "learning_rate": 4.425961538461539e-05, + "loss": 0.3132, + "step": 4179 + }, + { + "epoch": 11.483516483516484, + "grad_norm": 15.202310562133789, + "learning_rate": 4.425824175824176e-05, + "loss": 0.493, + "step": 4180 + }, + { + "epoch": 11.486263736263735, + "grad_norm": 17.329309463500977, + "learning_rate": 4.4256868131868135e-05, + "loss": 0.6866, + "step": 4181 + }, + { + "epoch": 11.489010989010989, + "grad_norm": 14.928777694702148, + "learning_rate": 4.425549450549451e-05, + "loss": 0.6388, + "step": 4182 + }, + { + "epoch": 11.491758241758241, + "grad_norm": 12.369379997253418, + "learning_rate": 4.425412087912088e-05, + "loss": 0.3742, + "step": 4183 + }, + { + "epoch": 11.494505494505495, + "grad_norm": 11.194231033325195, + "learning_rate": 4.425274725274726e-05, + "loss": 0.4334, + "step": 4184 + }, + { + "epoch": 11.497252747252748, + "grad_norm": 16.265438079833984, + "learning_rate": 4.425137362637363e-05, + "loss": 0.5416, + "step": 4185 + }, + { + "epoch": 11.5, + "grad_norm": 16.040950775146484, + "learning_rate": 4.4250000000000005e-05, + "loss": 0.7063, + "step": 4186 + }, + { + "epoch": 11.502747252747252, + "grad_norm": 12.623430252075195, + "learning_rate": 4.424862637362638e-05, + "loss": 0.4267, + "step": 4187 + }, + { + "epoch": 11.505494505494505, + "grad_norm": 19.318012237548828, + "learning_rate": 4.424725274725275e-05, + "loss": 0.7631, + "step": 4188 + }, + { + "epoch": 11.508241758241759, + "grad_norm": 14.860520362854004, + "learning_rate": 4.424587912087912e-05, + "loss": 0.485, + "step": 4189 + }, + { + "epoch": 11.510989010989011, + "grad_norm": 14.675725936889648, + "learning_rate": 4.424450549450549e-05, + "loss": 0.389, + "step": 4190 + }, + { + "epoch": 11.513736263736263, + "grad_norm": 14.606380462646484, + "learning_rate": 4.424313186813187e-05, + "loss": 0.6356, + "step": 4191 + }, + { + "epoch": 11.516483516483516, + "grad_norm": 11.35124683380127, + "learning_rate": 4.4241758241758245e-05, + "loss": 0.5319, + "step": 4192 + }, + { + "epoch": 11.51923076923077, + "grad_norm": 9.858672142028809, + "learning_rate": 4.4240384615384615e-05, + "loss": 0.3217, + "step": 4193 + }, + { + "epoch": 11.521978021978022, + "grad_norm": 17.876365661621094, + "learning_rate": 4.423901098901099e-05, + "loss": 0.9363, + "step": 4194 + }, + { + "epoch": 11.524725274725276, + "grad_norm": 10.02184772491455, + "learning_rate": 4.423763736263736e-05, + "loss": 0.4352, + "step": 4195 + }, + { + "epoch": 11.527472527472527, + "grad_norm": 16.084579467773438, + "learning_rate": 4.423626373626374e-05, + "loss": 0.6477, + "step": 4196 + }, + { + "epoch": 11.530219780219781, + "grad_norm": 18.818744659423828, + "learning_rate": 4.4234890109890116e-05, + "loss": 0.4922, + "step": 4197 + }, + { + "epoch": 11.532967032967033, + "grad_norm": 16.598764419555664, + "learning_rate": 4.4233516483516486e-05, + "loss": 0.471, + "step": 4198 + }, + { + "epoch": 11.535714285714286, + "grad_norm": 4.72271203994751, + "learning_rate": 4.423214285714286e-05, + "loss": 0.1688, + "step": 4199 + }, + { + "epoch": 11.538461538461538, + "grad_norm": 11.535364151000977, + "learning_rate": 4.423076923076923e-05, + "loss": 0.3876, + "step": 4200 + }, + { + "epoch": 11.541208791208792, + "grad_norm": 11.30101203918457, + "learning_rate": 4.422939560439561e-05, + "loss": 0.4008, + "step": 4201 + }, + { + "epoch": 11.543956043956044, + "grad_norm": 15.78536605834961, + "learning_rate": 4.4228021978021986e-05, + "loss": 0.5101, + "step": 4202 + }, + { + "epoch": 11.546703296703297, + "grad_norm": 10.570611000061035, + "learning_rate": 4.4226648351648356e-05, + "loss": 0.3338, + "step": 4203 + }, + { + "epoch": 11.54945054945055, + "grad_norm": 13.35008430480957, + "learning_rate": 4.4225274725274726e-05, + "loss": 0.3581, + "step": 4204 + }, + { + "epoch": 11.552197802197803, + "grad_norm": 11.658018112182617, + "learning_rate": 4.4223901098901096e-05, + "loss": 0.4891, + "step": 4205 + }, + { + "epoch": 11.554945054945055, + "grad_norm": 14.711888313293457, + "learning_rate": 4.422252747252747e-05, + "loss": 0.476, + "step": 4206 + }, + { + "epoch": 11.557692307692308, + "grad_norm": 6.300314426422119, + "learning_rate": 4.422115384615385e-05, + "loss": 0.1976, + "step": 4207 + }, + { + "epoch": 11.56043956043956, + "grad_norm": 19.795394897460938, + "learning_rate": 4.421978021978022e-05, + "loss": 0.7381, + "step": 4208 + }, + { + "epoch": 11.563186813186814, + "grad_norm": 5.802011013031006, + "learning_rate": 4.42184065934066e-05, + "loss": 0.2293, + "step": 4209 + }, + { + "epoch": 11.565934065934066, + "grad_norm": 16.844785690307617, + "learning_rate": 4.421703296703297e-05, + "loss": 0.7224, + "step": 4210 + }, + { + "epoch": 11.56868131868132, + "grad_norm": 14.133506774902344, + "learning_rate": 4.4215659340659343e-05, + "loss": 0.8065, + "step": 4211 + }, + { + "epoch": 11.571428571428571, + "grad_norm": 12.83301067352295, + "learning_rate": 4.4214285714285714e-05, + "loss": 0.5705, + "step": 4212 + }, + { + "epoch": 11.574175824175825, + "grad_norm": 11.617576599121094, + "learning_rate": 4.421291208791209e-05, + "loss": 0.2796, + "step": 4213 + }, + { + "epoch": 11.576923076923077, + "grad_norm": 12.646841049194336, + "learning_rate": 4.421153846153847e-05, + "loss": 0.4067, + "step": 4214 + }, + { + "epoch": 11.57967032967033, + "grad_norm": 8.139238357543945, + "learning_rate": 4.421016483516484e-05, + "loss": 0.2318, + "step": 4215 + }, + { + "epoch": 11.582417582417582, + "grad_norm": 24.87253189086914, + "learning_rate": 4.4208791208791214e-05, + "loss": 0.8228, + "step": 4216 + }, + { + "epoch": 11.585164835164836, + "grad_norm": 19.154111862182617, + "learning_rate": 4.4207417582417584e-05, + "loss": 0.9489, + "step": 4217 + }, + { + "epoch": 11.587912087912088, + "grad_norm": 7.077815532684326, + "learning_rate": 4.420604395604396e-05, + "loss": 0.2957, + "step": 4218 + }, + { + "epoch": 11.590659340659341, + "grad_norm": 13.956209182739258, + "learning_rate": 4.420467032967033e-05, + "loss": 0.5619, + "step": 4219 + }, + { + "epoch": 11.593406593406593, + "grad_norm": 13.650435447692871, + "learning_rate": 4.42032967032967e-05, + "loss": 0.5571, + "step": 4220 + }, + { + "epoch": 11.596153846153847, + "grad_norm": 15.709659576416016, + "learning_rate": 4.420192307692308e-05, + "loss": 0.7344, + "step": 4221 + }, + { + "epoch": 11.598901098901099, + "grad_norm": 8.88016414642334, + "learning_rate": 4.420054945054945e-05, + "loss": 0.2998, + "step": 4222 + }, + { + "epoch": 11.601648351648352, + "grad_norm": 13.45289134979248, + "learning_rate": 4.4199175824175824e-05, + "loss": 0.4346, + "step": 4223 + }, + { + "epoch": 11.604395604395604, + "grad_norm": 11.235252380371094, + "learning_rate": 4.41978021978022e-05, + "loss": 0.3662, + "step": 4224 + }, + { + "epoch": 11.607142857142858, + "grad_norm": 13.934978485107422, + "learning_rate": 4.419642857142857e-05, + "loss": 0.6348, + "step": 4225 + }, + { + "epoch": 11.60989010989011, + "grad_norm": 12.636359214782715, + "learning_rate": 4.419505494505495e-05, + "loss": 0.7202, + "step": 4226 + }, + { + "epoch": 11.612637362637363, + "grad_norm": 17.65861701965332, + "learning_rate": 4.419368131868132e-05, + "loss": 0.5894, + "step": 4227 + }, + { + "epoch": 11.615384615384615, + "grad_norm": 9.32744312286377, + "learning_rate": 4.4192307692307695e-05, + "loss": 0.2897, + "step": 4228 + }, + { + "epoch": 11.618131868131869, + "grad_norm": 16.09735107421875, + "learning_rate": 4.419093406593407e-05, + "loss": 0.613, + "step": 4229 + }, + { + "epoch": 11.62087912087912, + "grad_norm": 6.820662975311279, + "learning_rate": 4.418956043956044e-05, + "loss": 0.1794, + "step": 4230 + }, + { + "epoch": 11.623626373626374, + "grad_norm": 15.830245018005371, + "learning_rate": 4.418818681318682e-05, + "loss": 0.6809, + "step": 4231 + }, + { + "epoch": 11.626373626373626, + "grad_norm": 10.458910942077637, + "learning_rate": 4.418681318681319e-05, + "loss": 0.2868, + "step": 4232 + }, + { + "epoch": 11.62912087912088, + "grad_norm": 15.704001426696777, + "learning_rate": 4.4185439560439565e-05, + "loss": 0.7135, + "step": 4233 + }, + { + "epoch": 11.631868131868131, + "grad_norm": 9.049253463745117, + "learning_rate": 4.4184065934065935e-05, + "loss": 0.3492, + "step": 4234 + }, + { + "epoch": 11.634615384615385, + "grad_norm": 8.142221450805664, + "learning_rate": 4.4182692307692305e-05, + "loss": 0.2328, + "step": 4235 + }, + { + "epoch": 11.637362637362637, + "grad_norm": 6.028289318084717, + "learning_rate": 4.418131868131868e-05, + "loss": 0.2653, + "step": 4236 + }, + { + "epoch": 11.64010989010989, + "grad_norm": 18.48247718811035, + "learning_rate": 4.417994505494505e-05, + "loss": 0.6072, + "step": 4237 + }, + { + "epoch": 11.642857142857142, + "grad_norm": 14.130979537963867, + "learning_rate": 4.417857142857143e-05, + "loss": 0.5168, + "step": 4238 + }, + { + "epoch": 11.645604395604396, + "grad_norm": 17.163511276245117, + "learning_rate": 4.4177197802197806e-05, + "loss": 0.5265, + "step": 4239 + }, + { + "epoch": 11.648351648351648, + "grad_norm": 15.579244613647461, + "learning_rate": 4.4175824175824176e-05, + "loss": 0.5496, + "step": 4240 + }, + { + "epoch": 11.651098901098901, + "grad_norm": 12.85955810546875, + "learning_rate": 4.417445054945055e-05, + "loss": 0.34, + "step": 4241 + }, + { + "epoch": 11.653846153846153, + "grad_norm": 19.876205444335938, + "learning_rate": 4.417307692307692e-05, + "loss": 1.1771, + "step": 4242 + }, + { + "epoch": 11.656593406593407, + "grad_norm": 10.691503524780273, + "learning_rate": 4.41717032967033e-05, + "loss": 0.3657, + "step": 4243 + }, + { + "epoch": 11.659340659340659, + "grad_norm": 13.992874145507812, + "learning_rate": 4.4170329670329676e-05, + "loss": 0.4191, + "step": 4244 + }, + { + "epoch": 11.662087912087912, + "grad_norm": 10.751720428466797, + "learning_rate": 4.4168956043956046e-05, + "loss": 0.3455, + "step": 4245 + }, + { + "epoch": 11.664835164835164, + "grad_norm": 15.39520263671875, + "learning_rate": 4.416758241758242e-05, + "loss": 0.7487, + "step": 4246 + }, + { + "epoch": 11.667582417582418, + "grad_norm": 19.651403427124023, + "learning_rate": 4.416620879120879e-05, + "loss": 0.8509, + "step": 4247 + }, + { + "epoch": 11.67032967032967, + "grad_norm": 17.438814163208008, + "learning_rate": 4.416483516483517e-05, + "loss": 0.7524, + "step": 4248 + }, + { + "epoch": 11.673076923076923, + "grad_norm": 18.143285751342773, + "learning_rate": 4.416346153846154e-05, + "loss": 0.7098, + "step": 4249 + }, + { + "epoch": 11.675824175824175, + "grad_norm": 11.411389350891113, + "learning_rate": 4.416208791208791e-05, + "loss": 0.4835, + "step": 4250 + }, + { + "epoch": 11.678571428571429, + "grad_norm": 16.31471061706543, + "learning_rate": 4.4160714285714287e-05, + "loss": 1.0789, + "step": 4251 + }, + { + "epoch": 11.68131868131868, + "grad_norm": 20.437416076660156, + "learning_rate": 4.4159340659340657e-05, + "loss": 0.8322, + "step": 4252 + }, + { + "epoch": 11.684065934065934, + "grad_norm": 14.426778793334961, + "learning_rate": 4.415796703296703e-05, + "loss": 0.4646, + "step": 4253 + }, + { + "epoch": 11.686813186813186, + "grad_norm": 24.52228355407715, + "learning_rate": 4.415659340659341e-05, + "loss": 1.0794, + "step": 4254 + }, + { + "epoch": 11.68956043956044, + "grad_norm": 22.89060401916504, + "learning_rate": 4.415521978021978e-05, + "loss": 1.0, + "step": 4255 + }, + { + "epoch": 11.692307692307692, + "grad_norm": 13.868483543395996, + "learning_rate": 4.415384615384616e-05, + "loss": 0.7401, + "step": 4256 + }, + { + "epoch": 11.695054945054945, + "grad_norm": 16.660993576049805, + "learning_rate": 4.415247252747253e-05, + "loss": 0.7413, + "step": 4257 + }, + { + "epoch": 11.697802197802197, + "grad_norm": 8.978703498840332, + "learning_rate": 4.4151098901098904e-05, + "loss": 0.2854, + "step": 4258 + }, + { + "epoch": 11.70054945054945, + "grad_norm": 18.000558853149414, + "learning_rate": 4.414972527472528e-05, + "loss": 1.0585, + "step": 4259 + }, + { + "epoch": 11.703296703296703, + "grad_norm": 10.409247398376465, + "learning_rate": 4.414835164835165e-05, + "loss": 0.3227, + "step": 4260 + }, + { + "epoch": 11.706043956043956, + "grad_norm": 9.939135551452637, + "learning_rate": 4.414697802197803e-05, + "loss": 0.388, + "step": 4261 + }, + { + "epoch": 11.708791208791208, + "grad_norm": 20.613142013549805, + "learning_rate": 4.41456043956044e-05, + "loss": 0.7472, + "step": 4262 + }, + { + "epoch": 11.711538461538462, + "grad_norm": 13.414140701293945, + "learning_rate": 4.4144230769230774e-05, + "loss": 0.6256, + "step": 4263 + }, + { + "epoch": 11.714285714285714, + "grad_norm": 13.21255874633789, + "learning_rate": 4.4142857142857144e-05, + "loss": 0.4519, + "step": 4264 + }, + { + "epoch": 11.717032967032967, + "grad_norm": 12.260674476623535, + "learning_rate": 4.4141483516483514e-05, + "loss": 0.3792, + "step": 4265 + }, + { + "epoch": 11.719780219780219, + "grad_norm": 12.898482322692871, + "learning_rate": 4.414010989010989e-05, + "loss": 0.407, + "step": 4266 + }, + { + "epoch": 11.722527472527473, + "grad_norm": 15.84408187866211, + "learning_rate": 4.413873626373626e-05, + "loss": 0.5592, + "step": 4267 + }, + { + "epoch": 11.725274725274724, + "grad_norm": 11.908162117004395, + "learning_rate": 4.413736263736264e-05, + "loss": 0.434, + "step": 4268 + }, + { + "epoch": 11.728021978021978, + "grad_norm": 17.979644775390625, + "learning_rate": 4.4135989010989015e-05, + "loss": 0.8011, + "step": 4269 + }, + { + "epoch": 11.73076923076923, + "grad_norm": 13.911227226257324, + "learning_rate": 4.4134615384615385e-05, + "loss": 0.6933, + "step": 4270 + }, + { + "epoch": 11.733516483516484, + "grad_norm": 14.451095581054688, + "learning_rate": 4.413324175824176e-05, + "loss": 0.6632, + "step": 4271 + }, + { + "epoch": 11.736263736263737, + "grad_norm": 10.727404594421387, + "learning_rate": 4.413186813186813e-05, + "loss": 0.4003, + "step": 4272 + }, + { + "epoch": 11.739010989010989, + "grad_norm": 11.00478458404541, + "learning_rate": 4.413049450549451e-05, + "loss": 0.268, + "step": 4273 + }, + { + "epoch": 11.741758241758241, + "grad_norm": 9.305273056030273, + "learning_rate": 4.4129120879120885e-05, + "loss": 0.3059, + "step": 4274 + }, + { + "epoch": 11.744505494505495, + "grad_norm": 10.380253791809082, + "learning_rate": 4.4127747252747255e-05, + "loss": 0.4487, + "step": 4275 + }, + { + "epoch": 11.747252747252748, + "grad_norm": 14.821392059326172, + "learning_rate": 4.412637362637363e-05, + "loss": 0.5521, + "step": 4276 + }, + { + "epoch": 11.75, + "grad_norm": 19.841283798217773, + "learning_rate": 4.4125e-05, + "loss": 0.7699, + "step": 4277 + }, + { + "epoch": 11.752747252747252, + "grad_norm": 14.905745506286621, + "learning_rate": 4.412362637362638e-05, + "loss": 0.4904, + "step": 4278 + }, + { + "epoch": 11.755494505494505, + "grad_norm": 11.935315132141113, + "learning_rate": 4.412225274725275e-05, + "loss": 0.379, + "step": 4279 + }, + { + "epoch": 11.758241758241759, + "grad_norm": 8.155232429504395, + "learning_rate": 4.412087912087912e-05, + "loss": 0.2819, + "step": 4280 + }, + { + "epoch": 11.760989010989011, + "grad_norm": 11.599784851074219, + "learning_rate": 4.4119505494505496e-05, + "loss": 0.3153, + "step": 4281 + }, + { + "epoch": 11.763736263736263, + "grad_norm": 12.931781768798828, + "learning_rate": 4.4118131868131866e-05, + "loss": 0.3305, + "step": 4282 + }, + { + "epoch": 11.766483516483516, + "grad_norm": 17.600263595581055, + "learning_rate": 4.411675824175824e-05, + "loss": 0.803, + "step": 4283 + }, + { + "epoch": 11.76923076923077, + "grad_norm": 9.892910957336426, + "learning_rate": 4.411538461538462e-05, + "loss": 0.3626, + "step": 4284 + }, + { + "epoch": 11.771978021978022, + "grad_norm": 17.259737014770508, + "learning_rate": 4.411401098901099e-05, + "loss": 0.6639, + "step": 4285 + }, + { + "epoch": 11.774725274725276, + "grad_norm": 16.310211181640625, + "learning_rate": 4.4112637362637366e-05, + "loss": 0.6723, + "step": 4286 + }, + { + "epoch": 11.777472527472527, + "grad_norm": 14.2610445022583, + "learning_rate": 4.4111263736263736e-05, + "loss": 0.6577, + "step": 4287 + }, + { + "epoch": 11.780219780219781, + "grad_norm": 9.891153335571289, + "learning_rate": 4.410989010989011e-05, + "loss": 0.2715, + "step": 4288 + }, + { + "epoch": 11.782967032967033, + "grad_norm": 10.581568717956543, + "learning_rate": 4.410851648351649e-05, + "loss": 0.4327, + "step": 4289 + }, + { + "epoch": 11.785714285714286, + "grad_norm": 10.351937294006348, + "learning_rate": 4.410714285714286e-05, + "loss": 0.2403, + "step": 4290 + }, + { + "epoch": 11.788461538461538, + "grad_norm": 14.420196533203125, + "learning_rate": 4.4105769230769236e-05, + "loss": 0.452, + "step": 4291 + }, + { + "epoch": 11.791208791208792, + "grad_norm": 7.77803897857666, + "learning_rate": 4.4104395604395606e-05, + "loss": 0.2868, + "step": 4292 + }, + { + "epoch": 11.793956043956044, + "grad_norm": 7.311130046844482, + "learning_rate": 4.410302197802198e-05, + "loss": 0.2741, + "step": 4293 + }, + { + "epoch": 11.796703296703297, + "grad_norm": 16.360870361328125, + "learning_rate": 4.410164835164835e-05, + "loss": 0.661, + "step": 4294 + }, + { + "epoch": 11.79945054945055, + "grad_norm": 11.804720878601074, + "learning_rate": 4.410027472527472e-05, + "loss": 0.452, + "step": 4295 + }, + { + "epoch": 11.802197802197803, + "grad_norm": 8.055717468261719, + "learning_rate": 4.40989010989011e-05, + "loss": 0.2077, + "step": 4296 + }, + { + "epoch": 11.804945054945055, + "grad_norm": 7.818387508392334, + "learning_rate": 4.409752747252747e-05, + "loss": 0.3047, + "step": 4297 + }, + { + "epoch": 11.807692307692308, + "grad_norm": 12.824387550354004, + "learning_rate": 4.409615384615385e-05, + "loss": 0.4467, + "step": 4298 + }, + { + "epoch": 11.81043956043956, + "grad_norm": 10.49008560180664, + "learning_rate": 4.4094780219780224e-05, + "loss": 0.4022, + "step": 4299 + }, + { + "epoch": 11.813186813186814, + "grad_norm": 7.799314022064209, + "learning_rate": 4.4093406593406594e-05, + "loss": 0.2729, + "step": 4300 + }, + { + "epoch": 11.815934065934066, + "grad_norm": 11.83956527709961, + "learning_rate": 4.409203296703297e-05, + "loss": 0.5067, + "step": 4301 + }, + { + "epoch": 11.81868131868132, + "grad_norm": 7.145654678344727, + "learning_rate": 4.409065934065934e-05, + "loss": 0.3167, + "step": 4302 + }, + { + "epoch": 11.821428571428571, + "grad_norm": 7.475332260131836, + "learning_rate": 4.408928571428572e-05, + "loss": 0.314, + "step": 4303 + }, + { + "epoch": 11.824175824175825, + "grad_norm": 11.260656356811523, + "learning_rate": 4.4087912087912094e-05, + "loss": 0.2479, + "step": 4304 + }, + { + "epoch": 11.826923076923077, + "grad_norm": 10.903254508972168, + "learning_rate": 4.4086538461538464e-05, + "loss": 0.4821, + "step": 4305 + }, + { + "epoch": 11.82967032967033, + "grad_norm": 9.746467590332031, + "learning_rate": 4.408516483516484e-05, + "loss": 0.3241, + "step": 4306 + }, + { + "epoch": 11.832417582417582, + "grad_norm": 9.372220039367676, + "learning_rate": 4.408379120879121e-05, + "loss": 0.3938, + "step": 4307 + }, + { + "epoch": 11.835164835164836, + "grad_norm": 16.464534759521484, + "learning_rate": 4.408241758241759e-05, + "loss": 0.6651, + "step": 4308 + }, + { + "epoch": 11.837912087912088, + "grad_norm": 8.338237762451172, + "learning_rate": 4.408104395604396e-05, + "loss": 0.1826, + "step": 4309 + }, + { + "epoch": 11.840659340659341, + "grad_norm": 16.268281936645508, + "learning_rate": 4.407967032967033e-05, + "loss": 0.6014, + "step": 4310 + }, + { + "epoch": 11.843406593406593, + "grad_norm": 14.392302513122559, + "learning_rate": 4.4078296703296705e-05, + "loss": 0.6144, + "step": 4311 + }, + { + "epoch": 11.846153846153847, + "grad_norm": 10.55656623840332, + "learning_rate": 4.4076923076923075e-05, + "loss": 0.3546, + "step": 4312 + }, + { + "epoch": 11.848901098901099, + "grad_norm": 12.594079971313477, + "learning_rate": 4.407554945054945e-05, + "loss": 0.371, + "step": 4313 + }, + { + "epoch": 11.851648351648352, + "grad_norm": 22.7947940826416, + "learning_rate": 4.407417582417583e-05, + "loss": 1.3137, + "step": 4314 + }, + { + "epoch": 11.854395604395604, + "grad_norm": 5.000246047973633, + "learning_rate": 4.40728021978022e-05, + "loss": 0.1123, + "step": 4315 + }, + { + "epoch": 11.857142857142858, + "grad_norm": 13.911237716674805, + "learning_rate": 4.4071428571428575e-05, + "loss": 0.5498, + "step": 4316 + }, + { + "epoch": 11.85989010989011, + "grad_norm": 23.03075408935547, + "learning_rate": 4.4070054945054945e-05, + "loss": 1.125, + "step": 4317 + }, + { + "epoch": 11.862637362637363, + "grad_norm": 9.610235214233398, + "learning_rate": 4.406868131868132e-05, + "loss": 0.4295, + "step": 4318 + }, + { + "epoch": 11.865384615384615, + "grad_norm": 12.25671672821045, + "learning_rate": 4.40673076923077e-05, + "loss": 0.3895, + "step": 4319 + }, + { + "epoch": 11.868131868131869, + "grad_norm": 12.757874488830566, + "learning_rate": 4.406593406593407e-05, + "loss": 0.2553, + "step": 4320 + }, + { + "epoch": 11.87087912087912, + "grad_norm": 15.638124465942383, + "learning_rate": 4.4064560439560445e-05, + "loss": 0.5258, + "step": 4321 + }, + { + "epoch": 11.873626373626374, + "grad_norm": 16.44317626953125, + "learning_rate": 4.4063186813186815e-05, + "loss": 0.6848, + "step": 4322 + }, + { + "epoch": 11.876373626373626, + "grad_norm": 10.55500316619873, + "learning_rate": 4.406181318681319e-05, + "loss": 0.302, + "step": 4323 + }, + { + "epoch": 11.87912087912088, + "grad_norm": 8.728456497192383, + "learning_rate": 4.406043956043956e-05, + "loss": 0.3511, + "step": 4324 + }, + { + "epoch": 11.881868131868131, + "grad_norm": 12.007547378540039, + "learning_rate": 4.405906593406593e-05, + "loss": 0.4854, + "step": 4325 + }, + { + "epoch": 11.884615384615385, + "grad_norm": 16.841856002807617, + "learning_rate": 4.405769230769231e-05, + "loss": 0.6887, + "step": 4326 + }, + { + "epoch": 11.887362637362637, + "grad_norm": 13.59692096710205, + "learning_rate": 4.405631868131868e-05, + "loss": 0.5248, + "step": 4327 + }, + { + "epoch": 11.89010989010989, + "grad_norm": 7.094817161560059, + "learning_rate": 4.4054945054945056e-05, + "loss": 0.2188, + "step": 4328 + }, + { + "epoch": 11.892857142857142, + "grad_norm": 9.41871452331543, + "learning_rate": 4.405357142857143e-05, + "loss": 0.5782, + "step": 4329 + }, + { + "epoch": 11.895604395604396, + "grad_norm": 16.49758529663086, + "learning_rate": 4.40521978021978e-05, + "loss": 0.6145, + "step": 4330 + }, + { + "epoch": 11.898351648351648, + "grad_norm": 18.256027221679688, + "learning_rate": 4.405082417582418e-05, + "loss": 0.8312, + "step": 4331 + }, + { + "epoch": 11.901098901098901, + "grad_norm": 12.780855178833008, + "learning_rate": 4.404945054945055e-05, + "loss": 0.3322, + "step": 4332 + }, + { + "epoch": 11.903846153846153, + "grad_norm": 16.47693634033203, + "learning_rate": 4.4048076923076926e-05, + "loss": 0.8729, + "step": 4333 + }, + { + "epoch": 11.906593406593407, + "grad_norm": 19.144601821899414, + "learning_rate": 4.40467032967033e-05, + "loss": 0.9098, + "step": 4334 + }, + { + "epoch": 11.909340659340659, + "grad_norm": 19.743732452392578, + "learning_rate": 4.404532967032967e-05, + "loss": 1.1888, + "step": 4335 + }, + { + "epoch": 11.912087912087912, + "grad_norm": 8.36662483215332, + "learning_rate": 4.404395604395605e-05, + "loss": 0.3653, + "step": 4336 + }, + { + "epoch": 11.914835164835164, + "grad_norm": 17.413415908813477, + "learning_rate": 4.404258241758242e-05, + "loss": 0.7657, + "step": 4337 + }, + { + "epoch": 11.917582417582418, + "grad_norm": 16.08151626586914, + "learning_rate": 4.40412087912088e-05, + "loss": 0.6423, + "step": 4338 + }, + { + "epoch": 11.92032967032967, + "grad_norm": 16.748903274536133, + "learning_rate": 4.403983516483517e-05, + "loss": 0.5078, + "step": 4339 + }, + { + "epoch": 11.923076923076923, + "grad_norm": 18.51679229736328, + "learning_rate": 4.403846153846154e-05, + "loss": 0.6189, + "step": 4340 + }, + { + "epoch": 11.925824175824175, + "grad_norm": 11.343650817871094, + "learning_rate": 4.4037087912087913e-05, + "loss": 0.4858, + "step": 4341 + }, + { + "epoch": 11.928571428571429, + "grad_norm": 12.085856437683105, + "learning_rate": 4.4035714285714284e-05, + "loss": 0.4569, + "step": 4342 + }, + { + "epoch": 11.93131868131868, + "grad_norm": 8.580135345458984, + "learning_rate": 4.403434065934066e-05, + "loss": 0.4101, + "step": 4343 + }, + { + "epoch": 11.934065934065934, + "grad_norm": 17.414867401123047, + "learning_rate": 4.403296703296704e-05, + "loss": 0.6049, + "step": 4344 + }, + { + "epoch": 11.936813186813186, + "grad_norm": 5.700684547424316, + "learning_rate": 4.403159340659341e-05, + "loss": 0.1483, + "step": 4345 + }, + { + "epoch": 11.93956043956044, + "grad_norm": 9.943248748779297, + "learning_rate": 4.4030219780219784e-05, + "loss": 0.2287, + "step": 4346 + }, + { + "epoch": 11.942307692307692, + "grad_norm": 7.088542461395264, + "learning_rate": 4.4028846153846154e-05, + "loss": 0.1965, + "step": 4347 + }, + { + "epoch": 11.945054945054945, + "grad_norm": 7.612791061401367, + "learning_rate": 4.402747252747253e-05, + "loss": 0.2027, + "step": 4348 + }, + { + "epoch": 11.947802197802197, + "grad_norm": 12.104660987854004, + "learning_rate": 4.402609890109891e-05, + "loss": 0.4865, + "step": 4349 + }, + { + "epoch": 11.95054945054945, + "grad_norm": 4.622377395629883, + "learning_rate": 4.402472527472528e-05, + "loss": 0.131, + "step": 4350 + }, + { + "epoch": 11.953296703296703, + "grad_norm": 11.324217796325684, + "learning_rate": 4.4023351648351654e-05, + "loss": 0.4578, + "step": 4351 + }, + { + "epoch": 11.956043956043956, + "grad_norm": 16.51335906982422, + "learning_rate": 4.4021978021978024e-05, + "loss": 0.8016, + "step": 4352 + }, + { + "epoch": 11.958791208791208, + "grad_norm": 13.743186950683594, + "learning_rate": 4.40206043956044e-05, + "loss": 0.3932, + "step": 4353 + }, + { + "epoch": 11.961538461538462, + "grad_norm": 11.586811065673828, + "learning_rate": 4.401923076923077e-05, + "loss": 0.6269, + "step": 4354 + }, + { + "epoch": 11.964285714285714, + "grad_norm": 9.131403923034668, + "learning_rate": 4.401785714285714e-05, + "loss": 0.1885, + "step": 4355 + }, + { + "epoch": 11.967032967032967, + "grad_norm": 15.34982681274414, + "learning_rate": 4.401648351648352e-05, + "loss": 0.4276, + "step": 4356 + }, + { + "epoch": 11.969780219780219, + "grad_norm": 9.841672897338867, + "learning_rate": 4.401510989010989e-05, + "loss": 0.3644, + "step": 4357 + }, + { + "epoch": 11.972527472527473, + "grad_norm": 8.583047866821289, + "learning_rate": 4.4013736263736265e-05, + "loss": 0.1618, + "step": 4358 + }, + { + "epoch": 11.975274725274724, + "grad_norm": 17.495960235595703, + "learning_rate": 4.401236263736264e-05, + "loss": 0.6475, + "step": 4359 + }, + { + "epoch": 11.978021978021978, + "grad_norm": 9.228883743286133, + "learning_rate": 4.401098901098901e-05, + "loss": 0.3202, + "step": 4360 + }, + { + "epoch": 11.98076923076923, + "grad_norm": 15.108036994934082, + "learning_rate": 4.400961538461539e-05, + "loss": 0.5493, + "step": 4361 + }, + { + "epoch": 11.983516483516484, + "grad_norm": 16.65545082092285, + "learning_rate": 4.400824175824176e-05, + "loss": 0.8041, + "step": 4362 + }, + { + "epoch": 11.986263736263737, + "grad_norm": 13.764854431152344, + "learning_rate": 4.4006868131868135e-05, + "loss": 0.4367, + "step": 4363 + }, + { + "epoch": 11.989010989010989, + "grad_norm": 10.789657592773438, + "learning_rate": 4.400549450549451e-05, + "loss": 0.5774, + "step": 4364 + }, + { + "epoch": 11.991758241758241, + "grad_norm": 11.803166389465332, + "learning_rate": 4.400412087912088e-05, + "loss": 0.3791, + "step": 4365 + }, + { + "epoch": 11.994505494505495, + "grad_norm": 18.119583129882812, + "learning_rate": 4.400274725274726e-05, + "loss": 0.7749, + "step": 4366 + }, + { + "epoch": 11.997252747252748, + "grad_norm": 19.880632400512695, + "learning_rate": 4.400137362637363e-05, + "loss": 1.0297, + "step": 4367 + }, + { + "epoch": 12.0, + "grad_norm": 15.143667221069336, + "learning_rate": 4.4000000000000006e-05, + "loss": 0.2037, + "step": 4368 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.640495867768595, + "eval_f1": 0.6219643210290821, + "eval_f1_DuraRiadoRio_64x64": 0.32, + "eval_f1_Mole_64x64": 0.7063829787234043, + "eval_f1_Quebrado_64x64": 0.8366013071895425, + "eval_f1_RiadoRio_64x64": 0.6566037735849056, + "eval_f1_RioFechado_64x64": 0.5902335456475584, + "eval_loss": 1.9170002937316895, + "eval_precision": 0.7595682161940466, + "eval_precision_DuraRiadoRio_64x64": 0.9032258064516129, + "eval_precision_Mole_64x64": 0.9120879120879121, + "eval_precision_Quebrado_64x64": 0.7901234567901234, + "eval_precision_RiadoRio_64x64": 0.7699115044247787, + "eval_precision_RioFechado_64x64": 0.42249240121580545, + "eval_recall": 0.6421927765422947, + "eval_recall_DuraRiadoRio_64x64": 0.19444444444444445, + "eval_recall_Mole_64x64": 0.5763888888888888, + "eval_recall_Quebrado_64x64": 0.8888888888888888, + "eval_recall_RiadoRio_64x64": 0.5723684210526315, + "eval_recall_RioFechado_64x64": 0.9788732394366197, + "eval_runtime": 1.7658, + "eval_samples_per_second": 411.153, + "eval_steps_per_second": 26.051, + "step": 4368 + }, + { + "epoch": 12.002747252747254, + "grad_norm": 17.179597854614258, + "learning_rate": 4.3998626373626376e-05, + "loss": 0.6204, + "step": 4369 + }, + { + "epoch": 12.005494505494505, + "grad_norm": 11.048657417297363, + "learning_rate": 4.3997252747252746e-05, + "loss": 0.4461, + "step": 4370 + }, + { + "epoch": 12.008241758241759, + "grad_norm": 13.814530372619629, + "learning_rate": 4.399587912087912e-05, + "loss": 0.5348, + "step": 4371 + }, + { + "epoch": 12.010989010989011, + "grad_norm": 11.75894832611084, + "learning_rate": 4.399450549450549e-05, + "loss": 0.3948, + "step": 4372 + }, + { + "epoch": 12.013736263736265, + "grad_norm": 13.82925796508789, + "learning_rate": 4.399313186813187e-05, + "loss": 0.5328, + "step": 4373 + }, + { + "epoch": 12.016483516483516, + "grad_norm": 13.659710884094238, + "learning_rate": 4.3991758241758246e-05, + "loss": 0.4206, + "step": 4374 + }, + { + "epoch": 12.01923076923077, + "grad_norm": 16.815025329589844, + "learning_rate": 4.3990384615384616e-05, + "loss": 0.8785, + "step": 4375 + }, + { + "epoch": 12.021978021978022, + "grad_norm": 6.530291557312012, + "learning_rate": 4.398901098901099e-05, + "loss": 0.2539, + "step": 4376 + }, + { + "epoch": 12.024725274725276, + "grad_norm": 17.719207763671875, + "learning_rate": 4.398763736263736e-05, + "loss": 0.7382, + "step": 4377 + }, + { + "epoch": 12.027472527472527, + "grad_norm": 16.50051498413086, + "learning_rate": 4.398626373626374e-05, + "loss": 0.6854, + "step": 4378 + }, + { + "epoch": 12.030219780219781, + "grad_norm": 11.805959701538086, + "learning_rate": 4.3984890109890116e-05, + "loss": 0.4261, + "step": 4379 + }, + { + "epoch": 12.032967032967033, + "grad_norm": 12.844244003295898, + "learning_rate": 4.3983516483516487e-05, + "loss": 0.36, + "step": 4380 + }, + { + "epoch": 12.035714285714286, + "grad_norm": 9.377798080444336, + "learning_rate": 4.398214285714286e-05, + "loss": 0.4665, + "step": 4381 + }, + { + "epoch": 12.038461538461538, + "grad_norm": 12.21772575378418, + "learning_rate": 4.398076923076923e-05, + "loss": 0.579, + "step": 4382 + }, + { + "epoch": 12.041208791208792, + "grad_norm": 4.982666015625, + "learning_rate": 4.397939560439561e-05, + "loss": 0.1349, + "step": 4383 + }, + { + "epoch": 12.043956043956044, + "grad_norm": 12.629704475402832, + "learning_rate": 4.397802197802198e-05, + "loss": 0.3768, + "step": 4384 + }, + { + "epoch": 12.046703296703297, + "grad_norm": 10.897019386291504, + "learning_rate": 4.397664835164835e-05, + "loss": 0.4484, + "step": 4385 + }, + { + "epoch": 12.04945054945055, + "grad_norm": 16.279281616210938, + "learning_rate": 4.397527472527473e-05, + "loss": 0.5216, + "step": 4386 + }, + { + "epoch": 12.052197802197803, + "grad_norm": 8.883925437927246, + "learning_rate": 4.39739010989011e-05, + "loss": 0.2105, + "step": 4387 + }, + { + "epoch": 12.054945054945055, + "grad_norm": 17.700672149658203, + "learning_rate": 4.3972527472527474e-05, + "loss": 0.6023, + "step": 4388 + }, + { + "epoch": 12.057692307692308, + "grad_norm": 16.291345596313477, + "learning_rate": 4.397115384615385e-05, + "loss": 0.6061, + "step": 4389 + }, + { + "epoch": 12.06043956043956, + "grad_norm": 9.910440444946289, + "learning_rate": 4.396978021978022e-05, + "loss": 0.2381, + "step": 4390 + }, + { + "epoch": 12.063186813186814, + "grad_norm": 17.980125427246094, + "learning_rate": 4.39684065934066e-05, + "loss": 0.596, + "step": 4391 + }, + { + "epoch": 12.065934065934066, + "grad_norm": 11.825092315673828, + "learning_rate": 4.396703296703297e-05, + "loss": 0.4995, + "step": 4392 + }, + { + "epoch": 12.06868131868132, + "grad_norm": 11.377984046936035, + "learning_rate": 4.3965659340659344e-05, + "loss": 0.4656, + "step": 4393 + }, + { + "epoch": 12.071428571428571, + "grad_norm": 17.494768142700195, + "learning_rate": 4.396428571428572e-05, + "loss": 0.5012, + "step": 4394 + }, + { + "epoch": 12.074175824175825, + "grad_norm": 12.756686210632324, + "learning_rate": 4.396291208791209e-05, + "loss": 0.6014, + "step": 4395 + }, + { + "epoch": 12.076923076923077, + "grad_norm": 9.348896026611328, + "learning_rate": 4.396153846153847e-05, + "loss": 0.2171, + "step": 4396 + }, + { + "epoch": 12.07967032967033, + "grad_norm": 12.955289840698242, + "learning_rate": 4.396016483516484e-05, + "loss": 0.5469, + "step": 4397 + }, + { + "epoch": 12.082417582417582, + "grad_norm": 15.418805122375488, + "learning_rate": 4.3958791208791215e-05, + "loss": 0.5004, + "step": 4398 + }, + { + "epoch": 12.085164835164836, + "grad_norm": 9.62559986114502, + "learning_rate": 4.3957417582417585e-05, + "loss": 0.3081, + "step": 4399 + }, + { + "epoch": 12.087912087912088, + "grad_norm": 12.596254348754883, + "learning_rate": 4.3956043956043955e-05, + "loss": 0.4966, + "step": 4400 + }, + { + "epoch": 12.090659340659341, + "grad_norm": 9.734594345092773, + "learning_rate": 4.395467032967033e-05, + "loss": 0.3789, + "step": 4401 + }, + { + "epoch": 12.093406593406593, + "grad_norm": 14.9645357131958, + "learning_rate": 4.39532967032967e-05, + "loss": 0.5206, + "step": 4402 + }, + { + "epoch": 12.096153846153847, + "grad_norm": 17.00887107849121, + "learning_rate": 4.395192307692308e-05, + "loss": 0.833, + "step": 4403 + }, + { + "epoch": 12.098901098901099, + "grad_norm": 18.283523559570312, + "learning_rate": 4.3950549450549455e-05, + "loss": 0.8707, + "step": 4404 + }, + { + "epoch": 12.101648351648352, + "grad_norm": 14.463651657104492, + "learning_rate": 4.3949175824175825e-05, + "loss": 0.3513, + "step": 4405 + }, + { + "epoch": 12.104395604395604, + "grad_norm": 18.450923919677734, + "learning_rate": 4.39478021978022e-05, + "loss": 0.7973, + "step": 4406 + }, + { + "epoch": 12.107142857142858, + "grad_norm": 14.534599304199219, + "learning_rate": 4.394642857142857e-05, + "loss": 0.6125, + "step": 4407 + }, + { + "epoch": 12.10989010989011, + "grad_norm": 17.207275390625, + "learning_rate": 4.394505494505495e-05, + "loss": 0.8116, + "step": 4408 + }, + { + "epoch": 12.112637362637363, + "grad_norm": 19.838966369628906, + "learning_rate": 4.3943681318681325e-05, + "loss": 0.7028, + "step": 4409 + }, + { + "epoch": 12.115384615384615, + "grad_norm": 11.097702026367188, + "learning_rate": 4.3942307692307695e-05, + "loss": 0.3271, + "step": 4410 + }, + { + "epoch": 12.118131868131869, + "grad_norm": 14.374701499938965, + "learning_rate": 4.394093406593407e-05, + "loss": 0.5013, + "step": 4411 + }, + { + "epoch": 12.12087912087912, + "grad_norm": 19.93117332458496, + "learning_rate": 4.393956043956044e-05, + "loss": 0.83, + "step": 4412 + }, + { + "epoch": 12.123626373626374, + "grad_norm": 12.759315490722656, + "learning_rate": 4.393818681318682e-05, + "loss": 0.5093, + "step": 4413 + }, + { + "epoch": 12.126373626373626, + "grad_norm": 11.680936813354492, + "learning_rate": 4.393681318681319e-05, + "loss": 0.3968, + "step": 4414 + }, + { + "epoch": 12.12912087912088, + "grad_norm": 13.458003997802734, + "learning_rate": 4.393543956043956e-05, + "loss": 0.5832, + "step": 4415 + }, + { + "epoch": 12.131868131868131, + "grad_norm": 14.903462409973145, + "learning_rate": 4.3934065934065936e-05, + "loss": 0.7126, + "step": 4416 + }, + { + "epoch": 12.134615384615385, + "grad_norm": 16.61503028869629, + "learning_rate": 4.3932692307692306e-05, + "loss": 0.6085, + "step": 4417 + }, + { + "epoch": 12.137362637362637, + "grad_norm": 15.952893257141113, + "learning_rate": 4.393131868131868e-05, + "loss": 0.8134, + "step": 4418 + }, + { + "epoch": 12.14010989010989, + "grad_norm": 13.000471115112305, + "learning_rate": 4.392994505494506e-05, + "loss": 0.3755, + "step": 4419 + }, + { + "epoch": 12.142857142857142, + "grad_norm": 8.615472793579102, + "learning_rate": 4.392857142857143e-05, + "loss": 0.3996, + "step": 4420 + }, + { + "epoch": 12.145604395604396, + "grad_norm": 7.269603729248047, + "learning_rate": 4.3927197802197806e-05, + "loss": 0.2271, + "step": 4421 + }, + { + "epoch": 12.148351648351648, + "grad_norm": 11.295462608337402, + "learning_rate": 4.3925824175824176e-05, + "loss": 0.4307, + "step": 4422 + }, + { + "epoch": 12.151098901098901, + "grad_norm": 7.5587544441223145, + "learning_rate": 4.392445054945055e-05, + "loss": 0.1635, + "step": 4423 + }, + { + "epoch": 12.153846153846153, + "grad_norm": 10.024930953979492, + "learning_rate": 4.392307692307693e-05, + "loss": 0.2462, + "step": 4424 + }, + { + "epoch": 12.156593406593407, + "grad_norm": 9.78458309173584, + "learning_rate": 4.39217032967033e-05, + "loss": 0.2465, + "step": 4425 + }, + { + "epoch": 12.159340659340659, + "grad_norm": 13.398746490478516, + "learning_rate": 4.392032967032968e-05, + "loss": 0.3771, + "step": 4426 + }, + { + "epoch": 12.162087912087912, + "grad_norm": 14.734793663024902, + "learning_rate": 4.391895604395605e-05, + "loss": 0.6189, + "step": 4427 + }, + { + "epoch": 12.164835164835164, + "grad_norm": 9.87999153137207, + "learning_rate": 4.391758241758242e-05, + "loss": 0.313, + "step": 4428 + }, + { + "epoch": 12.167582417582418, + "grad_norm": 7.563697814941406, + "learning_rate": 4.3916208791208794e-05, + "loss": 0.2489, + "step": 4429 + }, + { + "epoch": 12.17032967032967, + "grad_norm": 17.28541374206543, + "learning_rate": 4.3914835164835164e-05, + "loss": 0.5088, + "step": 4430 + }, + { + "epoch": 12.173076923076923, + "grad_norm": 8.132773399353027, + "learning_rate": 4.391346153846154e-05, + "loss": 0.3463, + "step": 4431 + }, + { + "epoch": 12.175824175824175, + "grad_norm": 7.841879367828369, + "learning_rate": 4.391208791208791e-05, + "loss": 0.2625, + "step": 4432 + }, + { + "epoch": 12.178571428571429, + "grad_norm": 12.935012817382812, + "learning_rate": 4.391071428571429e-05, + "loss": 0.3854, + "step": 4433 + }, + { + "epoch": 12.18131868131868, + "grad_norm": 11.203290939331055, + "learning_rate": 4.3909340659340664e-05, + "loss": 0.3035, + "step": 4434 + }, + { + "epoch": 12.184065934065934, + "grad_norm": 14.56285285949707, + "learning_rate": 4.3907967032967034e-05, + "loss": 0.6422, + "step": 4435 + }, + { + "epoch": 12.186813186813186, + "grad_norm": 13.074301719665527, + "learning_rate": 4.390659340659341e-05, + "loss": 0.4406, + "step": 4436 + }, + { + "epoch": 12.18956043956044, + "grad_norm": 14.504081726074219, + "learning_rate": 4.390521978021978e-05, + "loss": 0.5892, + "step": 4437 + }, + { + "epoch": 12.192307692307692, + "grad_norm": 13.519264221191406, + "learning_rate": 4.390384615384616e-05, + "loss": 0.4017, + "step": 4438 + }, + { + "epoch": 12.195054945054945, + "grad_norm": 12.169923782348633, + "learning_rate": 4.390247252747253e-05, + "loss": 0.3732, + "step": 4439 + }, + { + "epoch": 12.197802197802197, + "grad_norm": 13.366174697875977, + "learning_rate": 4.3901098901098904e-05, + "loss": 0.3749, + "step": 4440 + }, + { + "epoch": 12.20054945054945, + "grad_norm": 8.035226821899414, + "learning_rate": 4.389972527472528e-05, + "loss": 0.335, + "step": 4441 + }, + { + "epoch": 12.203296703296703, + "grad_norm": 14.527053833007812, + "learning_rate": 4.389835164835165e-05, + "loss": 0.5071, + "step": 4442 + }, + { + "epoch": 12.206043956043956, + "grad_norm": 9.464879035949707, + "learning_rate": 4.389697802197802e-05, + "loss": 0.2218, + "step": 4443 + }, + { + "epoch": 12.208791208791208, + "grad_norm": 18.51105499267578, + "learning_rate": 4.38956043956044e-05, + "loss": 0.6792, + "step": 4444 + }, + { + "epoch": 12.211538461538462, + "grad_norm": 6.74958610534668, + "learning_rate": 4.389423076923077e-05, + "loss": 0.1753, + "step": 4445 + }, + { + "epoch": 12.214285714285714, + "grad_norm": 9.215563774108887, + "learning_rate": 4.3892857142857145e-05, + "loss": 0.2477, + "step": 4446 + }, + { + "epoch": 12.217032967032967, + "grad_norm": 10.945121765136719, + "learning_rate": 4.3891483516483515e-05, + "loss": 0.3928, + "step": 4447 + }, + { + "epoch": 12.219780219780219, + "grad_norm": 5.910930633544922, + "learning_rate": 4.389010989010989e-05, + "loss": 0.2413, + "step": 4448 + }, + { + "epoch": 12.222527472527473, + "grad_norm": 10.267148971557617, + "learning_rate": 4.388873626373626e-05, + "loss": 0.3626, + "step": 4449 + }, + { + "epoch": 12.225274725274724, + "grad_norm": 3.132321357727051, + "learning_rate": 4.388736263736264e-05, + "loss": 0.0646, + "step": 4450 + }, + { + "epoch": 12.228021978021978, + "grad_norm": 16.586034774780273, + "learning_rate": 4.3885989010989015e-05, + "loss": 0.8891, + "step": 4451 + }, + { + "epoch": 12.23076923076923, + "grad_norm": 13.209068298339844, + "learning_rate": 4.3884615384615385e-05, + "loss": 0.4096, + "step": 4452 + }, + { + "epoch": 12.233516483516484, + "grad_norm": 13.917695999145508, + "learning_rate": 4.388324175824176e-05, + "loss": 0.5092, + "step": 4453 + }, + { + "epoch": 12.236263736263735, + "grad_norm": 6.6183390617370605, + "learning_rate": 4.388186813186813e-05, + "loss": 0.1766, + "step": 4454 + }, + { + "epoch": 12.239010989010989, + "grad_norm": 14.673247337341309, + "learning_rate": 4.388049450549451e-05, + "loss": 0.482, + "step": 4455 + }, + { + "epoch": 12.241758241758241, + "grad_norm": 19.832164764404297, + "learning_rate": 4.3879120879120886e-05, + "loss": 0.6744, + "step": 4456 + }, + { + "epoch": 12.244505494505495, + "grad_norm": 13.859118461608887, + "learning_rate": 4.3877747252747256e-05, + "loss": 0.3657, + "step": 4457 + }, + { + "epoch": 12.247252747252748, + "grad_norm": 11.91613483428955, + "learning_rate": 4.3876373626373626e-05, + "loss": 0.316, + "step": 4458 + }, + { + "epoch": 12.25, + "grad_norm": 18.02569580078125, + "learning_rate": 4.3875e-05, + "loss": 0.7418, + "step": 4459 + }, + { + "epoch": 12.252747252747252, + "grad_norm": 6.316547393798828, + "learning_rate": 4.387362637362637e-05, + "loss": 0.2677, + "step": 4460 + }, + { + "epoch": 12.255494505494505, + "grad_norm": 11.116721153259277, + "learning_rate": 4.387225274725275e-05, + "loss": 0.4893, + "step": 4461 + }, + { + "epoch": 12.258241758241759, + "grad_norm": 13.055336952209473, + "learning_rate": 4.387087912087912e-05, + "loss": 0.6439, + "step": 4462 + }, + { + "epoch": 12.260989010989011, + "grad_norm": 6.779710292816162, + "learning_rate": 4.3869505494505496e-05, + "loss": 0.1173, + "step": 4463 + }, + { + "epoch": 12.263736263736265, + "grad_norm": 19.2264461517334, + "learning_rate": 4.3868131868131866e-05, + "loss": 0.816, + "step": 4464 + }, + { + "epoch": 12.266483516483516, + "grad_norm": 15.59477710723877, + "learning_rate": 4.386675824175824e-05, + "loss": 0.4428, + "step": 4465 + }, + { + "epoch": 12.26923076923077, + "grad_norm": 17.34543228149414, + "learning_rate": 4.386538461538462e-05, + "loss": 0.9368, + "step": 4466 + }, + { + "epoch": 12.271978021978022, + "grad_norm": 9.152710914611816, + "learning_rate": 4.386401098901099e-05, + "loss": 0.3782, + "step": 4467 + }, + { + "epoch": 12.274725274725276, + "grad_norm": 14.734461784362793, + "learning_rate": 4.386263736263737e-05, + "loss": 0.5519, + "step": 4468 + }, + { + "epoch": 12.277472527472527, + "grad_norm": 9.45106315612793, + "learning_rate": 4.386126373626374e-05, + "loss": 0.2195, + "step": 4469 + }, + { + "epoch": 12.280219780219781, + "grad_norm": 13.655954360961914, + "learning_rate": 4.3859890109890113e-05, + "loss": 0.9048, + "step": 4470 + }, + { + "epoch": 12.282967032967033, + "grad_norm": 16.96833610534668, + "learning_rate": 4.385851648351649e-05, + "loss": 0.7359, + "step": 4471 + }, + { + "epoch": 12.285714285714286, + "grad_norm": 16.150039672851562, + "learning_rate": 4.385714285714286e-05, + "loss": 0.6581, + "step": 4472 + }, + { + "epoch": 12.288461538461538, + "grad_norm": 13.866188049316406, + "learning_rate": 4.385576923076923e-05, + "loss": 0.4923, + "step": 4473 + }, + { + "epoch": 12.291208791208792, + "grad_norm": 7.8362226486206055, + "learning_rate": 4.385439560439561e-05, + "loss": 0.2086, + "step": 4474 + }, + { + "epoch": 12.293956043956044, + "grad_norm": 13.3656005859375, + "learning_rate": 4.385302197802198e-05, + "loss": 0.5841, + "step": 4475 + }, + { + "epoch": 12.296703296703297, + "grad_norm": 10.253274917602539, + "learning_rate": 4.3851648351648354e-05, + "loss": 0.2488, + "step": 4476 + }, + { + "epoch": 12.29945054945055, + "grad_norm": 16.310766220092773, + "learning_rate": 4.3850274725274724e-05, + "loss": 0.441, + "step": 4477 + }, + { + "epoch": 12.302197802197803, + "grad_norm": 11.53735065460205, + "learning_rate": 4.38489010989011e-05, + "loss": 0.4405, + "step": 4478 + }, + { + "epoch": 12.304945054945055, + "grad_norm": 11.508623123168945, + "learning_rate": 4.384752747252747e-05, + "loss": 0.4923, + "step": 4479 + }, + { + "epoch": 12.307692307692308, + "grad_norm": 15.188008308410645, + "learning_rate": 4.384615384615385e-05, + "loss": 0.3912, + "step": 4480 + }, + { + "epoch": 12.31043956043956, + "grad_norm": 17.410673141479492, + "learning_rate": 4.3844780219780224e-05, + "loss": 0.7055, + "step": 4481 + }, + { + "epoch": 12.313186813186814, + "grad_norm": 9.779631614685059, + "learning_rate": 4.3843406593406594e-05, + "loss": 0.3095, + "step": 4482 + }, + { + "epoch": 12.315934065934066, + "grad_norm": 17.187631607055664, + "learning_rate": 4.384203296703297e-05, + "loss": 0.934, + "step": 4483 + }, + { + "epoch": 12.31868131868132, + "grad_norm": 20.729541778564453, + "learning_rate": 4.384065934065934e-05, + "loss": 1.1551, + "step": 4484 + }, + { + "epoch": 12.321428571428571, + "grad_norm": 12.5762300491333, + "learning_rate": 4.383928571428572e-05, + "loss": 0.3334, + "step": 4485 + }, + { + "epoch": 12.324175824175825, + "grad_norm": 15.693215370178223, + "learning_rate": 4.3837912087912095e-05, + "loss": 0.5901, + "step": 4486 + }, + { + "epoch": 12.326923076923077, + "grad_norm": 13.15086841583252, + "learning_rate": 4.3836538461538465e-05, + "loss": 0.4163, + "step": 4487 + }, + { + "epoch": 12.32967032967033, + "grad_norm": 11.942005157470703, + "learning_rate": 4.3835164835164835e-05, + "loss": 0.441, + "step": 4488 + }, + { + "epoch": 12.332417582417582, + "grad_norm": 11.875146865844727, + "learning_rate": 4.383379120879121e-05, + "loss": 0.374, + "step": 4489 + }, + { + "epoch": 12.335164835164836, + "grad_norm": 16.134201049804688, + "learning_rate": 4.383241758241758e-05, + "loss": 0.6265, + "step": 4490 + }, + { + "epoch": 12.337912087912088, + "grad_norm": 21.741668701171875, + "learning_rate": 4.383104395604396e-05, + "loss": 1.0761, + "step": 4491 + }, + { + "epoch": 12.340659340659341, + "grad_norm": 12.680625915527344, + "learning_rate": 4.382967032967033e-05, + "loss": 0.4918, + "step": 4492 + }, + { + "epoch": 12.343406593406593, + "grad_norm": 9.932820320129395, + "learning_rate": 4.3828296703296705e-05, + "loss": 0.3622, + "step": 4493 + }, + { + "epoch": 12.346153846153847, + "grad_norm": 13.189620018005371, + "learning_rate": 4.3826923076923075e-05, + "loss": 0.5579, + "step": 4494 + }, + { + "epoch": 12.348901098901099, + "grad_norm": 13.51407241821289, + "learning_rate": 4.382554945054945e-05, + "loss": 0.5455, + "step": 4495 + }, + { + "epoch": 12.351648351648352, + "grad_norm": 16.17180633544922, + "learning_rate": 4.382417582417583e-05, + "loss": 0.6535, + "step": 4496 + }, + { + "epoch": 12.354395604395604, + "grad_norm": 6.675502777099609, + "learning_rate": 4.38228021978022e-05, + "loss": 0.2556, + "step": 4497 + }, + { + "epoch": 12.357142857142858, + "grad_norm": 15.304974555969238, + "learning_rate": 4.3821428571428576e-05, + "loss": 0.5596, + "step": 4498 + }, + { + "epoch": 12.35989010989011, + "grad_norm": 19.490345001220703, + "learning_rate": 4.3820054945054946e-05, + "loss": 0.9214, + "step": 4499 + }, + { + "epoch": 12.362637362637363, + "grad_norm": 17.453414916992188, + "learning_rate": 4.381868131868132e-05, + "loss": 0.7685, + "step": 4500 + }, + { + "epoch": 12.365384615384615, + "grad_norm": 11.554492950439453, + "learning_rate": 4.38173076923077e-05, + "loss": 0.3436, + "step": 4501 + }, + { + "epoch": 12.368131868131869, + "grad_norm": 11.07406234741211, + "learning_rate": 4.381593406593407e-05, + "loss": 0.366, + "step": 4502 + }, + { + "epoch": 12.37087912087912, + "grad_norm": 16.046058654785156, + "learning_rate": 4.381456043956044e-05, + "loss": 0.516, + "step": 4503 + }, + { + "epoch": 12.373626373626374, + "grad_norm": 6.88881778717041, + "learning_rate": 4.3813186813186816e-05, + "loss": 0.3231, + "step": 4504 + }, + { + "epoch": 12.376373626373626, + "grad_norm": 10.03037166595459, + "learning_rate": 4.3811813186813186e-05, + "loss": 0.4919, + "step": 4505 + }, + { + "epoch": 12.37912087912088, + "grad_norm": 16.77999496459961, + "learning_rate": 4.381043956043956e-05, + "loss": 0.6231, + "step": 4506 + }, + { + "epoch": 12.381868131868131, + "grad_norm": 17.83268928527832, + "learning_rate": 4.380906593406593e-05, + "loss": 0.6599, + "step": 4507 + }, + { + "epoch": 12.384615384615385, + "grad_norm": 20.122478485107422, + "learning_rate": 4.380769230769231e-05, + "loss": 0.8301, + "step": 4508 + }, + { + "epoch": 12.387362637362637, + "grad_norm": 8.202431678771973, + "learning_rate": 4.380631868131868e-05, + "loss": 0.1684, + "step": 4509 + }, + { + "epoch": 12.39010989010989, + "grad_norm": 8.299907684326172, + "learning_rate": 4.3804945054945057e-05, + "loss": 0.3608, + "step": 4510 + }, + { + "epoch": 12.392857142857142, + "grad_norm": 11.810074806213379, + "learning_rate": 4.380357142857143e-05, + "loss": 0.4391, + "step": 4511 + }, + { + "epoch": 12.395604395604396, + "grad_norm": 7.026386260986328, + "learning_rate": 4.38021978021978e-05, + "loss": 0.2856, + "step": 4512 + }, + { + "epoch": 12.398351648351648, + "grad_norm": 15.57845687866211, + "learning_rate": 4.380082417582418e-05, + "loss": 0.5794, + "step": 4513 + }, + { + "epoch": 12.401098901098901, + "grad_norm": 9.131890296936035, + "learning_rate": 4.379945054945055e-05, + "loss": 0.3062, + "step": 4514 + }, + { + "epoch": 12.403846153846153, + "grad_norm": 13.857989311218262, + "learning_rate": 4.379807692307693e-05, + "loss": 0.4954, + "step": 4515 + }, + { + "epoch": 12.406593406593407, + "grad_norm": 14.473320007324219, + "learning_rate": 4.3796703296703304e-05, + "loss": 0.6295, + "step": 4516 + }, + { + "epoch": 12.409340659340659, + "grad_norm": 19.957265853881836, + "learning_rate": 4.3795329670329674e-05, + "loss": 0.7362, + "step": 4517 + }, + { + "epoch": 12.412087912087912, + "grad_norm": 9.175501823425293, + "learning_rate": 4.3793956043956044e-05, + "loss": 0.2704, + "step": 4518 + }, + { + "epoch": 12.414835164835164, + "grad_norm": 15.701393127441406, + "learning_rate": 4.379258241758242e-05, + "loss": 0.5825, + "step": 4519 + }, + { + "epoch": 12.417582417582418, + "grad_norm": 16.575265884399414, + "learning_rate": 4.379120879120879e-05, + "loss": 0.9664, + "step": 4520 + }, + { + "epoch": 12.42032967032967, + "grad_norm": 10.458659172058105, + "learning_rate": 4.378983516483517e-05, + "loss": 0.309, + "step": 4521 + }, + { + "epoch": 12.423076923076923, + "grad_norm": 14.216110229492188, + "learning_rate": 4.378846153846154e-05, + "loss": 0.8022, + "step": 4522 + }, + { + "epoch": 12.425824175824175, + "grad_norm": 16.175935745239258, + "learning_rate": 4.3787087912087914e-05, + "loss": 0.4558, + "step": 4523 + }, + { + "epoch": 12.428571428571429, + "grad_norm": 11.835165023803711, + "learning_rate": 4.3785714285714284e-05, + "loss": 0.4856, + "step": 4524 + }, + { + "epoch": 12.43131868131868, + "grad_norm": 11.965370178222656, + "learning_rate": 4.378434065934066e-05, + "loss": 0.464, + "step": 4525 + }, + { + "epoch": 12.434065934065934, + "grad_norm": 12.91832160949707, + "learning_rate": 4.378296703296704e-05, + "loss": 0.5764, + "step": 4526 + }, + { + "epoch": 12.436813186813186, + "grad_norm": 18.05847930908203, + "learning_rate": 4.378159340659341e-05, + "loss": 0.602, + "step": 4527 + }, + { + "epoch": 12.43956043956044, + "grad_norm": 11.578243255615234, + "learning_rate": 4.3780219780219785e-05, + "loss": 0.5576, + "step": 4528 + }, + { + "epoch": 12.442307692307692, + "grad_norm": 14.271818161010742, + "learning_rate": 4.3778846153846155e-05, + "loss": 0.6725, + "step": 4529 + }, + { + "epoch": 12.445054945054945, + "grad_norm": 4.690183639526367, + "learning_rate": 4.377747252747253e-05, + "loss": 0.1688, + "step": 4530 + }, + { + "epoch": 12.447802197802197, + "grad_norm": 18.3817195892334, + "learning_rate": 4.377609890109891e-05, + "loss": 0.8034, + "step": 4531 + }, + { + "epoch": 12.45054945054945, + "grad_norm": 11.670406341552734, + "learning_rate": 4.377472527472528e-05, + "loss": 0.415, + "step": 4532 + }, + { + "epoch": 12.453296703296703, + "grad_norm": 18.921066284179688, + "learning_rate": 4.377335164835165e-05, + "loss": 0.6867, + "step": 4533 + }, + { + "epoch": 12.456043956043956, + "grad_norm": 15.772001266479492, + "learning_rate": 4.3771978021978025e-05, + "loss": 0.5953, + "step": 4534 + }, + { + "epoch": 12.458791208791208, + "grad_norm": 13.75296401977539, + "learning_rate": 4.3770604395604395e-05, + "loss": 0.4362, + "step": 4535 + }, + { + "epoch": 12.461538461538462, + "grad_norm": 13.360005378723145, + "learning_rate": 4.376923076923077e-05, + "loss": 0.4218, + "step": 4536 + }, + { + "epoch": 12.464285714285714, + "grad_norm": 7.809999942779541, + "learning_rate": 4.376785714285714e-05, + "loss": 0.2337, + "step": 4537 + }, + { + "epoch": 12.467032967032967, + "grad_norm": 12.544843673706055, + "learning_rate": 4.376648351648352e-05, + "loss": 0.4016, + "step": 4538 + }, + { + "epoch": 12.469780219780219, + "grad_norm": 12.185050964355469, + "learning_rate": 4.376510989010989e-05, + "loss": 0.2787, + "step": 4539 + }, + { + "epoch": 12.472527472527473, + "grad_norm": 13.688613891601562, + "learning_rate": 4.3763736263736265e-05, + "loss": 0.424, + "step": 4540 + }, + { + "epoch": 12.475274725274724, + "grad_norm": 5.999633312225342, + "learning_rate": 4.376236263736264e-05, + "loss": 0.2551, + "step": 4541 + }, + { + "epoch": 12.478021978021978, + "grad_norm": 10.329451560974121, + "learning_rate": 4.376098901098901e-05, + "loss": 0.3866, + "step": 4542 + }, + { + "epoch": 12.48076923076923, + "grad_norm": 11.319001197814941, + "learning_rate": 4.375961538461539e-05, + "loss": 0.4575, + "step": 4543 + }, + { + "epoch": 12.483516483516484, + "grad_norm": 18.222063064575195, + "learning_rate": 4.375824175824176e-05, + "loss": 0.6178, + "step": 4544 + }, + { + "epoch": 12.486263736263735, + "grad_norm": 14.089003562927246, + "learning_rate": 4.3756868131868136e-05, + "loss": 0.7322, + "step": 4545 + }, + { + "epoch": 12.489010989010989, + "grad_norm": 14.10513687133789, + "learning_rate": 4.375549450549451e-05, + "loss": 0.5003, + "step": 4546 + }, + { + "epoch": 12.491758241758241, + "grad_norm": 11.704471588134766, + "learning_rate": 4.375412087912088e-05, + "loss": 0.3998, + "step": 4547 + }, + { + "epoch": 12.494505494505495, + "grad_norm": 18.4868106842041, + "learning_rate": 4.375274725274725e-05, + "loss": 0.8537, + "step": 4548 + }, + { + "epoch": 12.497252747252748, + "grad_norm": 8.919268608093262, + "learning_rate": 4.375137362637363e-05, + "loss": 0.2397, + "step": 4549 + }, + { + "epoch": 12.5, + "grad_norm": 12.93055534362793, + "learning_rate": 4.375e-05, + "loss": 0.404, + "step": 4550 + }, + { + "epoch": 12.502747252747252, + "grad_norm": 19.256338119506836, + "learning_rate": 4.3748626373626376e-05, + "loss": 0.9476, + "step": 4551 + }, + { + "epoch": 12.505494505494505, + "grad_norm": 17.783531188964844, + "learning_rate": 4.3747252747252746e-05, + "loss": 0.7989, + "step": 4552 + }, + { + "epoch": 12.508241758241759, + "grad_norm": 14.137761116027832, + "learning_rate": 4.374587912087912e-05, + "loss": 0.4848, + "step": 4553 + }, + { + "epoch": 12.510989010989011, + "grad_norm": 12.622154235839844, + "learning_rate": 4.374450549450549e-05, + "loss": 0.4152, + "step": 4554 + }, + { + "epoch": 12.513736263736263, + "grad_norm": 9.48965072631836, + "learning_rate": 4.374313186813187e-05, + "loss": 0.3074, + "step": 4555 + }, + { + "epoch": 12.516483516483516, + "grad_norm": 16.17853546142578, + "learning_rate": 4.374175824175825e-05, + "loss": 0.5412, + "step": 4556 + }, + { + "epoch": 12.51923076923077, + "grad_norm": 14.841958999633789, + "learning_rate": 4.374038461538462e-05, + "loss": 0.601, + "step": 4557 + }, + { + "epoch": 12.521978021978022, + "grad_norm": 8.437382698059082, + "learning_rate": 4.3739010989010994e-05, + "loss": 0.2553, + "step": 4558 + }, + { + "epoch": 12.524725274725276, + "grad_norm": 13.590347290039062, + "learning_rate": 4.3737637362637364e-05, + "loss": 0.6, + "step": 4559 + }, + { + "epoch": 12.527472527472527, + "grad_norm": 16.956836700439453, + "learning_rate": 4.373626373626374e-05, + "loss": 0.6651, + "step": 4560 + }, + { + "epoch": 12.530219780219781, + "grad_norm": 12.5696439743042, + "learning_rate": 4.373489010989012e-05, + "loss": 0.4323, + "step": 4561 + }, + { + "epoch": 12.532967032967033, + "grad_norm": 15.060051918029785, + "learning_rate": 4.373351648351649e-05, + "loss": 0.4812, + "step": 4562 + }, + { + "epoch": 12.535714285714286, + "grad_norm": 10.806258201599121, + "learning_rate": 4.373214285714286e-05, + "loss": 0.309, + "step": 4563 + }, + { + "epoch": 12.538461538461538, + "grad_norm": 12.10583209991455, + "learning_rate": 4.3730769230769234e-05, + "loss": 0.3663, + "step": 4564 + }, + { + "epoch": 12.541208791208792, + "grad_norm": 13.602435111999512, + "learning_rate": 4.3729395604395604e-05, + "loss": 0.4576, + "step": 4565 + }, + { + "epoch": 12.543956043956044, + "grad_norm": 10.138872146606445, + "learning_rate": 4.372802197802198e-05, + "loss": 0.4246, + "step": 4566 + }, + { + "epoch": 12.546703296703297, + "grad_norm": 11.83414363861084, + "learning_rate": 4.372664835164835e-05, + "loss": 0.5107, + "step": 4567 + }, + { + "epoch": 12.54945054945055, + "grad_norm": 13.628019332885742, + "learning_rate": 4.372527472527473e-05, + "loss": 0.7003, + "step": 4568 + }, + { + "epoch": 12.552197802197803, + "grad_norm": 16.155406951904297, + "learning_rate": 4.37239010989011e-05, + "loss": 0.5763, + "step": 4569 + }, + { + "epoch": 12.554945054945055, + "grad_norm": 10.649096488952637, + "learning_rate": 4.3722527472527474e-05, + "loss": 0.4937, + "step": 4570 + }, + { + "epoch": 12.557692307692308, + "grad_norm": 15.455038070678711, + "learning_rate": 4.372115384615385e-05, + "loss": 0.5853, + "step": 4571 + }, + { + "epoch": 12.56043956043956, + "grad_norm": 16.35447120666504, + "learning_rate": 4.371978021978022e-05, + "loss": 0.7436, + "step": 4572 + }, + { + "epoch": 12.563186813186814, + "grad_norm": 17.548337936401367, + "learning_rate": 4.37184065934066e-05, + "loss": 0.6375, + "step": 4573 + }, + { + "epoch": 12.565934065934066, + "grad_norm": 9.504019737243652, + "learning_rate": 4.371703296703297e-05, + "loss": 0.392, + "step": 4574 + }, + { + "epoch": 12.56868131868132, + "grad_norm": 19.873916625976562, + "learning_rate": 4.3715659340659345e-05, + "loss": 1.2104, + "step": 4575 + }, + { + "epoch": 12.571428571428571, + "grad_norm": 18.377559661865234, + "learning_rate": 4.371428571428572e-05, + "loss": 0.7146, + "step": 4576 + }, + { + "epoch": 12.574175824175825, + "grad_norm": 8.15492057800293, + "learning_rate": 4.371291208791209e-05, + "loss": 0.272, + "step": 4577 + }, + { + "epoch": 12.576923076923077, + "grad_norm": 12.441814422607422, + "learning_rate": 4.371153846153846e-05, + "loss": 0.5266, + "step": 4578 + }, + { + "epoch": 12.57967032967033, + "grad_norm": 17.47844886779785, + "learning_rate": 4.371016483516484e-05, + "loss": 0.7775, + "step": 4579 + }, + { + "epoch": 12.582417582417582, + "grad_norm": 14.760797500610352, + "learning_rate": 4.370879120879121e-05, + "loss": 0.4786, + "step": 4580 + }, + { + "epoch": 12.585164835164836, + "grad_norm": 9.495824813842773, + "learning_rate": 4.3707417582417585e-05, + "loss": 0.2463, + "step": 4581 + }, + { + "epoch": 12.587912087912088, + "grad_norm": 14.538134574890137, + "learning_rate": 4.3706043956043955e-05, + "loss": 0.4954, + "step": 4582 + }, + { + "epoch": 12.590659340659341, + "grad_norm": 16.674196243286133, + "learning_rate": 4.370467032967033e-05, + "loss": 0.9697, + "step": 4583 + }, + { + "epoch": 12.593406593406593, + "grad_norm": 10.563886642456055, + "learning_rate": 4.37032967032967e-05, + "loss": 0.3335, + "step": 4584 + }, + { + "epoch": 12.596153846153847, + "grad_norm": 18.738201141357422, + "learning_rate": 4.370192307692308e-05, + "loss": 0.9522, + "step": 4585 + }, + { + "epoch": 12.598901098901099, + "grad_norm": 13.007959365844727, + "learning_rate": 4.3700549450549456e-05, + "loss": 0.3663, + "step": 4586 + }, + { + "epoch": 12.601648351648352, + "grad_norm": 14.451300621032715, + "learning_rate": 4.3699175824175826e-05, + "loss": 0.4384, + "step": 4587 + }, + { + "epoch": 12.604395604395604, + "grad_norm": 13.77453327178955, + "learning_rate": 4.36978021978022e-05, + "loss": 0.4043, + "step": 4588 + }, + { + "epoch": 12.607142857142858, + "grad_norm": 14.720355033874512, + "learning_rate": 4.369642857142857e-05, + "loss": 0.51, + "step": 4589 + }, + { + "epoch": 12.60989010989011, + "grad_norm": 14.595205307006836, + "learning_rate": 4.369505494505495e-05, + "loss": 0.3684, + "step": 4590 + }, + { + "epoch": 12.612637362637363, + "grad_norm": 10.511310577392578, + "learning_rate": 4.3693681318681326e-05, + "loss": 0.3401, + "step": 4591 + }, + { + "epoch": 12.615384615384615, + "grad_norm": 15.797384262084961, + "learning_rate": 4.3692307692307696e-05, + "loss": 0.9657, + "step": 4592 + }, + { + "epoch": 12.618131868131869, + "grad_norm": 16.991899490356445, + "learning_rate": 4.3690934065934066e-05, + "loss": 0.7077, + "step": 4593 + }, + { + "epoch": 12.62087912087912, + "grad_norm": 18.112201690673828, + "learning_rate": 4.368956043956044e-05, + "loss": 0.9492, + "step": 4594 + }, + { + "epoch": 12.623626373626374, + "grad_norm": 8.048134803771973, + "learning_rate": 4.368818681318681e-05, + "loss": 0.3008, + "step": 4595 + }, + { + "epoch": 12.626373626373626, + "grad_norm": 9.854085922241211, + "learning_rate": 4.368681318681319e-05, + "loss": 0.3357, + "step": 4596 + }, + { + "epoch": 12.62912087912088, + "grad_norm": 11.281682014465332, + "learning_rate": 4.368543956043956e-05, + "loss": 0.3086, + "step": 4597 + }, + { + "epoch": 12.631868131868131, + "grad_norm": 12.245624542236328, + "learning_rate": 4.368406593406594e-05, + "loss": 0.372, + "step": 4598 + }, + { + "epoch": 12.634615384615385, + "grad_norm": 14.416741371154785, + "learning_rate": 4.368269230769231e-05, + "loss": 0.7134, + "step": 4599 + }, + { + "epoch": 12.637362637362637, + "grad_norm": 9.962732315063477, + "learning_rate": 4.3681318681318683e-05, + "loss": 0.3057, + "step": 4600 + }, + { + "epoch": 12.64010989010989, + "grad_norm": 12.672574043273926, + "learning_rate": 4.367994505494506e-05, + "loss": 0.8953, + "step": 4601 + }, + { + "epoch": 12.642857142857142, + "grad_norm": 8.807940483093262, + "learning_rate": 4.367857142857143e-05, + "loss": 0.1552, + "step": 4602 + }, + { + "epoch": 12.645604395604396, + "grad_norm": 17.63615608215332, + "learning_rate": 4.367719780219781e-05, + "loss": 0.7302, + "step": 4603 + }, + { + "epoch": 12.648351648351648, + "grad_norm": 17.539865493774414, + "learning_rate": 4.367582417582418e-05, + "loss": 0.8235, + "step": 4604 + }, + { + "epoch": 12.651098901098901, + "grad_norm": 6.938848495483398, + "learning_rate": 4.3674450549450554e-05, + "loss": 0.2317, + "step": 4605 + }, + { + "epoch": 12.653846153846153, + "grad_norm": 12.695895195007324, + "learning_rate": 4.367307692307693e-05, + "loss": 0.3245, + "step": 4606 + }, + { + "epoch": 12.656593406593407, + "grad_norm": 11.425056457519531, + "learning_rate": 4.36717032967033e-05, + "loss": 0.328, + "step": 4607 + }, + { + "epoch": 12.659340659340659, + "grad_norm": 10.427978515625, + "learning_rate": 4.367032967032967e-05, + "loss": 0.3014, + "step": 4608 + }, + { + "epoch": 12.662087912087912, + "grad_norm": 14.76090145111084, + "learning_rate": 4.366895604395605e-05, + "loss": 0.7552, + "step": 4609 + }, + { + "epoch": 12.664835164835164, + "grad_norm": 12.928520202636719, + "learning_rate": 4.366758241758242e-05, + "loss": 0.549, + "step": 4610 + }, + { + "epoch": 12.667582417582418, + "grad_norm": 11.604528427124023, + "learning_rate": 4.3666208791208794e-05, + "loss": 0.2476, + "step": 4611 + }, + { + "epoch": 12.67032967032967, + "grad_norm": 9.226865768432617, + "learning_rate": 4.3664835164835164e-05, + "loss": 0.2808, + "step": 4612 + }, + { + "epoch": 12.673076923076923, + "grad_norm": 9.552837371826172, + "learning_rate": 4.366346153846154e-05, + "loss": 0.4054, + "step": 4613 + }, + { + "epoch": 12.675824175824175, + "grad_norm": 14.211810111999512, + "learning_rate": 4.366208791208791e-05, + "loss": 0.5918, + "step": 4614 + }, + { + "epoch": 12.678571428571429, + "grad_norm": 13.393390655517578, + "learning_rate": 4.366071428571429e-05, + "loss": 0.5582, + "step": 4615 + }, + { + "epoch": 12.68131868131868, + "grad_norm": 5.9344482421875, + "learning_rate": 4.3659340659340665e-05, + "loss": 0.189, + "step": 4616 + }, + { + "epoch": 12.684065934065934, + "grad_norm": 14.95859146118164, + "learning_rate": 4.3657967032967035e-05, + "loss": 0.5971, + "step": 4617 + }, + { + "epoch": 12.686813186813186, + "grad_norm": 14.947850227355957, + "learning_rate": 4.365659340659341e-05, + "loss": 0.5494, + "step": 4618 + }, + { + "epoch": 12.68956043956044, + "grad_norm": 6.537243843078613, + "learning_rate": 4.365521978021978e-05, + "loss": 0.194, + "step": 4619 + }, + { + "epoch": 12.692307692307692, + "grad_norm": 17.031055450439453, + "learning_rate": 4.365384615384616e-05, + "loss": 0.8408, + "step": 4620 + }, + { + "epoch": 12.695054945054945, + "grad_norm": 12.420122146606445, + "learning_rate": 4.3652472527472535e-05, + "loss": 0.6378, + "step": 4621 + }, + { + "epoch": 12.697802197802197, + "grad_norm": 6.608885765075684, + "learning_rate": 4.3651098901098905e-05, + "loss": 0.1808, + "step": 4622 + }, + { + "epoch": 12.70054945054945, + "grad_norm": 9.928104400634766, + "learning_rate": 4.3649725274725275e-05, + "loss": 0.3473, + "step": 4623 + }, + { + "epoch": 12.703296703296703, + "grad_norm": 13.141168594360352, + "learning_rate": 4.3648351648351645e-05, + "loss": 0.3321, + "step": 4624 + }, + { + "epoch": 12.706043956043956, + "grad_norm": 13.98568058013916, + "learning_rate": 4.364697802197802e-05, + "loss": 0.3965, + "step": 4625 + }, + { + "epoch": 12.708791208791208, + "grad_norm": 13.689091682434082, + "learning_rate": 4.36456043956044e-05, + "loss": 0.4657, + "step": 4626 + }, + { + "epoch": 12.711538461538462, + "grad_norm": 13.12035083770752, + "learning_rate": 4.364423076923077e-05, + "loss": 0.4394, + "step": 4627 + }, + { + "epoch": 12.714285714285714, + "grad_norm": 11.357205390930176, + "learning_rate": 4.3642857142857146e-05, + "loss": 0.4111, + "step": 4628 + }, + { + "epoch": 12.717032967032967, + "grad_norm": 12.853514671325684, + "learning_rate": 4.3641483516483516e-05, + "loss": 0.4498, + "step": 4629 + }, + { + "epoch": 12.719780219780219, + "grad_norm": 10.267383575439453, + "learning_rate": 4.364010989010989e-05, + "loss": 0.2369, + "step": 4630 + }, + { + "epoch": 12.722527472527473, + "grad_norm": 9.431170463562012, + "learning_rate": 4.363873626373627e-05, + "loss": 0.208, + "step": 4631 + }, + { + "epoch": 12.725274725274724, + "grad_norm": 19.419113159179688, + "learning_rate": 4.363736263736264e-05, + "loss": 0.6499, + "step": 4632 + }, + { + "epoch": 12.728021978021978, + "grad_norm": 15.830035209655762, + "learning_rate": 4.3635989010989016e-05, + "loss": 0.6702, + "step": 4633 + }, + { + "epoch": 12.73076923076923, + "grad_norm": 7.053138732910156, + "learning_rate": 4.3634615384615386e-05, + "loss": 0.2641, + "step": 4634 + }, + { + "epoch": 12.733516483516484, + "grad_norm": 10.513936996459961, + "learning_rate": 4.363324175824176e-05, + "loss": 0.4277, + "step": 4635 + }, + { + "epoch": 12.736263736263737, + "grad_norm": 10.822425842285156, + "learning_rate": 4.363186813186814e-05, + "loss": 0.4013, + "step": 4636 + }, + { + "epoch": 12.739010989010989, + "grad_norm": 13.693620681762695, + "learning_rate": 4.363049450549451e-05, + "loss": 0.3883, + "step": 4637 + }, + { + "epoch": 12.741758241758241, + "grad_norm": 15.830219268798828, + "learning_rate": 4.362912087912088e-05, + "loss": 0.6158, + "step": 4638 + }, + { + "epoch": 12.744505494505495, + "grad_norm": 18.80978012084961, + "learning_rate": 4.362774725274725e-05, + "loss": 0.9281, + "step": 4639 + }, + { + "epoch": 12.747252747252748, + "grad_norm": 13.459344863891602, + "learning_rate": 4.3626373626373626e-05, + "loss": 0.4522, + "step": 4640 + }, + { + "epoch": 12.75, + "grad_norm": 9.098426818847656, + "learning_rate": 4.3625e-05, + "loss": 0.292, + "step": 4641 + }, + { + "epoch": 12.752747252747252, + "grad_norm": 8.12074089050293, + "learning_rate": 4.362362637362637e-05, + "loss": 0.3991, + "step": 4642 + }, + { + "epoch": 12.755494505494505, + "grad_norm": 13.863814353942871, + "learning_rate": 4.362225274725275e-05, + "loss": 0.5791, + "step": 4643 + }, + { + "epoch": 12.758241758241759, + "grad_norm": 13.757081985473633, + "learning_rate": 4.362087912087912e-05, + "loss": 0.4799, + "step": 4644 + }, + { + "epoch": 12.760989010989011, + "grad_norm": 11.464592933654785, + "learning_rate": 4.36195054945055e-05, + "loss": 0.362, + "step": 4645 + }, + { + "epoch": 12.763736263736263, + "grad_norm": 9.96176528930664, + "learning_rate": 4.3618131868131874e-05, + "loss": 0.3295, + "step": 4646 + }, + { + "epoch": 12.766483516483516, + "grad_norm": 9.803329467773438, + "learning_rate": 4.3616758241758244e-05, + "loss": 0.4158, + "step": 4647 + }, + { + "epoch": 12.76923076923077, + "grad_norm": 17.36766815185547, + "learning_rate": 4.361538461538462e-05, + "loss": 0.5446, + "step": 4648 + }, + { + "epoch": 12.771978021978022, + "grad_norm": 16.327590942382812, + "learning_rate": 4.361401098901099e-05, + "loss": 0.7992, + "step": 4649 + }, + { + "epoch": 12.774725274725276, + "grad_norm": 9.826924324035645, + "learning_rate": 4.361263736263737e-05, + "loss": 0.2872, + "step": 4650 + }, + { + "epoch": 12.777472527472527, + "grad_norm": 16.704378128051758, + "learning_rate": 4.3611263736263744e-05, + "loss": 1.0533, + "step": 4651 + }, + { + "epoch": 12.780219780219781, + "grad_norm": 11.585872650146484, + "learning_rate": 4.3609890109890114e-05, + "loss": 0.2776, + "step": 4652 + }, + { + "epoch": 12.782967032967033, + "grad_norm": 10.063958168029785, + "learning_rate": 4.3608516483516484e-05, + "loss": 0.459, + "step": 4653 + }, + { + "epoch": 12.785714285714286, + "grad_norm": 13.828328132629395, + "learning_rate": 4.3607142857142854e-05, + "loss": 0.6857, + "step": 4654 + }, + { + "epoch": 12.788461538461538, + "grad_norm": 9.17297077178955, + "learning_rate": 4.360576923076923e-05, + "loss": 0.2298, + "step": 4655 + }, + { + "epoch": 12.791208791208792, + "grad_norm": 15.907722473144531, + "learning_rate": 4.360439560439561e-05, + "loss": 0.8531, + "step": 4656 + }, + { + "epoch": 12.793956043956044, + "grad_norm": 17.160247802734375, + "learning_rate": 4.360302197802198e-05, + "loss": 0.6864, + "step": 4657 + }, + { + "epoch": 12.796703296703297, + "grad_norm": 14.431551933288574, + "learning_rate": 4.3601648351648355e-05, + "loss": 0.3815, + "step": 4658 + }, + { + "epoch": 12.79945054945055, + "grad_norm": 11.341951370239258, + "learning_rate": 4.3600274725274725e-05, + "loss": 0.3357, + "step": 4659 + }, + { + "epoch": 12.802197802197803, + "grad_norm": 16.39647674560547, + "learning_rate": 4.35989010989011e-05, + "loss": 0.5821, + "step": 4660 + }, + { + "epoch": 12.804945054945055, + "grad_norm": 14.375459671020508, + "learning_rate": 4.359752747252748e-05, + "loss": 0.5606, + "step": 4661 + }, + { + "epoch": 12.807692307692308, + "grad_norm": 16.130504608154297, + "learning_rate": 4.359615384615385e-05, + "loss": 0.6688, + "step": 4662 + }, + { + "epoch": 12.81043956043956, + "grad_norm": 8.093069076538086, + "learning_rate": 4.3594780219780225e-05, + "loss": 0.1982, + "step": 4663 + }, + { + "epoch": 12.813186813186814, + "grad_norm": 15.523516654968262, + "learning_rate": 4.3593406593406595e-05, + "loss": 0.6089, + "step": 4664 + }, + { + "epoch": 12.815934065934066, + "grad_norm": 17.86176300048828, + "learning_rate": 4.359203296703297e-05, + "loss": 0.6704, + "step": 4665 + }, + { + "epoch": 12.81868131868132, + "grad_norm": 10.133831977844238, + "learning_rate": 4.359065934065934e-05, + "loss": 0.3135, + "step": 4666 + }, + { + "epoch": 12.821428571428571, + "grad_norm": 12.028388023376465, + "learning_rate": 4.358928571428572e-05, + "loss": 0.3979, + "step": 4667 + }, + { + "epoch": 12.824175824175825, + "grad_norm": 12.819512367248535, + "learning_rate": 4.358791208791209e-05, + "loss": 0.4798, + "step": 4668 + }, + { + "epoch": 12.826923076923077, + "grad_norm": 13.48633098602295, + "learning_rate": 4.358653846153846e-05, + "loss": 0.5124, + "step": 4669 + }, + { + "epoch": 12.82967032967033, + "grad_norm": 12.18091106414795, + "learning_rate": 4.3585164835164835e-05, + "loss": 0.4141, + "step": 4670 + }, + { + "epoch": 12.832417582417582, + "grad_norm": 15.94629192352295, + "learning_rate": 4.3583791208791205e-05, + "loss": 0.567, + "step": 4671 + }, + { + "epoch": 12.835164835164836, + "grad_norm": 6.276219367980957, + "learning_rate": 4.358241758241758e-05, + "loss": 0.1982, + "step": 4672 + }, + { + "epoch": 12.837912087912088, + "grad_norm": 11.017806053161621, + "learning_rate": 4.358104395604396e-05, + "loss": 0.36, + "step": 4673 + }, + { + "epoch": 12.840659340659341, + "grad_norm": 11.140483856201172, + "learning_rate": 4.357967032967033e-05, + "loss": 0.5478, + "step": 4674 + }, + { + "epoch": 12.843406593406593, + "grad_norm": 12.967521667480469, + "learning_rate": 4.3578296703296706e-05, + "loss": 0.4083, + "step": 4675 + }, + { + "epoch": 12.846153846153847, + "grad_norm": 14.108990669250488, + "learning_rate": 4.3576923076923076e-05, + "loss": 0.5394, + "step": 4676 + }, + { + "epoch": 12.848901098901099, + "grad_norm": 11.911495208740234, + "learning_rate": 4.357554945054945e-05, + "loss": 0.5192, + "step": 4677 + }, + { + "epoch": 12.851648351648352, + "grad_norm": 13.968186378479004, + "learning_rate": 4.357417582417583e-05, + "loss": 0.5065, + "step": 4678 + }, + { + "epoch": 12.854395604395604, + "grad_norm": 9.842879295349121, + "learning_rate": 4.35728021978022e-05, + "loss": 0.2835, + "step": 4679 + }, + { + "epoch": 12.857142857142858, + "grad_norm": 14.218655586242676, + "learning_rate": 4.3571428571428576e-05, + "loss": 0.5386, + "step": 4680 + }, + { + "epoch": 12.85989010989011, + "grad_norm": 15.622401237487793, + "learning_rate": 4.3570054945054946e-05, + "loss": 0.6088, + "step": 4681 + }, + { + "epoch": 12.862637362637363, + "grad_norm": 11.750727653503418, + "learning_rate": 4.356868131868132e-05, + "loss": 0.478, + "step": 4682 + }, + { + "epoch": 12.865384615384615, + "grad_norm": 13.598709106445312, + "learning_rate": 4.356730769230769e-05, + "loss": 0.7256, + "step": 4683 + }, + { + "epoch": 12.868131868131869, + "grad_norm": 17.034915924072266, + "learning_rate": 4.356593406593406e-05, + "loss": 0.6075, + "step": 4684 + }, + { + "epoch": 12.87087912087912, + "grad_norm": 8.799266815185547, + "learning_rate": 4.356456043956044e-05, + "loss": 0.2449, + "step": 4685 + }, + { + "epoch": 12.873626373626374, + "grad_norm": 14.531840324401855, + "learning_rate": 4.356318681318681e-05, + "loss": 0.4921, + "step": 4686 + }, + { + "epoch": 12.876373626373626, + "grad_norm": 10.575021743774414, + "learning_rate": 4.356181318681319e-05, + "loss": 0.2542, + "step": 4687 + }, + { + "epoch": 12.87912087912088, + "grad_norm": 17.166959762573242, + "learning_rate": 4.3560439560439564e-05, + "loss": 0.7877, + "step": 4688 + }, + { + "epoch": 12.881868131868131, + "grad_norm": 17.099828720092773, + "learning_rate": 4.3559065934065934e-05, + "loss": 0.4959, + "step": 4689 + }, + { + "epoch": 12.884615384615385, + "grad_norm": 4.399044990539551, + "learning_rate": 4.355769230769231e-05, + "loss": 0.1248, + "step": 4690 + }, + { + "epoch": 12.887362637362637, + "grad_norm": 9.197866439819336, + "learning_rate": 4.355631868131868e-05, + "loss": 0.2667, + "step": 4691 + }, + { + "epoch": 12.89010989010989, + "grad_norm": 17.194679260253906, + "learning_rate": 4.355494505494506e-05, + "loss": 0.575, + "step": 4692 + }, + { + "epoch": 12.892857142857142, + "grad_norm": 10.778600692749023, + "learning_rate": 4.3553571428571434e-05, + "loss": 0.3139, + "step": 4693 + }, + { + "epoch": 12.895604395604396, + "grad_norm": 16.4632568359375, + "learning_rate": 4.3552197802197804e-05, + "loss": 0.8613, + "step": 4694 + }, + { + "epoch": 12.898351648351648, + "grad_norm": 5.8957366943359375, + "learning_rate": 4.355082417582418e-05, + "loss": 0.2643, + "step": 4695 + }, + { + "epoch": 12.901098901098901, + "grad_norm": 9.54183292388916, + "learning_rate": 4.354945054945055e-05, + "loss": 0.2325, + "step": 4696 + }, + { + "epoch": 12.903846153846153, + "grad_norm": 10.838325500488281, + "learning_rate": 4.354807692307693e-05, + "loss": 0.3198, + "step": 4697 + }, + { + "epoch": 12.906593406593407, + "grad_norm": 10.427392959594727, + "learning_rate": 4.35467032967033e-05, + "loss": 0.3668, + "step": 4698 + }, + { + "epoch": 12.909340659340659, + "grad_norm": 8.66936206817627, + "learning_rate": 4.354532967032967e-05, + "loss": 0.2415, + "step": 4699 + }, + { + "epoch": 12.912087912087912, + "grad_norm": 10.611624717712402, + "learning_rate": 4.3543956043956044e-05, + "loss": 0.4671, + "step": 4700 + }, + { + "epoch": 12.914835164835164, + "grad_norm": 17.183958053588867, + "learning_rate": 4.3542582417582414e-05, + "loss": 0.744, + "step": 4701 + }, + { + "epoch": 12.917582417582418, + "grad_norm": 9.258489608764648, + "learning_rate": 4.354120879120879e-05, + "loss": 0.3545, + "step": 4702 + }, + { + "epoch": 12.92032967032967, + "grad_norm": 14.50532341003418, + "learning_rate": 4.353983516483517e-05, + "loss": 0.4837, + "step": 4703 + }, + { + "epoch": 12.923076923076923, + "grad_norm": 9.979281425476074, + "learning_rate": 4.353846153846154e-05, + "loss": 0.299, + "step": 4704 + }, + { + "epoch": 12.925824175824175, + "grad_norm": 17.09597396850586, + "learning_rate": 4.3537087912087915e-05, + "loss": 0.6411, + "step": 4705 + }, + { + "epoch": 12.928571428571429, + "grad_norm": 11.66775894165039, + "learning_rate": 4.3535714285714285e-05, + "loss": 0.3514, + "step": 4706 + }, + { + "epoch": 12.93131868131868, + "grad_norm": 16.400148391723633, + "learning_rate": 4.353434065934066e-05, + "loss": 0.6691, + "step": 4707 + }, + { + "epoch": 12.934065934065934, + "grad_norm": 13.919601440429688, + "learning_rate": 4.353296703296704e-05, + "loss": 0.6723, + "step": 4708 + }, + { + "epoch": 12.936813186813186, + "grad_norm": 7.849497318267822, + "learning_rate": 4.353159340659341e-05, + "loss": 0.2286, + "step": 4709 + }, + { + "epoch": 12.93956043956044, + "grad_norm": 16.573623657226562, + "learning_rate": 4.3530219780219785e-05, + "loss": 0.6194, + "step": 4710 + }, + { + "epoch": 12.942307692307692, + "grad_norm": 23.717269897460938, + "learning_rate": 4.3528846153846155e-05, + "loss": 0.8276, + "step": 4711 + }, + { + "epoch": 12.945054945054945, + "grad_norm": 13.428932189941406, + "learning_rate": 4.352747252747253e-05, + "loss": 0.4056, + "step": 4712 + }, + { + "epoch": 12.947802197802197, + "grad_norm": 19.95755958557129, + "learning_rate": 4.35260989010989e-05, + "loss": 0.9242, + "step": 4713 + }, + { + "epoch": 12.95054945054945, + "grad_norm": 16.504575729370117, + "learning_rate": 4.352472527472527e-05, + "loss": 0.5588, + "step": 4714 + }, + { + "epoch": 12.953296703296703, + "grad_norm": 12.129435539245605, + "learning_rate": 4.352335164835165e-05, + "loss": 0.3215, + "step": 4715 + }, + { + "epoch": 12.956043956043956, + "grad_norm": 17.034786224365234, + "learning_rate": 4.352197802197802e-05, + "loss": 0.499, + "step": 4716 + }, + { + "epoch": 12.958791208791208, + "grad_norm": 15.840636253356934, + "learning_rate": 4.3520604395604396e-05, + "loss": 1.0505, + "step": 4717 + }, + { + "epoch": 12.961538461538462, + "grad_norm": 14.865423202514648, + "learning_rate": 4.351923076923077e-05, + "loss": 0.4203, + "step": 4718 + }, + { + "epoch": 12.964285714285714, + "grad_norm": 16.348106384277344, + "learning_rate": 4.351785714285714e-05, + "loss": 0.9147, + "step": 4719 + }, + { + "epoch": 12.967032967032967, + "grad_norm": 18.926319122314453, + "learning_rate": 4.351648351648352e-05, + "loss": 0.6787, + "step": 4720 + }, + { + "epoch": 12.969780219780219, + "grad_norm": 10.312007904052734, + "learning_rate": 4.351510989010989e-05, + "loss": 0.3559, + "step": 4721 + }, + { + "epoch": 12.972527472527473, + "grad_norm": 17.720623016357422, + "learning_rate": 4.3513736263736266e-05, + "loss": 0.847, + "step": 4722 + }, + { + "epoch": 12.975274725274724, + "grad_norm": 5.646967887878418, + "learning_rate": 4.351236263736264e-05, + "loss": 0.1292, + "step": 4723 + }, + { + "epoch": 12.978021978021978, + "grad_norm": 11.820381164550781, + "learning_rate": 4.351098901098901e-05, + "loss": 0.4805, + "step": 4724 + }, + { + "epoch": 12.98076923076923, + "grad_norm": 20.792499542236328, + "learning_rate": 4.350961538461539e-05, + "loss": 0.7096, + "step": 4725 + }, + { + "epoch": 12.983516483516484, + "grad_norm": 15.554469108581543, + "learning_rate": 4.350824175824176e-05, + "loss": 0.6689, + "step": 4726 + }, + { + "epoch": 12.986263736263737, + "grad_norm": 20.289493560791016, + "learning_rate": 4.3506868131868137e-05, + "loss": 0.5514, + "step": 4727 + }, + { + "epoch": 12.989010989010989, + "grad_norm": 11.37121868133545, + "learning_rate": 4.350549450549451e-05, + "loss": 0.286, + "step": 4728 + }, + { + "epoch": 12.991758241758241, + "grad_norm": 10.01125431060791, + "learning_rate": 4.350412087912088e-05, + "loss": 0.448, + "step": 4729 + }, + { + "epoch": 12.994505494505495, + "grad_norm": 13.195698738098145, + "learning_rate": 4.3502747252747253e-05, + "loss": 0.3094, + "step": 4730 + }, + { + "epoch": 12.997252747252748, + "grad_norm": 11.99101448059082, + "learning_rate": 4.3501373626373623e-05, + "loss": 0.2789, + "step": 4731 + }, + { + "epoch": 13.0, + "grad_norm": 36.25969696044922, + "learning_rate": 4.35e-05, + "loss": 0.7657, + "step": 4732 + }, + { + "epoch": 13.0, + "eval_accuracy": 0.709366391184573, + "eval_f1": 0.6832277800190176, + "eval_f1_DuraRiadoRio_64x64": 0.6939890710382514, + "eval_f1_Mole_64x64": 0.6727272727272727, + "eval_f1_Quebrado_64x64": 0.8742138364779874, + "eval_f1_RiadoRio_64x64": 0.40816326530612246, + "eval_f1_RioFechado_64x64": 0.7670454545454546, + "eval_loss": 1.291277289390564, + "eval_precision": 0.7793109818518167, + "eval_precision_DuraRiadoRio_64x64": 0.5720720720720721, + "eval_precision_Mole_64x64": 0.9736842105263158, + "eval_precision_Quebrado_64x64": 0.7988505747126436, + "eval_precision_RiadoRio_64x64": 0.9090909090909091, + "eval_precision_RioFechado_64x64": 0.6428571428571429, + "eval_recall": 0.7149946462400132, + "eval_recall_DuraRiadoRio_64x64": 0.8819444444444444, + "eval_recall_Mole_64x64": 0.5138888888888888, + "eval_recall_Quebrado_64x64": 0.9652777777777778, + "eval_recall_RiadoRio_64x64": 0.2631578947368421, + "eval_recall_RioFechado_64x64": 0.9507042253521126, + "eval_runtime": 1.7267, + "eval_samples_per_second": 420.451, + "eval_steps_per_second": 26.64, + "step": 4732 + }, + { + "epoch": 13.002747252747254, + "grad_norm": 10.723831176757812, + "learning_rate": 4.349862637362638e-05, + "loss": 0.3763, + "step": 4733 + }, + { + "epoch": 13.005494505494505, + "grad_norm": 8.906532287597656, + "learning_rate": 4.349725274725275e-05, + "loss": 0.2938, + "step": 4734 + }, + { + "epoch": 13.008241758241759, + "grad_norm": 5.9167585372924805, + "learning_rate": 4.3495879120879124e-05, + "loss": 0.1485, + "step": 4735 + }, + { + "epoch": 13.010989010989011, + "grad_norm": 16.011470794677734, + "learning_rate": 4.3494505494505494e-05, + "loss": 0.8321, + "step": 4736 + }, + { + "epoch": 13.013736263736265, + "grad_norm": 16.515172958374023, + "learning_rate": 4.349313186813187e-05, + "loss": 0.7298, + "step": 4737 + }, + { + "epoch": 13.016483516483516, + "grad_norm": 13.823661804199219, + "learning_rate": 4.349175824175825e-05, + "loss": 0.3358, + "step": 4738 + }, + { + "epoch": 13.01923076923077, + "grad_norm": 18.423429489135742, + "learning_rate": 4.349038461538462e-05, + "loss": 1.1508, + "step": 4739 + }, + { + "epoch": 13.021978021978022, + "grad_norm": 12.318816184997559, + "learning_rate": 4.3489010989010994e-05, + "loss": 0.3869, + "step": 4740 + }, + { + "epoch": 13.024725274725276, + "grad_norm": 9.270550727844238, + "learning_rate": 4.3487637362637364e-05, + "loss": 0.3143, + "step": 4741 + }, + { + "epoch": 13.027472527472527, + "grad_norm": 12.932236671447754, + "learning_rate": 4.348626373626374e-05, + "loss": 0.7148, + "step": 4742 + }, + { + "epoch": 13.030219780219781, + "grad_norm": 17.072046279907227, + "learning_rate": 4.348489010989011e-05, + "loss": 0.8789, + "step": 4743 + }, + { + "epoch": 13.032967032967033, + "grad_norm": 22.200529098510742, + "learning_rate": 4.348351648351648e-05, + "loss": 0.9412, + "step": 4744 + }, + { + "epoch": 13.035714285714286, + "grad_norm": 23.477771759033203, + "learning_rate": 4.348214285714286e-05, + "loss": 1.1155, + "step": 4745 + }, + { + "epoch": 13.038461538461538, + "grad_norm": 12.717408180236816, + "learning_rate": 4.348076923076923e-05, + "loss": 0.4798, + "step": 4746 + }, + { + "epoch": 13.041208791208792, + "grad_norm": 11.230759620666504, + "learning_rate": 4.3479395604395605e-05, + "loss": 0.5844, + "step": 4747 + }, + { + "epoch": 13.043956043956044, + "grad_norm": 16.877946853637695, + "learning_rate": 4.347802197802198e-05, + "loss": 0.7419, + "step": 4748 + }, + { + "epoch": 13.046703296703297, + "grad_norm": 7.106292724609375, + "learning_rate": 4.347664835164835e-05, + "loss": 0.2218, + "step": 4749 + }, + { + "epoch": 13.04945054945055, + "grad_norm": 10.920767784118652, + "learning_rate": 4.347527472527473e-05, + "loss": 0.3964, + "step": 4750 + }, + { + "epoch": 13.052197802197803, + "grad_norm": 10.694462776184082, + "learning_rate": 4.34739010989011e-05, + "loss": 0.376, + "step": 4751 + }, + { + "epoch": 13.054945054945055, + "grad_norm": 13.125815391540527, + "learning_rate": 4.3472527472527475e-05, + "loss": 0.536, + "step": 4752 + }, + { + "epoch": 13.057692307692308, + "grad_norm": 15.49527645111084, + "learning_rate": 4.347115384615385e-05, + "loss": 0.6486, + "step": 4753 + }, + { + "epoch": 13.06043956043956, + "grad_norm": 7.321831226348877, + "learning_rate": 4.346978021978022e-05, + "loss": 0.2338, + "step": 4754 + }, + { + "epoch": 13.063186813186814, + "grad_norm": 13.398125648498535, + "learning_rate": 4.34684065934066e-05, + "loss": 0.2357, + "step": 4755 + }, + { + "epoch": 13.065934065934066, + "grad_norm": 5.929818153381348, + "learning_rate": 4.346703296703297e-05, + "loss": 0.1757, + "step": 4756 + }, + { + "epoch": 13.06868131868132, + "grad_norm": 11.413277626037598, + "learning_rate": 4.3465659340659346e-05, + "loss": 0.41, + "step": 4757 + }, + { + "epoch": 13.071428571428571, + "grad_norm": 14.113419532775879, + "learning_rate": 4.3464285714285716e-05, + "loss": 0.502, + "step": 4758 + }, + { + "epoch": 13.074175824175825, + "grad_norm": 18.563493728637695, + "learning_rate": 4.3462912087912086e-05, + "loss": 0.8288, + "step": 4759 + }, + { + "epoch": 13.076923076923077, + "grad_norm": 9.454916954040527, + "learning_rate": 4.346153846153846e-05, + "loss": 0.3557, + "step": 4760 + }, + { + "epoch": 13.07967032967033, + "grad_norm": 10.83631420135498, + "learning_rate": 4.346016483516483e-05, + "loss": 0.3283, + "step": 4761 + }, + { + "epoch": 13.082417582417582, + "grad_norm": 6.088351249694824, + "learning_rate": 4.345879120879121e-05, + "loss": 0.1466, + "step": 4762 + }, + { + "epoch": 13.085164835164836, + "grad_norm": 11.992012977600098, + "learning_rate": 4.3457417582417586e-05, + "loss": 0.3464, + "step": 4763 + }, + { + "epoch": 13.087912087912088, + "grad_norm": 4.928199768066406, + "learning_rate": 4.3456043956043956e-05, + "loss": 0.1548, + "step": 4764 + }, + { + "epoch": 13.090659340659341, + "grad_norm": 16.414688110351562, + "learning_rate": 4.345467032967033e-05, + "loss": 0.7131, + "step": 4765 + }, + { + "epoch": 13.093406593406593, + "grad_norm": 9.589231491088867, + "learning_rate": 4.34532967032967e-05, + "loss": 0.3457, + "step": 4766 + }, + { + "epoch": 13.096153846153847, + "grad_norm": 22.140552520751953, + "learning_rate": 4.345192307692308e-05, + "loss": 0.8457, + "step": 4767 + }, + { + "epoch": 13.098901098901099, + "grad_norm": 14.606778144836426, + "learning_rate": 4.3450549450549456e-05, + "loss": 0.4892, + "step": 4768 + }, + { + "epoch": 13.101648351648352, + "grad_norm": 19.789838790893555, + "learning_rate": 4.3449175824175826e-05, + "loss": 0.8968, + "step": 4769 + }, + { + "epoch": 13.104395604395604, + "grad_norm": 15.659170150756836, + "learning_rate": 4.34478021978022e-05, + "loss": 0.471, + "step": 4770 + }, + { + "epoch": 13.107142857142858, + "grad_norm": 8.922869682312012, + "learning_rate": 4.344642857142857e-05, + "loss": 0.2304, + "step": 4771 + }, + { + "epoch": 13.10989010989011, + "grad_norm": 6.704702854156494, + "learning_rate": 4.344505494505495e-05, + "loss": 0.2319, + "step": 4772 + }, + { + "epoch": 13.112637362637363, + "grad_norm": 15.108819007873535, + "learning_rate": 4.344368131868132e-05, + "loss": 0.5286, + "step": 4773 + }, + { + "epoch": 13.115384615384615, + "grad_norm": 19.54309844970703, + "learning_rate": 4.344230769230769e-05, + "loss": 0.7168, + "step": 4774 + }, + { + "epoch": 13.118131868131869, + "grad_norm": 17.47512435913086, + "learning_rate": 4.344093406593407e-05, + "loss": 0.5106, + "step": 4775 + }, + { + "epoch": 13.12087912087912, + "grad_norm": 25.084108352661133, + "learning_rate": 4.343956043956044e-05, + "loss": 0.8966, + "step": 4776 + }, + { + "epoch": 13.123626373626374, + "grad_norm": 13.746874809265137, + "learning_rate": 4.3438186813186814e-05, + "loss": 0.4654, + "step": 4777 + }, + { + "epoch": 13.126373626373626, + "grad_norm": 21.672117233276367, + "learning_rate": 4.343681318681319e-05, + "loss": 0.6877, + "step": 4778 + }, + { + "epoch": 13.12912087912088, + "grad_norm": 15.802281379699707, + "learning_rate": 4.343543956043956e-05, + "loss": 1.0968, + "step": 4779 + }, + { + "epoch": 13.131868131868131, + "grad_norm": 12.657217979431152, + "learning_rate": 4.343406593406594e-05, + "loss": 0.3435, + "step": 4780 + }, + { + "epoch": 13.134615384615385, + "grad_norm": 13.14632511138916, + "learning_rate": 4.343269230769231e-05, + "loss": 0.449, + "step": 4781 + }, + { + "epoch": 13.137362637362637, + "grad_norm": 11.832037925720215, + "learning_rate": 4.3431318681318684e-05, + "loss": 0.3217, + "step": 4782 + }, + { + "epoch": 13.14010989010989, + "grad_norm": 10.734001159667969, + "learning_rate": 4.342994505494506e-05, + "loss": 0.358, + "step": 4783 + }, + { + "epoch": 13.142857142857142, + "grad_norm": 6.400766849517822, + "learning_rate": 4.342857142857143e-05, + "loss": 0.1809, + "step": 4784 + }, + { + "epoch": 13.145604395604396, + "grad_norm": 10.437433242797852, + "learning_rate": 4.342719780219781e-05, + "loss": 0.3874, + "step": 4785 + }, + { + "epoch": 13.148351648351648, + "grad_norm": 8.953082084655762, + "learning_rate": 4.342582417582418e-05, + "loss": 0.2632, + "step": 4786 + }, + { + "epoch": 13.151098901098901, + "grad_norm": 11.121992111206055, + "learning_rate": 4.3424450549450555e-05, + "loss": 0.3839, + "step": 4787 + }, + { + "epoch": 13.153846153846153, + "grad_norm": 16.764293670654297, + "learning_rate": 4.3423076923076925e-05, + "loss": 0.7721, + "step": 4788 + }, + { + "epoch": 13.156593406593407, + "grad_norm": 14.368106842041016, + "learning_rate": 4.3421703296703295e-05, + "loss": 0.4721, + "step": 4789 + }, + { + "epoch": 13.159340659340659, + "grad_norm": 14.276729583740234, + "learning_rate": 4.342032967032967e-05, + "loss": 0.5191, + "step": 4790 + }, + { + "epoch": 13.162087912087912, + "grad_norm": 14.474982261657715, + "learning_rate": 4.341895604395604e-05, + "loss": 0.592, + "step": 4791 + }, + { + "epoch": 13.164835164835164, + "grad_norm": 17.60384750366211, + "learning_rate": 4.341758241758242e-05, + "loss": 0.8353, + "step": 4792 + }, + { + "epoch": 13.167582417582418, + "grad_norm": 13.435150146484375, + "learning_rate": 4.3416208791208795e-05, + "loss": 0.5548, + "step": 4793 + }, + { + "epoch": 13.17032967032967, + "grad_norm": 10.680499076843262, + "learning_rate": 4.3414835164835165e-05, + "loss": 0.4001, + "step": 4794 + }, + { + "epoch": 13.173076923076923, + "grad_norm": 9.507210731506348, + "learning_rate": 4.341346153846154e-05, + "loss": 0.304, + "step": 4795 + }, + { + "epoch": 13.175824175824175, + "grad_norm": 7.98565673828125, + "learning_rate": 4.341208791208791e-05, + "loss": 0.3115, + "step": 4796 + }, + { + "epoch": 13.178571428571429, + "grad_norm": 17.50967025756836, + "learning_rate": 4.341071428571429e-05, + "loss": 0.8748, + "step": 4797 + }, + { + "epoch": 13.18131868131868, + "grad_norm": 11.161595344543457, + "learning_rate": 4.3409340659340665e-05, + "loss": 0.3872, + "step": 4798 + }, + { + "epoch": 13.184065934065934, + "grad_norm": 10.192102432250977, + "learning_rate": 4.3407967032967035e-05, + "loss": 0.3207, + "step": 4799 + }, + { + "epoch": 13.186813186813186, + "grad_norm": 14.199399948120117, + "learning_rate": 4.340659340659341e-05, + "loss": 0.6427, + "step": 4800 + }, + { + "epoch": 13.18956043956044, + "grad_norm": 14.833407402038574, + "learning_rate": 4.340521978021978e-05, + "loss": 0.5442, + "step": 4801 + }, + { + "epoch": 13.192307692307692, + "grad_norm": 14.734766006469727, + "learning_rate": 4.340384615384616e-05, + "loss": 0.5379, + "step": 4802 + }, + { + "epoch": 13.195054945054945, + "grad_norm": 18.13193702697754, + "learning_rate": 4.340247252747253e-05, + "loss": 0.5549, + "step": 4803 + }, + { + "epoch": 13.197802197802197, + "grad_norm": 23.473196029663086, + "learning_rate": 4.34010989010989e-05, + "loss": 1.2781, + "step": 4804 + }, + { + "epoch": 13.20054945054945, + "grad_norm": 13.16939926147461, + "learning_rate": 4.3399725274725276e-05, + "loss": 0.4963, + "step": 4805 + }, + { + "epoch": 13.203296703296703, + "grad_norm": 9.392404556274414, + "learning_rate": 4.3398351648351646e-05, + "loss": 0.2389, + "step": 4806 + }, + { + "epoch": 13.206043956043956, + "grad_norm": 8.328872680664062, + "learning_rate": 4.339697802197802e-05, + "loss": 0.3981, + "step": 4807 + }, + { + "epoch": 13.208791208791208, + "grad_norm": 9.112195014953613, + "learning_rate": 4.33956043956044e-05, + "loss": 0.3363, + "step": 4808 + }, + { + "epoch": 13.211538461538462, + "grad_norm": 13.631893157958984, + "learning_rate": 4.339423076923077e-05, + "loss": 0.5376, + "step": 4809 + }, + { + "epoch": 13.214285714285714, + "grad_norm": 11.273017883300781, + "learning_rate": 4.3392857142857146e-05, + "loss": 0.4147, + "step": 4810 + }, + { + "epoch": 13.217032967032967, + "grad_norm": 11.56130313873291, + "learning_rate": 4.3391483516483516e-05, + "loss": 0.607, + "step": 4811 + }, + { + "epoch": 13.219780219780219, + "grad_norm": 19.918880462646484, + "learning_rate": 4.339010989010989e-05, + "loss": 1.079, + "step": 4812 + }, + { + "epoch": 13.222527472527473, + "grad_norm": 10.515483856201172, + "learning_rate": 4.338873626373627e-05, + "loss": 0.3625, + "step": 4813 + }, + { + "epoch": 13.225274725274724, + "grad_norm": 15.45021915435791, + "learning_rate": 4.338736263736264e-05, + "loss": 0.4066, + "step": 4814 + }, + { + "epoch": 13.228021978021978, + "grad_norm": 10.487899780273438, + "learning_rate": 4.338598901098902e-05, + "loss": 0.4483, + "step": 4815 + }, + { + "epoch": 13.23076923076923, + "grad_norm": 4.570345401763916, + "learning_rate": 4.338461538461539e-05, + "loss": 0.1331, + "step": 4816 + }, + { + "epoch": 13.233516483516484, + "grad_norm": 10.443402290344238, + "learning_rate": 4.3383241758241764e-05, + "loss": 0.5372, + "step": 4817 + }, + { + "epoch": 13.236263736263735, + "grad_norm": 8.549178123474121, + "learning_rate": 4.3381868131868134e-05, + "loss": 0.2199, + "step": 4818 + }, + { + "epoch": 13.239010989010989, + "grad_norm": 9.637445449829102, + "learning_rate": 4.3380494505494504e-05, + "loss": 0.3234, + "step": 4819 + }, + { + "epoch": 13.241758241758241, + "grad_norm": 17.293840408325195, + "learning_rate": 4.337912087912088e-05, + "loss": 0.6175, + "step": 4820 + }, + { + "epoch": 13.244505494505495, + "grad_norm": 6.772944927215576, + "learning_rate": 4.337774725274725e-05, + "loss": 0.1707, + "step": 4821 + }, + { + "epoch": 13.247252747252748, + "grad_norm": 15.984644889831543, + "learning_rate": 4.337637362637363e-05, + "loss": 0.7538, + "step": 4822 + }, + { + "epoch": 13.25, + "grad_norm": 8.009336471557617, + "learning_rate": 4.3375000000000004e-05, + "loss": 0.216, + "step": 4823 + }, + { + "epoch": 13.252747252747252, + "grad_norm": 12.006604194641113, + "learning_rate": 4.3373626373626374e-05, + "loss": 0.5269, + "step": 4824 + }, + { + "epoch": 13.255494505494505, + "grad_norm": 9.732797622680664, + "learning_rate": 4.337225274725275e-05, + "loss": 0.3855, + "step": 4825 + }, + { + "epoch": 13.258241758241759, + "grad_norm": 15.441813468933105, + "learning_rate": 4.337087912087912e-05, + "loss": 0.603, + "step": 4826 + }, + { + "epoch": 13.260989010989011, + "grad_norm": 7.937861442565918, + "learning_rate": 4.33695054945055e-05, + "loss": 0.25, + "step": 4827 + }, + { + "epoch": 13.263736263736265, + "grad_norm": 16.480058670043945, + "learning_rate": 4.3368131868131874e-05, + "loss": 1.072, + "step": 4828 + }, + { + "epoch": 13.266483516483516, + "grad_norm": 11.839804649353027, + "learning_rate": 4.3366758241758244e-05, + "loss": 0.5044, + "step": 4829 + }, + { + "epoch": 13.26923076923077, + "grad_norm": 17.05419921875, + "learning_rate": 4.336538461538462e-05, + "loss": 0.8046, + "step": 4830 + }, + { + "epoch": 13.271978021978022, + "grad_norm": 10.748167991638184, + "learning_rate": 4.336401098901099e-05, + "loss": 0.2772, + "step": 4831 + }, + { + "epoch": 13.274725274725276, + "grad_norm": 9.33501148223877, + "learning_rate": 4.336263736263737e-05, + "loss": 0.4188, + "step": 4832 + }, + { + "epoch": 13.277472527472527, + "grad_norm": 15.225046157836914, + "learning_rate": 4.336126373626374e-05, + "loss": 0.8036, + "step": 4833 + }, + { + "epoch": 13.280219780219781, + "grad_norm": 13.157960891723633, + "learning_rate": 4.335989010989011e-05, + "loss": 0.2618, + "step": 4834 + }, + { + "epoch": 13.282967032967033, + "grad_norm": 16.839141845703125, + "learning_rate": 4.3358516483516485e-05, + "loss": 0.7384, + "step": 4835 + }, + { + "epoch": 13.285714285714286, + "grad_norm": 10.862621307373047, + "learning_rate": 4.3357142857142855e-05, + "loss": 0.2954, + "step": 4836 + }, + { + "epoch": 13.288461538461538, + "grad_norm": 19.76622200012207, + "learning_rate": 4.335576923076923e-05, + "loss": 0.8406, + "step": 4837 + }, + { + "epoch": 13.291208791208792, + "grad_norm": 11.832319259643555, + "learning_rate": 4.335439560439561e-05, + "loss": 0.4107, + "step": 4838 + }, + { + "epoch": 13.293956043956044, + "grad_norm": 14.012885093688965, + "learning_rate": 4.335302197802198e-05, + "loss": 0.6439, + "step": 4839 + }, + { + "epoch": 13.296703296703297, + "grad_norm": 9.847195625305176, + "learning_rate": 4.3351648351648355e-05, + "loss": 0.3321, + "step": 4840 + }, + { + "epoch": 13.29945054945055, + "grad_norm": 4.562719345092773, + "learning_rate": 4.3350274725274725e-05, + "loss": 0.1539, + "step": 4841 + }, + { + "epoch": 13.302197802197803, + "grad_norm": 17.267852783203125, + "learning_rate": 4.33489010989011e-05, + "loss": 0.8688, + "step": 4842 + }, + { + "epoch": 13.304945054945055, + "grad_norm": 10.097101211547852, + "learning_rate": 4.334752747252748e-05, + "loss": 0.4099, + "step": 4843 + }, + { + "epoch": 13.307692307692308, + "grad_norm": 16.731460571289062, + "learning_rate": 4.334615384615385e-05, + "loss": 0.7481, + "step": 4844 + }, + { + "epoch": 13.31043956043956, + "grad_norm": 17.61587905883789, + "learning_rate": 4.3344780219780226e-05, + "loss": 0.9972, + "step": 4845 + }, + { + "epoch": 13.313186813186814, + "grad_norm": 12.087013244628906, + "learning_rate": 4.3343406593406596e-05, + "loss": 0.4414, + "step": 4846 + }, + { + "epoch": 13.315934065934066, + "grad_norm": 10.063536643981934, + "learning_rate": 4.334203296703297e-05, + "loss": 0.4336, + "step": 4847 + }, + { + "epoch": 13.31868131868132, + "grad_norm": 19.26861000061035, + "learning_rate": 4.334065934065934e-05, + "loss": 0.5795, + "step": 4848 + }, + { + "epoch": 13.321428571428571, + "grad_norm": 11.355950355529785, + "learning_rate": 4.333928571428571e-05, + "loss": 0.4303, + "step": 4849 + }, + { + "epoch": 13.324175824175825, + "grad_norm": 15.334455490112305, + "learning_rate": 4.333791208791209e-05, + "loss": 0.554, + "step": 4850 + }, + { + "epoch": 13.326923076923077, + "grad_norm": 15.97062873840332, + "learning_rate": 4.333653846153846e-05, + "loss": 0.7476, + "step": 4851 + }, + { + "epoch": 13.32967032967033, + "grad_norm": 9.960127830505371, + "learning_rate": 4.3335164835164836e-05, + "loss": 0.3864, + "step": 4852 + }, + { + "epoch": 13.332417582417582, + "grad_norm": 13.813652992248535, + "learning_rate": 4.333379120879121e-05, + "loss": 0.5045, + "step": 4853 + }, + { + "epoch": 13.335164835164836, + "grad_norm": 4.633195400238037, + "learning_rate": 4.333241758241758e-05, + "loss": 0.1105, + "step": 4854 + }, + { + "epoch": 13.337912087912088, + "grad_norm": 19.452880859375, + "learning_rate": 4.333104395604396e-05, + "loss": 0.5828, + "step": 4855 + }, + { + "epoch": 13.340659340659341, + "grad_norm": 10.385274887084961, + "learning_rate": 4.332967032967033e-05, + "loss": 0.3108, + "step": 4856 + }, + { + "epoch": 13.343406593406593, + "grad_norm": 11.081195831298828, + "learning_rate": 4.3328296703296707e-05, + "loss": 0.3761, + "step": 4857 + }, + { + "epoch": 13.346153846153847, + "grad_norm": 8.625415802001953, + "learning_rate": 4.3326923076923083e-05, + "loss": 0.2697, + "step": 4858 + }, + { + "epoch": 13.348901098901099, + "grad_norm": 10.0980224609375, + "learning_rate": 4.3325549450549453e-05, + "loss": 0.386, + "step": 4859 + }, + { + "epoch": 13.351648351648352, + "grad_norm": 16.71953010559082, + "learning_rate": 4.332417582417583e-05, + "loss": 0.5016, + "step": 4860 + }, + { + "epoch": 13.354395604395604, + "grad_norm": 15.860106468200684, + "learning_rate": 4.33228021978022e-05, + "loss": 0.6526, + "step": 4861 + }, + { + "epoch": 13.357142857142858, + "grad_norm": 10.580629348754883, + "learning_rate": 4.332142857142858e-05, + "loss": 0.399, + "step": 4862 + }, + { + "epoch": 13.35989010989011, + "grad_norm": 20.257612228393555, + "learning_rate": 4.332005494505495e-05, + "loss": 0.6821, + "step": 4863 + }, + { + "epoch": 13.362637362637363, + "grad_norm": 10.786725997924805, + "learning_rate": 4.331868131868132e-05, + "loss": 0.3159, + "step": 4864 + }, + { + "epoch": 13.365384615384615, + "grad_norm": 9.060179710388184, + "learning_rate": 4.3317307692307694e-05, + "loss": 0.321, + "step": 4865 + }, + { + "epoch": 13.368131868131869, + "grad_norm": 21.78651237487793, + "learning_rate": 4.3315934065934064e-05, + "loss": 0.9308, + "step": 4866 + }, + { + "epoch": 13.37087912087912, + "grad_norm": 11.53005599975586, + "learning_rate": 4.331456043956044e-05, + "loss": 0.3319, + "step": 4867 + }, + { + "epoch": 13.373626373626374, + "grad_norm": 14.186485290527344, + "learning_rate": 4.331318681318682e-05, + "loss": 0.4045, + "step": 4868 + }, + { + "epoch": 13.376373626373626, + "grad_norm": 13.565457344055176, + "learning_rate": 4.331181318681319e-05, + "loss": 0.585, + "step": 4869 + }, + { + "epoch": 13.37912087912088, + "grad_norm": 14.309264183044434, + "learning_rate": 4.3310439560439564e-05, + "loss": 0.5633, + "step": 4870 + }, + { + "epoch": 13.381868131868131, + "grad_norm": 22.88578987121582, + "learning_rate": 4.3309065934065934e-05, + "loss": 0.8662, + "step": 4871 + }, + { + "epoch": 13.384615384615385, + "grad_norm": 9.893072128295898, + "learning_rate": 4.330769230769231e-05, + "loss": 0.2348, + "step": 4872 + }, + { + "epoch": 13.387362637362637, + "grad_norm": 16.076536178588867, + "learning_rate": 4.330631868131869e-05, + "loss": 0.6794, + "step": 4873 + }, + { + "epoch": 13.39010989010989, + "grad_norm": 10.861685752868652, + "learning_rate": 4.330494505494506e-05, + "loss": 0.4247, + "step": 4874 + }, + { + "epoch": 13.392857142857142, + "grad_norm": 10.335638999938965, + "learning_rate": 4.3303571428571435e-05, + "loss": 0.2586, + "step": 4875 + }, + { + "epoch": 13.395604395604396, + "grad_norm": 11.82959270477295, + "learning_rate": 4.3302197802197805e-05, + "loss": 0.4353, + "step": 4876 + }, + { + "epoch": 13.398351648351648, + "grad_norm": 21.141185760498047, + "learning_rate": 4.330082417582418e-05, + "loss": 0.4735, + "step": 4877 + }, + { + "epoch": 13.401098901098901, + "grad_norm": 15.696643829345703, + "learning_rate": 4.329945054945055e-05, + "loss": 0.5841, + "step": 4878 + }, + { + "epoch": 13.403846153846153, + "grad_norm": 13.396557807922363, + "learning_rate": 4.329807692307692e-05, + "loss": 0.6091, + "step": 4879 + }, + { + "epoch": 13.406593406593407, + "grad_norm": 17.037160873413086, + "learning_rate": 4.32967032967033e-05, + "loss": 0.4975, + "step": 4880 + }, + { + "epoch": 13.409340659340659, + "grad_norm": 17.07465362548828, + "learning_rate": 4.329532967032967e-05, + "loss": 0.8965, + "step": 4881 + }, + { + "epoch": 13.412087912087912, + "grad_norm": 7.445065498352051, + "learning_rate": 4.3293956043956045e-05, + "loss": 0.2439, + "step": 4882 + }, + { + "epoch": 13.414835164835164, + "grad_norm": 16.283708572387695, + "learning_rate": 4.329258241758242e-05, + "loss": 0.6807, + "step": 4883 + }, + { + "epoch": 13.417582417582418, + "grad_norm": 13.50178337097168, + "learning_rate": 4.329120879120879e-05, + "loss": 0.4538, + "step": 4884 + }, + { + "epoch": 13.42032967032967, + "grad_norm": 13.916069030761719, + "learning_rate": 4.328983516483517e-05, + "loss": 0.477, + "step": 4885 + }, + { + "epoch": 13.423076923076923, + "grad_norm": 9.826841354370117, + "learning_rate": 4.328846153846154e-05, + "loss": 0.2708, + "step": 4886 + }, + { + "epoch": 13.425824175824175, + "grad_norm": 7.813159465789795, + "learning_rate": 4.3287087912087916e-05, + "loss": 0.2044, + "step": 4887 + }, + { + "epoch": 13.428571428571429, + "grad_norm": 16.541603088378906, + "learning_rate": 4.328571428571429e-05, + "loss": 0.5897, + "step": 4888 + }, + { + "epoch": 13.43131868131868, + "grad_norm": 16.072717666625977, + "learning_rate": 4.328434065934066e-05, + "loss": 0.5543, + "step": 4889 + }, + { + "epoch": 13.434065934065934, + "grad_norm": 20.247756958007812, + "learning_rate": 4.328296703296704e-05, + "loss": 0.8847, + "step": 4890 + }, + { + "epoch": 13.436813186813186, + "grad_norm": 13.472789764404297, + "learning_rate": 4.328159340659341e-05, + "loss": 0.4218, + "step": 4891 + }, + { + "epoch": 13.43956043956044, + "grad_norm": 12.853904724121094, + "learning_rate": 4.3280219780219786e-05, + "loss": 0.633, + "step": 4892 + }, + { + "epoch": 13.442307692307692, + "grad_norm": 12.29749870300293, + "learning_rate": 4.3278846153846156e-05, + "loss": 0.4235, + "step": 4893 + }, + { + "epoch": 13.445054945054945, + "grad_norm": 12.858254432678223, + "learning_rate": 4.3277472527472526e-05, + "loss": 0.4718, + "step": 4894 + }, + { + "epoch": 13.447802197802197, + "grad_norm": 6.51045560836792, + "learning_rate": 4.32760989010989e-05, + "loss": 0.1773, + "step": 4895 + }, + { + "epoch": 13.45054945054945, + "grad_norm": 11.794196128845215, + "learning_rate": 4.327472527472527e-05, + "loss": 0.6321, + "step": 4896 + }, + { + "epoch": 13.453296703296703, + "grad_norm": 12.246779441833496, + "learning_rate": 4.327335164835165e-05, + "loss": 0.4709, + "step": 4897 + }, + { + "epoch": 13.456043956043956, + "grad_norm": 8.451399803161621, + "learning_rate": 4.327197802197802e-05, + "loss": 0.2904, + "step": 4898 + }, + { + "epoch": 13.458791208791208, + "grad_norm": 11.765411376953125, + "learning_rate": 4.3270604395604396e-05, + "loss": 0.4452, + "step": 4899 + }, + { + "epoch": 13.461538461538462, + "grad_norm": 10.12549114227295, + "learning_rate": 4.326923076923077e-05, + "loss": 0.3298, + "step": 4900 + }, + { + "epoch": 13.464285714285714, + "grad_norm": 13.376971244812012, + "learning_rate": 4.326785714285714e-05, + "loss": 0.4145, + "step": 4901 + }, + { + "epoch": 13.467032967032967, + "grad_norm": 14.39488697052002, + "learning_rate": 4.326648351648352e-05, + "loss": 0.5496, + "step": 4902 + }, + { + "epoch": 13.469780219780219, + "grad_norm": 7.9328813552856445, + "learning_rate": 4.326510989010989e-05, + "loss": 0.3107, + "step": 4903 + }, + { + "epoch": 13.472527472527473, + "grad_norm": 16.249290466308594, + "learning_rate": 4.326373626373627e-05, + "loss": 0.5693, + "step": 4904 + }, + { + "epoch": 13.475274725274724, + "grad_norm": 12.363578796386719, + "learning_rate": 4.3262362637362644e-05, + "loss": 0.2826, + "step": 4905 + }, + { + "epoch": 13.478021978021978, + "grad_norm": 15.738364219665527, + "learning_rate": 4.3260989010989014e-05, + "loss": 0.7077, + "step": 4906 + }, + { + "epoch": 13.48076923076923, + "grad_norm": 18.107791900634766, + "learning_rate": 4.325961538461539e-05, + "loss": 0.7006, + "step": 4907 + }, + { + "epoch": 13.483516483516484, + "grad_norm": 9.324466705322266, + "learning_rate": 4.325824175824176e-05, + "loss": 0.2913, + "step": 4908 + }, + { + "epoch": 13.486263736263735, + "grad_norm": 8.671869277954102, + "learning_rate": 4.325686813186813e-05, + "loss": 0.2986, + "step": 4909 + }, + { + "epoch": 13.489010989010989, + "grad_norm": 18.8564510345459, + "learning_rate": 4.325549450549451e-05, + "loss": 0.4134, + "step": 4910 + }, + { + "epoch": 13.491758241758241, + "grad_norm": 11.416878700256348, + "learning_rate": 4.325412087912088e-05, + "loss": 0.3296, + "step": 4911 + }, + { + "epoch": 13.494505494505495, + "grad_norm": 5.021029949188232, + "learning_rate": 4.3252747252747254e-05, + "loss": 0.1829, + "step": 4912 + }, + { + "epoch": 13.497252747252748, + "grad_norm": 14.616170883178711, + "learning_rate": 4.3251373626373624e-05, + "loss": 0.3455, + "step": 4913 + }, + { + "epoch": 13.5, + "grad_norm": 18.53137969970703, + "learning_rate": 4.325e-05, + "loss": 0.6827, + "step": 4914 + }, + { + "epoch": 13.502747252747252, + "grad_norm": 13.303974151611328, + "learning_rate": 4.324862637362638e-05, + "loss": 0.6032, + "step": 4915 + }, + { + "epoch": 13.505494505494505, + "grad_norm": 10.537565231323242, + "learning_rate": 4.324725274725275e-05, + "loss": 0.4607, + "step": 4916 + }, + { + "epoch": 13.508241758241759, + "grad_norm": 11.686141014099121, + "learning_rate": 4.3245879120879125e-05, + "loss": 0.4408, + "step": 4917 + }, + { + "epoch": 13.510989010989011, + "grad_norm": 13.68840503692627, + "learning_rate": 4.3244505494505495e-05, + "loss": 0.7106, + "step": 4918 + }, + { + "epoch": 13.513736263736263, + "grad_norm": 8.074808120727539, + "learning_rate": 4.324313186813187e-05, + "loss": 0.3492, + "step": 4919 + }, + { + "epoch": 13.516483516483516, + "grad_norm": 11.970963478088379, + "learning_rate": 4.324175824175825e-05, + "loss": 0.3851, + "step": 4920 + }, + { + "epoch": 13.51923076923077, + "grad_norm": 10.570959091186523, + "learning_rate": 4.324038461538462e-05, + "loss": 0.3245, + "step": 4921 + }, + { + "epoch": 13.521978021978022, + "grad_norm": 14.138797760009766, + "learning_rate": 4.3239010989010995e-05, + "loss": 0.9124, + "step": 4922 + }, + { + "epoch": 13.524725274725276, + "grad_norm": 4.463658332824707, + "learning_rate": 4.3237637362637365e-05, + "loss": 0.1138, + "step": 4923 + }, + { + "epoch": 13.527472527472527, + "grad_norm": 13.278937339782715, + "learning_rate": 4.3236263736263735e-05, + "loss": 0.3977, + "step": 4924 + }, + { + "epoch": 13.530219780219781, + "grad_norm": 7.590734958648682, + "learning_rate": 4.323489010989011e-05, + "loss": 0.1965, + "step": 4925 + }, + { + "epoch": 13.532967032967033, + "grad_norm": 6.498380184173584, + "learning_rate": 4.323351648351648e-05, + "loss": 0.1532, + "step": 4926 + }, + { + "epoch": 13.535714285714286, + "grad_norm": 14.298527717590332, + "learning_rate": 4.323214285714286e-05, + "loss": 0.5813, + "step": 4927 + }, + { + "epoch": 13.538461538461538, + "grad_norm": 13.67834758758545, + "learning_rate": 4.323076923076923e-05, + "loss": 0.4394, + "step": 4928 + }, + { + "epoch": 13.541208791208792, + "grad_norm": 8.608348846435547, + "learning_rate": 4.3229395604395605e-05, + "loss": 0.34, + "step": 4929 + }, + { + "epoch": 13.543956043956044, + "grad_norm": 12.136446952819824, + "learning_rate": 4.322802197802198e-05, + "loss": 0.3369, + "step": 4930 + }, + { + "epoch": 13.546703296703297, + "grad_norm": 7.331638336181641, + "learning_rate": 4.322664835164835e-05, + "loss": 0.2074, + "step": 4931 + }, + { + "epoch": 13.54945054945055, + "grad_norm": 14.564366340637207, + "learning_rate": 4.322527472527473e-05, + "loss": 0.5345, + "step": 4932 + }, + { + "epoch": 13.552197802197803, + "grad_norm": 12.278783798217773, + "learning_rate": 4.32239010989011e-05, + "loss": 0.4628, + "step": 4933 + }, + { + "epoch": 13.554945054945055, + "grad_norm": 15.76007080078125, + "learning_rate": 4.3222527472527476e-05, + "loss": 0.4651, + "step": 4934 + }, + { + "epoch": 13.557692307692308, + "grad_norm": 13.876718521118164, + "learning_rate": 4.322115384615385e-05, + "loss": 0.4018, + "step": 4935 + }, + { + "epoch": 13.56043956043956, + "grad_norm": 13.501043319702148, + "learning_rate": 4.321978021978022e-05, + "loss": 0.6237, + "step": 4936 + }, + { + "epoch": 13.563186813186814, + "grad_norm": 18.244722366333008, + "learning_rate": 4.32184065934066e-05, + "loss": 0.292, + "step": 4937 + }, + { + "epoch": 13.565934065934066, + "grad_norm": 15.678210258483887, + "learning_rate": 4.321703296703297e-05, + "loss": 0.6822, + "step": 4938 + }, + { + "epoch": 13.56868131868132, + "grad_norm": 11.422497749328613, + "learning_rate": 4.321565934065934e-05, + "loss": 0.3982, + "step": 4939 + }, + { + "epoch": 13.571428571428571, + "grad_norm": 10.555192947387695, + "learning_rate": 4.3214285714285716e-05, + "loss": 0.3584, + "step": 4940 + }, + { + "epoch": 13.574175824175825, + "grad_norm": 17.473526000976562, + "learning_rate": 4.3212912087912086e-05, + "loss": 0.624, + "step": 4941 + }, + { + "epoch": 13.576923076923077, + "grad_norm": 11.632640838623047, + "learning_rate": 4.321153846153846e-05, + "loss": 0.435, + "step": 4942 + }, + { + "epoch": 13.57967032967033, + "grad_norm": 15.039384841918945, + "learning_rate": 4.321016483516483e-05, + "loss": 0.6579, + "step": 4943 + }, + { + "epoch": 13.582417582417582, + "grad_norm": 14.814033508300781, + "learning_rate": 4.320879120879121e-05, + "loss": 0.562, + "step": 4944 + }, + { + "epoch": 13.585164835164836, + "grad_norm": 8.14275074005127, + "learning_rate": 4.320741758241759e-05, + "loss": 0.2007, + "step": 4945 + }, + { + "epoch": 13.587912087912088, + "grad_norm": 8.64939022064209, + "learning_rate": 4.320604395604396e-05, + "loss": 0.3626, + "step": 4946 + }, + { + "epoch": 13.590659340659341, + "grad_norm": 15.098531723022461, + "learning_rate": 4.3204670329670334e-05, + "loss": 0.4686, + "step": 4947 + }, + { + "epoch": 13.593406593406593, + "grad_norm": 21.014711380004883, + "learning_rate": 4.3203296703296704e-05, + "loss": 1.2144, + "step": 4948 + }, + { + "epoch": 13.596153846153847, + "grad_norm": 19.29338264465332, + "learning_rate": 4.320192307692308e-05, + "loss": 0.525, + "step": 4949 + }, + { + "epoch": 13.598901098901099, + "grad_norm": 17.32494354248047, + "learning_rate": 4.320054945054946e-05, + "loss": 0.4817, + "step": 4950 + }, + { + "epoch": 13.601648351648352, + "grad_norm": 18.813081741333008, + "learning_rate": 4.319917582417583e-05, + "loss": 0.9236, + "step": 4951 + }, + { + "epoch": 13.604395604395604, + "grad_norm": 10.920024871826172, + "learning_rate": 4.3197802197802204e-05, + "loss": 0.3953, + "step": 4952 + }, + { + "epoch": 13.607142857142858, + "grad_norm": 20.837236404418945, + "learning_rate": 4.3196428571428574e-05, + "loss": 0.9032, + "step": 4953 + }, + { + "epoch": 13.60989010989011, + "grad_norm": 8.994549751281738, + "learning_rate": 4.3195054945054944e-05, + "loss": 0.4219, + "step": 4954 + }, + { + "epoch": 13.612637362637363, + "grad_norm": 10.029094696044922, + "learning_rate": 4.319368131868132e-05, + "loss": 0.3462, + "step": 4955 + }, + { + "epoch": 13.615384615384615, + "grad_norm": 11.532449722290039, + "learning_rate": 4.319230769230769e-05, + "loss": 0.3237, + "step": 4956 + }, + { + "epoch": 13.618131868131869, + "grad_norm": 13.973567962646484, + "learning_rate": 4.319093406593407e-05, + "loss": 0.4256, + "step": 4957 + }, + { + "epoch": 13.62087912087912, + "grad_norm": 10.436758041381836, + "learning_rate": 4.318956043956044e-05, + "loss": 0.3691, + "step": 4958 + }, + { + "epoch": 13.623626373626374, + "grad_norm": 16.981000900268555, + "learning_rate": 4.3188186813186814e-05, + "loss": 0.4728, + "step": 4959 + }, + { + "epoch": 13.626373626373626, + "grad_norm": 16.343185424804688, + "learning_rate": 4.318681318681319e-05, + "loss": 0.84, + "step": 4960 + }, + { + "epoch": 13.62912087912088, + "grad_norm": 12.888956069946289, + "learning_rate": 4.318543956043956e-05, + "loss": 0.3982, + "step": 4961 + }, + { + "epoch": 13.631868131868131, + "grad_norm": 11.453998565673828, + "learning_rate": 4.318406593406594e-05, + "loss": 0.2596, + "step": 4962 + }, + { + "epoch": 13.634615384615385, + "grad_norm": 13.111404418945312, + "learning_rate": 4.318269230769231e-05, + "loss": 0.6478, + "step": 4963 + }, + { + "epoch": 13.637362637362637, + "grad_norm": 11.199974060058594, + "learning_rate": 4.3181318681318685e-05, + "loss": 0.467, + "step": 4964 + }, + { + "epoch": 13.64010989010989, + "grad_norm": 7.398059844970703, + "learning_rate": 4.317994505494506e-05, + "loss": 0.2228, + "step": 4965 + }, + { + "epoch": 13.642857142857142, + "grad_norm": 14.459675788879395, + "learning_rate": 4.317857142857143e-05, + "loss": 0.4758, + "step": 4966 + }, + { + "epoch": 13.645604395604396, + "grad_norm": 15.804776191711426, + "learning_rate": 4.317719780219781e-05, + "loss": 0.5334, + "step": 4967 + }, + { + "epoch": 13.648351648351648, + "grad_norm": 7.250674724578857, + "learning_rate": 4.317582417582418e-05, + "loss": 0.2424, + "step": 4968 + }, + { + "epoch": 13.651098901098901, + "grad_norm": 5.6093645095825195, + "learning_rate": 4.317445054945055e-05, + "loss": 0.1393, + "step": 4969 + }, + { + "epoch": 13.653846153846153, + "grad_norm": 18.87467384338379, + "learning_rate": 4.3173076923076925e-05, + "loss": 0.7761, + "step": 4970 + }, + { + "epoch": 13.656593406593407, + "grad_norm": 15.078128814697266, + "learning_rate": 4.3171703296703295e-05, + "loss": 0.5982, + "step": 4971 + }, + { + "epoch": 13.659340659340659, + "grad_norm": 11.271979331970215, + "learning_rate": 4.317032967032967e-05, + "loss": 0.3225, + "step": 4972 + }, + { + "epoch": 13.662087912087912, + "grad_norm": 25.672513961791992, + "learning_rate": 4.316895604395604e-05, + "loss": 1.1034, + "step": 4973 + }, + { + "epoch": 13.664835164835164, + "grad_norm": 13.573152542114258, + "learning_rate": 4.316758241758242e-05, + "loss": 0.5439, + "step": 4974 + }, + { + "epoch": 13.667582417582418, + "grad_norm": 12.495598793029785, + "learning_rate": 4.3166208791208796e-05, + "loss": 0.7417, + "step": 4975 + }, + { + "epoch": 13.67032967032967, + "grad_norm": 9.893359184265137, + "learning_rate": 4.3164835164835166e-05, + "loss": 0.3164, + "step": 4976 + }, + { + "epoch": 13.673076923076923, + "grad_norm": 13.4677095413208, + "learning_rate": 4.316346153846154e-05, + "loss": 0.3426, + "step": 4977 + }, + { + "epoch": 13.675824175824175, + "grad_norm": 14.510340690612793, + "learning_rate": 4.316208791208791e-05, + "loss": 0.6172, + "step": 4978 + }, + { + "epoch": 13.678571428571429, + "grad_norm": 14.240402221679688, + "learning_rate": 4.316071428571429e-05, + "loss": 0.4561, + "step": 4979 + }, + { + "epoch": 13.68131868131868, + "grad_norm": 13.451970100402832, + "learning_rate": 4.3159340659340666e-05, + "loss": 0.493, + "step": 4980 + }, + { + "epoch": 13.684065934065934, + "grad_norm": 8.471837997436523, + "learning_rate": 4.3157967032967036e-05, + "loss": 0.3784, + "step": 4981 + }, + { + "epoch": 13.686813186813186, + "grad_norm": 14.466461181640625, + "learning_rate": 4.315659340659341e-05, + "loss": 0.8415, + "step": 4982 + }, + { + "epoch": 13.68956043956044, + "grad_norm": 9.490971565246582, + "learning_rate": 4.315521978021978e-05, + "loss": 0.247, + "step": 4983 + }, + { + "epoch": 13.692307692307692, + "grad_norm": 10.686491012573242, + "learning_rate": 4.315384615384615e-05, + "loss": 0.2377, + "step": 4984 + }, + { + "epoch": 13.695054945054945, + "grad_norm": 14.62559986114502, + "learning_rate": 4.315247252747253e-05, + "loss": 0.5454, + "step": 4985 + }, + { + "epoch": 13.697802197802197, + "grad_norm": 19.732196807861328, + "learning_rate": 4.31510989010989e-05, + "loss": 0.9481, + "step": 4986 + }, + { + "epoch": 13.70054945054945, + "grad_norm": 12.248234748840332, + "learning_rate": 4.3149725274725277e-05, + "loss": 0.4182, + "step": 4987 + }, + { + "epoch": 13.703296703296703, + "grad_norm": 15.515449523925781, + "learning_rate": 4.314835164835165e-05, + "loss": 0.7333, + "step": 4988 + }, + { + "epoch": 13.706043956043956, + "grad_norm": 15.742258071899414, + "learning_rate": 4.3146978021978023e-05, + "loss": 0.6654, + "step": 4989 + }, + { + "epoch": 13.708791208791208, + "grad_norm": 12.907803535461426, + "learning_rate": 4.31456043956044e-05, + "loss": 0.4183, + "step": 4990 + }, + { + "epoch": 13.711538461538462, + "grad_norm": 18.13562774658203, + "learning_rate": 4.314423076923077e-05, + "loss": 0.4698, + "step": 4991 + }, + { + "epoch": 13.714285714285714, + "grad_norm": 15.696730613708496, + "learning_rate": 4.314285714285715e-05, + "loss": 0.5152, + "step": 4992 + }, + { + "epoch": 13.717032967032967, + "grad_norm": 9.076647758483887, + "learning_rate": 4.314148351648352e-05, + "loss": 0.3126, + "step": 4993 + }, + { + "epoch": 13.719780219780219, + "grad_norm": 10.565563201904297, + "learning_rate": 4.3140109890109894e-05, + "loss": 0.4373, + "step": 4994 + }, + { + "epoch": 13.722527472527473, + "grad_norm": 13.616942405700684, + "learning_rate": 4.313873626373627e-05, + "loss": 0.46, + "step": 4995 + }, + { + "epoch": 13.725274725274724, + "grad_norm": 12.838788032531738, + "learning_rate": 4.313736263736264e-05, + "loss": 0.4387, + "step": 4996 + }, + { + "epoch": 13.728021978021978, + "grad_norm": 9.623133659362793, + "learning_rate": 4.313598901098902e-05, + "loss": 0.3582, + "step": 4997 + }, + { + "epoch": 13.73076923076923, + "grad_norm": 16.446693420410156, + "learning_rate": 4.313461538461539e-05, + "loss": 0.8597, + "step": 4998 + }, + { + "epoch": 13.733516483516484, + "grad_norm": 17.490453720092773, + "learning_rate": 4.313324175824176e-05, + "loss": 0.6048, + "step": 4999 + }, + { + "epoch": 13.736263736263737, + "grad_norm": 21.155298233032227, + "learning_rate": 4.3131868131868134e-05, + "loss": 1.1804, + "step": 5000 + }, + { + "epoch": 13.739010989010989, + "grad_norm": 11.915645599365234, + "learning_rate": 4.3130494505494504e-05, + "loss": 0.3326, + "step": 5001 + }, + { + "epoch": 13.741758241758241, + "grad_norm": 16.64859962463379, + "learning_rate": 4.312912087912088e-05, + "loss": 0.5741, + "step": 5002 + }, + { + "epoch": 13.744505494505495, + "grad_norm": 10.610708236694336, + "learning_rate": 4.312774725274725e-05, + "loss": 0.2376, + "step": 5003 + }, + { + "epoch": 13.747252747252748, + "grad_norm": 7.684528350830078, + "learning_rate": 4.312637362637363e-05, + "loss": 0.3077, + "step": 5004 + }, + { + "epoch": 13.75, + "grad_norm": 13.307236671447754, + "learning_rate": 4.3125000000000005e-05, + "loss": 0.7702, + "step": 5005 + }, + { + "epoch": 13.752747252747252, + "grad_norm": 12.613136291503906, + "learning_rate": 4.3123626373626375e-05, + "loss": 0.3854, + "step": 5006 + }, + { + "epoch": 13.755494505494505, + "grad_norm": 16.847490310668945, + "learning_rate": 4.312225274725275e-05, + "loss": 0.8728, + "step": 5007 + }, + { + "epoch": 13.758241758241759, + "grad_norm": 13.256494522094727, + "learning_rate": 4.312087912087912e-05, + "loss": 0.4714, + "step": 5008 + }, + { + "epoch": 13.760989010989011, + "grad_norm": 5.79615592956543, + "learning_rate": 4.31195054945055e-05, + "loss": 0.2049, + "step": 5009 + }, + { + "epoch": 13.763736263736263, + "grad_norm": 23.539514541625977, + "learning_rate": 4.3118131868131875e-05, + "loss": 0.8789, + "step": 5010 + }, + { + "epoch": 13.766483516483516, + "grad_norm": 18.672840118408203, + "learning_rate": 4.3116758241758245e-05, + "loss": 0.7044, + "step": 5011 + }, + { + "epoch": 13.76923076923077, + "grad_norm": 15.09896469116211, + "learning_rate": 4.311538461538462e-05, + "loss": 0.6524, + "step": 5012 + }, + { + "epoch": 13.771978021978022, + "grad_norm": 13.15733814239502, + "learning_rate": 4.311401098901099e-05, + "loss": 0.332, + "step": 5013 + }, + { + "epoch": 13.774725274725276, + "grad_norm": 10.513467788696289, + "learning_rate": 4.311263736263736e-05, + "loss": 0.437, + "step": 5014 + }, + { + "epoch": 13.777472527472527, + "grad_norm": 14.269810676574707, + "learning_rate": 4.311126373626374e-05, + "loss": 0.3669, + "step": 5015 + }, + { + "epoch": 13.780219780219781, + "grad_norm": 8.868474960327148, + "learning_rate": 4.310989010989011e-05, + "loss": 0.4476, + "step": 5016 + }, + { + "epoch": 13.782967032967033, + "grad_norm": 8.740968704223633, + "learning_rate": 4.3108516483516486e-05, + "loss": 0.3518, + "step": 5017 + }, + { + "epoch": 13.785714285714286, + "grad_norm": 13.8154935836792, + "learning_rate": 4.3107142857142856e-05, + "loss": 0.4689, + "step": 5018 + }, + { + "epoch": 13.788461538461538, + "grad_norm": 16.423587799072266, + "learning_rate": 4.310576923076923e-05, + "loss": 0.7618, + "step": 5019 + }, + { + "epoch": 13.791208791208792, + "grad_norm": 16.914033889770508, + "learning_rate": 4.310439560439561e-05, + "loss": 0.6314, + "step": 5020 + }, + { + "epoch": 13.793956043956044, + "grad_norm": 11.70035171508789, + "learning_rate": 4.310302197802198e-05, + "loss": 0.4872, + "step": 5021 + }, + { + "epoch": 13.796703296703297, + "grad_norm": 13.21642017364502, + "learning_rate": 4.3101648351648356e-05, + "loss": 0.333, + "step": 5022 + }, + { + "epoch": 13.79945054945055, + "grad_norm": 14.303874015808105, + "learning_rate": 4.3100274725274726e-05, + "loss": 0.6818, + "step": 5023 + }, + { + "epoch": 13.802197802197803, + "grad_norm": 9.913787841796875, + "learning_rate": 4.30989010989011e-05, + "loss": 0.2827, + "step": 5024 + }, + { + "epoch": 13.804945054945055, + "grad_norm": 16.337387084960938, + "learning_rate": 4.309752747252748e-05, + "loss": 0.6201, + "step": 5025 + }, + { + "epoch": 13.807692307692308, + "grad_norm": 14.113114356994629, + "learning_rate": 4.309615384615385e-05, + "loss": 0.5163, + "step": 5026 + }, + { + "epoch": 13.81043956043956, + "grad_norm": 19.524227142333984, + "learning_rate": 4.3094780219780226e-05, + "loss": 0.6324, + "step": 5027 + }, + { + "epoch": 13.813186813186814, + "grad_norm": 9.316298484802246, + "learning_rate": 4.3093406593406596e-05, + "loss": 0.2758, + "step": 5028 + }, + { + "epoch": 13.815934065934066, + "grad_norm": 9.668830871582031, + "learning_rate": 4.3092032967032966e-05, + "loss": 0.3256, + "step": 5029 + }, + { + "epoch": 13.81868131868132, + "grad_norm": 10.128119468688965, + "learning_rate": 4.309065934065934e-05, + "loss": 0.4332, + "step": 5030 + }, + { + "epoch": 13.821428571428571, + "grad_norm": 16.551237106323242, + "learning_rate": 4.308928571428571e-05, + "loss": 0.5173, + "step": 5031 + }, + { + "epoch": 13.824175824175825, + "grad_norm": 9.223771095275879, + "learning_rate": 4.308791208791209e-05, + "loss": 0.2804, + "step": 5032 + }, + { + "epoch": 13.826923076923077, + "grad_norm": 12.356391906738281, + "learning_rate": 4.308653846153846e-05, + "loss": 0.377, + "step": 5033 + }, + { + "epoch": 13.82967032967033, + "grad_norm": 17.214256286621094, + "learning_rate": 4.308516483516484e-05, + "loss": 0.6833, + "step": 5034 + }, + { + "epoch": 13.832417582417582, + "grad_norm": 12.589115142822266, + "learning_rate": 4.3083791208791214e-05, + "loss": 0.5464, + "step": 5035 + }, + { + "epoch": 13.835164835164836, + "grad_norm": 16.144376754760742, + "learning_rate": 4.3082417582417584e-05, + "loss": 0.3935, + "step": 5036 + }, + { + "epoch": 13.837912087912088, + "grad_norm": 9.527585983276367, + "learning_rate": 4.308104395604396e-05, + "loss": 0.2652, + "step": 5037 + }, + { + "epoch": 13.840659340659341, + "grad_norm": 11.561222076416016, + "learning_rate": 4.307967032967033e-05, + "loss": 0.3253, + "step": 5038 + }, + { + "epoch": 13.843406593406593, + "grad_norm": 14.645570755004883, + "learning_rate": 4.307829670329671e-05, + "loss": 0.4304, + "step": 5039 + }, + { + "epoch": 13.846153846153847, + "grad_norm": 17.079954147338867, + "learning_rate": 4.3076923076923084e-05, + "loss": 0.4861, + "step": 5040 + }, + { + "epoch": 13.848901098901099, + "grad_norm": 9.857675552368164, + "learning_rate": 4.3075549450549454e-05, + "loss": 0.3048, + "step": 5041 + }, + { + "epoch": 13.851648351648352, + "grad_norm": 14.98028564453125, + "learning_rate": 4.307417582417583e-05, + "loss": 0.5789, + "step": 5042 + }, + { + "epoch": 13.854395604395604, + "grad_norm": 15.495904922485352, + "learning_rate": 4.30728021978022e-05, + "loss": 0.5127, + "step": 5043 + }, + { + "epoch": 13.857142857142858, + "grad_norm": 18.293033599853516, + "learning_rate": 4.307142857142857e-05, + "loss": 0.7309, + "step": 5044 + }, + { + "epoch": 13.85989010989011, + "grad_norm": 12.772912979125977, + "learning_rate": 4.307005494505495e-05, + "loss": 0.4026, + "step": 5045 + }, + { + "epoch": 13.862637362637363, + "grad_norm": 11.767938613891602, + "learning_rate": 4.306868131868132e-05, + "loss": 0.4623, + "step": 5046 + }, + { + "epoch": 13.865384615384615, + "grad_norm": 15.037487983703613, + "learning_rate": 4.3067307692307695e-05, + "loss": 0.4404, + "step": 5047 + }, + { + "epoch": 13.868131868131869, + "grad_norm": 15.348742485046387, + "learning_rate": 4.3065934065934065e-05, + "loss": 0.5521, + "step": 5048 + }, + { + "epoch": 13.87087912087912, + "grad_norm": 24.97662353515625, + "learning_rate": 4.306456043956044e-05, + "loss": 1.0495, + "step": 5049 + }, + { + "epoch": 13.873626373626374, + "grad_norm": 11.436151504516602, + "learning_rate": 4.306318681318682e-05, + "loss": 0.5398, + "step": 5050 + }, + { + "epoch": 13.876373626373626, + "grad_norm": 5.9257659912109375, + "learning_rate": 4.306181318681319e-05, + "loss": 0.1751, + "step": 5051 + }, + { + "epoch": 13.87912087912088, + "grad_norm": 13.108136177062988, + "learning_rate": 4.3060439560439565e-05, + "loss": 0.6194, + "step": 5052 + }, + { + "epoch": 13.881868131868131, + "grad_norm": 19.372547149658203, + "learning_rate": 4.3059065934065935e-05, + "loss": 0.7719, + "step": 5053 + }, + { + "epoch": 13.884615384615385, + "grad_norm": 10.292391777038574, + "learning_rate": 4.305769230769231e-05, + "loss": 0.3025, + "step": 5054 + }, + { + "epoch": 13.887362637362637, + "grad_norm": 8.131501197814941, + "learning_rate": 4.305631868131869e-05, + "loss": 0.2462, + "step": 5055 + }, + { + "epoch": 13.89010989010989, + "grad_norm": 7.066055774688721, + "learning_rate": 4.305494505494506e-05, + "loss": 0.1911, + "step": 5056 + }, + { + "epoch": 13.892857142857142, + "grad_norm": 16.93373680114746, + "learning_rate": 4.305357142857143e-05, + "loss": 0.4836, + "step": 5057 + }, + { + "epoch": 13.895604395604396, + "grad_norm": 11.204721450805664, + "learning_rate": 4.3052197802197805e-05, + "loss": 0.5882, + "step": 5058 + }, + { + "epoch": 13.898351648351648, + "grad_norm": 11.197220802307129, + "learning_rate": 4.3050824175824175e-05, + "loss": 0.3428, + "step": 5059 + }, + { + "epoch": 13.901098901098901, + "grad_norm": 14.688116073608398, + "learning_rate": 4.304945054945055e-05, + "loss": 0.4113, + "step": 5060 + }, + { + "epoch": 13.903846153846153, + "grad_norm": 10.852533340454102, + "learning_rate": 4.304807692307692e-05, + "loss": 0.4612, + "step": 5061 + }, + { + "epoch": 13.906593406593407, + "grad_norm": 14.408626556396484, + "learning_rate": 4.30467032967033e-05, + "loss": 0.52, + "step": 5062 + }, + { + "epoch": 13.909340659340659, + "grad_norm": 15.556806564331055, + "learning_rate": 4.304532967032967e-05, + "loss": 0.5339, + "step": 5063 + }, + { + "epoch": 13.912087912087912, + "grad_norm": 15.710969924926758, + "learning_rate": 4.3043956043956046e-05, + "loss": 0.7277, + "step": 5064 + }, + { + "epoch": 13.914835164835164, + "grad_norm": 14.84205436706543, + "learning_rate": 4.304258241758242e-05, + "loss": 0.6756, + "step": 5065 + }, + { + "epoch": 13.917582417582418, + "grad_norm": 14.013448715209961, + "learning_rate": 4.304120879120879e-05, + "loss": 0.6002, + "step": 5066 + }, + { + "epoch": 13.92032967032967, + "grad_norm": 12.243412017822266, + "learning_rate": 4.303983516483517e-05, + "loss": 0.2753, + "step": 5067 + }, + { + "epoch": 13.923076923076923, + "grad_norm": 17.497291564941406, + "learning_rate": 4.303846153846154e-05, + "loss": 0.6647, + "step": 5068 + }, + { + "epoch": 13.925824175824175, + "grad_norm": 11.499594688415527, + "learning_rate": 4.3037087912087916e-05, + "loss": 0.3593, + "step": 5069 + }, + { + "epoch": 13.928571428571429, + "grad_norm": 17.749658584594727, + "learning_rate": 4.303571428571429e-05, + "loss": 0.7187, + "step": 5070 + }, + { + "epoch": 13.93131868131868, + "grad_norm": 8.91269302368164, + "learning_rate": 4.303434065934066e-05, + "loss": 0.2907, + "step": 5071 + }, + { + "epoch": 13.934065934065934, + "grad_norm": 21.811201095581055, + "learning_rate": 4.303296703296703e-05, + "loss": 0.9125, + "step": 5072 + }, + { + "epoch": 13.936813186813186, + "grad_norm": 15.186220169067383, + "learning_rate": 4.303159340659341e-05, + "loss": 0.7358, + "step": 5073 + }, + { + "epoch": 13.93956043956044, + "grad_norm": 21.777748107910156, + "learning_rate": 4.303021978021978e-05, + "loss": 0.9475, + "step": 5074 + }, + { + "epoch": 13.942307692307692, + "grad_norm": 4.733185291290283, + "learning_rate": 4.302884615384616e-05, + "loss": 0.1361, + "step": 5075 + }, + { + "epoch": 13.945054945054945, + "grad_norm": 8.641536712646484, + "learning_rate": 4.302747252747253e-05, + "loss": 0.4711, + "step": 5076 + }, + { + "epoch": 13.947802197802197, + "grad_norm": 19.392745971679688, + "learning_rate": 4.3026098901098904e-05, + "loss": 0.8485, + "step": 5077 + }, + { + "epoch": 13.95054945054945, + "grad_norm": 9.1420316696167, + "learning_rate": 4.3024725274725274e-05, + "loss": 0.345, + "step": 5078 + }, + { + "epoch": 13.953296703296703, + "grad_norm": 9.958218574523926, + "learning_rate": 4.302335164835165e-05, + "loss": 0.179, + "step": 5079 + }, + { + "epoch": 13.956043956043956, + "grad_norm": 10.701087951660156, + "learning_rate": 4.302197802197803e-05, + "loss": 0.3598, + "step": 5080 + }, + { + "epoch": 13.958791208791208, + "grad_norm": 10.366738319396973, + "learning_rate": 4.30206043956044e-05, + "loss": 0.4828, + "step": 5081 + }, + { + "epoch": 13.961538461538462, + "grad_norm": 12.716300010681152, + "learning_rate": 4.3019230769230774e-05, + "loss": 0.5446, + "step": 5082 + }, + { + "epoch": 13.964285714285714, + "grad_norm": 11.108354568481445, + "learning_rate": 4.3017857142857144e-05, + "loss": 0.4024, + "step": 5083 + }, + { + "epoch": 13.967032967032967, + "grad_norm": 13.970499992370605, + "learning_rate": 4.301648351648352e-05, + "loss": 0.5452, + "step": 5084 + }, + { + "epoch": 13.969780219780219, + "grad_norm": 15.41087532043457, + "learning_rate": 4.30151098901099e-05, + "loss": 0.483, + "step": 5085 + }, + { + "epoch": 13.972527472527473, + "grad_norm": 4.890560150146484, + "learning_rate": 4.301373626373627e-05, + "loss": 0.1143, + "step": 5086 + }, + { + "epoch": 13.975274725274724, + "grad_norm": 8.541370391845703, + "learning_rate": 4.301236263736264e-05, + "loss": 0.2197, + "step": 5087 + }, + { + "epoch": 13.978021978021978, + "grad_norm": 8.056529998779297, + "learning_rate": 4.3010989010989014e-05, + "loss": 0.2077, + "step": 5088 + }, + { + "epoch": 13.98076923076923, + "grad_norm": 11.344796180725098, + "learning_rate": 4.3009615384615384e-05, + "loss": 0.3306, + "step": 5089 + }, + { + "epoch": 13.983516483516484, + "grad_norm": 6.330842971801758, + "learning_rate": 4.300824175824176e-05, + "loss": 0.13, + "step": 5090 + }, + { + "epoch": 13.986263736263737, + "grad_norm": 14.80716609954834, + "learning_rate": 4.300686813186813e-05, + "loss": 0.5624, + "step": 5091 + }, + { + "epoch": 13.989010989010989, + "grad_norm": 21.264663696289062, + "learning_rate": 4.300549450549451e-05, + "loss": 0.9102, + "step": 5092 + }, + { + "epoch": 13.991758241758241, + "grad_norm": 15.518328666687012, + "learning_rate": 4.300412087912088e-05, + "loss": 0.6199, + "step": 5093 + }, + { + "epoch": 13.994505494505495, + "grad_norm": 16.364112854003906, + "learning_rate": 4.3002747252747255e-05, + "loss": 0.5813, + "step": 5094 + }, + { + "epoch": 13.997252747252748, + "grad_norm": 21.096784591674805, + "learning_rate": 4.300137362637363e-05, + "loss": 0.7925, + "step": 5095 + }, + { + "epoch": 14.0, + "grad_norm": 51.79513168334961, + "learning_rate": 4.3e-05, + "loss": 1.7904, + "step": 5096 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.5771349862258953, + "eval_f1": 0.5883338561399837, + "eval_f1_DuraRiadoRio_64x64": 0.5789473684210527, + "eval_f1_Mole_64x64": 0.5077720207253886, + "eval_f1_Quebrado_64x64": 0.7649402390438247, + "eval_f1_RiadoRio_64x64": 0.5096525096525096, + "eval_f1_RioFechado_64x64": 0.5803571428571429, + "eval_loss": 1.5749942064285278, + "eval_precision": 0.7363364934400073, + "eval_precision_DuraRiadoRio_64x64": 0.6311475409836066, + "eval_precision_Mole_64x64": 1.0, + "eval_precision_Quebrado_64x64": 0.897196261682243, + "eval_precision_RiadoRio_64x64": 0.36065573770491804, + "eval_precision_RioFechado_64x64": 0.7926829268292683, + "eval_recall": 0.573566839634297, + "eval_recall_DuraRiadoRio_64x64": 0.5347222222222222, + "eval_recall_Mole_64x64": 0.3402777777777778, + "eval_recall_Quebrado_64x64": 0.6666666666666666, + "eval_recall_RiadoRio_64x64": 0.868421052631579, + "eval_recall_RioFechado_64x64": 0.45774647887323944, + "eval_runtime": 1.7121, + "eval_samples_per_second": 424.045, + "eval_steps_per_second": 26.868, + "step": 5096 + }, + { + "epoch": 14.002747252747254, + "grad_norm": 13.81182861328125, + "learning_rate": 4.299862637362638e-05, + "loss": 0.4443, + "step": 5097 + }, + { + "epoch": 14.005494505494505, + "grad_norm": 9.485939025878906, + "learning_rate": 4.299725274725275e-05, + "loss": 0.3265, + "step": 5098 + }, + { + "epoch": 14.008241758241759, + "grad_norm": 12.7100830078125, + "learning_rate": 4.2995879120879125e-05, + "loss": 0.3759, + "step": 5099 + }, + { + "epoch": 14.010989010989011, + "grad_norm": 9.863348960876465, + "learning_rate": 4.29945054945055e-05, + "loss": 0.2725, + "step": 5100 + }, + { + "epoch": 14.013736263736265, + "grad_norm": 13.76801872253418, + "learning_rate": 4.299313186813187e-05, + "loss": 0.5652, + "step": 5101 + }, + { + "epoch": 14.016483516483516, + "grad_norm": 9.627725601196289, + "learning_rate": 4.299175824175824e-05, + "loss": 0.2852, + "step": 5102 + }, + { + "epoch": 14.01923076923077, + "grad_norm": 8.781536102294922, + "learning_rate": 4.299038461538462e-05, + "loss": 0.2318, + "step": 5103 + }, + { + "epoch": 14.021978021978022, + "grad_norm": 16.420080184936523, + "learning_rate": 4.298901098901099e-05, + "loss": 0.6256, + "step": 5104 + }, + { + "epoch": 14.024725274725276, + "grad_norm": 8.964868545532227, + "learning_rate": 4.2987637362637366e-05, + "loss": 0.3635, + "step": 5105 + }, + { + "epoch": 14.027472527472527, + "grad_norm": 5.674344539642334, + "learning_rate": 4.2986263736263736e-05, + "loss": 0.184, + "step": 5106 + }, + { + "epoch": 14.030219780219781, + "grad_norm": 14.205597877502441, + "learning_rate": 4.298489010989011e-05, + "loss": 0.3838, + "step": 5107 + }, + { + "epoch": 14.032967032967033, + "grad_norm": 12.728413581848145, + "learning_rate": 4.298351648351648e-05, + "loss": 0.2856, + "step": 5108 + }, + { + "epoch": 14.035714285714286, + "grad_norm": 11.862096786499023, + "learning_rate": 4.298214285714286e-05, + "loss": 0.6359, + "step": 5109 + }, + { + "epoch": 14.038461538461538, + "grad_norm": 14.683948516845703, + "learning_rate": 4.2980769230769236e-05, + "loss": 0.58, + "step": 5110 + }, + { + "epoch": 14.041208791208792, + "grad_norm": 10.897865295410156, + "learning_rate": 4.2979395604395606e-05, + "loss": 0.2967, + "step": 5111 + }, + { + "epoch": 14.043956043956044, + "grad_norm": 7.602872371673584, + "learning_rate": 4.297802197802198e-05, + "loss": 0.2471, + "step": 5112 + }, + { + "epoch": 14.046703296703297, + "grad_norm": 8.805047988891602, + "learning_rate": 4.297664835164835e-05, + "loss": 0.2444, + "step": 5113 + }, + { + "epoch": 14.04945054945055, + "grad_norm": 15.637541770935059, + "learning_rate": 4.297527472527473e-05, + "loss": 0.6648, + "step": 5114 + }, + { + "epoch": 14.052197802197803, + "grad_norm": 7.347252368927002, + "learning_rate": 4.2973901098901107e-05, + "loss": 0.252, + "step": 5115 + }, + { + "epoch": 14.054945054945055, + "grad_norm": 11.287496566772461, + "learning_rate": 4.2972527472527477e-05, + "loss": 0.4043, + "step": 5116 + }, + { + "epoch": 14.057692307692308, + "grad_norm": 9.5869722366333, + "learning_rate": 4.2971153846153847e-05, + "loss": 0.283, + "step": 5117 + }, + { + "epoch": 14.06043956043956, + "grad_norm": 25.971628189086914, + "learning_rate": 4.296978021978022e-05, + "loss": 1.1063, + "step": 5118 + }, + { + "epoch": 14.063186813186814, + "grad_norm": 11.626262664794922, + "learning_rate": 4.2968406593406593e-05, + "loss": 0.3222, + "step": 5119 + }, + { + "epoch": 14.065934065934066, + "grad_norm": 10.277908325195312, + "learning_rate": 4.2967032967032963e-05, + "loss": 0.2955, + "step": 5120 + }, + { + "epoch": 14.06868131868132, + "grad_norm": 13.709663391113281, + "learning_rate": 4.296565934065934e-05, + "loss": 0.5761, + "step": 5121 + }, + { + "epoch": 14.071428571428571, + "grad_norm": 10.35136890411377, + "learning_rate": 4.296428571428572e-05, + "loss": 0.2635, + "step": 5122 + }, + { + "epoch": 14.074175824175825, + "grad_norm": 13.361552238464355, + "learning_rate": 4.296291208791209e-05, + "loss": 0.4547, + "step": 5123 + }, + { + "epoch": 14.076923076923077, + "grad_norm": 10.339520454406738, + "learning_rate": 4.2961538461538464e-05, + "loss": 0.3077, + "step": 5124 + }, + { + "epoch": 14.07967032967033, + "grad_norm": 13.332267761230469, + "learning_rate": 4.2960164835164834e-05, + "loss": 0.5058, + "step": 5125 + }, + { + "epoch": 14.082417582417582, + "grad_norm": 5.8296217918396, + "learning_rate": 4.295879120879121e-05, + "loss": 0.1883, + "step": 5126 + }, + { + "epoch": 14.085164835164836, + "grad_norm": 12.239119529724121, + "learning_rate": 4.295741758241759e-05, + "loss": 0.4312, + "step": 5127 + }, + { + "epoch": 14.087912087912088, + "grad_norm": 8.52624225616455, + "learning_rate": 4.295604395604396e-05, + "loss": 0.3197, + "step": 5128 + }, + { + "epoch": 14.090659340659341, + "grad_norm": 8.789243698120117, + "learning_rate": 4.2954670329670334e-05, + "loss": 0.3486, + "step": 5129 + }, + { + "epoch": 14.093406593406593, + "grad_norm": 12.240397453308105, + "learning_rate": 4.2953296703296704e-05, + "loss": 0.5735, + "step": 5130 + }, + { + "epoch": 14.096153846153847, + "grad_norm": 7.774289131164551, + "learning_rate": 4.295192307692308e-05, + "loss": 0.3379, + "step": 5131 + }, + { + "epoch": 14.098901098901099, + "grad_norm": 6.69738245010376, + "learning_rate": 4.295054945054945e-05, + "loss": 0.2071, + "step": 5132 + }, + { + "epoch": 14.101648351648352, + "grad_norm": 14.837197303771973, + "learning_rate": 4.294917582417583e-05, + "loss": 0.6092, + "step": 5133 + }, + { + "epoch": 14.104395604395604, + "grad_norm": 6.818667888641357, + "learning_rate": 4.29478021978022e-05, + "loss": 0.2412, + "step": 5134 + }, + { + "epoch": 14.107142857142858, + "grad_norm": 20.655433654785156, + "learning_rate": 4.294642857142857e-05, + "loss": 0.5924, + "step": 5135 + }, + { + "epoch": 14.10989010989011, + "grad_norm": 17.26772117614746, + "learning_rate": 4.2945054945054945e-05, + "loss": 0.7155, + "step": 5136 + }, + { + "epoch": 14.112637362637363, + "grad_norm": 6.062309265136719, + "learning_rate": 4.294368131868132e-05, + "loss": 0.2099, + "step": 5137 + }, + { + "epoch": 14.115384615384615, + "grad_norm": 18.806039810180664, + "learning_rate": 4.294230769230769e-05, + "loss": 0.986, + "step": 5138 + }, + { + "epoch": 14.118131868131869, + "grad_norm": 11.012650489807129, + "learning_rate": 4.294093406593407e-05, + "loss": 0.4937, + "step": 5139 + }, + { + "epoch": 14.12087912087912, + "grad_norm": 13.891215324401855, + "learning_rate": 4.293956043956044e-05, + "loss": 0.6309, + "step": 5140 + }, + { + "epoch": 14.123626373626374, + "grad_norm": 5.557957172393799, + "learning_rate": 4.2938186813186815e-05, + "loss": 0.1314, + "step": 5141 + }, + { + "epoch": 14.126373626373626, + "grad_norm": 11.751703262329102, + "learning_rate": 4.293681318681319e-05, + "loss": 0.3597, + "step": 5142 + }, + { + "epoch": 14.12912087912088, + "grad_norm": 7.976860046386719, + "learning_rate": 4.293543956043956e-05, + "loss": 0.1952, + "step": 5143 + }, + { + "epoch": 14.131868131868131, + "grad_norm": 21.917600631713867, + "learning_rate": 4.293406593406594e-05, + "loss": 0.7465, + "step": 5144 + }, + { + "epoch": 14.134615384615385, + "grad_norm": 15.735158920288086, + "learning_rate": 4.293269230769231e-05, + "loss": 0.4796, + "step": 5145 + }, + { + "epoch": 14.137362637362637, + "grad_norm": 22.018051147460938, + "learning_rate": 4.2931318681318686e-05, + "loss": 1.0862, + "step": 5146 + }, + { + "epoch": 14.14010989010989, + "grad_norm": 19.523963928222656, + "learning_rate": 4.2929945054945056e-05, + "loss": 0.9678, + "step": 5147 + }, + { + "epoch": 14.142857142857142, + "grad_norm": 6.07539701461792, + "learning_rate": 4.292857142857143e-05, + "loss": 0.1681, + "step": 5148 + }, + { + "epoch": 14.145604395604396, + "grad_norm": 17.051088333129883, + "learning_rate": 4.29271978021978e-05, + "loss": 0.5797, + "step": 5149 + }, + { + "epoch": 14.148351648351648, + "grad_norm": 8.269576072692871, + "learning_rate": 4.292582417582417e-05, + "loss": 0.2852, + "step": 5150 + }, + { + "epoch": 14.151098901098901, + "grad_norm": 10.28635025024414, + "learning_rate": 4.292445054945055e-05, + "loss": 0.3277, + "step": 5151 + }, + { + "epoch": 14.153846153846153, + "grad_norm": 9.501382827758789, + "learning_rate": 4.2923076923076926e-05, + "loss": 0.1944, + "step": 5152 + }, + { + "epoch": 14.156593406593407, + "grad_norm": 18.279727935791016, + "learning_rate": 4.2921703296703296e-05, + "loss": 0.7605, + "step": 5153 + }, + { + "epoch": 14.159340659340659, + "grad_norm": 11.629794120788574, + "learning_rate": 4.292032967032967e-05, + "loss": 0.3566, + "step": 5154 + }, + { + "epoch": 14.162087912087912, + "grad_norm": 12.450735092163086, + "learning_rate": 4.291895604395604e-05, + "loss": 0.3451, + "step": 5155 + }, + { + "epoch": 14.164835164835164, + "grad_norm": 16.027454376220703, + "learning_rate": 4.291758241758242e-05, + "loss": 0.6766, + "step": 5156 + }, + { + "epoch": 14.167582417582418, + "grad_norm": 11.361793518066406, + "learning_rate": 4.2916208791208796e-05, + "loss": 0.3491, + "step": 5157 + }, + { + "epoch": 14.17032967032967, + "grad_norm": 10.782907485961914, + "learning_rate": 4.2914835164835166e-05, + "loss": 0.4068, + "step": 5158 + }, + { + "epoch": 14.173076923076923, + "grad_norm": 17.29706382751465, + "learning_rate": 4.291346153846154e-05, + "loss": 0.7097, + "step": 5159 + }, + { + "epoch": 14.175824175824175, + "grad_norm": 6.6438398361206055, + "learning_rate": 4.291208791208791e-05, + "loss": 0.216, + "step": 5160 + }, + { + "epoch": 14.178571428571429, + "grad_norm": 16.266132354736328, + "learning_rate": 4.291071428571429e-05, + "loss": 0.6101, + "step": 5161 + }, + { + "epoch": 14.18131868131868, + "grad_norm": 13.290995597839355, + "learning_rate": 4.290934065934066e-05, + "loss": 0.4339, + "step": 5162 + }, + { + "epoch": 14.184065934065934, + "grad_norm": 15.0147123336792, + "learning_rate": 4.290796703296704e-05, + "loss": 0.6038, + "step": 5163 + }, + { + "epoch": 14.186813186813186, + "grad_norm": 5.335078716278076, + "learning_rate": 4.290659340659341e-05, + "loss": 0.1298, + "step": 5164 + }, + { + "epoch": 14.18956043956044, + "grad_norm": 14.284476280212402, + "learning_rate": 4.290521978021978e-05, + "loss": 0.5175, + "step": 5165 + }, + { + "epoch": 14.192307692307692, + "grad_norm": 12.441431045532227, + "learning_rate": 4.2903846153846154e-05, + "loss": 0.3098, + "step": 5166 + }, + { + "epoch": 14.195054945054945, + "grad_norm": 12.6087064743042, + "learning_rate": 4.290247252747253e-05, + "loss": 0.3864, + "step": 5167 + }, + { + "epoch": 14.197802197802197, + "grad_norm": 11.974607467651367, + "learning_rate": 4.29010989010989e-05, + "loss": 0.3379, + "step": 5168 + }, + { + "epoch": 14.20054945054945, + "grad_norm": 12.075024604797363, + "learning_rate": 4.289972527472528e-05, + "loss": 0.367, + "step": 5169 + }, + { + "epoch": 14.203296703296703, + "grad_norm": 15.139405250549316, + "learning_rate": 4.289835164835165e-05, + "loss": 0.4729, + "step": 5170 + }, + { + "epoch": 14.206043956043956, + "grad_norm": 16.70508575439453, + "learning_rate": 4.2896978021978024e-05, + "loss": 0.4283, + "step": 5171 + }, + { + "epoch": 14.208791208791208, + "grad_norm": 3.3026599884033203, + "learning_rate": 4.28956043956044e-05, + "loss": 0.08, + "step": 5172 + }, + { + "epoch": 14.211538461538462, + "grad_norm": 14.820829391479492, + "learning_rate": 4.289423076923077e-05, + "loss": 0.5866, + "step": 5173 + }, + { + "epoch": 14.214285714285714, + "grad_norm": 10.217903137207031, + "learning_rate": 4.289285714285715e-05, + "loss": 0.2634, + "step": 5174 + }, + { + "epoch": 14.217032967032967, + "grad_norm": 7.7921929359436035, + "learning_rate": 4.289148351648352e-05, + "loss": 0.1708, + "step": 5175 + }, + { + "epoch": 14.219780219780219, + "grad_norm": 14.372883796691895, + "learning_rate": 4.2890109890109895e-05, + "loss": 0.5768, + "step": 5176 + }, + { + "epoch": 14.222527472527473, + "grad_norm": 24.3051700592041, + "learning_rate": 4.2888736263736265e-05, + "loss": 0.6485, + "step": 5177 + }, + { + "epoch": 14.225274725274724, + "grad_norm": 15.82347297668457, + "learning_rate": 4.288736263736264e-05, + "loss": 0.6628, + "step": 5178 + }, + { + "epoch": 14.228021978021978, + "grad_norm": 18.019197463989258, + "learning_rate": 4.288598901098901e-05, + "loss": 0.3637, + "step": 5179 + }, + { + "epoch": 14.23076923076923, + "grad_norm": 18.334402084350586, + "learning_rate": 4.288461538461538e-05, + "loss": 0.8383, + "step": 5180 + }, + { + "epoch": 14.233516483516484, + "grad_norm": 18.564205169677734, + "learning_rate": 4.288324175824176e-05, + "loss": 0.7711, + "step": 5181 + }, + { + "epoch": 14.236263736263735, + "grad_norm": 16.497007369995117, + "learning_rate": 4.2881868131868135e-05, + "loss": 0.6562, + "step": 5182 + }, + { + "epoch": 14.239010989010989, + "grad_norm": 8.726202964782715, + "learning_rate": 4.2880494505494505e-05, + "loss": 0.2105, + "step": 5183 + }, + { + "epoch": 14.241758241758241, + "grad_norm": 18.861072540283203, + "learning_rate": 4.287912087912088e-05, + "loss": 0.7372, + "step": 5184 + }, + { + "epoch": 14.244505494505495, + "grad_norm": 12.580850601196289, + "learning_rate": 4.287774725274725e-05, + "loss": 0.4602, + "step": 5185 + }, + { + "epoch": 14.247252747252748, + "grad_norm": 13.684026718139648, + "learning_rate": 4.287637362637363e-05, + "loss": 0.6657, + "step": 5186 + }, + { + "epoch": 14.25, + "grad_norm": 9.867182731628418, + "learning_rate": 4.2875000000000005e-05, + "loss": 0.3571, + "step": 5187 + }, + { + "epoch": 14.252747252747252, + "grad_norm": 19.78746223449707, + "learning_rate": 4.2873626373626375e-05, + "loss": 0.7453, + "step": 5188 + }, + { + "epoch": 14.255494505494505, + "grad_norm": 15.855148315429688, + "learning_rate": 4.287225274725275e-05, + "loss": 0.577, + "step": 5189 + }, + { + "epoch": 14.258241758241759, + "grad_norm": 19.365131378173828, + "learning_rate": 4.287087912087912e-05, + "loss": 0.5511, + "step": 5190 + }, + { + "epoch": 14.260989010989011, + "grad_norm": 12.498108863830566, + "learning_rate": 4.28695054945055e-05, + "loss": 0.5298, + "step": 5191 + }, + { + "epoch": 14.263736263736265, + "grad_norm": 17.617149353027344, + "learning_rate": 4.286813186813187e-05, + "loss": 0.645, + "step": 5192 + }, + { + "epoch": 14.266483516483516, + "grad_norm": 7.483818054199219, + "learning_rate": 4.2866758241758246e-05, + "loss": 0.1705, + "step": 5193 + }, + { + "epoch": 14.26923076923077, + "grad_norm": 9.785093307495117, + "learning_rate": 4.2865384615384616e-05, + "loss": 0.2052, + "step": 5194 + }, + { + "epoch": 14.271978021978022, + "grad_norm": 13.059722900390625, + "learning_rate": 4.2864010989010986e-05, + "loss": 0.5462, + "step": 5195 + }, + { + "epoch": 14.274725274725276, + "grad_norm": 8.006590843200684, + "learning_rate": 4.286263736263736e-05, + "loss": 0.2213, + "step": 5196 + }, + { + "epoch": 14.277472527472527, + "grad_norm": 4.507295608520508, + "learning_rate": 4.286126373626374e-05, + "loss": 0.1131, + "step": 5197 + }, + { + "epoch": 14.280219780219781, + "grad_norm": 11.29996109008789, + "learning_rate": 4.285989010989011e-05, + "loss": 0.417, + "step": 5198 + }, + { + "epoch": 14.282967032967033, + "grad_norm": 9.551502227783203, + "learning_rate": 4.2858516483516486e-05, + "loss": 0.4946, + "step": 5199 + }, + { + "epoch": 14.285714285714286, + "grad_norm": 11.465909004211426, + "learning_rate": 4.2857142857142856e-05, + "loss": 0.2636, + "step": 5200 + }, + { + "epoch": 14.288461538461538, + "grad_norm": 14.320587158203125, + "learning_rate": 4.285576923076923e-05, + "loss": 0.6172, + "step": 5201 + }, + { + "epoch": 14.291208791208792, + "grad_norm": 17.058055877685547, + "learning_rate": 4.285439560439561e-05, + "loss": 0.9088, + "step": 5202 + }, + { + "epoch": 14.293956043956044, + "grad_norm": 18.84653091430664, + "learning_rate": 4.285302197802198e-05, + "loss": 0.7755, + "step": 5203 + }, + { + "epoch": 14.296703296703297, + "grad_norm": 17.333290100097656, + "learning_rate": 4.285164835164836e-05, + "loss": 0.8708, + "step": 5204 + }, + { + "epoch": 14.29945054945055, + "grad_norm": 16.288522720336914, + "learning_rate": 4.285027472527473e-05, + "loss": 0.356, + "step": 5205 + }, + { + "epoch": 14.302197802197803, + "grad_norm": 12.824731826782227, + "learning_rate": 4.2848901098901104e-05, + "loss": 0.3628, + "step": 5206 + }, + { + "epoch": 14.304945054945055, + "grad_norm": 12.848221778869629, + "learning_rate": 4.2847527472527474e-05, + "loss": 0.3386, + "step": 5207 + }, + { + "epoch": 14.307692307692308, + "grad_norm": 21.71632957458496, + "learning_rate": 4.284615384615385e-05, + "loss": 0.9136, + "step": 5208 + }, + { + "epoch": 14.31043956043956, + "grad_norm": 12.232616424560547, + "learning_rate": 4.284478021978022e-05, + "loss": 0.3459, + "step": 5209 + }, + { + "epoch": 14.313186813186814, + "grad_norm": 14.374411582946777, + "learning_rate": 4.284340659340659e-05, + "loss": 0.5497, + "step": 5210 + }, + { + "epoch": 14.315934065934066, + "grad_norm": 8.011957168579102, + "learning_rate": 4.284203296703297e-05, + "loss": 0.4141, + "step": 5211 + }, + { + "epoch": 14.31868131868132, + "grad_norm": 8.305990219116211, + "learning_rate": 4.2840659340659344e-05, + "loss": 0.2082, + "step": 5212 + }, + { + "epoch": 14.321428571428571, + "grad_norm": 14.431011199951172, + "learning_rate": 4.2839285714285714e-05, + "loss": 0.5142, + "step": 5213 + }, + { + "epoch": 14.324175824175825, + "grad_norm": 9.751762390136719, + "learning_rate": 4.283791208791209e-05, + "loss": 0.2597, + "step": 5214 + }, + { + "epoch": 14.326923076923077, + "grad_norm": 6.419500827789307, + "learning_rate": 4.283653846153846e-05, + "loss": 0.1423, + "step": 5215 + }, + { + "epoch": 14.32967032967033, + "grad_norm": 11.622011184692383, + "learning_rate": 4.283516483516484e-05, + "loss": 0.379, + "step": 5216 + }, + { + "epoch": 14.332417582417582, + "grad_norm": 13.108296394348145, + "learning_rate": 4.2833791208791214e-05, + "loss": 0.4065, + "step": 5217 + }, + { + "epoch": 14.335164835164836, + "grad_norm": 18.484922409057617, + "learning_rate": 4.2832417582417584e-05, + "loss": 0.4899, + "step": 5218 + }, + { + "epoch": 14.337912087912088, + "grad_norm": 16.527633666992188, + "learning_rate": 4.283104395604396e-05, + "loss": 0.5631, + "step": 5219 + }, + { + "epoch": 14.340659340659341, + "grad_norm": 12.788285255432129, + "learning_rate": 4.282967032967033e-05, + "loss": 0.4016, + "step": 5220 + }, + { + "epoch": 14.343406593406593, + "grad_norm": 17.37544059753418, + "learning_rate": 4.282829670329671e-05, + "loss": 0.6827, + "step": 5221 + }, + { + "epoch": 14.346153846153847, + "grad_norm": 14.311058044433594, + "learning_rate": 4.282692307692308e-05, + "loss": 0.6209, + "step": 5222 + }, + { + "epoch": 14.348901098901099, + "grad_norm": 15.693558692932129, + "learning_rate": 4.2825549450549455e-05, + "loss": 0.6569, + "step": 5223 + }, + { + "epoch": 14.351648351648352, + "grad_norm": 10.394285202026367, + "learning_rate": 4.2824175824175825e-05, + "loss": 0.3484, + "step": 5224 + }, + { + "epoch": 14.354395604395604, + "grad_norm": 8.568055152893066, + "learning_rate": 4.2822802197802195e-05, + "loss": 0.269, + "step": 5225 + }, + { + "epoch": 14.357142857142858, + "grad_norm": 18.65423011779785, + "learning_rate": 4.282142857142857e-05, + "loss": 0.6068, + "step": 5226 + }, + { + "epoch": 14.35989010989011, + "grad_norm": 15.834242820739746, + "learning_rate": 4.282005494505495e-05, + "loss": 0.619, + "step": 5227 + }, + { + "epoch": 14.362637362637363, + "grad_norm": 14.429057121276855, + "learning_rate": 4.281868131868132e-05, + "loss": 0.5553, + "step": 5228 + }, + { + "epoch": 14.365384615384615, + "grad_norm": 13.666606903076172, + "learning_rate": 4.2817307692307695e-05, + "loss": 0.3423, + "step": 5229 + }, + { + "epoch": 14.368131868131869, + "grad_norm": 12.817451477050781, + "learning_rate": 4.2815934065934065e-05, + "loss": 0.5955, + "step": 5230 + }, + { + "epoch": 14.37087912087912, + "grad_norm": 13.207125663757324, + "learning_rate": 4.281456043956044e-05, + "loss": 0.5442, + "step": 5231 + }, + { + "epoch": 14.373626373626374, + "grad_norm": 12.168498992919922, + "learning_rate": 4.281318681318682e-05, + "loss": 0.358, + "step": 5232 + }, + { + "epoch": 14.376373626373626, + "grad_norm": 16.23292350769043, + "learning_rate": 4.281181318681319e-05, + "loss": 0.5304, + "step": 5233 + }, + { + "epoch": 14.37912087912088, + "grad_norm": 16.695859909057617, + "learning_rate": 4.2810439560439566e-05, + "loss": 1.1626, + "step": 5234 + }, + { + "epoch": 14.381868131868131, + "grad_norm": 13.731607437133789, + "learning_rate": 4.2809065934065936e-05, + "loss": 0.6798, + "step": 5235 + }, + { + "epoch": 14.384615384615385, + "grad_norm": 9.585025787353516, + "learning_rate": 4.280769230769231e-05, + "loss": 0.2381, + "step": 5236 + }, + { + "epoch": 14.387362637362637, + "grad_norm": 18.642396926879883, + "learning_rate": 4.280631868131868e-05, + "loss": 0.7196, + "step": 5237 + }, + { + "epoch": 14.39010989010989, + "grad_norm": 17.727333068847656, + "learning_rate": 4.280494505494505e-05, + "loss": 0.7247, + "step": 5238 + }, + { + "epoch": 14.392857142857142, + "grad_norm": 9.430874824523926, + "learning_rate": 4.280357142857143e-05, + "loss": 0.2601, + "step": 5239 + }, + { + "epoch": 14.395604395604396, + "grad_norm": 15.21187686920166, + "learning_rate": 4.28021978021978e-05, + "loss": 0.6039, + "step": 5240 + }, + { + "epoch": 14.398351648351648, + "grad_norm": 11.726092338562012, + "learning_rate": 4.2800824175824176e-05, + "loss": 0.2967, + "step": 5241 + }, + { + "epoch": 14.401098901098901, + "grad_norm": 13.393171310424805, + "learning_rate": 4.279945054945055e-05, + "loss": 0.4019, + "step": 5242 + }, + { + "epoch": 14.403846153846153, + "grad_norm": 9.841984748840332, + "learning_rate": 4.279807692307692e-05, + "loss": 0.3425, + "step": 5243 + }, + { + "epoch": 14.406593406593407, + "grad_norm": 16.36267852783203, + "learning_rate": 4.27967032967033e-05, + "loss": 0.6339, + "step": 5244 + }, + { + "epoch": 14.409340659340659, + "grad_norm": 18.810443878173828, + "learning_rate": 4.279532967032967e-05, + "loss": 0.8187, + "step": 5245 + }, + { + "epoch": 14.412087912087912, + "grad_norm": 18.234716415405273, + "learning_rate": 4.2793956043956047e-05, + "loss": 0.8622, + "step": 5246 + }, + { + "epoch": 14.414835164835164, + "grad_norm": 5.996585369110107, + "learning_rate": 4.279258241758242e-05, + "loss": 0.1194, + "step": 5247 + }, + { + "epoch": 14.417582417582418, + "grad_norm": 13.025598526000977, + "learning_rate": 4.279120879120879e-05, + "loss": 0.512, + "step": 5248 + }, + { + "epoch": 14.42032967032967, + "grad_norm": 11.430262565612793, + "learning_rate": 4.278983516483517e-05, + "loss": 0.5583, + "step": 5249 + }, + { + "epoch": 14.423076923076923, + "grad_norm": 5.986560344696045, + "learning_rate": 4.278846153846154e-05, + "loss": 0.1239, + "step": 5250 + }, + { + "epoch": 14.425824175824175, + "grad_norm": 8.829070091247559, + "learning_rate": 4.278708791208792e-05, + "loss": 0.2673, + "step": 5251 + }, + { + "epoch": 14.428571428571429, + "grad_norm": 18.973817825317383, + "learning_rate": 4.278571428571429e-05, + "loss": 0.8536, + "step": 5252 + }, + { + "epoch": 14.43131868131868, + "grad_norm": 12.231196403503418, + "learning_rate": 4.278434065934066e-05, + "loss": 0.3901, + "step": 5253 + }, + { + "epoch": 14.434065934065934, + "grad_norm": 15.42395305633545, + "learning_rate": 4.2782967032967034e-05, + "loss": 0.5099, + "step": 5254 + }, + { + "epoch": 14.436813186813186, + "grad_norm": 13.308141708374023, + "learning_rate": 4.2781593406593404e-05, + "loss": 0.4983, + "step": 5255 + }, + { + "epoch": 14.43956043956044, + "grad_norm": 11.51955795288086, + "learning_rate": 4.278021978021978e-05, + "loss": 0.488, + "step": 5256 + }, + { + "epoch": 14.442307692307692, + "grad_norm": 14.539527893066406, + "learning_rate": 4.277884615384616e-05, + "loss": 0.6168, + "step": 5257 + }, + { + "epoch": 14.445054945054945, + "grad_norm": 8.88019847869873, + "learning_rate": 4.277747252747253e-05, + "loss": 0.1824, + "step": 5258 + }, + { + "epoch": 14.447802197802197, + "grad_norm": 12.216510772705078, + "learning_rate": 4.2776098901098904e-05, + "loss": 0.5176, + "step": 5259 + }, + { + "epoch": 14.45054945054945, + "grad_norm": 18.75323486328125, + "learning_rate": 4.2774725274725274e-05, + "loss": 0.4972, + "step": 5260 + }, + { + "epoch": 14.453296703296703, + "grad_norm": 21.666841506958008, + "learning_rate": 4.277335164835165e-05, + "loss": 0.9263, + "step": 5261 + }, + { + "epoch": 14.456043956043956, + "grad_norm": 15.449360847473145, + "learning_rate": 4.277197802197803e-05, + "loss": 0.7303, + "step": 5262 + }, + { + "epoch": 14.458791208791208, + "grad_norm": 12.127955436706543, + "learning_rate": 4.27706043956044e-05, + "loss": 0.5579, + "step": 5263 + }, + { + "epoch": 14.461538461538462, + "grad_norm": 14.378389358520508, + "learning_rate": 4.2769230769230775e-05, + "loss": 0.4939, + "step": 5264 + }, + { + "epoch": 14.464285714285714, + "grad_norm": 13.82028579711914, + "learning_rate": 4.2767857142857145e-05, + "loss": 0.4387, + "step": 5265 + }, + { + "epoch": 14.467032967032967, + "grad_norm": 16.9725399017334, + "learning_rate": 4.276648351648352e-05, + "loss": 0.5983, + "step": 5266 + }, + { + "epoch": 14.469780219780219, + "grad_norm": 8.848398208618164, + "learning_rate": 4.276510989010989e-05, + "loss": 0.323, + "step": 5267 + }, + { + "epoch": 14.472527472527473, + "grad_norm": 18.789592742919922, + "learning_rate": 4.276373626373626e-05, + "loss": 0.8026, + "step": 5268 + }, + { + "epoch": 14.475274725274724, + "grad_norm": 14.819063186645508, + "learning_rate": 4.276236263736264e-05, + "loss": 0.5241, + "step": 5269 + }, + { + "epoch": 14.478021978021978, + "grad_norm": 10.139039039611816, + "learning_rate": 4.276098901098901e-05, + "loss": 0.2638, + "step": 5270 + }, + { + "epoch": 14.48076923076923, + "grad_norm": 15.343926429748535, + "learning_rate": 4.2759615384615385e-05, + "loss": 0.5193, + "step": 5271 + }, + { + "epoch": 14.483516483516484, + "grad_norm": 10.437386512756348, + "learning_rate": 4.275824175824176e-05, + "loss": 0.3595, + "step": 5272 + }, + { + "epoch": 14.486263736263735, + "grad_norm": 16.100175857543945, + "learning_rate": 4.275686813186813e-05, + "loss": 0.286, + "step": 5273 + }, + { + "epoch": 14.489010989010989, + "grad_norm": 8.642009735107422, + "learning_rate": 4.275549450549451e-05, + "loss": 0.3714, + "step": 5274 + }, + { + "epoch": 14.491758241758241, + "grad_norm": 8.662147521972656, + "learning_rate": 4.275412087912088e-05, + "loss": 0.2799, + "step": 5275 + }, + { + "epoch": 14.494505494505495, + "grad_norm": 9.723097801208496, + "learning_rate": 4.2752747252747256e-05, + "loss": 0.4103, + "step": 5276 + }, + { + "epoch": 14.497252747252748, + "grad_norm": 12.285481452941895, + "learning_rate": 4.275137362637363e-05, + "loss": 0.3644, + "step": 5277 + }, + { + "epoch": 14.5, + "grad_norm": 8.211996078491211, + "learning_rate": 4.275e-05, + "loss": 0.2396, + "step": 5278 + }, + { + "epoch": 14.502747252747252, + "grad_norm": 15.005613327026367, + "learning_rate": 4.274862637362638e-05, + "loss": 0.579, + "step": 5279 + }, + { + "epoch": 14.505494505494505, + "grad_norm": 10.914105415344238, + "learning_rate": 4.274725274725275e-05, + "loss": 0.2961, + "step": 5280 + }, + { + "epoch": 14.508241758241759, + "grad_norm": 13.123945236206055, + "learning_rate": 4.2745879120879126e-05, + "loss": 0.5671, + "step": 5281 + }, + { + "epoch": 14.510989010989011, + "grad_norm": 8.028947830200195, + "learning_rate": 4.2744505494505496e-05, + "loss": 0.1717, + "step": 5282 + }, + { + "epoch": 14.513736263736263, + "grad_norm": 9.755265235900879, + "learning_rate": 4.2743131868131866e-05, + "loss": 0.3871, + "step": 5283 + }, + { + "epoch": 14.516483516483516, + "grad_norm": 21.12866973876953, + "learning_rate": 4.274175824175824e-05, + "loss": 0.8706, + "step": 5284 + }, + { + "epoch": 14.51923076923077, + "grad_norm": 18.468860626220703, + "learning_rate": 4.274038461538461e-05, + "loss": 0.6452, + "step": 5285 + }, + { + "epoch": 14.521978021978022, + "grad_norm": 15.991326332092285, + "learning_rate": 4.273901098901099e-05, + "loss": 1.0805, + "step": 5286 + }, + { + "epoch": 14.524725274725276, + "grad_norm": 11.057988166809082, + "learning_rate": 4.2737637362637366e-05, + "loss": 0.3429, + "step": 5287 + }, + { + "epoch": 14.527472527472527, + "grad_norm": 10.567100524902344, + "learning_rate": 4.2736263736263736e-05, + "loss": 0.2968, + "step": 5288 + }, + { + "epoch": 14.530219780219781, + "grad_norm": 9.10664176940918, + "learning_rate": 4.273489010989011e-05, + "loss": 0.3516, + "step": 5289 + }, + { + "epoch": 14.532967032967033, + "grad_norm": 8.671846389770508, + "learning_rate": 4.273351648351648e-05, + "loss": 0.3301, + "step": 5290 + }, + { + "epoch": 14.535714285714286, + "grad_norm": 8.82649040222168, + "learning_rate": 4.273214285714286e-05, + "loss": 0.2653, + "step": 5291 + }, + { + "epoch": 14.538461538461538, + "grad_norm": 18.31021499633789, + "learning_rate": 4.273076923076924e-05, + "loss": 0.6104, + "step": 5292 + }, + { + "epoch": 14.541208791208792, + "grad_norm": 15.319742202758789, + "learning_rate": 4.272939560439561e-05, + "loss": 0.671, + "step": 5293 + }, + { + "epoch": 14.543956043956044, + "grad_norm": 14.363277435302734, + "learning_rate": 4.2728021978021984e-05, + "loss": 0.5057, + "step": 5294 + }, + { + "epoch": 14.546703296703297, + "grad_norm": 16.84830093383789, + "learning_rate": 4.2726648351648354e-05, + "loss": 0.6559, + "step": 5295 + }, + { + "epoch": 14.54945054945055, + "grad_norm": 14.724018096923828, + "learning_rate": 4.272527472527473e-05, + "loss": 0.5501, + "step": 5296 + }, + { + "epoch": 14.552197802197803, + "grad_norm": 14.831514358520508, + "learning_rate": 4.27239010989011e-05, + "loss": 0.6169, + "step": 5297 + }, + { + "epoch": 14.554945054945055, + "grad_norm": 18.6199951171875, + "learning_rate": 4.272252747252747e-05, + "loss": 0.6264, + "step": 5298 + }, + { + "epoch": 14.557692307692308, + "grad_norm": 14.48404598236084, + "learning_rate": 4.272115384615385e-05, + "loss": 0.3621, + "step": 5299 + }, + { + "epoch": 14.56043956043956, + "grad_norm": 12.306714057922363, + "learning_rate": 4.271978021978022e-05, + "loss": 0.4962, + "step": 5300 + }, + { + "epoch": 14.563186813186814, + "grad_norm": 17.781042098999023, + "learning_rate": 4.2718406593406594e-05, + "loss": 0.9474, + "step": 5301 + }, + { + "epoch": 14.565934065934066, + "grad_norm": 12.291430473327637, + "learning_rate": 4.271703296703297e-05, + "loss": 0.4121, + "step": 5302 + }, + { + "epoch": 14.56868131868132, + "grad_norm": 8.505814552307129, + "learning_rate": 4.271565934065934e-05, + "loss": 0.3034, + "step": 5303 + }, + { + "epoch": 14.571428571428571, + "grad_norm": 16.8923282623291, + "learning_rate": 4.271428571428572e-05, + "loss": 0.7628, + "step": 5304 + }, + { + "epoch": 14.574175824175825, + "grad_norm": 6.280637264251709, + "learning_rate": 4.271291208791209e-05, + "loss": 0.1513, + "step": 5305 + }, + { + "epoch": 14.576923076923077, + "grad_norm": 9.110633850097656, + "learning_rate": 4.2711538461538465e-05, + "loss": 0.2134, + "step": 5306 + }, + { + "epoch": 14.57967032967033, + "grad_norm": 8.817941665649414, + "learning_rate": 4.271016483516484e-05, + "loss": 0.2909, + "step": 5307 + }, + { + "epoch": 14.582417582417582, + "grad_norm": 8.371278762817383, + "learning_rate": 4.270879120879121e-05, + "loss": 0.3139, + "step": 5308 + }, + { + "epoch": 14.585164835164836, + "grad_norm": 17.19437599182129, + "learning_rate": 4.270741758241759e-05, + "loss": 0.6183, + "step": 5309 + }, + { + "epoch": 14.587912087912088, + "grad_norm": 24.555599212646484, + "learning_rate": 4.270604395604396e-05, + "loss": 1.0128, + "step": 5310 + }, + { + "epoch": 14.590659340659341, + "grad_norm": 12.623367309570312, + "learning_rate": 4.2704670329670335e-05, + "loss": 0.6246, + "step": 5311 + }, + { + "epoch": 14.593406593406593, + "grad_norm": 11.356074333190918, + "learning_rate": 4.2703296703296705e-05, + "loss": 0.3582, + "step": 5312 + }, + { + "epoch": 14.596153846153847, + "grad_norm": 10.849884986877441, + "learning_rate": 4.2701923076923075e-05, + "loss": 0.4352, + "step": 5313 + }, + { + "epoch": 14.598901098901099, + "grad_norm": 17.51255989074707, + "learning_rate": 4.270054945054945e-05, + "loss": 0.7668, + "step": 5314 + }, + { + "epoch": 14.601648351648352, + "grad_norm": 8.874763488769531, + "learning_rate": 4.269917582417582e-05, + "loss": 0.2948, + "step": 5315 + }, + { + "epoch": 14.604395604395604, + "grad_norm": 12.8494291305542, + "learning_rate": 4.26978021978022e-05, + "loss": 0.2539, + "step": 5316 + }, + { + "epoch": 14.607142857142858, + "grad_norm": 6.212353229522705, + "learning_rate": 4.2696428571428575e-05, + "loss": 0.1501, + "step": 5317 + }, + { + "epoch": 14.60989010989011, + "grad_norm": 9.915999412536621, + "learning_rate": 4.2695054945054945e-05, + "loss": 0.2714, + "step": 5318 + }, + { + "epoch": 14.612637362637363, + "grad_norm": 11.38350772857666, + "learning_rate": 4.269368131868132e-05, + "loss": 0.4309, + "step": 5319 + }, + { + "epoch": 14.615384615384615, + "grad_norm": 28.257116317749023, + "learning_rate": 4.269230769230769e-05, + "loss": 0.8445, + "step": 5320 + }, + { + "epoch": 14.618131868131869, + "grad_norm": 8.211613655090332, + "learning_rate": 4.269093406593407e-05, + "loss": 0.2474, + "step": 5321 + }, + { + "epoch": 14.62087912087912, + "grad_norm": 11.677437782287598, + "learning_rate": 4.2689560439560446e-05, + "loss": 0.2898, + "step": 5322 + }, + { + "epoch": 14.623626373626374, + "grad_norm": 11.425764083862305, + "learning_rate": 4.2688186813186816e-05, + "loss": 0.3917, + "step": 5323 + }, + { + "epoch": 14.626373626373626, + "grad_norm": 15.59532356262207, + "learning_rate": 4.268681318681319e-05, + "loss": 0.4518, + "step": 5324 + }, + { + "epoch": 14.62912087912088, + "grad_norm": 9.54471492767334, + "learning_rate": 4.268543956043956e-05, + "loss": 0.2856, + "step": 5325 + }, + { + "epoch": 14.631868131868131, + "grad_norm": 9.117147445678711, + "learning_rate": 4.268406593406594e-05, + "loss": 0.3176, + "step": 5326 + }, + { + "epoch": 14.634615384615385, + "grad_norm": 7.994399070739746, + "learning_rate": 4.268269230769231e-05, + "loss": 0.2563, + "step": 5327 + }, + { + "epoch": 14.637362637362637, + "grad_norm": 16.138275146484375, + "learning_rate": 4.268131868131868e-05, + "loss": 0.5593, + "step": 5328 + }, + { + "epoch": 14.64010989010989, + "grad_norm": 18.32124900817871, + "learning_rate": 4.2679945054945056e-05, + "loss": 0.7191, + "step": 5329 + }, + { + "epoch": 14.642857142857142, + "grad_norm": 18.97287368774414, + "learning_rate": 4.2678571428571426e-05, + "loss": 0.6422, + "step": 5330 + }, + { + "epoch": 14.645604395604396, + "grad_norm": 24.430583953857422, + "learning_rate": 4.26771978021978e-05, + "loss": 1.2759, + "step": 5331 + }, + { + "epoch": 14.648351648351648, + "grad_norm": 13.518088340759277, + "learning_rate": 4.267582417582418e-05, + "loss": 0.4501, + "step": 5332 + }, + { + "epoch": 14.651098901098901, + "grad_norm": 11.14832878112793, + "learning_rate": 4.267445054945055e-05, + "loss": 0.2859, + "step": 5333 + }, + { + "epoch": 14.653846153846153, + "grad_norm": 8.108465194702148, + "learning_rate": 4.267307692307693e-05, + "loss": 0.307, + "step": 5334 + }, + { + "epoch": 14.656593406593407, + "grad_norm": 12.53339672088623, + "learning_rate": 4.26717032967033e-05, + "loss": 0.295, + "step": 5335 + }, + { + "epoch": 14.659340659340659, + "grad_norm": 7.884527206420898, + "learning_rate": 4.2670329670329674e-05, + "loss": 0.2128, + "step": 5336 + }, + { + "epoch": 14.662087912087912, + "grad_norm": 9.665164947509766, + "learning_rate": 4.266895604395605e-05, + "loss": 0.2216, + "step": 5337 + }, + { + "epoch": 14.664835164835164, + "grad_norm": 8.355396270751953, + "learning_rate": 4.266758241758242e-05, + "loss": 0.2681, + "step": 5338 + }, + { + "epoch": 14.667582417582418, + "grad_norm": 13.922929763793945, + "learning_rate": 4.26662087912088e-05, + "loss": 0.8696, + "step": 5339 + }, + { + "epoch": 14.67032967032967, + "grad_norm": 13.305988311767578, + "learning_rate": 4.266483516483517e-05, + "loss": 0.3868, + "step": 5340 + }, + { + "epoch": 14.673076923076923, + "grad_norm": 12.747992515563965, + "learning_rate": 4.2663461538461544e-05, + "loss": 0.4395, + "step": 5341 + }, + { + "epoch": 14.675824175824175, + "grad_norm": 9.571887969970703, + "learning_rate": 4.2662087912087914e-05, + "loss": 0.2411, + "step": 5342 + }, + { + "epoch": 14.678571428571429, + "grad_norm": 10.808222770690918, + "learning_rate": 4.2660714285714284e-05, + "loss": 0.3212, + "step": 5343 + }, + { + "epoch": 14.68131868131868, + "grad_norm": 9.102350234985352, + "learning_rate": 4.265934065934066e-05, + "loss": 0.2551, + "step": 5344 + }, + { + "epoch": 14.684065934065934, + "grad_norm": 17.7232608795166, + "learning_rate": 4.265796703296703e-05, + "loss": 0.7677, + "step": 5345 + }, + { + "epoch": 14.686813186813186, + "grad_norm": 14.40681266784668, + "learning_rate": 4.265659340659341e-05, + "loss": 0.5542, + "step": 5346 + }, + { + "epoch": 14.68956043956044, + "grad_norm": 9.866328239440918, + "learning_rate": 4.265521978021978e-05, + "loss": 0.4134, + "step": 5347 + }, + { + "epoch": 14.692307692307692, + "grad_norm": 10.628108024597168, + "learning_rate": 4.2653846153846154e-05, + "loss": 0.197, + "step": 5348 + }, + { + "epoch": 14.695054945054945, + "grad_norm": 12.806500434875488, + "learning_rate": 4.265247252747253e-05, + "loss": 0.3307, + "step": 5349 + }, + { + "epoch": 14.697802197802197, + "grad_norm": 13.618905067443848, + "learning_rate": 4.26510989010989e-05, + "loss": 0.3878, + "step": 5350 + }, + { + "epoch": 14.70054945054945, + "grad_norm": 13.450702667236328, + "learning_rate": 4.264972527472528e-05, + "loss": 0.7955, + "step": 5351 + }, + { + "epoch": 14.703296703296703, + "grad_norm": 18.774526596069336, + "learning_rate": 4.264835164835165e-05, + "loss": 0.8561, + "step": 5352 + }, + { + "epoch": 14.706043956043956, + "grad_norm": 8.977989196777344, + "learning_rate": 4.2646978021978025e-05, + "loss": 0.3578, + "step": 5353 + }, + { + "epoch": 14.708791208791208, + "grad_norm": 10.662592887878418, + "learning_rate": 4.26456043956044e-05, + "loss": 0.3413, + "step": 5354 + }, + { + "epoch": 14.711538461538462, + "grad_norm": 6.351759433746338, + "learning_rate": 4.264423076923077e-05, + "loss": 0.2346, + "step": 5355 + }, + { + "epoch": 14.714285714285714, + "grad_norm": 14.828826904296875, + "learning_rate": 4.264285714285715e-05, + "loss": 0.481, + "step": 5356 + }, + { + "epoch": 14.717032967032967, + "grad_norm": 19.019306182861328, + "learning_rate": 4.264148351648352e-05, + "loss": 1.0461, + "step": 5357 + }, + { + "epoch": 14.719780219780219, + "grad_norm": 11.782758712768555, + "learning_rate": 4.264010989010989e-05, + "loss": 0.3976, + "step": 5358 + }, + { + "epoch": 14.722527472527473, + "grad_norm": 8.158798217773438, + "learning_rate": 4.2638736263736265e-05, + "loss": 0.1467, + "step": 5359 + }, + { + "epoch": 14.725274725274724, + "grad_norm": 9.641575813293457, + "learning_rate": 4.2637362637362635e-05, + "loss": 0.4211, + "step": 5360 + }, + { + "epoch": 14.728021978021978, + "grad_norm": 10.462140083312988, + "learning_rate": 4.263598901098901e-05, + "loss": 0.2665, + "step": 5361 + }, + { + "epoch": 14.73076923076923, + "grad_norm": 9.296174049377441, + "learning_rate": 4.263461538461538e-05, + "loss": 0.4789, + "step": 5362 + }, + { + "epoch": 14.733516483516484, + "grad_norm": 15.627665519714355, + "learning_rate": 4.263324175824176e-05, + "loss": 0.9057, + "step": 5363 + }, + { + "epoch": 14.736263736263737, + "grad_norm": 15.793008804321289, + "learning_rate": 4.2631868131868136e-05, + "loss": 0.449, + "step": 5364 + }, + { + "epoch": 14.739010989010989, + "grad_norm": 17.956937789916992, + "learning_rate": 4.2630494505494506e-05, + "loss": 0.7467, + "step": 5365 + }, + { + "epoch": 14.741758241758241, + "grad_norm": 5.238981246948242, + "learning_rate": 4.262912087912088e-05, + "loss": 0.1631, + "step": 5366 + }, + { + "epoch": 14.744505494505495, + "grad_norm": 10.415566444396973, + "learning_rate": 4.262774725274725e-05, + "loss": 0.4991, + "step": 5367 + }, + { + "epoch": 14.747252747252748, + "grad_norm": 10.057366371154785, + "learning_rate": 4.262637362637363e-05, + "loss": 0.3908, + "step": 5368 + }, + { + "epoch": 14.75, + "grad_norm": 11.67117691040039, + "learning_rate": 4.2625000000000006e-05, + "loss": 0.6804, + "step": 5369 + }, + { + "epoch": 14.752747252747252, + "grad_norm": 8.786307334899902, + "learning_rate": 4.2623626373626376e-05, + "loss": 0.2029, + "step": 5370 + }, + { + "epoch": 14.755494505494505, + "grad_norm": 9.300015449523926, + "learning_rate": 4.262225274725275e-05, + "loss": 0.258, + "step": 5371 + }, + { + "epoch": 14.758241758241759, + "grad_norm": 15.789891242980957, + "learning_rate": 4.262087912087912e-05, + "loss": 0.5516, + "step": 5372 + }, + { + "epoch": 14.760989010989011, + "grad_norm": 10.708935737609863, + "learning_rate": 4.261950549450549e-05, + "loss": 0.3611, + "step": 5373 + }, + { + "epoch": 14.763736263736263, + "grad_norm": 12.66236400604248, + "learning_rate": 4.261813186813187e-05, + "loss": 0.7234, + "step": 5374 + }, + { + "epoch": 14.766483516483516, + "grad_norm": 14.764954566955566, + "learning_rate": 4.261675824175824e-05, + "loss": 0.6253, + "step": 5375 + }, + { + "epoch": 14.76923076923077, + "grad_norm": 9.38718318939209, + "learning_rate": 4.2615384615384617e-05, + "loss": 0.383, + "step": 5376 + }, + { + "epoch": 14.771978021978022, + "grad_norm": 10.318526268005371, + "learning_rate": 4.2614010989010987e-05, + "loss": 0.2505, + "step": 5377 + }, + { + "epoch": 14.774725274725276, + "grad_norm": 16.16214942932129, + "learning_rate": 4.261263736263736e-05, + "loss": 0.5714, + "step": 5378 + }, + { + "epoch": 14.777472527472527, + "grad_norm": 11.890682220458984, + "learning_rate": 4.261126373626374e-05, + "loss": 0.3439, + "step": 5379 + }, + { + "epoch": 14.780219780219781, + "grad_norm": 10.880393981933594, + "learning_rate": 4.260989010989011e-05, + "loss": 0.4301, + "step": 5380 + }, + { + "epoch": 14.782967032967033, + "grad_norm": 8.870414733886719, + "learning_rate": 4.260851648351649e-05, + "loss": 0.2157, + "step": 5381 + }, + { + "epoch": 14.785714285714286, + "grad_norm": 14.83718490600586, + "learning_rate": 4.260714285714286e-05, + "loss": 0.5593, + "step": 5382 + }, + { + "epoch": 14.788461538461538, + "grad_norm": 11.333806037902832, + "learning_rate": 4.2605769230769234e-05, + "loss": 0.5144, + "step": 5383 + }, + { + "epoch": 14.791208791208792, + "grad_norm": 12.850351333618164, + "learning_rate": 4.260439560439561e-05, + "loss": 0.431, + "step": 5384 + }, + { + "epoch": 14.793956043956044, + "grad_norm": 9.779860496520996, + "learning_rate": 4.260302197802198e-05, + "loss": 0.3083, + "step": 5385 + }, + { + "epoch": 14.796703296703297, + "grad_norm": 16.161169052124023, + "learning_rate": 4.260164835164836e-05, + "loss": 0.7193, + "step": 5386 + }, + { + "epoch": 14.79945054945055, + "grad_norm": 15.714470863342285, + "learning_rate": 4.260027472527473e-05, + "loss": 0.5306, + "step": 5387 + }, + { + "epoch": 14.802197802197803, + "grad_norm": 15.10595989227295, + "learning_rate": 4.25989010989011e-05, + "loss": 0.6577, + "step": 5388 + }, + { + "epoch": 14.804945054945055, + "grad_norm": 16.166227340698242, + "learning_rate": 4.2597527472527474e-05, + "loss": 0.6305, + "step": 5389 + }, + { + "epoch": 14.807692307692308, + "grad_norm": 10.900979995727539, + "learning_rate": 4.2596153846153844e-05, + "loss": 0.3374, + "step": 5390 + }, + { + "epoch": 14.81043956043956, + "grad_norm": 10.015094757080078, + "learning_rate": 4.259478021978022e-05, + "loss": 0.2689, + "step": 5391 + }, + { + "epoch": 14.813186813186814, + "grad_norm": 14.672850608825684, + "learning_rate": 4.259340659340659e-05, + "loss": 0.4684, + "step": 5392 + }, + { + "epoch": 14.815934065934066, + "grad_norm": 23.973901748657227, + "learning_rate": 4.259203296703297e-05, + "loss": 1.1794, + "step": 5393 + }, + { + "epoch": 14.81868131868132, + "grad_norm": 12.986330032348633, + "learning_rate": 4.2590659340659345e-05, + "loss": 0.2622, + "step": 5394 + }, + { + "epoch": 14.821428571428571, + "grad_norm": 11.610701560974121, + "learning_rate": 4.2589285714285715e-05, + "loss": 0.3302, + "step": 5395 + }, + { + "epoch": 14.824175824175825, + "grad_norm": 13.065506935119629, + "learning_rate": 4.258791208791209e-05, + "loss": 0.451, + "step": 5396 + }, + { + "epoch": 14.826923076923077, + "grad_norm": 17.537830352783203, + "learning_rate": 4.258653846153846e-05, + "loss": 0.7556, + "step": 5397 + }, + { + "epoch": 14.82967032967033, + "grad_norm": 9.418267250061035, + "learning_rate": 4.258516483516484e-05, + "loss": 0.4316, + "step": 5398 + }, + { + "epoch": 14.832417582417582, + "grad_norm": 15.262167930603027, + "learning_rate": 4.2583791208791215e-05, + "loss": 0.5168, + "step": 5399 + }, + { + "epoch": 14.835164835164836, + "grad_norm": 9.836102485656738, + "learning_rate": 4.2582417582417585e-05, + "loss": 0.4376, + "step": 5400 + }, + { + "epoch": 14.837912087912088, + "grad_norm": 17.500221252441406, + "learning_rate": 4.258104395604396e-05, + "loss": 0.8532, + "step": 5401 + }, + { + "epoch": 14.840659340659341, + "grad_norm": 8.524140357971191, + "learning_rate": 4.257967032967033e-05, + "loss": 0.3129, + "step": 5402 + }, + { + "epoch": 14.843406593406593, + "grad_norm": 8.793901443481445, + "learning_rate": 4.25782967032967e-05, + "loss": 0.4065, + "step": 5403 + }, + { + "epoch": 14.846153846153847, + "grad_norm": 11.703400611877441, + "learning_rate": 4.257692307692308e-05, + "loss": 0.6173, + "step": 5404 + }, + { + "epoch": 14.848901098901099, + "grad_norm": 18.53386878967285, + "learning_rate": 4.257554945054945e-05, + "loss": 0.5827, + "step": 5405 + }, + { + "epoch": 14.851648351648352, + "grad_norm": 11.9616060256958, + "learning_rate": 4.2574175824175826e-05, + "loss": 0.3763, + "step": 5406 + }, + { + "epoch": 14.854395604395604, + "grad_norm": 15.188549995422363, + "learning_rate": 4.2572802197802196e-05, + "loss": 0.6568, + "step": 5407 + }, + { + "epoch": 14.857142857142858, + "grad_norm": 17.037992477416992, + "learning_rate": 4.257142857142857e-05, + "loss": 0.5498, + "step": 5408 + }, + { + "epoch": 14.85989010989011, + "grad_norm": 15.540526390075684, + "learning_rate": 4.257005494505495e-05, + "loss": 0.5617, + "step": 5409 + }, + { + "epoch": 14.862637362637363, + "grad_norm": 12.274551391601562, + "learning_rate": 4.256868131868132e-05, + "loss": 0.4394, + "step": 5410 + }, + { + "epoch": 14.865384615384615, + "grad_norm": 9.8792085647583, + "learning_rate": 4.2567307692307696e-05, + "loss": 0.3159, + "step": 5411 + }, + { + "epoch": 14.868131868131869, + "grad_norm": 11.502272605895996, + "learning_rate": 4.2565934065934066e-05, + "loss": 0.4986, + "step": 5412 + }, + { + "epoch": 14.87087912087912, + "grad_norm": 14.867233276367188, + "learning_rate": 4.256456043956044e-05, + "loss": 0.6168, + "step": 5413 + }, + { + "epoch": 14.873626373626374, + "grad_norm": 4.689126968383789, + "learning_rate": 4.256318681318682e-05, + "loss": 0.1607, + "step": 5414 + }, + { + "epoch": 14.876373626373626, + "grad_norm": 6.241525173187256, + "learning_rate": 4.256181318681319e-05, + "loss": 0.2049, + "step": 5415 + }, + { + "epoch": 14.87912087912088, + "grad_norm": 18.628225326538086, + "learning_rate": 4.2560439560439566e-05, + "loss": 0.585, + "step": 5416 + }, + { + "epoch": 14.881868131868131, + "grad_norm": 9.076371192932129, + "learning_rate": 4.2559065934065936e-05, + "loss": 0.3329, + "step": 5417 + }, + { + "epoch": 14.884615384615385, + "grad_norm": 15.31352710723877, + "learning_rate": 4.2557692307692306e-05, + "loss": 0.6911, + "step": 5418 + }, + { + "epoch": 14.887362637362637, + "grad_norm": 13.520642280578613, + "learning_rate": 4.255631868131868e-05, + "loss": 0.5018, + "step": 5419 + }, + { + "epoch": 14.89010989010989, + "grad_norm": 15.499388694763184, + "learning_rate": 4.255494505494505e-05, + "loss": 0.6709, + "step": 5420 + }, + { + "epoch": 14.892857142857142, + "grad_norm": 13.358429908752441, + "learning_rate": 4.255357142857143e-05, + "loss": 0.4968, + "step": 5421 + }, + { + "epoch": 14.895604395604396, + "grad_norm": 13.439720153808594, + "learning_rate": 4.25521978021978e-05, + "loss": 0.3471, + "step": 5422 + }, + { + "epoch": 14.898351648351648, + "grad_norm": 16.049999237060547, + "learning_rate": 4.255082417582418e-05, + "loss": 0.4667, + "step": 5423 + }, + { + "epoch": 14.901098901098901, + "grad_norm": 12.426063537597656, + "learning_rate": 4.2549450549450554e-05, + "loss": 0.3577, + "step": 5424 + }, + { + "epoch": 14.903846153846153, + "grad_norm": 21.111942291259766, + "learning_rate": 4.2548076923076924e-05, + "loss": 0.8387, + "step": 5425 + }, + { + "epoch": 14.906593406593407, + "grad_norm": 13.336347579956055, + "learning_rate": 4.25467032967033e-05, + "loss": 0.5156, + "step": 5426 + }, + { + "epoch": 14.909340659340659, + "grad_norm": 17.676387786865234, + "learning_rate": 4.254532967032967e-05, + "loss": 0.6065, + "step": 5427 + }, + { + "epoch": 14.912087912087912, + "grad_norm": 17.04873275756836, + "learning_rate": 4.254395604395605e-05, + "loss": 0.5052, + "step": 5428 + }, + { + "epoch": 14.914835164835164, + "grad_norm": 13.1514310836792, + "learning_rate": 4.2542582417582424e-05, + "loss": 0.5189, + "step": 5429 + }, + { + "epoch": 14.917582417582418, + "grad_norm": 13.761882781982422, + "learning_rate": 4.2541208791208794e-05, + "loss": 0.6283, + "step": 5430 + }, + { + "epoch": 14.92032967032967, + "grad_norm": 10.274882316589355, + "learning_rate": 4.253983516483517e-05, + "loss": 0.2033, + "step": 5431 + }, + { + "epoch": 14.923076923076923, + "grad_norm": 20.79287338256836, + "learning_rate": 4.253846153846154e-05, + "loss": 0.942, + "step": 5432 + }, + { + "epoch": 14.925824175824175, + "grad_norm": 8.225625991821289, + "learning_rate": 4.253708791208791e-05, + "loss": 0.2539, + "step": 5433 + }, + { + "epoch": 14.928571428571429, + "grad_norm": 11.594156265258789, + "learning_rate": 4.253571428571429e-05, + "loss": 0.6883, + "step": 5434 + }, + { + "epoch": 14.93131868131868, + "grad_norm": 9.281265258789062, + "learning_rate": 4.253434065934066e-05, + "loss": 0.3352, + "step": 5435 + }, + { + "epoch": 14.934065934065934, + "grad_norm": 12.192341804504395, + "learning_rate": 4.2532967032967035e-05, + "loss": 0.4574, + "step": 5436 + }, + { + "epoch": 14.936813186813186, + "grad_norm": 19.225820541381836, + "learning_rate": 4.2531593406593405e-05, + "loss": 0.6897, + "step": 5437 + }, + { + "epoch": 14.93956043956044, + "grad_norm": 14.959096908569336, + "learning_rate": 4.253021978021978e-05, + "loss": 0.6066, + "step": 5438 + }, + { + "epoch": 14.942307692307692, + "grad_norm": 14.407695770263672, + "learning_rate": 4.252884615384616e-05, + "loss": 0.443, + "step": 5439 + }, + { + "epoch": 14.945054945054945, + "grad_norm": 14.741530418395996, + "learning_rate": 4.252747252747253e-05, + "loss": 0.6483, + "step": 5440 + }, + { + "epoch": 14.947802197802197, + "grad_norm": 5.97457218170166, + "learning_rate": 4.2526098901098905e-05, + "loss": 0.1267, + "step": 5441 + }, + { + "epoch": 14.95054945054945, + "grad_norm": 7.808045387268066, + "learning_rate": 4.2524725274725275e-05, + "loss": 0.2374, + "step": 5442 + }, + { + "epoch": 14.953296703296703, + "grad_norm": 12.6863374710083, + "learning_rate": 4.252335164835165e-05, + "loss": 0.4802, + "step": 5443 + }, + { + "epoch": 14.956043956043956, + "grad_norm": 11.783757209777832, + "learning_rate": 4.252197802197803e-05, + "loss": 0.3874, + "step": 5444 + }, + { + "epoch": 14.958791208791208, + "grad_norm": 12.676264762878418, + "learning_rate": 4.25206043956044e-05, + "loss": 0.3129, + "step": 5445 + }, + { + "epoch": 14.961538461538462, + "grad_norm": 12.807640075683594, + "learning_rate": 4.2519230769230775e-05, + "loss": 0.5589, + "step": 5446 + }, + { + "epoch": 14.964285714285714, + "grad_norm": 18.919130325317383, + "learning_rate": 4.2517857142857145e-05, + "loss": 0.6096, + "step": 5447 + }, + { + "epoch": 14.967032967032967, + "grad_norm": 10.772675514221191, + "learning_rate": 4.2516483516483515e-05, + "loss": 0.4613, + "step": 5448 + }, + { + "epoch": 14.969780219780219, + "grad_norm": 23.928789138793945, + "learning_rate": 4.251510989010989e-05, + "loss": 0.5653, + "step": 5449 + }, + { + "epoch": 14.972527472527473, + "grad_norm": 7.431549072265625, + "learning_rate": 4.251373626373626e-05, + "loss": 0.2835, + "step": 5450 + }, + { + "epoch": 14.975274725274724, + "grad_norm": 9.15684700012207, + "learning_rate": 4.251236263736264e-05, + "loss": 0.2057, + "step": 5451 + }, + { + "epoch": 14.978021978021978, + "grad_norm": 11.732134819030762, + "learning_rate": 4.251098901098901e-05, + "loss": 0.4, + "step": 5452 + }, + { + "epoch": 14.98076923076923, + "grad_norm": 11.597197532653809, + "learning_rate": 4.2509615384615386e-05, + "loss": 0.4409, + "step": 5453 + }, + { + "epoch": 14.983516483516484, + "grad_norm": 16.493696212768555, + "learning_rate": 4.250824175824176e-05, + "loss": 0.6415, + "step": 5454 + }, + { + "epoch": 14.986263736263737, + "grad_norm": 13.240839958190918, + "learning_rate": 4.250686813186813e-05, + "loss": 0.4405, + "step": 5455 + }, + { + "epoch": 14.989010989010989, + "grad_norm": 7.1965718269348145, + "learning_rate": 4.250549450549451e-05, + "loss": 0.1985, + "step": 5456 + }, + { + "epoch": 14.991758241758241, + "grad_norm": 19.151185989379883, + "learning_rate": 4.250412087912088e-05, + "loss": 0.9402, + "step": 5457 + }, + { + "epoch": 14.994505494505495, + "grad_norm": 6.897725582122803, + "learning_rate": 4.2502747252747256e-05, + "loss": 0.1916, + "step": 5458 + }, + { + "epoch": 14.997252747252748, + "grad_norm": 8.632184982299805, + "learning_rate": 4.250137362637363e-05, + "loss": 0.2908, + "step": 5459 + }, + { + "epoch": 15.0, + "grad_norm": 38.64149475097656, + "learning_rate": 4.25e-05, + "loss": 1.8144, + "step": 5460 + }, + { + "epoch": 15.0, + "eval_accuracy": 0.7396694214876033, + "eval_f1": 0.7316436834320561, + "eval_f1_DuraRiadoRio_64x64": 0.7219251336898396, + "eval_f1_Mole_64x64": 0.8181818181818182, + "eval_f1_Quebrado_64x64": 0.8131868131868132, + "eval_f1_RiadoRio_64x64": 0.6865671641791045, + "eval_f1_RioFechado_64x64": 0.6183574879227053, + "eval_loss": 0.8439465761184692, + "eval_precision": 0.7901893845044701, + "eval_precision_DuraRiadoRio_64x64": 0.5869565217391305, + "eval_precision_Mole_64x64": 0.7258064516129032, + "eval_precision_Quebrado_64x64": 0.8604651162790697, + "eval_precision_RiadoRio_64x64": 0.7931034482758621, + "eval_precision_RioFechado_64x64": 0.9846153846153847, + "eval_recall": 0.7403601433160365, + "eval_recall_DuraRiadoRio_64x64": 0.9375, + "eval_recall_Mole_64x64": 0.9375, + "eval_recall_Quebrado_64x64": 0.7708333333333334, + "eval_recall_RiadoRio_64x64": 0.6052631578947368, + "eval_recall_RioFechado_64x64": 0.4507042253521127, + "eval_runtime": 1.7655, + "eval_samples_per_second": 411.217, + "eval_steps_per_second": 26.055, + "step": 5460 + }, + { + "epoch": 15.002747252747254, + "grad_norm": 8.041021347045898, + "learning_rate": 4.249862637362638e-05, + "loss": 0.2205, + "step": 5461 + }, + { + "epoch": 15.005494505494505, + "grad_norm": 20.55148696899414, + "learning_rate": 4.249725274725275e-05, + "loss": 0.8777, + "step": 5462 + }, + { + "epoch": 15.008241758241759, + "grad_norm": 13.860167503356934, + "learning_rate": 4.249587912087912e-05, + "loss": 0.7171, + "step": 5463 + }, + { + "epoch": 15.010989010989011, + "grad_norm": 17.94432258605957, + "learning_rate": 4.24945054945055e-05, + "loss": 0.7017, + "step": 5464 + }, + { + "epoch": 15.013736263736265, + "grad_norm": 6.191634178161621, + "learning_rate": 4.249313186813187e-05, + "loss": 0.1964, + "step": 5465 + }, + { + "epoch": 15.016483516483516, + "grad_norm": 15.759900093078613, + "learning_rate": 4.2491758241758243e-05, + "loss": 0.7997, + "step": 5466 + }, + { + "epoch": 15.01923076923077, + "grad_norm": 4.557618141174316, + "learning_rate": 4.2490384615384614e-05, + "loss": 0.2853, + "step": 5467 + }, + { + "epoch": 15.021978021978022, + "grad_norm": 5.556963920593262, + "learning_rate": 4.248901098901099e-05, + "loss": 0.1639, + "step": 5468 + }, + { + "epoch": 15.024725274725276, + "grad_norm": 9.393351554870605, + "learning_rate": 4.248763736263737e-05, + "loss": 0.3745, + "step": 5469 + }, + { + "epoch": 15.027472527472527, + "grad_norm": 16.282777786254883, + "learning_rate": 4.248626373626374e-05, + "loss": 0.7035, + "step": 5470 + }, + { + "epoch": 15.030219780219781, + "grad_norm": 15.248826026916504, + "learning_rate": 4.2484890109890114e-05, + "loss": 0.6793, + "step": 5471 + }, + { + "epoch": 15.032967032967033, + "grad_norm": 12.069225311279297, + "learning_rate": 4.2483516483516484e-05, + "loss": 0.3272, + "step": 5472 + }, + { + "epoch": 15.035714285714286, + "grad_norm": 10.838260650634766, + "learning_rate": 4.248214285714286e-05, + "loss": 0.3028, + "step": 5473 + }, + { + "epoch": 15.038461538461538, + "grad_norm": 12.929464340209961, + "learning_rate": 4.248076923076924e-05, + "loss": 0.4944, + "step": 5474 + }, + { + "epoch": 15.041208791208792, + "grad_norm": 10.658905029296875, + "learning_rate": 4.247939560439561e-05, + "loss": 0.3692, + "step": 5475 + }, + { + "epoch": 15.043956043956044, + "grad_norm": 21.699384689331055, + "learning_rate": 4.2478021978021984e-05, + "loss": 1.0527, + "step": 5476 + }, + { + "epoch": 15.046703296703297, + "grad_norm": 11.363911628723145, + "learning_rate": 4.2476648351648354e-05, + "loss": 0.3228, + "step": 5477 + }, + { + "epoch": 15.04945054945055, + "grad_norm": 15.018169403076172, + "learning_rate": 4.2475274725274724e-05, + "loss": 0.6618, + "step": 5478 + }, + { + "epoch": 15.052197802197803, + "grad_norm": 15.18624496459961, + "learning_rate": 4.24739010989011e-05, + "loss": 0.5047, + "step": 5479 + }, + { + "epoch": 15.054945054945055, + "grad_norm": 23.341224670410156, + "learning_rate": 4.247252747252747e-05, + "loss": 1.0166, + "step": 5480 + }, + { + "epoch": 15.057692307692308, + "grad_norm": 10.509374618530273, + "learning_rate": 4.247115384615385e-05, + "loss": 0.401, + "step": 5481 + }, + { + "epoch": 15.06043956043956, + "grad_norm": 13.697373390197754, + "learning_rate": 4.246978021978022e-05, + "loss": 0.3664, + "step": 5482 + }, + { + "epoch": 15.063186813186814, + "grad_norm": 9.341050148010254, + "learning_rate": 4.2468406593406595e-05, + "loss": 0.2206, + "step": 5483 + }, + { + "epoch": 15.065934065934066, + "grad_norm": 17.953039169311523, + "learning_rate": 4.246703296703297e-05, + "loss": 0.7911, + "step": 5484 + }, + { + "epoch": 15.06868131868132, + "grad_norm": 7.543127059936523, + "learning_rate": 4.246565934065934e-05, + "loss": 0.2619, + "step": 5485 + }, + { + "epoch": 15.071428571428571, + "grad_norm": 16.180152893066406, + "learning_rate": 4.246428571428572e-05, + "loss": 0.7277, + "step": 5486 + }, + { + "epoch": 15.074175824175825, + "grad_norm": 7.338883876800537, + "learning_rate": 4.246291208791209e-05, + "loss": 0.2634, + "step": 5487 + }, + { + "epoch": 15.076923076923077, + "grad_norm": 13.881560325622559, + "learning_rate": 4.2461538461538465e-05, + "loss": 0.3856, + "step": 5488 + }, + { + "epoch": 15.07967032967033, + "grad_norm": 14.427999496459961, + "learning_rate": 4.246016483516484e-05, + "loss": 0.4251, + "step": 5489 + }, + { + "epoch": 15.082417582417582, + "grad_norm": 9.05544662475586, + "learning_rate": 4.245879120879121e-05, + "loss": 0.2411, + "step": 5490 + }, + { + "epoch": 15.085164835164836, + "grad_norm": 15.465364456176758, + "learning_rate": 4.245741758241759e-05, + "loss": 0.4774, + "step": 5491 + }, + { + "epoch": 15.087912087912088, + "grad_norm": 15.036941528320312, + "learning_rate": 4.245604395604396e-05, + "loss": 0.4091, + "step": 5492 + }, + { + "epoch": 15.090659340659341, + "grad_norm": 15.389951705932617, + "learning_rate": 4.245467032967033e-05, + "loss": 0.7451, + "step": 5493 + }, + { + "epoch": 15.093406593406593, + "grad_norm": 8.958878517150879, + "learning_rate": 4.2453296703296706e-05, + "loss": 0.4069, + "step": 5494 + }, + { + "epoch": 15.096153846153847, + "grad_norm": 15.740049362182617, + "learning_rate": 4.2451923076923076e-05, + "loss": 0.5512, + "step": 5495 + }, + { + "epoch": 15.098901098901099, + "grad_norm": 12.4744873046875, + "learning_rate": 4.245054945054945e-05, + "loss": 0.3487, + "step": 5496 + }, + { + "epoch": 15.101648351648352, + "grad_norm": 12.597352981567383, + "learning_rate": 4.244917582417582e-05, + "loss": 0.4254, + "step": 5497 + }, + { + "epoch": 15.104395604395604, + "grad_norm": 14.367764472961426, + "learning_rate": 4.24478021978022e-05, + "loss": 0.4646, + "step": 5498 + }, + { + "epoch": 15.107142857142858, + "grad_norm": 8.36013126373291, + "learning_rate": 4.2446428571428576e-05, + "loss": 0.2393, + "step": 5499 + }, + { + "epoch": 15.10989010989011, + "grad_norm": 17.330331802368164, + "learning_rate": 4.2445054945054946e-05, + "loss": 0.6221, + "step": 5500 + }, + { + "epoch": 15.112637362637363, + "grad_norm": 4.828197479248047, + "learning_rate": 4.244368131868132e-05, + "loss": 0.1174, + "step": 5501 + }, + { + "epoch": 15.115384615384615, + "grad_norm": 8.994034767150879, + "learning_rate": 4.244230769230769e-05, + "loss": 0.2708, + "step": 5502 + }, + { + "epoch": 15.118131868131869, + "grad_norm": 12.399177551269531, + "learning_rate": 4.244093406593407e-05, + "loss": 0.5679, + "step": 5503 + }, + { + "epoch": 15.12087912087912, + "grad_norm": 16.157331466674805, + "learning_rate": 4.2439560439560447e-05, + "loss": 0.9146, + "step": 5504 + }, + { + "epoch": 15.123626373626374, + "grad_norm": 6.200267791748047, + "learning_rate": 4.2438186813186817e-05, + "loss": 0.1699, + "step": 5505 + }, + { + "epoch": 15.126373626373626, + "grad_norm": 12.797117233276367, + "learning_rate": 4.243681318681319e-05, + "loss": 0.4368, + "step": 5506 + }, + { + "epoch": 15.12912087912088, + "grad_norm": 16.872802734375, + "learning_rate": 4.243543956043956e-05, + "loss": 0.7715, + "step": 5507 + }, + { + "epoch": 15.131868131868131, + "grad_norm": 11.533100128173828, + "learning_rate": 4.243406593406593e-05, + "loss": 0.3468, + "step": 5508 + }, + { + "epoch": 15.134615384615385, + "grad_norm": 10.801642417907715, + "learning_rate": 4.243269230769231e-05, + "loss": 0.2829, + "step": 5509 + }, + { + "epoch": 15.137362637362637, + "grad_norm": 5.349408149719238, + "learning_rate": 4.243131868131868e-05, + "loss": 0.2024, + "step": 5510 + }, + { + "epoch": 15.14010989010989, + "grad_norm": 13.066055297851562, + "learning_rate": 4.242994505494506e-05, + "loss": 0.4107, + "step": 5511 + }, + { + "epoch": 15.142857142857142, + "grad_norm": 9.808083534240723, + "learning_rate": 4.242857142857143e-05, + "loss": 0.1989, + "step": 5512 + }, + { + "epoch": 15.145604395604396, + "grad_norm": 13.370216369628906, + "learning_rate": 4.2427197802197804e-05, + "loss": 0.4515, + "step": 5513 + }, + { + "epoch": 15.148351648351648, + "grad_norm": 12.879119873046875, + "learning_rate": 4.242582417582418e-05, + "loss": 0.4331, + "step": 5514 + }, + { + "epoch": 15.151098901098901, + "grad_norm": 11.955568313598633, + "learning_rate": 4.242445054945055e-05, + "loss": 0.2764, + "step": 5515 + }, + { + "epoch": 15.153846153846153, + "grad_norm": 13.46051025390625, + "learning_rate": 4.242307692307693e-05, + "loss": 0.673, + "step": 5516 + }, + { + "epoch": 15.156593406593407, + "grad_norm": 12.239970207214355, + "learning_rate": 4.24217032967033e-05, + "loss": 0.3885, + "step": 5517 + }, + { + "epoch": 15.159340659340659, + "grad_norm": 10.924010276794434, + "learning_rate": 4.2420329670329674e-05, + "loss": 0.4296, + "step": 5518 + }, + { + "epoch": 15.162087912087912, + "grad_norm": 14.930682182312012, + "learning_rate": 4.241895604395605e-05, + "loss": 0.6126, + "step": 5519 + }, + { + "epoch": 15.164835164835164, + "grad_norm": 13.282163619995117, + "learning_rate": 4.241758241758242e-05, + "loss": 0.4389, + "step": 5520 + }, + { + "epoch": 15.167582417582418, + "grad_norm": 12.421906471252441, + "learning_rate": 4.24162087912088e-05, + "loss": 0.6277, + "step": 5521 + }, + { + "epoch": 15.17032967032967, + "grad_norm": 14.222182273864746, + "learning_rate": 4.241483516483517e-05, + "loss": 0.3559, + "step": 5522 + }, + { + "epoch": 15.173076923076923, + "grad_norm": 12.393587112426758, + "learning_rate": 4.241346153846154e-05, + "loss": 0.4886, + "step": 5523 + }, + { + "epoch": 15.175824175824175, + "grad_norm": 14.875682830810547, + "learning_rate": 4.2412087912087915e-05, + "loss": 0.5676, + "step": 5524 + }, + { + "epoch": 15.178571428571429, + "grad_norm": 9.868363380432129, + "learning_rate": 4.2410714285714285e-05, + "loss": 0.2829, + "step": 5525 + }, + { + "epoch": 15.18131868131868, + "grad_norm": 8.390774726867676, + "learning_rate": 4.240934065934066e-05, + "loss": 0.2136, + "step": 5526 + }, + { + "epoch": 15.184065934065934, + "grad_norm": 15.834590911865234, + "learning_rate": 4.240796703296703e-05, + "loss": 0.7275, + "step": 5527 + }, + { + "epoch": 15.186813186813186, + "grad_norm": 5.191529273986816, + "learning_rate": 4.240659340659341e-05, + "loss": 0.1414, + "step": 5528 + }, + { + "epoch": 15.18956043956044, + "grad_norm": 16.490718841552734, + "learning_rate": 4.2405219780219785e-05, + "loss": 0.4113, + "step": 5529 + }, + { + "epoch": 15.192307692307692, + "grad_norm": 15.126343727111816, + "learning_rate": 4.2403846153846155e-05, + "loss": 0.5115, + "step": 5530 + }, + { + "epoch": 15.195054945054945, + "grad_norm": 14.764904022216797, + "learning_rate": 4.240247252747253e-05, + "loss": 0.5405, + "step": 5531 + }, + { + "epoch": 15.197802197802197, + "grad_norm": 5.886446952819824, + "learning_rate": 4.24010989010989e-05, + "loss": 0.1515, + "step": 5532 + }, + { + "epoch": 15.20054945054945, + "grad_norm": 13.107589721679688, + "learning_rate": 4.239972527472528e-05, + "loss": 0.459, + "step": 5533 + }, + { + "epoch": 15.203296703296703, + "grad_norm": 6.260650157928467, + "learning_rate": 4.2398351648351655e-05, + "loss": 0.1518, + "step": 5534 + }, + { + "epoch": 15.206043956043956, + "grad_norm": 7.215967178344727, + "learning_rate": 4.2396978021978026e-05, + "loss": 0.2639, + "step": 5535 + }, + { + "epoch": 15.208791208791208, + "grad_norm": 12.617860794067383, + "learning_rate": 4.23956043956044e-05, + "loss": 0.3575, + "step": 5536 + }, + { + "epoch": 15.211538461538462, + "grad_norm": 19.797504425048828, + "learning_rate": 4.239423076923077e-05, + "loss": 1.0316, + "step": 5537 + }, + { + "epoch": 15.214285714285714, + "grad_norm": 6.598066329956055, + "learning_rate": 4.239285714285714e-05, + "loss": 0.1894, + "step": 5538 + }, + { + "epoch": 15.217032967032967, + "grad_norm": 12.912496566772461, + "learning_rate": 4.239148351648352e-05, + "loss": 0.4465, + "step": 5539 + }, + { + "epoch": 15.219780219780219, + "grad_norm": 13.34255313873291, + "learning_rate": 4.239010989010989e-05, + "loss": 0.4185, + "step": 5540 + }, + { + "epoch": 15.222527472527473, + "grad_norm": 17.147064208984375, + "learning_rate": 4.2388736263736266e-05, + "loss": 0.6209, + "step": 5541 + }, + { + "epoch": 15.225274725274724, + "grad_norm": 11.52444076538086, + "learning_rate": 4.2387362637362636e-05, + "loss": 0.5357, + "step": 5542 + }, + { + "epoch": 15.228021978021978, + "grad_norm": 15.193800926208496, + "learning_rate": 4.238598901098901e-05, + "loss": 0.4407, + "step": 5543 + }, + { + "epoch": 15.23076923076923, + "grad_norm": 13.722149848937988, + "learning_rate": 4.238461538461539e-05, + "loss": 0.2996, + "step": 5544 + }, + { + "epoch": 15.233516483516484, + "grad_norm": 10.903084754943848, + "learning_rate": 4.238324175824176e-05, + "loss": 0.3446, + "step": 5545 + }, + { + "epoch": 15.236263736263735, + "grad_norm": 11.551902770996094, + "learning_rate": 4.2381868131868136e-05, + "loss": 0.4071, + "step": 5546 + }, + { + "epoch": 15.239010989010989, + "grad_norm": 14.508563041687012, + "learning_rate": 4.2380494505494506e-05, + "loss": 0.5171, + "step": 5547 + }, + { + "epoch": 15.241758241758241, + "grad_norm": 12.410033226013184, + "learning_rate": 4.237912087912088e-05, + "loss": 0.272, + "step": 5548 + }, + { + "epoch": 15.244505494505495, + "grad_norm": 9.999593734741211, + "learning_rate": 4.237774725274726e-05, + "loss": 0.2255, + "step": 5549 + }, + { + "epoch": 15.247252747252748, + "grad_norm": 9.293580055236816, + "learning_rate": 4.237637362637363e-05, + "loss": 0.3458, + "step": 5550 + }, + { + "epoch": 15.25, + "grad_norm": 14.622387886047363, + "learning_rate": 4.237500000000001e-05, + "loss": 0.4899, + "step": 5551 + }, + { + "epoch": 15.252747252747252, + "grad_norm": 15.479636192321777, + "learning_rate": 4.237362637362638e-05, + "loss": 0.9378, + "step": 5552 + }, + { + "epoch": 15.255494505494505, + "grad_norm": 12.515435218811035, + "learning_rate": 4.237225274725275e-05, + "loss": 0.5113, + "step": 5553 + }, + { + "epoch": 15.258241758241759, + "grad_norm": 14.525099754333496, + "learning_rate": 4.2370879120879124e-05, + "loss": 0.3506, + "step": 5554 + }, + { + "epoch": 15.260989010989011, + "grad_norm": 16.466983795166016, + "learning_rate": 4.2369505494505494e-05, + "loss": 0.7692, + "step": 5555 + }, + { + "epoch": 15.263736263736265, + "grad_norm": 12.618517875671387, + "learning_rate": 4.236813186813187e-05, + "loss": 0.3323, + "step": 5556 + }, + { + "epoch": 15.266483516483516, + "grad_norm": 5.853906631469727, + "learning_rate": 4.236675824175824e-05, + "loss": 0.1723, + "step": 5557 + }, + { + "epoch": 15.26923076923077, + "grad_norm": 11.757822036743164, + "learning_rate": 4.236538461538462e-05, + "loss": 0.3133, + "step": 5558 + }, + { + "epoch": 15.271978021978022, + "grad_norm": 17.68351936340332, + "learning_rate": 4.2364010989010994e-05, + "loss": 0.7038, + "step": 5559 + }, + { + "epoch": 15.274725274725276, + "grad_norm": 12.696239471435547, + "learning_rate": 4.2362637362637364e-05, + "loss": 0.4127, + "step": 5560 + }, + { + "epoch": 15.277472527472527, + "grad_norm": 14.430155754089355, + "learning_rate": 4.236126373626374e-05, + "loss": 0.6257, + "step": 5561 + }, + { + "epoch": 15.280219780219781, + "grad_norm": 18.466327667236328, + "learning_rate": 4.235989010989011e-05, + "loss": 0.614, + "step": 5562 + }, + { + "epoch": 15.282967032967033, + "grad_norm": 9.449464797973633, + "learning_rate": 4.235851648351649e-05, + "loss": 0.3372, + "step": 5563 + }, + { + "epoch": 15.285714285714286, + "grad_norm": 16.18244171142578, + "learning_rate": 4.2357142857142864e-05, + "loss": 0.5905, + "step": 5564 + }, + { + "epoch": 15.288461538461538, + "grad_norm": 10.214183807373047, + "learning_rate": 4.2355769230769234e-05, + "loss": 0.3093, + "step": 5565 + }, + { + "epoch": 15.291208791208792, + "grad_norm": 11.226512908935547, + "learning_rate": 4.235439560439561e-05, + "loss": 0.3974, + "step": 5566 + }, + { + "epoch": 15.293956043956044, + "grad_norm": 10.867313385009766, + "learning_rate": 4.235302197802198e-05, + "loss": 0.3368, + "step": 5567 + }, + { + "epoch": 15.296703296703297, + "grad_norm": 3.3748557567596436, + "learning_rate": 4.235164835164835e-05, + "loss": 0.0935, + "step": 5568 + }, + { + "epoch": 15.29945054945055, + "grad_norm": 19.553077697753906, + "learning_rate": 4.235027472527473e-05, + "loss": 0.7262, + "step": 5569 + }, + { + "epoch": 15.302197802197803, + "grad_norm": 11.458033561706543, + "learning_rate": 4.23489010989011e-05, + "loss": 0.2679, + "step": 5570 + }, + { + "epoch": 15.304945054945055, + "grad_norm": 12.30308723449707, + "learning_rate": 4.2347527472527475e-05, + "loss": 0.3007, + "step": 5571 + }, + { + "epoch": 15.307692307692308, + "grad_norm": 16.009084701538086, + "learning_rate": 4.2346153846153845e-05, + "loss": 0.5422, + "step": 5572 + }, + { + "epoch": 15.31043956043956, + "grad_norm": 9.976790428161621, + "learning_rate": 4.234478021978022e-05, + "loss": 0.2331, + "step": 5573 + }, + { + "epoch": 15.313186813186814, + "grad_norm": 16.12584686279297, + "learning_rate": 4.234340659340659e-05, + "loss": 0.5781, + "step": 5574 + }, + { + "epoch": 15.315934065934066, + "grad_norm": 12.487449645996094, + "learning_rate": 4.234203296703297e-05, + "loss": 0.322, + "step": 5575 + }, + { + "epoch": 15.31868131868132, + "grad_norm": 11.978874206542969, + "learning_rate": 4.2340659340659345e-05, + "loss": 0.5161, + "step": 5576 + }, + { + "epoch": 15.321428571428571, + "grad_norm": 13.58665657043457, + "learning_rate": 4.2339285714285715e-05, + "loss": 0.415, + "step": 5577 + }, + { + "epoch": 15.324175824175825, + "grad_norm": 9.834334373474121, + "learning_rate": 4.233791208791209e-05, + "loss": 0.3108, + "step": 5578 + }, + { + "epoch": 15.326923076923077, + "grad_norm": 13.544315338134766, + "learning_rate": 4.233653846153846e-05, + "loss": 0.3662, + "step": 5579 + }, + { + "epoch": 15.32967032967033, + "grad_norm": 12.066825866699219, + "learning_rate": 4.233516483516484e-05, + "loss": 0.3875, + "step": 5580 + }, + { + "epoch": 15.332417582417582, + "grad_norm": 9.739148139953613, + "learning_rate": 4.2333791208791216e-05, + "loss": 0.1847, + "step": 5581 + }, + { + "epoch": 15.335164835164836, + "grad_norm": 10.418888092041016, + "learning_rate": 4.2332417582417586e-05, + "loss": 0.3302, + "step": 5582 + }, + { + "epoch": 15.337912087912088, + "grad_norm": 10.018643379211426, + "learning_rate": 4.2331043956043956e-05, + "loss": 0.4338, + "step": 5583 + }, + { + "epoch": 15.340659340659341, + "grad_norm": 17.431127548217773, + "learning_rate": 4.2329670329670326e-05, + "loss": 0.7036, + "step": 5584 + }, + { + "epoch": 15.343406593406593, + "grad_norm": 7.566971302032471, + "learning_rate": 4.23282967032967e-05, + "loss": 0.3116, + "step": 5585 + }, + { + "epoch": 15.346153846153847, + "grad_norm": 11.037267684936523, + "learning_rate": 4.232692307692308e-05, + "loss": 0.3445, + "step": 5586 + }, + { + "epoch": 15.348901098901099, + "grad_norm": 14.54234790802002, + "learning_rate": 4.232554945054945e-05, + "loss": 0.4741, + "step": 5587 + }, + { + "epoch": 15.351648351648352, + "grad_norm": 10.892547607421875, + "learning_rate": 4.2324175824175826e-05, + "loss": 0.3263, + "step": 5588 + }, + { + "epoch": 15.354395604395604, + "grad_norm": 8.641361236572266, + "learning_rate": 4.2322802197802196e-05, + "loss": 0.3481, + "step": 5589 + }, + { + "epoch": 15.357142857142858, + "grad_norm": 11.809785842895508, + "learning_rate": 4.232142857142857e-05, + "loss": 0.4576, + "step": 5590 + }, + { + "epoch": 15.35989010989011, + "grad_norm": 15.418500900268555, + "learning_rate": 4.232005494505495e-05, + "loss": 0.6566, + "step": 5591 + }, + { + "epoch": 15.362637362637363, + "grad_norm": 13.765449523925781, + "learning_rate": 4.231868131868132e-05, + "loss": 0.5657, + "step": 5592 + }, + { + "epoch": 15.365384615384615, + "grad_norm": 5.272315502166748, + "learning_rate": 4.23173076923077e-05, + "loss": 0.1408, + "step": 5593 + }, + { + "epoch": 15.368131868131869, + "grad_norm": 15.055747985839844, + "learning_rate": 4.231593406593407e-05, + "loss": 0.5911, + "step": 5594 + }, + { + "epoch": 15.37087912087912, + "grad_norm": 16.88910675048828, + "learning_rate": 4.2314560439560443e-05, + "loss": 0.6055, + "step": 5595 + }, + { + "epoch": 15.373626373626374, + "grad_norm": 18.939348220825195, + "learning_rate": 4.231318681318682e-05, + "loss": 0.4267, + "step": 5596 + }, + { + "epoch": 15.376373626373626, + "grad_norm": 9.986907005310059, + "learning_rate": 4.231181318681319e-05, + "loss": 0.2539, + "step": 5597 + }, + { + "epoch": 15.37912087912088, + "grad_norm": 11.801994323730469, + "learning_rate": 4.231043956043956e-05, + "loss": 0.4004, + "step": 5598 + }, + { + "epoch": 15.381868131868131, + "grad_norm": 14.89265251159668, + "learning_rate": 4.230906593406593e-05, + "loss": 0.3639, + "step": 5599 + }, + { + "epoch": 15.384615384615385, + "grad_norm": 13.95502758026123, + "learning_rate": 4.230769230769231e-05, + "loss": 0.6019, + "step": 5600 + }, + { + "epoch": 15.387362637362637, + "grad_norm": 15.3554048538208, + "learning_rate": 4.2306318681318684e-05, + "loss": 0.8392, + "step": 5601 + }, + { + "epoch": 15.39010989010989, + "grad_norm": 7.7914323806762695, + "learning_rate": 4.2304945054945054e-05, + "loss": 0.1773, + "step": 5602 + }, + { + "epoch": 15.392857142857142, + "grad_norm": 9.146482467651367, + "learning_rate": 4.230357142857143e-05, + "loss": 0.2631, + "step": 5603 + }, + { + "epoch": 15.395604395604396, + "grad_norm": 16.76626205444336, + "learning_rate": 4.23021978021978e-05, + "loss": 0.6444, + "step": 5604 + }, + { + "epoch": 15.398351648351648, + "grad_norm": 15.692272186279297, + "learning_rate": 4.230082417582418e-05, + "loss": 0.775, + "step": 5605 + }, + { + "epoch": 15.401098901098901, + "grad_norm": 15.82702922821045, + "learning_rate": 4.2299450549450554e-05, + "loss": 0.5957, + "step": 5606 + }, + { + "epoch": 15.403846153846153, + "grad_norm": 14.486993789672852, + "learning_rate": 4.2298076923076924e-05, + "loss": 0.8769, + "step": 5607 + }, + { + "epoch": 15.406593406593407, + "grad_norm": 15.540741920471191, + "learning_rate": 4.22967032967033e-05, + "loss": 0.495, + "step": 5608 + }, + { + "epoch": 15.409340659340659, + "grad_norm": 19.589832305908203, + "learning_rate": 4.229532967032967e-05, + "loss": 0.8541, + "step": 5609 + }, + { + "epoch": 15.412087912087912, + "grad_norm": 10.06828498840332, + "learning_rate": 4.229395604395605e-05, + "loss": 0.3602, + "step": 5610 + }, + { + "epoch": 15.414835164835164, + "grad_norm": 14.706233978271484, + "learning_rate": 4.2292582417582425e-05, + "loss": 0.392, + "step": 5611 + }, + { + "epoch": 15.417582417582418, + "grad_norm": 17.22075843811035, + "learning_rate": 4.2291208791208795e-05, + "loss": 0.4981, + "step": 5612 + }, + { + "epoch": 15.42032967032967, + "grad_norm": 7.563056468963623, + "learning_rate": 4.2289835164835165e-05, + "loss": 0.1807, + "step": 5613 + }, + { + "epoch": 15.423076923076923, + "grad_norm": 16.718557357788086, + "learning_rate": 4.2288461538461535e-05, + "loss": 0.4771, + "step": 5614 + }, + { + "epoch": 15.425824175824175, + "grad_norm": 12.462922096252441, + "learning_rate": 4.228708791208791e-05, + "loss": 0.3702, + "step": 5615 + }, + { + "epoch": 15.428571428571429, + "grad_norm": 15.733048439025879, + "learning_rate": 4.228571428571429e-05, + "loss": 0.398, + "step": 5616 + }, + { + "epoch": 15.43131868131868, + "grad_norm": 8.074758529663086, + "learning_rate": 4.228434065934066e-05, + "loss": 0.367, + "step": 5617 + }, + { + "epoch": 15.434065934065934, + "grad_norm": 11.022522926330566, + "learning_rate": 4.2282967032967035e-05, + "loss": 0.3536, + "step": 5618 + }, + { + "epoch": 15.436813186813186, + "grad_norm": 18.73307991027832, + "learning_rate": 4.2281593406593405e-05, + "loss": 0.5898, + "step": 5619 + }, + { + "epoch": 15.43956043956044, + "grad_norm": 9.449438095092773, + "learning_rate": 4.228021978021978e-05, + "loss": 0.2629, + "step": 5620 + }, + { + "epoch": 15.442307692307692, + "grad_norm": 21.051218032836914, + "learning_rate": 4.227884615384616e-05, + "loss": 0.8472, + "step": 5621 + }, + { + "epoch": 15.445054945054945, + "grad_norm": 11.07641887664795, + "learning_rate": 4.227747252747253e-05, + "loss": 0.2467, + "step": 5622 + }, + { + "epoch": 15.447802197802197, + "grad_norm": 11.473258972167969, + "learning_rate": 4.2276098901098906e-05, + "loss": 0.4132, + "step": 5623 + }, + { + "epoch": 15.45054945054945, + "grad_norm": 13.442501068115234, + "learning_rate": 4.2274725274725276e-05, + "loss": 0.3375, + "step": 5624 + }, + { + "epoch": 15.453296703296703, + "grad_norm": 10.092564582824707, + "learning_rate": 4.227335164835165e-05, + "loss": 0.2617, + "step": 5625 + }, + { + "epoch": 15.456043956043956, + "grad_norm": 12.713462829589844, + "learning_rate": 4.227197802197803e-05, + "loss": 0.5513, + "step": 5626 + }, + { + "epoch": 15.458791208791208, + "grad_norm": 16.656593322753906, + "learning_rate": 4.22706043956044e-05, + "loss": 0.6292, + "step": 5627 + }, + { + "epoch": 15.461538461538462, + "grad_norm": 12.712469100952148, + "learning_rate": 4.226923076923077e-05, + "loss": 0.4792, + "step": 5628 + }, + { + "epoch": 15.464285714285714, + "grad_norm": 15.060297966003418, + "learning_rate": 4.226785714285714e-05, + "loss": 0.5731, + "step": 5629 + }, + { + "epoch": 15.467032967032967, + "grad_norm": 13.815094947814941, + "learning_rate": 4.2266483516483516e-05, + "loss": 0.331, + "step": 5630 + }, + { + "epoch": 15.469780219780219, + "grad_norm": 16.59674644470215, + "learning_rate": 4.226510989010989e-05, + "loss": 0.4949, + "step": 5631 + }, + { + "epoch": 15.472527472527473, + "grad_norm": 8.691229820251465, + "learning_rate": 4.226373626373626e-05, + "loss": 0.363, + "step": 5632 + }, + { + "epoch": 15.475274725274724, + "grad_norm": 10.455649375915527, + "learning_rate": 4.226236263736264e-05, + "loss": 0.3859, + "step": 5633 + }, + { + "epoch": 15.478021978021978, + "grad_norm": 18.39076042175293, + "learning_rate": 4.226098901098901e-05, + "loss": 0.4078, + "step": 5634 + }, + { + "epoch": 15.48076923076923, + "grad_norm": 8.366668701171875, + "learning_rate": 4.2259615384615387e-05, + "loss": 0.2451, + "step": 5635 + }, + { + "epoch": 15.483516483516484, + "grad_norm": 10.7598237991333, + "learning_rate": 4.225824175824176e-05, + "loss": 0.4563, + "step": 5636 + }, + { + "epoch": 15.486263736263735, + "grad_norm": 12.670727729797363, + "learning_rate": 4.225686813186813e-05, + "loss": 0.5847, + "step": 5637 + }, + { + "epoch": 15.489010989010989, + "grad_norm": 12.60062313079834, + "learning_rate": 4.225549450549451e-05, + "loss": 0.3338, + "step": 5638 + }, + { + "epoch": 15.491758241758241, + "grad_norm": 16.1190128326416, + "learning_rate": 4.225412087912088e-05, + "loss": 0.6224, + "step": 5639 + }, + { + "epoch": 15.494505494505495, + "grad_norm": 12.702201843261719, + "learning_rate": 4.225274725274726e-05, + "loss": 0.3356, + "step": 5640 + }, + { + "epoch": 15.497252747252748, + "grad_norm": 12.974874496459961, + "learning_rate": 4.2251373626373634e-05, + "loss": 0.4826, + "step": 5641 + }, + { + "epoch": 15.5, + "grad_norm": 9.722800254821777, + "learning_rate": 4.2250000000000004e-05, + "loss": 0.3012, + "step": 5642 + }, + { + "epoch": 15.502747252747252, + "grad_norm": 15.807145118713379, + "learning_rate": 4.2248626373626374e-05, + "loss": 0.5503, + "step": 5643 + }, + { + "epoch": 15.505494505494505, + "grad_norm": 22.802953720092773, + "learning_rate": 4.2247252747252744e-05, + "loss": 0.7389, + "step": 5644 + }, + { + "epoch": 15.508241758241759, + "grad_norm": 20.45420265197754, + "learning_rate": 4.224587912087912e-05, + "loss": 1.0012, + "step": 5645 + }, + { + "epoch": 15.510989010989011, + "grad_norm": 13.551674842834473, + "learning_rate": 4.22445054945055e-05, + "loss": 0.3418, + "step": 5646 + }, + { + "epoch": 15.513736263736263, + "grad_norm": 11.741430282592773, + "learning_rate": 4.224313186813187e-05, + "loss": 0.3154, + "step": 5647 + }, + { + "epoch": 15.516483516483516, + "grad_norm": 15.18171215057373, + "learning_rate": 4.2241758241758244e-05, + "loss": 0.4145, + "step": 5648 + }, + { + "epoch": 15.51923076923077, + "grad_norm": 15.373465538024902, + "learning_rate": 4.2240384615384614e-05, + "loss": 0.5516, + "step": 5649 + }, + { + "epoch": 15.521978021978022, + "grad_norm": 13.040771484375, + "learning_rate": 4.223901098901099e-05, + "loss": 0.632, + "step": 5650 + }, + { + "epoch": 15.524725274725276, + "grad_norm": 15.52379322052002, + "learning_rate": 4.223763736263737e-05, + "loss": 0.4079, + "step": 5651 + }, + { + "epoch": 15.527472527472527, + "grad_norm": 9.455058097839355, + "learning_rate": 4.223626373626374e-05, + "loss": 0.3495, + "step": 5652 + }, + { + "epoch": 15.530219780219781, + "grad_norm": 15.1013822555542, + "learning_rate": 4.2234890109890115e-05, + "loss": 0.5731, + "step": 5653 + }, + { + "epoch": 15.532967032967033, + "grad_norm": 12.85676097869873, + "learning_rate": 4.2233516483516485e-05, + "loss": 0.4127, + "step": 5654 + }, + { + "epoch": 15.535714285714286, + "grad_norm": 7.672653675079346, + "learning_rate": 4.223214285714286e-05, + "loss": 0.2874, + "step": 5655 + }, + { + "epoch": 15.538461538461538, + "grad_norm": 9.791374206542969, + "learning_rate": 4.223076923076924e-05, + "loss": 0.2812, + "step": 5656 + }, + { + "epoch": 15.541208791208792, + "grad_norm": 18.955469131469727, + "learning_rate": 4.222939560439561e-05, + "loss": 0.5719, + "step": 5657 + }, + { + "epoch": 15.543956043956044, + "grad_norm": 10.153504371643066, + "learning_rate": 4.222802197802198e-05, + "loss": 0.2565, + "step": 5658 + }, + { + "epoch": 15.546703296703297, + "grad_norm": 12.777569770812988, + "learning_rate": 4.222664835164835e-05, + "loss": 0.4495, + "step": 5659 + }, + { + "epoch": 15.54945054945055, + "grad_norm": 9.726179122924805, + "learning_rate": 4.2225274725274725e-05, + "loss": 0.3461, + "step": 5660 + }, + { + "epoch": 15.552197802197803, + "grad_norm": 10.565035820007324, + "learning_rate": 4.22239010989011e-05, + "loss": 0.2365, + "step": 5661 + }, + { + "epoch": 15.554945054945055, + "grad_norm": 10.566859245300293, + "learning_rate": 4.222252747252747e-05, + "loss": 0.3683, + "step": 5662 + }, + { + "epoch": 15.557692307692308, + "grad_norm": 6.939207553863525, + "learning_rate": 4.222115384615385e-05, + "loss": 0.2228, + "step": 5663 + }, + { + "epoch": 15.56043956043956, + "grad_norm": 16.515729904174805, + "learning_rate": 4.221978021978022e-05, + "loss": 0.4238, + "step": 5664 + }, + { + "epoch": 15.563186813186814, + "grad_norm": 17.1671085357666, + "learning_rate": 4.2218406593406595e-05, + "loss": 0.9272, + "step": 5665 + }, + { + "epoch": 15.565934065934066, + "grad_norm": 13.933879852294922, + "learning_rate": 4.221703296703297e-05, + "loss": 0.5395, + "step": 5666 + }, + { + "epoch": 15.56868131868132, + "grad_norm": 13.750540733337402, + "learning_rate": 4.221565934065934e-05, + "loss": 0.4546, + "step": 5667 + }, + { + "epoch": 15.571428571428571, + "grad_norm": 15.123147964477539, + "learning_rate": 4.221428571428572e-05, + "loss": 0.4668, + "step": 5668 + }, + { + "epoch": 15.574175824175825, + "grad_norm": 11.374589920043945, + "learning_rate": 4.221291208791209e-05, + "loss": 0.551, + "step": 5669 + }, + { + "epoch": 15.576923076923077, + "grad_norm": 10.208890914916992, + "learning_rate": 4.2211538461538466e-05, + "loss": 0.2179, + "step": 5670 + }, + { + "epoch": 15.57967032967033, + "grad_norm": 13.139711380004883, + "learning_rate": 4.221016483516484e-05, + "loss": 0.3675, + "step": 5671 + }, + { + "epoch": 15.582417582417582, + "grad_norm": 9.842178344726562, + "learning_rate": 4.220879120879121e-05, + "loss": 0.317, + "step": 5672 + }, + { + "epoch": 15.585164835164836, + "grad_norm": 19.06231117248535, + "learning_rate": 4.220741758241758e-05, + "loss": 0.8124, + "step": 5673 + }, + { + "epoch": 15.587912087912088, + "grad_norm": 16.904373168945312, + "learning_rate": 4.220604395604395e-05, + "loss": 0.5835, + "step": 5674 + }, + { + "epoch": 15.590659340659341, + "grad_norm": 14.229371070861816, + "learning_rate": 4.220467032967033e-05, + "loss": 0.5605, + "step": 5675 + }, + { + "epoch": 15.593406593406593, + "grad_norm": 12.438663482666016, + "learning_rate": 4.2203296703296706e-05, + "loss": 0.4778, + "step": 5676 + }, + { + "epoch": 15.596153846153847, + "grad_norm": 12.637934684753418, + "learning_rate": 4.2201923076923076e-05, + "loss": 0.362, + "step": 5677 + }, + { + "epoch": 15.598901098901099, + "grad_norm": 13.889505386352539, + "learning_rate": 4.220054945054945e-05, + "loss": 0.3986, + "step": 5678 + }, + { + "epoch": 15.601648351648352, + "grad_norm": 13.211251258850098, + "learning_rate": 4.219917582417582e-05, + "loss": 0.2951, + "step": 5679 + }, + { + "epoch": 15.604395604395604, + "grad_norm": 7.458400726318359, + "learning_rate": 4.21978021978022e-05, + "loss": 0.1947, + "step": 5680 + }, + { + "epoch": 15.607142857142858, + "grad_norm": 8.254290580749512, + "learning_rate": 4.219642857142858e-05, + "loss": 0.2275, + "step": 5681 + }, + { + "epoch": 15.60989010989011, + "grad_norm": 11.632840156555176, + "learning_rate": 4.219505494505495e-05, + "loss": 0.4898, + "step": 5682 + }, + { + "epoch": 15.612637362637363, + "grad_norm": 16.735715866088867, + "learning_rate": 4.2193681318681324e-05, + "loss": 0.6861, + "step": 5683 + }, + { + "epoch": 15.615384615384615, + "grad_norm": 10.21928596496582, + "learning_rate": 4.2192307692307694e-05, + "loss": 0.3752, + "step": 5684 + }, + { + "epoch": 15.618131868131869, + "grad_norm": 12.585310935974121, + "learning_rate": 4.219093406593407e-05, + "loss": 0.4722, + "step": 5685 + }, + { + "epoch": 15.62087912087912, + "grad_norm": 5.97208309173584, + "learning_rate": 4.218956043956044e-05, + "loss": 0.1926, + "step": 5686 + }, + { + "epoch": 15.623626373626374, + "grad_norm": 10.753588676452637, + "learning_rate": 4.218818681318682e-05, + "loss": 0.3702, + "step": 5687 + }, + { + "epoch": 15.626373626373626, + "grad_norm": 15.421585083007812, + "learning_rate": 4.218681318681319e-05, + "loss": 0.6286, + "step": 5688 + }, + { + "epoch": 15.62912087912088, + "grad_norm": 11.25973129272461, + "learning_rate": 4.218543956043956e-05, + "loss": 0.4488, + "step": 5689 + }, + { + "epoch": 15.631868131868131, + "grad_norm": 19.99650001525879, + "learning_rate": 4.2184065934065934e-05, + "loss": 0.9195, + "step": 5690 + }, + { + "epoch": 15.634615384615385, + "grad_norm": 12.932104110717773, + "learning_rate": 4.218269230769231e-05, + "loss": 0.5456, + "step": 5691 + }, + { + "epoch": 15.637362637362637, + "grad_norm": 13.580655097961426, + "learning_rate": 4.218131868131868e-05, + "loss": 0.5779, + "step": 5692 + }, + { + "epoch": 15.64010989010989, + "grad_norm": 15.480597496032715, + "learning_rate": 4.217994505494506e-05, + "loss": 0.3517, + "step": 5693 + }, + { + "epoch": 15.642857142857142, + "grad_norm": 15.630012512207031, + "learning_rate": 4.217857142857143e-05, + "loss": 0.533, + "step": 5694 + }, + { + "epoch": 15.645604395604396, + "grad_norm": 4.323631763458252, + "learning_rate": 4.2177197802197804e-05, + "loss": 0.1202, + "step": 5695 + }, + { + "epoch": 15.648351648351648, + "grad_norm": 4.849519729614258, + "learning_rate": 4.217582417582418e-05, + "loss": 0.1113, + "step": 5696 + }, + { + "epoch": 15.651098901098901, + "grad_norm": 20.45153045654297, + "learning_rate": 4.217445054945055e-05, + "loss": 0.6177, + "step": 5697 + }, + { + "epoch": 15.653846153846153, + "grad_norm": 13.538086891174316, + "learning_rate": 4.217307692307693e-05, + "loss": 0.5222, + "step": 5698 + }, + { + "epoch": 15.656593406593407, + "grad_norm": 13.940134048461914, + "learning_rate": 4.21717032967033e-05, + "loss": 0.3218, + "step": 5699 + }, + { + "epoch": 15.659340659340659, + "grad_norm": 13.27150821685791, + "learning_rate": 4.2170329670329675e-05, + "loss": 0.4421, + "step": 5700 + }, + { + "epoch": 15.662087912087912, + "grad_norm": 8.834127426147461, + "learning_rate": 4.2168956043956045e-05, + "loss": 0.1621, + "step": 5701 + }, + { + "epoch": 15.664835164835164, + "grad_norm": 12.429832458496094, + "learning_rate": 4.216758241758242e-05, + "loss": 0.3562, + "step": 5702 + }, + { + "epoch": 15.667582417582418, + "grad_norm": 19.551851272583008, + "learning_rate": 4.216620879120879e-05, + "loss": 0.8737, + "step": 5703 + }, + { + "epoch": 15.67032967032967, + "grad_norm": 15.807050704956055, + "learning_rate": 4.216483516483516e-05, + "loss": 0.6414, + "step": 5704 + }, + { + "epoch": 15.673076923076923, + "grad_norm": 20.053802490234375, + "learning_rate": 4.216346153846154e-05, + "loss": 0.8895, + "step": 5705 + }, + { + "epoch": 15.675824175824175, + "grad_norm": 16.128826141357422, + "learning_rate": 4.2162087912087915e-05, + "loss": 0.581, + "step": 5706 + }, + { + "epoch": 15.678571428571429, + "grad_norm": 12.79980754852295, + "learning_rate": 4.2160714285714285e-05, + "loss": 0.5081, + "step": 5707 + }, + { + "epoch": 15.68131868131868, + "grad_norm": 19.866125106811523, + "learning_rate": 4.215934065934066e-05, + "loss": 0.6011, + "step": 5708 + }, + { + "epoch": 15.684065934065934, + "grad_norm": 9.027164459228516, + "learning_rate": 4.215796703296703e-05, + "loss": 0.2162, + "step": 5709 + }, + { + "epoch": 15.686813186813186, + "grad_norm": 14.739897727966309, + "learning_rate": 4.215659340659341e-05, + "loss": 0.712, + "step": 5710 + }, + { + "epoch": 15.68956043956044, + "grad_norm": 16.798248291015625, + "learning_rate": 4.2155219780219786e-05, + "loss": 0.5117, + "step": 5711 + }, + { + "epoch": 15.692307692307692, + "grad_norm": 10.079811096191406, + "learning_rate": 4.2153846153846156e-05, + "loss": 0.3095, + "step": 5712 + }, + { + "epoch": 15.695054945054945, + "grad_norm": 12.652459144592285, + "learning_rate": 4.215247252747253e-05, + "loss": 0.2312, + "step": 5713 + }, + { + "epoch": 15.697802197802197, + "grad_norm": 14.494023323059082, + "learning_rate": 4.21510989010989e-05, + "loss": 0.3941, + "step": 5714 + }, + { + "epoch": 15.70054945054945, + "grad_norm": 10.380335807800293, + "learning_rate": 4.214972527472528e-05, + "loss": 0.2952, + "step": 5715 + }, + { + "epoch": 15.703296703296703, + "grad_norm": 8.256494522094727, + "learning_rate": 4.214835164835165e-05, + "loss": 0.2553, + "step": 5716 + }, + { + "epoch": 15.706043956043956, + "grad_norm": 14.993309020996094, + "learning_rate": 4.2146978021978026e-05, + "loss": 0.7177, + "step": 5717 + }, + { + "epoch": 15.708791208791208, + "grad_norm": 13.664067268371582, + "learning_rate": 4.2145604395604396e-05, + "loss": 0.4827, + "step": 5718 + }, + { + "epoch": 15.711538461538462, + "grad_norm": 14.35271167755127, + "learning_rate": 4.2144230769230766e-05, + "loss": 0.546, + "step": 5719 + }, + { + "epoch": 15.714285714285714, + "grad_norm": 13.241947174072266, + "learning_rate": 4.214285714285714e-05, + "loss": 0.5533, + "step": 5720 + }, + { + "epoch": 15.717032967032967, + "grad_norm": 18.238849639892578, + "learning_rate": 4.214148351648352e-05, + "loss": 0.7579, + "step": 5721 + }, + { + "epoch": 15.719780219780219, + "grad_norm": 12.77196216583252, + "learning_rate": 4.214010989010989e-05, + "loss": 0.2839, + "step": 5722 + }, + { + "epoch": 15.722527472527473, + "grad_norm": 16.93671226501465, + "learning_rate": 4.213873626373627e-05, + "loss": 0.5969, + "step": 5723 + }, + { + "epoch": 15.725274725274724, + "grad_norm": 12.058526039123535, + "learning_rate": 4.213736263736264e-05, + "loss": 0.5887, + "step": 5724 + }, + { + "epoch": 15.728021978021978, + "grad_norm": 21.410724639892578, + "learning_rate": 4.2135989010989013e-05, + "loss": 1.1799, + "step": 5725 + }, + { + "epoch": 15.73076923076923, + "grad_norm": 6.840317726135254, + "learning_rate": 4.213461538461539e-05, + "loss": 0.1471, + "step": 5726 + }, + { + "epoch": 15.733516483516484, + "grad_norm": 9.388249397277832, + "learning_rate": 4.213324175824176e-05, + "loss": 0.2706, + "step": 5727 + }, + { + "epoch": 15.736263736263737, + "grad_norm": 17.65314292907715, + "learning_rate": 4.213186813186814e-05, + "loss": 0.6987, + "step": 5728 + }, + { + "epoch": 15.739010989010989, + "grad_norm": 10.922762870788574, + "learning_rate": 4.213049450549451e-05, + "loss": 0.2102, + "step": 5729 + }, + { + "epoch": 15.741758241758241, + "grad_norm": 12.053487777709961, + "learning_rate": 4.2129120879120884e-05, + "loss": 0.2963, + "step": 5730 + }, + { + "epoch": 15.744505494505495, + "grad_norm": 15.218631744384766, + "learning_rate": 4.2127747252747254e-05, + "loss": 0.5716, + "step": 5731 + }, + { + "epoch": 15.747252747252748, + "grad_norm": 11.94682502746582, + "learning_rate": 4.212637362637363e-05, + "loss": 0.5956, + "step": 5732 + }, + { + "epoch": 15.75, + "grad_norm": 13.827544212341309, + "learning_rate": 4.2125e-05, + "loss": 0.3593, + "step": 5733 + }, + { + "epoch": 15.752747252747252, + "grad_norm": 9.952807426452637, + "learning_rate": 4.212362637362637e-05, + "loss": 0.2436, + "step": 5734 + }, + { + "epoch": 15.755494505494505, + "grad_norm": 10.324817657470703, + "learning_rate": 4.212225274725275e-05, + "loss": 0.3815, + "step": 5735 + }, + { + "epoch": 15.758241758241759, + "grad_norm": 20.379112243652344, + "learning_rate": 4.2120879120879124e-05, + "loss": 0.8092, + "step": 5736 + }, + { + "epoch": 15.760989010989011, + "grad_norm": 6.845232009887695, + "learning_rate": 4.2119505494505494e-05, + "loss": 0.2065, + "step": 5737 + }, + { + "epoch": 15.763736263736263, + "grad_norm": 13.128222465515137, + "learning_rate": 4.211813186813187e-05, + "loss": 0.5571, + "step": 5738 + }, + { + "epoch": 15.766483516483516, + "grad_norm": 20.98304557800293, + "learning_rate": 4.211675824175824e-05, + "loss": 1.2692, + "step": 5739 + }, + { + "epoch": 15.76923076923077, + "grad_norm": 6.048780918121338, + "learning_rate": 4.211538461538462e-05, + "loss": 0.1335, + "step": 5740 + }, + { + "epoch": 15.771978021978022, + "grad_norm": 19.52567481994629, + "learning_rate": 4.2114010989010995e-05, + "loss": 0.8758, + "step": 5741 + }, + { + "epoch": 15.774725274725276, + "grad_norm": 13.701390266418457, + "learning_rate": 4.2112637362637365e-05, + "loss": 0.3656, + "step": 5742 + }, + { + "epoch": 15.777472527472527, + "grad_norm": 10.650725364685059, + "learning_rate": 4.211126373626374e-05, + "loss": 0.3438, + "step": 5743 + }, + { + "epoch": 15.780219780219781, + "grad_norm": 13.4364595413208, + "learning_rate": 4.210989010989011e-05, + "loss": 0.5277, + "step": 5744 + }, + { + "epoch": 15.782967032967033, + "grad_norm": 11.155928611755371, + "learning_rate": 4.210851648351649e-05, + "loss": 0.5295, + "step": 5745 + }, + { + "epoch": 15.785714285714286, + "grad_norm": 5.794440746307373, + "learning_rate": 4.210714285714286e-05, + "loss": 0.1039, + "step": 5746 + }, + { + "epoch": 15.788461538461538, + "grad_norm": 15.641450881958008, + "learning_rate": 4.2105769230769235e-05, + "loss": 0.5273, + "step": 5747 + }, + { + "epoch": 15.791208791208792, + "grad_norm": 14.744270324707031, + "learning_rate": 4.2104395604395605e-05, + "loss": 0.4575, + "step": 5748 + }, + { + "epoch": 15.793956043956044, + "grad_norm": 11.89287281036377, + "learning_rate": 4.2103021978021975e-05, + "loss": 0.4353, + "step": 5749 + }, + { + "epoch": 15.796703296703297, + "grad_norm": 14.082525253295898, + "learning_rate": 4.210164835164835e-05, + "loss": 0.3656, + "step": 5750 + }, + { + "epoch": 15.79945054945055, + "grad_norm": 19.597192764282227, + "learning_rate": 4.210027472527473e-05, + "loss": 0.9019, + "step": 5751 + }, + { + "epoch": 15.802197802197803, + "grad_norm": 11.524432182312012, + "learning_rate": 4.20989010989011e-05, + "loss": 0.38, + "step": 5752 + }, + { + "epoch": 15.804945054945055, + "grad_norm": 10.388084411621094, + "learning_rate": 4.2097527472527476e-05, + "loss": 0.371, + "step": 5753 + }, + { + "epoch": 15.807692307692308, + "grad_norm": 16.039255142211914, + "learning_rate": 4.2096153846153846e-05, + "loss": 0.6378, + "step": 5754 + }, + { + "epoch": 15.81043956043956, + "grad_norm": 16.23798179626465, + "learning_rate": 4.209478021978022e-05, + "loss": 0.7291, + "step": 5755 + }, + { + "epoch": 15.813186813186814, + "grad_norm": 18.64546775817871, + "learning_rate": 4.20934065934066e-05, + "loss": 0.4514, + "step": 5756 + }, + { + "epoch": 15.815934065934066, + "grad_norm": 13.312969207763672, + "learning_rate": 4.209203296703297e-05, + "loss": 0.3007, + "step": 5757 + }, + { + "epoch": 15.81868131868132, + "grad_norm": 7.915477275848389, + "learning_rate": 4.2090659340659346e-05, + "loss": 0.2516, + "step": 5758 + }, + { + "epoch": 15.821428571428571, + "grad_norm": 7.975748062133789, + "learning_rate": 4.2089285714285716e-05, + "loss": 0.2257, + "step": 5759 + }, + { + "epoch": 15.824175824175825, + "grad_norm": 16.69498634338379, + "learning_rate": 4.208791208791209e-05, + "loss": 0.6657, + "step": 5760 + }, + { + "epoch": 15.826923076923077, + "grad_norm": 12.12575912475586, + "learning_rate": 4.208653846153846e-05, + "loss": 0.6083, + "step": 5761 + }, + { + "epoch": 15.82967032967033, + "grad_norm": 19.418092727661133, + "learning_rate": 4.208516483516484e-05, + "loss": 0.6725, + "step": 5762 + }, + { + "epoch": 15.832417582417582, + "grad_norm": 10.618709564208984, + "learning_rate": 4.208379120879121e-05, + "loss": 0.3498, + "step": 5763 + }, + { + "epoch": 15.835164835164836, + "grad_norm": 13.102225303649902, + "learning_rate": 4.208241758241758e-05, + "loss": 0.3944, + "step": 5764 + }, + { + "epoch": 15.837912087912088, + "grad_norm": 8.889737129211426, + "learning_rate": 4.2081043956043957e-05, + "loss": 0.2853, + "step": 5765 + }, + { + "epoch": 15.840659340659341, + "grad_norm": 14.24969482421875, + "learning_rate": 4.207967032967033e-05, + "loss": 0.4597, + "step": 5766 + }, + { + "epoch": 15.843406593406593, + "grad_norm": 18.146739959716797, + "learning_rate": 4.20782967032967e-05, + "loss": 0.8316, + "step": 5767 + }, + { + "epoch": 15.846153846153847, + "grad_norm": 10.23621654510498, + "learning_rate": 4.207692307692308e-05, + "loss": 0.3887, + "step": 5768 + }, + { + "epoch": 15.848901098901099, + "grad_norm": 9.153021812438965, + "learning_rate": 4.207554945054945e-05, + "loss": 0.2555, + "step": 5769 + }, + { + "epoch": 15.851648351648352, + "grad_norm": 9.402667999267578, + "learning_rate": 4.207417582417583e-05, + "loss": 0.2325, + "step": 5770 + }, + { + "epoch": 15.854395604395604, + "grad_norm": 8.759961128234863, + "learning_rate": 4.2072802197802204e-05, + "loss": 0.2972, + "step": 5771 + }, + { + "epoch": 15.857142857142858, + "grad_norm": 11.059054374694824, + "learning_rate": 4.2071428571428574e-05, + "loss": 0.2608, + "step": 5772 + }, + { + "epoch": 15.85989010989011, + "grad_norm": 13.723063468933105, + "learning_rate": 4.207005494505495e-05, + "loss": 0.4478, + "step": 5773 + }, + { + "epoch": 15.862637362637363, + "grad_norm": 12.827160835266113, + "learning_rate": 4.206868131868132e-05, + "loss": 0.4417, + "step": 5774 + }, + { + "epoch": 15.865384615384615, + "grad_norm": 8.152119636535645, + "learning_rate": 4.20673076923077e-05, + "loss": 0.2357, + "step": 5775 + }, + { + "epoch": 15.868131868131869, + "grad_norm": 16.55063819885254, + "learning_rate": 4.206593406593407e-05, + "loss": 0.5855, + "step": 5776 + }, + { + "epoch": 15.87087912087912, + "grad_norm": 12.137316703796387, + "learning_rate": 4.2064560439560444e-05, + "loss": 0.4485, + "step": 5777 + }, + { + "epoch": 15.873626373626374, + "grad_norm": 17.012012481689453, + "learning_rate": 4.2063186813186814e-05, + "loss": 0.7413, + "step": 5778 + }, + { + "epoch": 15.876373626373626, + "grad_norm": 8.237137794494629, + "learning_rate": 4.2061813186813184e-05, + "loss": 0.2839, + "step": 5779 + }, + { + "epoch": 15.87912087912088, + "grad_norm": 10.30688762664795, + "learning_rate": 4.206043956043956e-05, + "loss": 0.2714, + "step": 5780 + }, + { + "epoch": 15.881868131868131, + "grad_norm": 12.819817543029785, + "learning_rate": 4.205906593406594e-05, + "loss": 0.5085, + "step": 5781 + }, + { + "epoch": 15.884615384615385, + "grad_norm": 16.11441993713379, + "learning_rate": 4.205769230769231e-05, + "loss": 0.794, + "step": 5782 + }, + { + "epoch": 15.887362637362637, + "grad_norm": 16.161409378051758, + "learning_rate": 4.2056318681318685e-05, + "loss": 0.4831, + "step": 5783 + }, + { + "epoch": 15.89010989010989, + "grad_norm": 15.400493621826172, + "learning_rate": 4.2054945054945055e-05, + "loss": 0.5331, + "step": 5784 + }, + { + "epoch": 15.892857142857142, + "grad_norm": 18.987186431884766, + "learning_rate": 4.205357142857143e-05, + "loss": 1.0288, + "step": 5785 + }, + { + "epoch": 15.895604395604396, + "grad_norm": 14.598334312438965, + "learning_rate": 4.205219780219781e-05, + "loss": 0.6132, + "step": 5786 + }, + { + "epoch": 15.898351648351648, + "grad_norm": 7.181898593902588, + "learning_rate": 4.205082417582418e-05, + "loss": 0.22, + "step": 5787 + }, + { + "epoch": 15.901098901098901, + "grad_norm": 15.132073402404785, + "learning_rate": 4.2049450549450555e-05, + "loss": 0.6683, + "step": 5788 + }, + { + "epoch": 15.903846153846153, + "grad_norm": 14.935481071472168, + "learning_rate": 4.2048076923076925e-05, + "loss": 0.3611, + "step": 5789 + }, + { + "epoch": 15.906593406593407, + "grad_norm": 20.758037567138672, + "learning_rate": 4.20467032967033e-05, + "loss": 0.5733, + "step": 5790 + }, + { + "epoch": 15.909340659340659, + "grad_norm": 10.245701789855957, + "learning_rate": 4.204532967032967e-05, + "loss": 0.3053, + "step": 5791 + }, + { + "epoch": 15.912087912087912, + "grad_norm": 7.627615928649902, + "learning_rate": 4.204395604395605e-05, + "loss": 0.2047, + "step": 5792 + }, + { + "epoch": 15.914835164835164, + "grad_norm": 10.728974342346191, + "learning_rate": 4.204258241758242e-05, + "loss": 0.4563, + "step": 5793 + }, + { + "epoch": 15.917582417582418, + "grad_norm": 16.37685203552246, + "learning_rate": 4.204120879120879e-05, + "loss": 0.5137, + "step": 5794 + }, + { + "epoch": 15.92032967032967, + "grad_norm": 17.46309471130371, + "learning_rate": 4.2039835164835165e-05, + "loss": 0.841, + "step": 5795 + }, + { + "epoch": 15.923076923076923, + "grad_norm": 12.89869499206543, + "learning_rate": 4.203846153846154e-05, + "loss": 0.4037, + "step": 5796 + }, + { + "epoch": 15.925824175824175, + "grad_norm": 10.982382774353027, + "learning_rate": 4.203708791208791e-05, + "loss": 0.4438, + "step": 5797 + }, + { + "epoch": 15.928571428571429, + "grad_norm": 15.70253849029541, + "learning_rate": 4.203571428571429e-05, + "loss": 0.4864, + "step": 5798 + }, + { + "epoch": 15.93131868131868, + "grad_norm": 7.107475280761719, + "learning_rate": 4.203434065934066e-05, + "loss": 0.2676, + "step": 5799 + }, + { + "epoch": 15.934065934065934, + "grad_norm": 13.282637596130371, + "learning_rate": 4.2032967032967036e-05, + "loss": 0.3135, + "step": 5800 + }, + { + "epoch": 15.936813186813186, + "grad_norm": 12.503633499145508, + "learning_rate": 4.203159340659341e-05, + "loss": 0.4292, + "step": 5801 + }, + { + "epoch": 15.93956043956044, + "grad_norm": 13.045930862426758, + "learning_rate": 4.203021978021978e-05, + "loss": 0.4598, + "step": 5802 + }, + { + "epoch": 15.942307692307692, + "grad_norm": 9.003716468811035, + "learning_rate": 4.202884615384616e-05, + "loss": 0.2778, + "step": 5803 + }, + { + "epoch": 15.945054945054945, + "grad_norm": 16.639135360717773, + "learning_rate": 4.202747252747253e-05, + "loss": 0.6833, + "step": 5804 + }, + { + "epoch": 15.947802197802197, + "grad_norm": 16.269149780273438, + "learning_rate": 4.2026098901098906e-05, + "loss": 0.5501, + "step": 5805 + }, + { + "epoch": 15.95054945054945, + "grad_norm": 25.952856063842773, + "learning_rate": 4.2024725274725276e-05, + "loss": 1.0805, + "step": 5806 + }, + { + "epoch": 15.953296703296703, + "grad_norm": 9.721183776855469, + "learning_rate": 4.202335164835165e-05, + "loss": 0.3528, + "step": 5807 + }, + { + "epoch": 15.956043956043956, + "grad_norm": 8.667040824890137, + "learning_rate": 4.202197802197802e-05, + "loss": 0.2305, + "step": 5808 + }, + { + "epoch": 15.958791208791208, + "grad_norm": 4.729969024658203, + "learning_rate": 4.202060439560439e-05, + "loss": 0.1753, + "step": 5809 + }, + { + "epoch": 15.961538461538462, + "grad_norm": 8.621582984924316, + "learning_rate": 4.201923076923077e-05, + "loss": 0.2077, + "step": 5810 + }, + { + "epoch": 15.964285714285714, + "grad_norm": 15.093189239501953, + "learning_rate": 4.201785714285714e-05, + "loss": 0.372, + "step": 5811 + }, + { + "epoch": 15.967032967032967, + "grad_norm": 13.553594589233398, + "learning_rate": 4.201648351648352e-05, + "loss": 0.4677, + "step": 5812 + }, + { + "epoch": 15.969780219780219, + "grad_norm": 13.752274513244629, + "learning_rate": 4.2015109890109894e-05, + "loss": 0.543, + "step": 5813 + }, + { + "epoch": 15.972527472527473, + "grad_norm": 19.968238830566406, + "learning_rate": 4.2013736263736264e-05, + "loss": 0.8783, + "step": 5814 + }, + { + "epoch": 15.975274725274724, + "grad_norm": 16.869585037231445, + "learning_rate": 4.201236263736264e-05, + "loss": 0.5445, + "step": 5815 + }, + { + "epoch": 15.978021978021978, + "grad_norm": 14.249378204345703, + "learning_rate": 4.201098901098901e-05, + "loss": 0.509, + "step": 5816 + }, + { + "epoch": 15.98076923076923, + "grad_norm": 7.42507791519165, + "learning_rate": 4.200961538461539e-05, + "loss": 0.229, + "step": 5817 + }, + { + "epoch": 15.983516483516484, + "grad_norm": 13.50236701965332, + "learning_rate": 4.2008241758241764e-05, + "loss": 0.5109, + "step": 5818 + }, + { + "epoch": 15.986263736263737, + "grad_norm": 16.1909122467041, + "learning_rate": 4.2006868131868134e-05, + "loss": 0.736, + "step": 5819 + }, + { + "epoch": 15.989010989010989, + "grad_norm": 13.979779243469238, + "learning_rate": 4.200549450549451e-05, + "loss": 0.3397, + "step": 5820 + }, + { + "epoch": 15.991758241758241, + "grad_norm": 15.521087646484375, + "learning_rate": 4.200412087912088e-05, + "loss": 0.5516, + "step": 5821 + }, + { + "epoch": 15.994505494505495, + "grad_norm": 8.323432922363281, + "learning_rate": 4.200274725274726e-05, + "loss": 0.2345, + "step": 5822 + }, + { + "epoch": 15.997252747252748, + "grad_norm": 10.771656036376953, + "learning_rate": 4.200137362637363e-05, + "loss": 0.2682, + "step": 5823 + }, + { + "epoch": 16.0, + "grad_norm": 29.253494262695312, + "learning_rate": 4.2e-05, + "loss": 0.787, + "step": 5824 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.7851239669421488, + "eval_f1": 0.7813439978907047, + "eval_f1_DuraRiadoRio_64x64": 0.7193675889328063, + "eval_f1_Mole_64x64": 0.8250825082508251, + "eval_f1_Quebrado_64x64": 0.831081081081081, + "eval_f1_RiadoRio_64x64": 0.7127272727272728, + "eval_f1_RioFechado_64x64": 0.8184615384615385, + "eval_loss": 0.7087234258651733, + "eval_precision": 0.7907520714826595, + "eval_precision_DuraRiadoRio_64x64": 0.8348623853211009, + "eval_precision_Mole_64x64": 0.7861635220125787, + "eval_precision_Quebrado_64x64": 0.8092105263157895, + "eval_precision_RiadoRio_64x64": 0.7967479674796748, + "eval_precision_RioFechado_64x64": 0.726775956284153, + "eval_recall": 0.7871046454163577, + "eval_recall_DuraRiadoRio_64x64": 0.6319444444444444, + "eval_recall_Mole_64x64": 0.8680555555555556, + "eval_recall_Quebrado_64x64": 0.8541666666666666, + "eval_recall_RiadoRio_64x64": 0.6447368421052632, + "eval_recall_RioFechado_64x64": 0.9366197183098591, + "eval_runtime": 1.7896, + "eval_samples_per_second": 405.675, + "eval_steps_per_second": 25.704, + "step": 5824 + }, + { + "epoch": 16.002747252747252, + "grad_norm": 17.647598266601562, + "learning_rate": 4.1998626373626374e-05, + "loss": 0.6937, + "step": 5825 + }, + { + "epoch": 16.005494505494507, + "grad_norm": 10.381742477416992, + "learning_rate": 4.1997252747252744e-05, + "loss": 0.2444, + "step": 5826 + }, + { + "epoch": 16.00824175824176, + "grad_norm": 7.848204612731934, + "learning_rate": 4.199587912087912e-05, + "loss": 0.3641, + "step": 5827 + }, + { + "epoch": 16.01098901098901, + "grad_norm": 7.120391368865967, + "learning_rate": 4.19945054945055e-05, + "loss": 0.1787, + "step": 5828 + }, + { + "epoch": 16.013736263736263, + "grad_norm": 15.07051944732666, + "learning_rate": 4.199313186813187e-05, + "loss": 0.5153, + "step": 5829 + }, + { + "epoch": 16.016483516483518, + "grad_norm": 14.015637397766113, + "learning_rate": 4.1991758241758245e-05, + "loss": 0.5102, + "step": 5830 + }, + { + "epoch": 16.01923076923077, + "grad_norm": 8.256819725036621, + "learning_rate": 4.1990384615384615e-05, + "loss": 0.1977, + "step": 5831 + }, + { + "epoch": 16.021978021978022, + "grad_norm": 15.859237670898438, + "learning_rate": 4.198901098901099e-05, + "loss": 0.6583, + "step": 5832 + }, + { + "epoch": 16.024725274725274, + "grad_norm": 3.7443981170654297, + "learning_rate": 4.198763736263737e-05, + "loss": 0.1304, + "step": 5833 + }, + { + "epoch": 16.02747252747253, + "grad_norm": 13.35325813293457, + "learning_rate": 4.198626373626374e-05, + "loss": 0.4071, + "step": 5834 + }, + { + "epoch": 16.03021978021978, + "grad_norm": 18.979999542236328, + "learning_rate": 4.1984890109890115e-05, + "loss": 0.7148, + "step": 5835 + }, + { + "epoch": 16.032967032967033, + "grad_norm": 14.16212272644043, + "learning_rate": 4.1983516483516485e-05, + "loss": 0.606, + "step": 5836 + }, + { + "epoch": 16.035714285714285, + "grad_norm": 6.333917140960693, + "learning_rate": 4.198214285714286e-05, + "loss": 0.1406, + "step": 5837 + }, + { + "epoch": 16.03846153846154, + "grad_norm": 17.277719497680664, + "learning_rate": 4.198076923076923e-05, + "loss": 0.4735, + "step": 5838 + }, + { + "epoch": 16.041208791208792, + "grad_norm": 9.472940444946289, + "learning_rate": 4.19793956043956e-05, + "loss": 0.2676, + "step": 5839 + }, + { + "epoch": 16.043956043956044, + "grad_norm": 12.600552558898926, + "learning_rate": 4.197802197802198e-05, + "loss": 0.3047, + "step": 5840 + }, + { + "epoch": 16.046703296703296, + "grad_norm": 17.404998779296875, + "learning_rate": 4.197664835164835e-05, + "loss": 0.6911, + "step": 5841 + }, + { + "epoch": 16.04945054945055, + "grad_norm": 7.020667552947998, + "learning_rate": 4.1975274725274726e-05, + "loss": 0.2033, + "step": 5842 + }, + { + "epoch": 16.052197802197803, + "grad_norm": 6.083189487457275, + "learning_rate": 4.19739010989011e-05, + "loss": 0.1811, + "step": 5843 + }, + { + "epoch": 16.054945054945055, + "grad_norm": 10.969232559204102, + "learning_rate": 4.197252747252747e-05, + "loss": 0.2855, + "step": 5844 + }, + { + "epoch": 16.057692307692307, + "grad_norm": 12.764274597167969, + "learning_rate": 4.197115384615385e-05, + "loss": 0.3099, + "step": 5845 + }, + { + "epoch": 16.060439560439562, + "grad_norm": 12.414701461791992, + "learning_rate": 4.196978021978022e-05, + "loss": 0.4252, + "step": 5846 + }, + { + "epoch": 16.063186813186814, + "grad_norm": 4.412341117858887, + "learning_rate": 4.1968406593406596e-05, + "loss": 0.1086, + "step": 5847 + }, + { + "epoch": 16.065934065934066, + "grad_norm": 9.786989212036133, + "learning_rate": 4.196703296703297e-05, + "loss": 0.2412, + "step": 5848 + }, + { + "epoch": 16.068681318681318, + "grad_norm": 9.68526840209961, + "learning_rate": 4.196565934065934e-05, + "loss": 0.4123, + "step": 5849 + }, + { + "epoch": 16.071428571428573, + "grad_norm": 8.838065147399902, + "learning_rate": 4.196428571428572e-05, + "loss": 0.4082, + "step": 5850 + }, + { + "epoch": 16.074175824175825, + "grad_norm": 11.393126487731934, + "learning_rate": 4.196291208791209e-05, + "loss": 0.2357, + "step": 5851 + }, + { + "epoch": 16.076923076923077, + "grad_norm": 9.21130657196045, + "learning_rate": 4.196153846153847e-05, + "loss": 0.2452, + "step": 5852 + }, + { + "epoch": 16.07967032967033, + "grad_norm": 16.082443237304688, + "learning_rate": 4.196016483516484e-05, + "loss": 0.5748, + "step": 5853 + }, + { + "epoch": 16.082417582417584, + "grad_norm": 17.675960540771484, + "learning_rate": 4.195879120879121e-05, + "loss": 0.6882, + "step": 5854 + }, + { + "epoch": 16.085164835164836, + "grad_norm": 18.108476638793945, + "learning_rate": 4.1957417582417583e-05, + "loss": 0.7068, + "step": 5855 + }, + { + "epoch": 16.087912087912088, + "grad_norm": 16.695283889770508, + "learning_rate": 4.1956043956043953e-05, + "loss": 0.4255, + "step": 5856 + }, + { + "epoch": 16.09065934065934, + "grad_norm": 22.058998107910156, + "learning_rate": 4.195467032967033e-05, + "loss": 0.8685, + "step": 5857 + }, + { + "epoch": 16.093406593406595, + "grad_norm": 14.00169563293457, + "learning_rate": 4.195329670329671e-05, + "loss": 0.5488, + "step": 5858 + }, + { + "epoch": 16.096153846153847, + "grad_norm": 17.44659996032715, + "learning_rate": 4.195192307692308e-05, + "loss": 0.5566, + "step": 5859 + }, + { + "epoch": 16.0989010989011, + "grad_norm": 12.992405891418457, + "learning_rate": 4.1950549450549454e-05, + "loss": 0.3216, + "step": 5860 + }, + { + "epoch": 16.10164835164835, + "grad_norm": 13.017400741577148, + "learning_rate": 4.1949175824175824e-05, + "loss": 0.2005, + "step": 5861 + }, + { + "epoch": 16.104395604395606, + "grad_norm": 18.55842399597168, + "learning_rate": 4.19478021978022e-05, + "loss": 0.6889, + "step": 5862 + }, + { + "epoch": 16.107142857142858, + "grad_norm": 13.778949737548828, + "learning_rate": 4.194642857142858e-05, + "loss": 0.4673, + "step": 5863 + }, + { + "epoch": 16.10989010989011, + "grad_norm": 20.10319709777832, + "learning_rate": 4.194505494505495e-05, + "loss": 0.8734, + "step": 5864 + }, + { + "epoch": 16.11263736263736, + "grad_norm": 8.862082481384277, + "learning_rate": 4.1943681318681324e-05, + "loss": 0.2219, + "step": 5865 + }, + { + "epoch": 16.115384615384617, + "grad_norm": 19.378807067871094, + "learning_rate": 4.1942307692307694e-05, + "loss": 0.8659, + "step": 5866 + }, + { + "epoch": 16.11813186813187, + "grad_norm": 14.99181842803955, + "learning_rate": 4.1940934065934064e-05, + "loss": 0.5978, + "step": 5867 + }, + { + "epoch": 16.12087912087912, + "grad_norm": 15.31252384185791, + "learning_rate": 4.193956043956044e-05, + "loss": 0.6292, + "step": 5868 + }, + { + "epoch": 16.123626373626372, + "grad_norm": 11.870076179504395, + "learning_rate": 4.193818681318681e-05, + "loss": 0.6574, + "step": 5869 + }, + { + "epoch": 16.126373626373628, + "grad_norm": 6.926461696624756, + "learning_rate": 4.193681318681319e-05, + "loss": 0.1751, + "step": 5870 + }, + { + "epoch": 16.12912087912088, + "grad_norm": 13.05871868133545, + "learning_rate": 4.193543956043956e-05, + "loss": 0.5412, + "step": 5871 + }, + { + "epoch": 16.13186813186813, + "grad_norm": 10.853994369506836, + "learning_rate": 4.1934065934065935e-05, + "loss": 0.3345, + "step": 5872 + }, + { + "epoch": 16.134615384615383, + "grad_norm": 13.107965469360352, + "learning_rate": 4.193269230769231e-05, + "loss": 0.4031, + "step": 5873 + }, + { + "epoch": 16.13736263736264, + "grad_norm": 10.446701049804688, + "learning_rate": 4.193131868131868e-05, + "loss": 0.5306, + "step": 5874 + }, + { + "epoch": 16.14010989010989, + "grad_norm": 9.532068252563477, + "learning_rate": 4.192994505494506e-05, + "loss": 0.3075, + "step": 5875 + }, + { + "epoch": 16.142857142857142, + "grad_norm": 15.914753913879395, + "learning_rate": 4.192857142857143e-05, + "loss": 0.5459, + "step": 5876 + }, + { + "epoch": 16.145604395604394, + "grad_norm": 4.8428215980529785, + "learning_rate": 4.1927197802197805e-05, + "loss": 0.1246, + "step": 5877 + }, + { + "epoch": 16.14835164835165, + "grad_norm": 6.188720226287842, + "learning_rate": 4.192582417582418e-05, + "loss": 0.2212, + "step": 5878 + }, + { + "epoch": 16.1510989010989, + "grad_norm": 18.62676239013672, + "learning_rate": 4.192445054945055e-05, + "loss": 0.8585, + "step": 5879 + }, + { + "epoch": 16.153846153846153, + "grad_norm": 10.989882469177246, + "learning_rate": 4.192307692307693e-05, + "loss": 0.286, + "step": 5880 + }, + { + "epoch": 16.156593406593405, + "grad_norm": 9.614006042480469, + "learning_rate": 4.19217032967033e-05, + "loss": 0.3028, + "step": 5881 + }, + { + "epoch": 16.15934065934066, + "grad_norm": 13.250431060791016, + "learning_rate": 4.192032967032967e-05, + "loss": 0.4324, + "step": 5882 + }, + { + "epoch": 16.162087912087912, + "grad_norm": 12.720958709716797, + "learning_rate": 4.1918956043956046e-05, + "loss": 0.3822, + "step": 5883 + }, + { + "epoch": 16.164835164835164, + "grad_norm": 7.972259998321533, + "learning_rate": 4.1917582417582416e-05, + "loss": 0.2319, + "step": 5884 + }, + { + "epoch": 16.167582417582416, + "grad_norm": 12.992009162902832, + "learning_rate": 4.191620879120879e-05, + "loss": 0.563, + "step": 5885 + }, + { + "epoch": 16.17032967032967, + "grad_norm": 9.284134864807129, + "learning_rate": 4.191483516483516e-05, + "loss": 0.2766, + "step": 5886 + }, + { + "epoch": 16.173076923076923, + "grad_norm": 6.66955041885376, + "learning_rate": 4.191346153846154e-05, + "loss": 0.2253, + "step": 5887 + }, + { + "epoch": 16.175824175824175, + "grad_norm": 13.202445030212402, + "learning_rate": 4.1912087912087916e-05, + "loss": 0.3512, + "step": 5888 + }, + { + "epoch": 16.178571428571427, + "grad_norm": 9.086142539978027, + "learning_rate": 4.1910714285714286e-05, + "loss": 0.3641, + "step": 5889 + }, + { + "epoch": 16.181318681318682, + "grad_norm": 14.609058380126953, + "learning_rate": 4.190934065934066e-05, + "loss": 0.4245, + "step": 5890 + }, + { + "epoch": 16.184065934065934, + "grad_norm": 11.216815948486328, + "learning_rate": 4.190796703296703e-05, + "loss": 0.2936, + "step": 5891 + }, + { + "epoch": 16.186813186813186, + "grad_norm": 10.109306335449219, + "learning_rate": 4.190659340659341e-05, + "loss": 0.3282, + "step": 5892 + }, + { + "epoch": 16.189560439560438, + "grad_norm": 12.070108413696289, + "learning_rate": 4.1905219780219786e-05, + "loss": 0.4687, + "step": 5893 + }, + { + "epoch": 16.192307692307693, + "grad_norm": 9.108137130737305, + "learning_rate": 4.1903846153846156e-05, + "loss": 0.2419, + "step": 5894 + }, + { + "epoch": 16.195054945054945, + "grad_norm": 16.483325958251953, + "learning_rate": 4.190247252747253e-05, + "loss": 0.5574, + "step": 5895 + }, + { + "epoch": 16.197802197802197, + "grad_norm": 17.500728607177734, + "learning_rate": 4.19010989010989e-05, + "loss": 0.7824, + "step": 5896 + }, + { + "epoch": 16.20054945054945, + "grad_norm": 14.804428100585938, + "learning_rate": 4.189972527472527e-05, + "loss": 0.372, + "step": 5897 + }, + { + "epoch": 16.203296703296704, + "grad_norm": 12.84751033782959, + "learning_rate": 4.189835164835165e-05, + "loss": 0.4609, + "step": 5898 + }, + { + "epoch": 16.206043956043956, + "grad_norm": 9.644209861755371, + "learning_rate": 4.189697802197802e-05, + "loss": 0.2192, + "step": 5899 + }, + { + "epoch": 16.208791208791208, + "grad_norm": 4.983716011047363, + "learning_rate": 4.18956043956044e-05, + "loss": 0.1361, + "step": 5900 + }, + { + "epoch": 16.21153846153846, + "grad_norm": 6.4151082038879395, + "learning_rate": 4.189423076923077e-05, + "loss": 0.1806, + "step": 5901 + }, + { + "epoch": 16.214285714285715, + "grad_norm": 16.76649284362793, + "learning_rate": 4.1892857142857144e-05, + "loss": 0.4508, + "step": 5902 + }, + { + "epoch": 16.217032967032967, + "grad_norm": 10.941665649414062, + "learning_rate": 4.189148351648352e-05, + "loss": 0.2215, + "step": 5903 + }, + { + "epoch": 16.21978021978022, + "grad_norm": 12.702791213989258, + "learning_rate": 4.189010989010989e-05, + "loss": 0.3455, + "step": 5904 + }, + { + "epoch": 16.22252747252747, + "grad_norm": 11.305005073547363, + "learning_rate": 4.188873626373627e-05, + "loss": 0.3728, + "step": 5905 + }, + { + "epoch": 16.225274725274726, + "grad_norm": 19.586135864257812, + "learning_rate": 4.188736263736264e-05, + "loss": 0.6578, + "step": 5906 + }, + { + "epoch": 16.228021978021978, + "grad_norm": 18.959259033203125, + "learning_rate": 4.1885989010989014e-05, + "loss": 0.7316, + "step": 5907 + }, + { + "epoch": 16.23076923076923, + "grad_norm": 18.48542594909668, + "learning_rate": 4.188461538461539e-05, + "loss": 0.8493, + "step": 5908 + }, + { + "epoch": 16.233516483516482, + "grad_norm": 9.139892578125, + "learning_rate": 4.188324175824176e-05, + "loss": 0.3418, + "step": 5909 + }, + { + "epoch": 16.236263736263737, + "grad_norm": 4.970314979553223, + "learning_rate": 4.188186813186814e-05, + "loss": 0.1921, + "step": 5910 + }, + { + "epoch": 16.23901098901099, + "grad_norm": 11.465696334838867, + "learning_rate": 4.188049450549451e-05, + "loss": 0.418, + "step": 5911 + }, + { + "epoch": 16.24175824175824, + "grad_norm": 11.663296699523926, + "learning_rate": 4.187912087912088e-05, + "loss": 0.4788, + "step": 5912 + }, + { + "epoch": 16.244505494505493, + "grad_norm": 10.652912139892578, + "learning_rate": 4.1877747252747255e-05, + "loss": 0.285, + "step": 5913 + }, + { + "epoch": 16.247252747252748, + "grad_norm": 21.7883243560791, + "learning_rate": 4.1876373626373625e-05, + "loss": 0.8402, + "step": 5914 + }, + { + "epoch": 16.25, + "grad_norm": 13.354711532592773, + "learning_rate": 4.1875e-05, + "loss": 0.6005, + "step": 5915 + }, + { + "epoch": 16.252747252747252, + "grad_norm": 11.5228910446167, + "learning_rate": 4.187362637362637e-05, + "loss": 0.308, + "step": 5916 + }, + { + "epoch": 16.255494505494504, + "grad_norm": 7.203298091888428, + "learning_rate": 4.187225274725275e-05, + "loss": 0.1539, + "step": 5917 + }, + { + "epoch": 16.25824175824176, + "grad_norm": 10.320595741271973, + "learning_rate": 4.1870879120879125e-05, + "loss": 0.3219, + "step": 5918 + }, + { + "epoch": 16.26098901098901, + "grad_norm": 15.960814476013184, + "learning_rate": 4.1869505494505495e-05, + "loss": 0.7899, + "step": 5919 + }, + { + "epoch": 16.263736263736263, + "grad_norm": 13.312347412109375, + "learning_rate": 4.186813186813187e-05, + "loss": 0.4299, + "step": 5920 + }, + { + "epoch": 16.266483516483518, + "grad_norm": 6.500017166137695, + "learning_rate": 4.186675824175824e-05, + "loss": 0.1901, + "step": 5921 + }, + { + "epoch": 16.26923076923077, + "grad_norm": 15.377662658691406, + "learning_rate": 4.186538461538462e-05, + "loss": 0.4638, + "step": 5922 + }, + { + "epoch": 16.271978021978022, + "grad_norm": 15.524810791015625, + "learning_rate": 4.1864010989010995e-05, + "loss": 0.4518, + "step": 5923 + }, + { + "epoch": 16.274725274725274, + "grad_norm": 15.289154052734375, + "learning_rate": 4.1862637362637365e-05, + "loss": 0.4805, + "step": 5924 + }, + { + "epoch": 16.27747252747253, + "grad_norm": 12.60490894317627, + "learning_rate": 4.186126373626374e-05, + "loss": 0.578, + "step": 5925 + }, + { + "epoch": 16.28021978021978, + "grad_norm": 10.946321487426758, + "learning_rate": 4.185989010989011e-05, + "loss": 0.2747, + "step": 5926 + }, + { + "epoch": 16.282967032967033, + "grad_norm": 11.415889739990234, + "learning_rate": 4.185851648351648e-05, + "loss": 0.4398, + "step": 5927 + }, + { + "epoch": 16.285714285714285, + "grad_norm": 11.452018737792969, + "learning_rate": 4.185714285714286e-05, + "loss": 0.3576, + "step": 5928 + }, + { + "epoch": 16.28846153846154, + "grad_norm": 20.568161010742188, + "learning_rate": 4.185576923076923e-05, + "loss": 0.8743, + "step": 5929 + }, + { + "epoch": 16.291208791208792, + "grad_norm": 12.925742149353027, + "learning_rate": 4.1854395604395606e-05, + "loss": 0.4136, + "step": 5930 + }, + { + "epoch": 16.293956043956044, + "grad_norm": 10.597430229187012, + "learning_rate": 4.1853021978021976e-05, + "loss": 0.2421, + "step": 5931 + }, + { + "epoch": 16.296703296703296, + "grad_norm": 10.251138687133789, + "learning_rate": 4.185164835164835e-05, + "loss": 0.2527, + "step": 5932 + }, + { + "epoch": 16.29945054945055, + "grad_norm": 11.911768913269043, + "learning_rate": 4.185027472527473e-05, + "loss": 0.3239, + "step": 5933 + }, + { + "epoch": 16.302197802197803, + "grad_norm": 14.333101272583008, + "learning_rate": 4.18489010989011e-05, + "loss": 0.6269, + "step": 5934 + }, + { + "epoch": 16.304945054945055, + "grad_norm": 15.959571838378906, + "learning_rate": 4.1847527472527476e-05, + "loss": 0.4477, + "step": 5935 + }, + { + "epoch": 16.307692307692307, + "grad_norm": 11.723847389221191, + "learning_rate": 4.1846153846153846e-05, + "loss": 0.5096, + "step": 5936 + }, + { + "epoch": 16.310439560439562, + "grad_norm": 15.717673301696777, + "learning_rate": 4.184478021978022e-05, + "loss": 0.5985, + "step": 5937 + }, + { + "epoch": 16.313186813186814, + "grad_norm": 9.092733383178711, + "learning_rate": 4.18434065934066e-05, + "loss": 0.1999, + "step": 5938 + }, + { + "epoch": 16.315934065934066, + "grad_norm": 13.197003364562988, + "learning_rate": 4.184203296703297e-05, + "loss": 0.4188, + "step": 5939 + }, + { + "epoch": 16.318681318681318, + "grad_norm": 15.467935562133789, + "learning_rate": 4.184065934065935e-05, + "loss": 0.5519, + "step": 5940 + }, + { + "epoch": 16.321428571428573, + "grad_norm": 14.608283042907715, + "learning_rate": 4.183928571428572e-05, + "loss": 0.5548, + "step": 5941 + }, + { + "epoch": 16.324175824175825, + "grad_norm": 14.30632209777832, + "learning_rate": 4.183791208791209e-05, + "loss": 0.5692, + "step": 5942 + }, + { + "epoch": 16.326923076923077, + "grad_norm": 6.6083855628967285, + "learning_rate": 4.1836538461538464e-05, + "loss": 0.1509, + "step": 5943 + }, + { + "epoch": 16.32967032967033, + "grad_norm": 3.9511799812316895, + "learning_rate": 4.1835164835164834e-05, + "loss": 0.0737, + "step": 5944 + }, + { + "epoch": 16.332417582417584, + "grad_norm": 7.786353588104248, + "learning_rate": 4.183379120879121e-05, + "loss": 0.1776, + "step": 5945 + }, + { + "epoch": 16.335164835164836, + "grad_norm": 16.858882904052734, + "learning_rate": 4.183241758241758e-05, + "loss": 0.6776, + "step": 5946 + }, + { + "epoch": 16.337912087912088, + "grad_norm": 14.958073616027832, + "learning_rate": 4.183104395604396e-05, + "loss": 0.8781, + "step": 5947 + }, + { + "epoch": 16.34065934065934, + "grad_norm": 12.49674129486084, + "learning_rate": 4.1829670329670334e-05, + "loss": 0.4799, + "step": 5948 + }, + { + "epoch": 16.343406593406595, + "grad_norm": 15.983027458190918, + "learning_rate": 4.1828296703296704e-05, + "loss": 0.7745, + "step": 5949 + }, + { + "epoch": 16.346153846153847, + "grad_norm": 7.102716445922852, + "learning_rate": 4.182692307692308e-05, + "loss": 0.0931, + "step": 5950 + }, + { + "epoch": 16.3489010989011, + "grad_norm": 7.248047828674316, + "learning_rate": 4.182554945054945e-05, + "loss": 0.2129, + "step": 5951 + }, + { + "epoch": 16.35164835164835, + "grad_norm": 13.743600845336914, + "learning_rate": 4.182417582417583e-05, + "loss": 0.4164, + "step": 5952 + }, + { + "epoch": 16.354395604395606, + "grad_norm": 15.83687973022461, + "learning_rate": 4.1822802197802204e-05, + "loss": 0.4309, + "step": 5953 + }, + { + "epoch": 16.357142857142858, + "grad_norm": 13.834670066833496, + "learning_rate": 4.1821428571428574e-05, + "loss": 0.4611, + "step": 5954 + }, + { + "epoch": 16.35989010989011, + "grad_norm": 17.61319923400879, + "learning_rate": 4.182005494505495e-05, + "loss": 0.7117, + "step": 5955 + }, + { + "epoch": 16.36263736263736, + "grad_norm": 9.360028266906738, + "learning_rate": 4.181868131868132e-05, + "loss": 0.1835, + "step": 5956 + }, + { + "epoch": 16.365384615384617, + "grad_norm": 9.77888298034668, + "learning_rate": 4.181730769230769e-05, + "loss": 0.3159, + "step": 5957 + }, + { + "epoch": 16.36813186813187, + "grad_norm": 14.67101764678955, + "learning_rate": 4.181593406593407e-05, + "loss": 0.4408, + "step": 5958 + }, + { + "epoch": 16.37087912087912, + "grad_norm": 13.117050170898438, + "learning_rate": 4.181456043956044e-05, + "loss": 0.6798, + "step": 5959 + }, + { + "epoch": 16.373626373626372, + "grad_norm": 8.873431205749512, + "learning_rate": 4.1813186813186815e-05, + "loss": 0.2072, + "step": 5960 + }, + { + "epoch": 16.376373626373628, + "grad_norm": 14.268959045410156, + "learning_rate": 4.1811813186813185e-05, + "loss": 0.5416, + "step": 5961 + }, + { + "epoch": 16.37912087912088, + "grad_norm": 12.746513366699219, + "learning_rate": 4.181043956043956e-05, + "loss": 0.418, + "step": 5962 + }, + { + "epoch": 16.38186813186813, + "grad_norm": 17.235746383666992, + "learning_rate": 4.180906593406594e-05, + "loss": 0.4602, + "step": 5963 + }, + { + "epoch": 16.384615384615383, + "grad_norm": 13.766231536865234, + "learning_rate": 4.180769230769231e-05, + "loss": 0.583, + "step": 5964 + }, + { + "epoch": 16.38736263736264, + "grad_norm": 14.403331756591797, + "learning_rate": 4.1806318681318685e-05, + "loss": 0.6355, + "step": 5965 + }, + { + "epoch": 16.39010989010989, + "grad_norm": 15.676045417785645, + "learning_rate": 4.1804945054945055e-05, + "loss": 0.6432, + "step": 5966 + }, + { + "epoch": 16.392857142857142, + "grad_norm": 13.864505767822266, + "learning_rate": 4.180357142857143e-05, + "loss": 0.3573, + "step": 5967 + }, + { + "epoch": 16.395604395604394, + "grad_norm": 6.643021106719971, + "learning_rate": 4.180219780219781e-05, + "loss": 0.135, + "step": 5968 + }, + { + "epoch": 16.39835164835165, + "grad_norm": 8.94968318939209, + "learning_rate": 4.180082417582418e-05, + "loss": 0.3576, + "step": 5969 + }, + { + "epoch": 16.4010989010989, + "grad_norm": 15.742698669433594, + "learning_rate": 4.1799450549450556e-05, + "loss": 0.4443, + "step": 5970 + }, + { + "epoch": 16.403846153846153, + "grad_norm": 8.218108177185059, + "learning_rate": 4.1798076923076926e-05, + "loss": 0.2096, + "step": 5971 + }, + { + "epoch": 16.406593406593405, + "grad_norm": 17.21498680114746, + "learning_rate": 4.1796703296703296e-05, + "loss": 0.611, + "step": 5972 + }, + { + "epoch": 16.40934065934066, + "grad_norm": 11.650078773498535, + "learning_rate": 4.179532967032967e-05, + "loss": 0.4459, + "step": 5973 + }, + { + "epoch": 16.412087912087912, + "grad_norm": 9.692105293273926, + "learning_rate": 4.179395604395604e-05, + "loss": 0.2647, + "step": 5974 + }, + { + "epoch": 16.414835164835164, + "grad_norm": 12.087836265563965, + "learning_rate": 4.179258241758242e-05, + "loss": 0.5807, + "step": 5975 + }, + { + "epoch": 16.417582417582416, + "grad_norm": 19.21379280090332, + "learning_rate": 4.179120879120879e-05, + "loss": 0.7126, + "step": 5976 + }, + { + "epoch": 16.42032967032967, + "grad_norm": 10.557879447937012, + "learning_rate": 4.1789835164835166e-05, + "loss": 0.2382, + "step": 5977 + }, + { + "epoch": 16.423076923076923, + "grad_norm": 8.081121444702148, + "learning_rate": 4.178846153846154e-05, + "loss": 0.3766, + "step": 5978 + }, + { + "epoch": 16.425824175824175, + "grad_norm": 15.567048072814941, + "learning_rate": 4.178708791208791e-05, + "loss": 0.3531, + "step": 5979 + }, + { + "epoch": 16.428571428571427, + "grad_norm": 17.0651798248291, + "learning_rate": 4.178571428571429e-05, + "loss": 0.5778, + "step": 5980 + }, + { + "epoch": 16.431318681318682, + "grad_norm": 17.33906364440918, + "learning_rate": 4.178434065934066e-05, + "loss": 0.8227, + "step": 5981 + }, + { + "epoch": 16.434065934065934, + "grad_norm": 21.051054000854492, + "learning_rate": 4.1782967032967037e-05, + "loss": 1.0036, + "step": 5982 + }, + { + "epoch": 16.436813186813186, + "grad_norm": 10.292372703552246, + "learning_rate": 4.1781593406593413e-05, + "loss": 0.2129, + "step": 5983 + }, + { + "epoch": 16.439560439560438, + "grad_norm": 7.001614093780518, + "learning_rate": 4.1780219780219783e-05, + "loss": 0.1728, + "step": 5984 + }, + { + "epoch": 16.442307692307693, + "grad_norm": 7.096103191375732, + "learning_rate": 4.177884615384616e-05, + "loss": 0.1922, + "step": 5985 + }, + { + "epoch": 16.445054945054945, + "grad_norm": 7.100625991821289, + "learning_rate": 4.177747252747253e-05, + "loss": 0.18, + "step": 5986 + }, + { + "epoch": 16.447802197802197, + "grad_norm": 11.331148147583008, + "learning_rate": 4.17760989010989e-05, + "loss": 0.336, + "step": 5987 + }, + { + "epoch": 16.45054945054945, + "grad_norm": 17.985132217407227, + "learning_rate": 4.177472527472528e-05, + "loss": 0.6695, + "step": 5988 + }, + { + "epoch": 16.453296703296704, + "grad_norm": 15.338875770568848, + "learning_rate": 4.177335164835165e-05, + "loss": 0.5506, + "step": 5989 + }, + { + "epoch": 16.456043956043956, + "grad_norm": 12.702322006225586, + "learning_rate": 4.1771978021978024e-05, + "loss": 0.6056, + "step": 5990 + }, + { + "epoch": 16.458791208791208, + "grad_norm": 7.922532558441162, + "learning_rate": 4.1770604395604394e-05, + "loss": 0.212, + "step": 5991 + }, + { + "epoch": 16.46153846153846, + "grad_norm": 10.788924217224121, + "learning_rate": 4.176923076923077e-05, + "loss": 0.3567, + "step": 5992 + }, + { + "epoch": 16.464285714285715, + "grad_norm": 13.94335651397705, + "learning_rate": 4.176785714285715e-05, + "loss": 0.4113, + "step": 5993 + }, + { + "epoch": 16.467032967032967, + "grad_norm": 17.976490020751953, + "learning_rate": 4.176648351648352e-05, + "loss": 0.5256, + "step": 5994 + }, + { + "epoch": 16.46978021978022, + "grad_norm": 12.663262367248535, + "learning_rate": 4.1765109890109894e-05, + "loss": 0.5954, + "step": 5995 + }, + { + "epoch": 16.47252747252747, + "grad_norm": 16.880767822265625, + "learning_rate": 4.1763736263736264e-05, + "loss": 0.6787, + "step": 5996 + }, + { + "epoch": 16.475274725274726, + "grad_norm": 11.540855407714844, + "learning_rate": 4.176236263736264e-05, + "loss": 0.3207, + "step": 5997 + }, + { + "epoch": 16.478021978021978, + "grad_norm": 13.90312671661377, + "learning_rate": 4.176098901098902e-05, + "loss": 0.4991, + "step": 5998 + }, + { + "epoch": 16.48076923076923, + "grad_norm": 19.428054809570312, + "learning_rate": 4.175961538461539e-05, + "loss": 0.5064, + "step": 5999 + }, + { + "epoch": 16.483516483516482, + "grad_norm": 6.953804016113281, + "learning_rate": 4.1758241758241765e-05, + "loss": 0.1671, + "step": 6000 + }, + { + "epoch": 16.486263736263737, + "grad_norm": 15.738346099853516, + "learning_rate": 4.1756868131868135e-05, + "loss": 0.4109, + "step": 6001 + }, + { + "epoch": 16.48901098901099, + "grad_norm": 14.772850036621094, + "learning_rate": 4.1755494505494505e-05, + "loss": 0.5188, + "step": 6002 + }, + { + "epoch": 16.49175824175824, + "grad_norm": 10.839703559875488, + "learning_rate": 4.175412087912088e-05, + "loss": 0.4647, + "step": 6003 + }, + { + "epoch": 16.494505494505496, + "grad_norm": 10.42348861694336, + "learning_rate": 4.175274725274725e-05, + "loss": 0.3322, + "step": 6004 + }, + { + "epoch": 16.497252747252748, + "grad_norm": 10.381722450256348, + "learning_rate": 4.175137362637363e-05, + "loss": 0.2998, + "step": 6005 + }, + { + "epoch": 16.5, + "grad_norm": 11.471263885498047, + "learning_rate": 4.175e-05, + "loss": 0.3703, + "step": 6006 + }, + { + "epoch": 16.502747252747252, + "grad_norm": 15.621392250061035, + "learning_rate": 4.1748626373626375e-05, + "loss": 0.3859, + "step": 6007 + }, + { + "epoch": 16.505494505494504, + "grad_norm": 6.642473220825195, + "learning_rate": 4.174725274725275e-05, + "loss": 0.1552, + "step": 6008 + }, + { + "epoch": 16.50824175824176, + "grad_norm": 13.871715545654297, + "learning_rate": 4.174587912087912e-05, + "loss": 0.4504, + "step": 6009 + }, + { + "epoch": 16.51098901098901, + "grad_norm": 11.588047981262207, + "learning_rate": 4.17445054945055e-05, + "loss": 0.2867, + "step": 6010 + }, + { + "epoch": 16.513736263736263, + "grad_norm": 11.434952735900879, + "learning_rate": 4.174313186813187e-05, + "loss": 0.3833, + "step": 6011 + }, + { + "epoch": 16.516483516483518, + "grad_norm": 4.967479705810547, + "learning_rate": 4.1741758241758246e-05, + "loss": 0.1332, + "step": 6012 + }, + { + "epoch": 16.51923076923077, + "grad_norm": 18.013832092285156, + "learning_rate": 4.174038461538462e-05, + "loss": 0.5424, + "step": 6013 + }, + { + "epoch": 16.521978021978022, + "grad_norm": 9.657384872436523, + "learning_rate": 4.173901098901099e-05, + "loss": 0.2065, + "step": 6014 + }, + { + "epoch": 16.524725274725274, + "grad_norm": 15.291208267211914, + "learning_rate": 4.173763736263737e-05, + "loss": 0.6847, + "step": 6015 + }, + { + "epoch": 16.52747252747253, + "grad_norm": 10.137816429138184, + "learning_rate": 4.173626373626374e-05, + "loss": 0.2367, + "step": 6016 + }, + { + "epoch": 16.53021978021978, + "grad_norm": 17.3769474029541, + "learning_rate": 4.173489010989011e-05, + "loss": 0.9254, + "step": 6017 + }, + { + "epoch": 16.532967032967033, + "grad_norm": 10.630552291870117, + "learning_rate": 4.1733516483516486e-05, + "loss": 0.2908, + "step": 6018 + }, + { + "epoch": 16.535714285714285, + "grad_norm": 11.597318649291992, + "learning_rate": 4.1732142857142856e-05, + "loss": 0.3342, + "step": 6019 + }, + { + "epoch": 16.53846153846154, + "grad_norm": 14.185966491699219, + "learning_rate": 4.173076923076923e-05, + "loss": 0.6814, + "step": 6020 + }, + { + "epoch": 16.541208791208792, + "grad_norm": 8.683733940124512, + "learning_rate": 4.17293956043956e-05, + "loss": 0.1506, + "step": 6021 + }, + { + "epoch": 16.543956043956044, + "grad_norm": 6.998571395874023, + "learning_rate": 4.172802197802198e-05, + "loss": 0.2422, + "step": 6022 + }, + { + "epoch": 16.546703296703296, + "grad_norm": 12.999043464660645, + "learning_rate": 4.1726648351648356e-05, + "loss": 0.3119, + "step": 6023 + }, + { + "epoch": 16.54945054945055, + "grad_norm": 9.582528114318848, + "learning_rate": 4.1725274725274726e-05, + "loss": 0.2277, + "step": 6024 + }, + { + "epoch": 16.552197802197803, + "grad_norm": 12.027161598205566, + "learning_rate": 4.17239010989011e-05, + "loss": 0.2702, + "step": 6025 + }, + { + "epoch": 16.554945054945055, + "grad_norm": 9.75332260131836, + "learning_rate": 4.172252747252747e-05, + "loss": 0.2018, + "step": 6026 + }, + { + "epoch": 16.557692307692307, + "grad_norm": 10.934866905212402, + "learning_rate": 4.172115384615385e-05, + "loss": 0.3167, + "step": 6027 + }, + { + "epoch": 16.560439560439562, + "grad_norm": 9.320786476135254, + "learning_rate": 4.171978021978023e-05, + "loss": 0.3432, + "step": 6028 + }, + { + "epoch": 16.563186813186814, + "grad_norm": 13.228446960449219, + "learning_rate": 4.17184065934066e-05, + "loss": 0.3765, + "step": 6029 + }, + { + "epoch": 16.565934065934066, + "grad_norm": 15.364874839782715, + "learning_rate": 4.1717032967032974e-05, + "loss": 0.4507, + "step": 6030 + }, + { + "epoch": 16.568681318681318, + "grad_norm": 16.63745880126953, + "learning_rate": 4.1715659340659344e-05, + "loss": 0.7687, + "step": 6031 + }, + { + "epoch": 16.571428571428573, + "grad_norm": 9.19870662689209, + "learning_rate": 4.1714285714285714e-05, + "loss": 0.2219, + "step": 6032 + }, + { + "epoch": 16.574175824175825, + "grad_norm": 18.625770568847656, + "learning_rate": 4.171291208791209e-05, + "loss": 0.7206, + "step": 6033 + }, + { + "epoch": 16.576923076923077, + "grad_norm": 21.231184005737305, + "learning_rate": 4.171153846153846e-05, + "loss": 0.7901, + "step": 6034 + }, + { + "epoch": 16.57967032967033, + "grad_norm": 14.19990348815918, + "learning_rate": 4.171016483516484e-05, + "loss": 0.3045, + "step": 6035 + }, + { + "epoch": 16.582417582417584, + "grad_norm": 15.786093711853027, + "learning_rate": 4.170879120879121e-05, + "loss": 0.6506, + "step": 6036 + }, + { + "epoch": 16.585164835164836, + "grad_norm": 9.417191505432129, + "learning_rate": 4.1707417582417584e-05, + "loss": 0.2691, + "step": 6037 + }, + { + "epoch": 16.587912087912088, + "grad_norm": 8.62007999420166, + "learning_rate": 4.1706043956043954e-05, + "loss": 0.3175, + "step": 6038 + }, + { + "epoch": 16.59065934065934, + "grad_norm": 22.265188217163086, + "learning_rate": 4.170467032967033e-05, + "loss": 1.0892, + "step": 6039 + }, + { + "epoch": 16.593406593406595, + "grad_norm": 12.632755279541016, + "learning_rate": 4.170329670329671e-05, + "loss": 0.6606, + "step": 6040 + }, + { + "epoch": 16.596153846153847, + "grad_norm": 20.155467987060547, + "learning_rate": 4.170192307692308e-05, + "loss": 0.6755, + "step": 6041 + }, + { + "epoch": 16.5989010989011, + "grad_norm": 6.676864147186279, + "learning_rate": 4.1700549450549455e-05, + "loss": 0.1801, + "step": 6042 + }, + { + "epoch": 16.60164835164835, + "grad_norm": 21.615476608276367, + "learning_rate": 4.1699175824175825e-05, + "loss": 0.9897, + "step": 6043 + }, + { + "epoch": 16.604395604395606, + "grad_norm": 8.437335014343262, + "learning_rate": 4.16978021978022e-05, + "loss": 0.4505, + "step": 6044 + }, + { + "epoch": 16.607142857142858, + "grad_norm": 9.185654640197754, + "learning_rate": 4.169642857142858e-05, + "loss": 0.3889, + "step": 6045 + }, + { + "epoch": 16.60989010989011, + "grad_norm": 14.93599796295166, + "learning_rate": 4.169505494505495e-05, + "loss": 0.4866, + "step": 6046 + }, + { + "epoch": 16.61263736263736, + "grad_norm": 10.355416297912598, + "learning_rate": 4.169368131868132e-05, + "loss": 0.3759, + "step": 6047 + }, + { + "epoch": 16.615384615384617, + "grad_norm": 8.893628120422363, + "learning_rate": 4.169230769230769e-05, + "loss": 0.221, + "step": 6048 + }, + { + "epoch": 16.61813186813187, + "grad_norm": 18.626243591308594, + "learning_rate": 4.1690934065934065e-05, + "loss": 0.4872, + "step": 6049 + }, + { + "epoch": 16.62087912087912, + "grad_norm": 9.141685485839844, + "learning_rate": 4.168956043956044e-05, + "loss": 0.2953, + "step": 6050 + }, + { + "epoch": 16.623626373626372, + "grad_norm": 11.473231315612793, + "learning_rate": 4.168818681318681e-05, + "loss": 0.3431, + "step": 6051 + }, + { + "epoch": 16.626373626373628, + "grad_norm": 11.686304092407227, + "learning_rate": 4.168681318681319e-05, + "loss": 0.2402, + "step": 6052 + }, + { + "epoch": 16.62912087912088, + "grad_norm": 7.638981342315674, + "learning_rate": 4.168543956043956e-05, + "loss": 0.3143, + "step": 6053 + }, + { + "epoch": 16.63186813186813, + "grad_norm": 18.980472564697266, + "learning_rate": 4.1684065934065935e-05, + "loss": 0.6958, + "step": 6054 + }, + { + "epoch": 16.634615384615383, + "grad_norm": 16.556682586669922, + "learning_rate": 4.168269230769231e-05, + "loss": 0.5958, + "step": 6055 + }, + { + "epoch": 16.63736263736264, + "grad_norm": 14.896933555603027, + "learning_rate": 4.168131868131868e-05, + "loss": 0.6224, + "step": 6056 + }, + { + "epoch": 16.64010989010989, + "grad_norm": 9.151537895202637, + "learning_rate": 4.167994505494506e-05, + "loss": 0.3184, + "step": 6057 + }, + { + "epoch": 16.642857142857142, + "grad_norm": 8.57437801361084, + "learning_rate": 4.167857142857143e-05, + "loss": 0.2311, + "step": 6058 + }, + { + "epoch": 16.645604395604394, + "grad_norm": 4.997485160827637, + "learning_rate": 4.1677197802197806e-05, + "loss": 0.1136, + "step": 6059 + }, + { + "epoch": 16.64835164835165, + "grad_norm": 21.559829711914062, + "learning_rate": 4.167582417582418e-05, + "loss": 1.0115, + "step": 6060 + }, + { + "epoch": 16.6510989010989, + "grad_norm": 7.273709297180176, + "learning_rate": 4.167445054945055e-05, + "loss": 0.3022, + "step": 6061 + }, + { + "epoch": 16.653846153846153, + "grad_norm": 11.55477237701416, + "learning_rate": 4.167307692307692e-05, + "loss": 0.4808, + "step": 6062 + }, + { + "epoch": 16.656593406593405, + "grad_norm": 10.971224784851074, + "learning_rate": 4.167170329670329e-05, + "loss": 0.3361, + "step": 6063 + }, + { + "epoch": 16.65934065934066, + "grad_norm": 16.575496673583984, + "learning_rate": 4.167032967032967e-05, + "loss": 0.4117, + "step": 6064 + }, + { + "epoch": 16.662087912087912, + "grad_norm": 15.607986450195312, + "learning_rate": 4.1668956043956046e-05, + "loss": 0.6151, + "step": 6065 + }, + { + "epoch": 16.664835164835164, + "grad_norm": 8.498894691467285, + "learning_rate": 4.1667582417582416e-05, + "loss": 0.2473, + "step": 6066 + }, + { + "epoch": 16.667582417582416, + "grad_norm": 8.243958473205566, + "learning_rate": 4.166620879120879e-05, + "loss": 0.1749, + "step": 6067 + }, + { + "epoch": 16.67032967032967, + "grad_norm": 14.109729766845703, + "learning_rate": 4.166483516483516e-05, + "loss": 0.305, + "step": 6068 + }, + { + "epoch": 16.673076923076923, + "grad_norm": 21.938396453857422, + "learning_rate": 4.166346153846154e-05, + "loss": 1.0345, + "step": 6069 + }, + { + "epoch": 16.675824175824175, + "grad_norm": 12.053964614868164, + "learning_rate": 4.166208791208792e-05, + "loss": 0.44, + "step": 6070 + }, + { + "epoch": 16.678571428571427, + "grad_norm": 13.908894538879395, + "learning_rate": 4.166071428571429e-05, + "loss": 0.357, + "step": 6071 + }, + { + "epoch": 16.681318681318682, + "grad_norm": 18.488567352294922, + "learning_rate": 4.1659340659340664e-05, + "loss": 0.5629, + "step": 6072 + }, + { + "epoch": 16.684065934065934, + "grad_norm": 13.390796661376953, + "learning_rate": 4.1657967032967034e-05, + "loss": 0.3601, + "step": 6073 + }, + { + "epoch": 16.686813186813186, + "grad_norm": 11.63930892944336, + "learning_rate": 4.165659340659341e-05, + "loss": 0.3382, + "step": 6074 + }, + { + "epoch": 16.689560439560438, + "grad_norm": 28.892854690551758, + "learning_rate": 4.165521978021979e-05, + "loss": 1.189, + "step": 6075 + }, + { + "epoch": 16.692307692307693, + "grad_norm": 11.072627067565918, + "learning_rate": 4.165384615384616e-05, + "loss": 0.2022, + "step": 6076 + }, + { + "epoch": 16.695054945054945, + "grad_norm": 13.610271453857422, + "learning_rate": 4.165247252747253e-05, + "loss": 0.4825, + "step": 6077 + }, + { + "epoch": 16.697802197802197, + "grad_norm": 11.023140907287598, + "learning_rate": 4.16510989010989e-05, + "loss": 0.2653, + "step": 6078 + }, + { + "epoch": 16.70054945054945, + "grad_norm": 5.875030040740967, + "learning_rate": 4.1649725274725274e-05, + "loss": 0.1401, + "step": 6079 + }, + { + "epoch": 16.703296703296704, + "grad_norm": 10.52977180480957, + "learning_rate": 4.164835164835165e-05, + "loss": 0.3837, + "step": 6080 + }, + { + "epoch": 16.706043956043956, + "grad_norm": 10.741825103759766, + "learning_rate": 4.164697802197802e-05, + "loss": 0.2288, + "step": 6081 + }, + { + "epoch": 16.708791208791208, + "grad_norm": 13.40241813659668, + "learning_rate": 4.16456043956044e-05, + "loss": 0.3473, + "step": 6082 + }, + { + "epoch": 16.71153846153846, + "grad_norm": 9.96292495727539, + "learning_rate": 4.164423076923077e-05, + "loss": 0.3077, + "step": 6083 + }, + { + "epoch": 16.714285714285715, + "grad_norm": 17.19739532470703, + "learning_rate": 4.1642857142857144e-05, + "loss": 0.8239, + "step": 6084 + }, + { + "epoch": 16.717032967032967, + "grad_norm": 13.487592697143555, + "learning_rate": 4.164148351648352e-05, + "loss": 0.7269, + "step": 6085 + }, + { + "epoch": 16.71978021978022, + "grad_norm": 7.700150966644287, + "learning_rate": 4.164010989010989e-05, + "loss": 0.1833, + "step": 6086 + }, + { + "epoch": 16.72252747252747, + "grad_norm": 12.342798233032227, + "learning_rate": 4.163873626373627e-05, + "loss": 0.4124, + "step": 6087 + }, + { + "epoch": 16.725274725274726, + "grad_norm": 9.900517463684082, + "learning_rate": 4.163736263736264e-05, + "loss": 0.2304, + "step": 6088 + }, + { + "epoch": 16.728021978021978, + "grad_norm": 11.634698867797852, + "learning_rate": 4.1635989010989015e-05, + "loss": 0.3243, + "step": 6089 + }, + { + "epoch": 16.73076923076923, + "grad_norm": 11.266351699829102, + "learning_rate": 4.163461538461539e-05, + "loss": 0.4332, + "step": 6090 + }, + { + "epoch": 16.733516483516482, + "grad_norm": 9.958905220031738, + "learning_rate": 4.163324175824176e-05, + "loss": 0.3964, + "step": 6091 + }, + { + "epoch": 16.736263736263737, + "grad_norm": 14.948006629943848, + "learning_rate": 4.163186813186813e-05, + "loss": 0.5742, + "step": 6092 + }, + { + "epoch": 16.73901098901099, + "grad_norm": 14.484781265258789, + "learning_rate": 4.16304945054945e-05, + "loss": 0.4234, + "step": 6093 + }, + { + "epoch": 16.74175824175824, + "grad_norm": 12.9591064453125, + "learning_rate": 4.162912087912088e-05, + "loss": 0.3723, + "step": 6094 + }, + { + "epoch": 16.744505494505496, + "grad_norm": 10.661615371704102, + "learning_rate": 4.1627747252747255e-05, + "loss": 0.2249, + "step": 6095 + }, + { + "epoch": 16.747252747252748, + "grad_norm": 11.666711807250977, + "learning_rate": 4.1626373626373625e-05, + "loss": 0.3264, + "step": 6096 + }, + { + "epoch": 16.75, + "grad_norm": 14.22657299041748, + "learning_rate": 4.1625e-05, + "loss": 0.7219, + "step": 6097 + }, + { + "epoch": 16.752747252747252, + "grad_norm": 19.182369232177734, + "learning_rate": 4.162362637362637e-05, + "loss": 0.967, + "step": 6098 + }, + { + "epoch": 16.755494505494504, + "grad_norm": 10.48308277130127, + "learning_rate": 4.162225274725275e-05, + "loss": 0.4525, + "step": 6099 + }, + { + "epoch": 16.75824175824176, + "grad_norm": 22.639814376831055, + "learning_rate": 4.1620879120879126e-05, + "loss": 0.8603, + "step": 6100 + }, + { + "epoch": 16.76098901098901, + "grad_norm": 12.514262199401855, + "learning_rate": 4.1619505494505496e-05, + "loss": 0.3903, + "step": 6101 + }, + { + "epoch": 16.763736263736263, + "grad_norm": 13.821334838867188, + "learning_rate": 4.161813186813187e-05, + "loss": 0.4224, + "step": 6102 + }, + { + "epoch": 16.766483516483518, + "grad_norm": 8.99378776550293, + "learning_rate": 4.161675824175824e-05, + "loss": 0.2053, + "step": 6103 + }, + { + "epoch": 16.76923076923077, + "grad_norm": 7.961583614349365, + "learning_rate": 4.161538461538462e-05, + "loss": 0.2523, + "step": 6104 + }, + { + "epoch": 16.771978021978022, + "grad_norm": 18.162324905395508, + "learning_rate": 4.1614010989010996e-05, + "loss": 0.5845, + "step": 6105 + }, + { + "epoch": 16.774725274725274, + "grad_norm": 13.109371185302734, + "learning_rate": 4.1612637362637366e-05, + "loss": 0.5335, + "step": 6106 + }, + { + "epoch": 16.77747252747253, + "grad_norm": 7.784365653991699, + "learning_rate": 4.1611263736263736e-05, + "loss": 0.2114, + "step": 6107 + }, + { + "epoch": 16.78021978021978, + "grad_norm": 17.833181381225586, + "learning_rate": 4.1609890109890106e-05, + "loss": 0.8973, + "step": 6108 + }, + { + "epoch": 16.782967032967033, + "grad_norm": 9.295458793640137, + "learning_rate": 4.160851648351648e-05, + "loss": 0.3671, + "step": 6109 + }, + { + "epoch": 16.785714285714285, + "grad_norm": 15.854897499084473, + "learning_rate": 4.160714285714286e-05, + "loss": 0.4641, + "step": 6110 + }, + { + "epoch": 16.78846153846154, + "grad_norm": 12.63198184967041, + "learning_rate": 4.160576923076923e-05, + "loss": 0.5407, + "step": 6111 + }, + { + "epoch": 16.791208791208792, + "grad_norm": 11.661181449890137, + "learning_rate": 4.1604395604395607e-05, + "loss": 0.4005, + "step": 6112 + }, + { + "epoch": 16.793956043956044, + "grad_norm": 9.187493324279785, + "learning_rate": 4.160302197802198e-05, + "loss": 0.3828, + "step": 6113 + }, + { + "epoch": 16.796703296703296, + "grad_norm": 8.86398696899414, + "learning_rate": 4.1601648351648353e-05, + "loss": 0.2445, + "step": 6114 + }, + { + "epoch": 16.79945054945055, + "grad_norm": 9.044426918029785, + "learning_rate": 4.160027472527473e-05, + "loss": 0.3214, + "step": 6115 + }, + { + "epoch": 16.802197802197803, + "grad_norm": 21.271453857421875, + "learning_rate": 4.15989010989011e-05, + "loss": 0.8398, + "step": 6116 + }, + { + "epoch": 16.804945054945055, + "grad_norm": 15.569075584411621, + "learning_rate": 4.159752747252748e-05, + "loss": 0.4055, + "step": 6117 + }, + { + "epoch": 16.807692307692307, + "grad_norm": 14.342781066894531, + "learning_rate": 4.159615384615385e-05, + "loss": 0.5315, + "step": 6118 + }, + { + "epoch": 16.810439560439562, + "grad_norm": 6.508975982666016, + "learning_rate": 4.1594780219780224e-05, + "loss": 0.1354, + "step": 6119 + }, + { + "epoch": 16.813186813186814, + "grad_norm": 10.649849891662598, + "learning_rate": 4.15934065934066e-05, + "loss": 0.2935, + "step": 6120 + }, + { + "epoch": 16.815934065934066, + "grad_norm": 12.326497077941895, + "learning_rate": 4.159203296703297e-05, + "loss": 0.5629, + "step": 6121 + }, + { + "epoch": 16.818681318681318, + "grad_norm": 10.040440559387207, + "learning_rate": 4.159065934065934e-05, + "loss": 0.2475, + "step": 6122 + }, + { + "epoch": 16.821428571428573, + "grad_norm": 19.115093231201172, + "learning_rate": 4.158928571428571e-05, + "loss": 0.7959, + "step": 6123 + }, + { + "epoch": 16.824175824175825, + "grad_norm": 20.204723358154297, + "learning_rate": 4.158791208791209e-05, + "loss": 1.096, + "step": 6124 + }, + { + "epoch": 16.826923076923077, + "grad_norm": 15.638474464416504, + "learning_rate": 4.1586538461538464e-05, + "loss": 0.6065, + "step": 6125 + }, + { + "epoch": 16.82967032967033, + "grad_norm": 25.455448150634766, + "learning_rate": 4.1585164835164834e-05, + "loss": 0.7977, + "step": 6126 + }, + { + "epoch": 16.832417582417584, + "grad_norm": 15.16103458404541, + "learning_rate": 4.158379120879121e-05, + "loss": 0.6269, + "step": 6127 + }, + { + "epoch": 16.835164835164836, + "grad_norm": 8.137001991271973, + "learning_rate": 4.158241758241758e-05, + "loss": 0.202, + "step": 6128 + }, + { + "epoch": 16.837912087912088, + "grad_norm": 8.298991203308105, + "learning_rate": 4.158104395604396e-05, + "loss": 0.318, + "step": 6129 + }, + { + "epoch": 16.84065934065934, + "grad_norm": 22.275421142578125, + "learning_rate": 4.1579670329670335e-05, + "loss": 0.7558, + "step": 6130 + }, + { + "epoch": 16.843406593406595, + "grad_norm": 15.799697875976562, + "learning_rate": 4.1578296703296705e-05, + "loss": 0.516, + "step": 6131 + }, + { + "epoch": 16.846153846153847, + "grad_norm": 17.147964477539062, + "learning_rate": 4.157692307692308e-05, + "loss": 0.5546, + "step": 6132 + }, + { + "epoch": 16.8489010989011, + "grad_norm": 15.309202194213867, + "learning_rate": 4.157554945054945e-05, + "loss": 0.4259, + "step": 6133 + }, + { + "epoch": 16.85164835164835, + "grad_norm": 10.7102689743042, + "learning_rate": 4.157417582417583e-05, + "loss": 0.3134, + "step": 6134 + }, + { + "epoch": 16.854395604395606, + "grad_norm": 11.64493465423584, + "learning_rate": 4.1572802197802205e-05, + "loss": 0.4811, + "step": 6135 + }, + { + "epoch": 16.857142857142858, + "grad_norm": 10.963376998901367, + "learning_rate": 4.1571428571428575e-05, + "loss": 0.2284, + "step": 6136 + }, + { + "epoch": 16.85989010989011, + "grad_norm": 8.428474426269531, + "learning_rate": 4.1570054945054945e-05, + "loss": 0.3167, + "step": 6137 + }, + { + "epoch": 16.86263736263736, + "grad_norm": 14.400456428527832, + "learning_rate": 4.1568681318681315e-05, + "loss": 0.5466, + "step": 6138 + }, + { + "epoch": 16.865384615384617, + "grad_norm": 10.256641387939453, + "learning_rate": 4.156730769230769e-05, + "loss": 0.5485, + "step": 6139 + }, + { + "epoch": 16.86813186813187, + "grad_norm": 11.934725761413574, + "learning_rate": 4.156593406593407e-05, + "loss": 0.3742, + "step": 6140 + }, + { + "epoch": 16.87087912087912, + "grad_norm": 10.942394256591797, + "learning_rate": 4.156456043956044e-05, + "loss": 0.3873, + "step": 6141 + }, + { + "epoch": 16.873626373626372, + "grad_norm": 7.510147571563721, + "learning_rate": 4.1563186813186816e-05, + "loss": 0.1685, + "step": 6142 + }, + { + "epoch": 16.876373626373628, + "grad_norm": 19.672988891601562, + "learning_rate": 4.1561813186813186e-05, + "loss": 0.825, + "step": 6143 + }, + { + "epoch": 16.87912087912088, + "grad_norm": 11.642427444458008, + "learning_rate": 4.156043956043956e-05, + "loss": 0.6398, + "step": 6144 + }, + { + "epoch": 16.88186813186813, + "grad_norm": 9.347244262695312, + "learning_rate": 4.155906593406594e-05, + "loss": 0.3186, + "step": 6145 + }, + { + "epoch": 16.884615384615383, + "grad_norm": 13.017897605895996, + "learning_rate": 4.155769230769231e-05, + "loss": 0.2759, + "step": 6146 + }, + { + "epoch": 16.88736263736264, + "grad_norm": 14.167412757873535, + "learning_rate": 4.1556318681318686e-05, + "loss": 0.5866, + "step": 6147 + }, + { + "epoch": 16.89010989010989, + "grad_norm": 14.379473686218262, + "learning_rate": 4.1554945054945056e-05, + "loss": 0.8405, + "step": 6148 + }, + { + "epoch": 16.892857142857142, + "grad_norm": 20.902353286743164, + "learning_rate": 4.155357142857143e-05, + "loss": 0.6759, + "step": 6149 + }, + { + "epoch": 16.895604395604394, + "grad_norm": 12.6608247756958, + "learning_rate": 4.155219780219781e-05, + "loss": 0.3534, + "step": 6150 + }, + { + "epoch": 16.89835164835165, + "grad_norm": 18.78713035583496, + "learning_rate": 4.155082417582418e-05, + "loss": 0.58, + "step": 6151 + }, + { + "epoch": 16.9010989010989, + "grad_norm": 9.120288848876953, + "learning_rate": 4.154945054945055e-05, + "loss": 0.2526, + "step": 6152 + }, + { + "epoch": 16.903846153846153, + "grad_norm": 16.863574981689453, + "learning_rate": 4.154807692307692e-05, + "loss": 0.4176, + "step": 6153 + }, + { + "epoch": 16.906593406593405, + "grad_norm": 23.123281478881836, + "learning_rate": 4.1546703296703296e-05, + "loss": 0.857, + "step": 6154 + }, + { + "epoch": 16.90934065934066, + "grad_norm": 12.072732925415039, + "learning_rate": 4.154532967032967e-05, + "loss": 0.6456, + "step": 6155 + }, + { + "epoch": 16.912087912087912, + "grad_norm": 6.701681137084961, + "learning_rate": 4.154395604395604e-05, + "loss": 0.1543, + "step": 6156 + }, + { + "epoch": 16.914835164835164, + "grad_norm": 18.090463638305664, + "learning_rate": 4.154258241758242e-05, + "loss": 0.6572, + "step": 6157 + }, + { + "epoch": 16.917582417582416, + "grad_norm": 19.25204849243164, + "learning_rate": 4.154120879120879e-05, + "loss": 0.8459, + "step": 6158 + }, + { + "epoch": 16.92032967032967, + "grad_norm": 23.00725746154785, + "learning_rate": 4.153983516483517e-05, + "loss": 0.6297, + "step": 6159 + }, + { + "epoch": 16.923076923076923, + "grad_norm": 13.42536735534668, + "learning_rate": 4.1538461538461544e-05, + "loss": 0.5866, + "step": 6160 + }, + { + "epoch": 16.925824175824175, + "grad_norm": 13.782551765441895, + "learning_rate": 4.1537087912087914e-05, + "loss": 0.3618, + "step": 6161 + }, + { + "epoch": 16.928571428571427, + "grad_norm": 9.672910690307617, + "learning_rate": 4.153571428571429e-05, + "loss": 0.4078, + "step": 6162 + }, + { + "epoch": 16.931318681318682, + "grad_norm": 20.649110794067383, + "learning_rate": 4.153434065934066e-05, + "loss": 0.7188, + "step": 6163 + }, + { + "epoch": 16.934065934065934, + "grad_norm": 10.989949226379395, + "learning_rate": 4.153296703296704e-05, + "loss": 0.2402, + "step": 6164 + }, + { + "epoch": 16.936813186813186, + "grad_norm": 10.118631362915039, + "learning_rate": 4.1531593406593414e-05, + "loss": 0.2334, + "step": 6165 + }, + { + "epoch": 16.939560439560438, + "grad_norm": 16.99169158935547, + "learning_rate": 4.1530219780219784e-05, + "loss": 0.5762, + "step": 6166 + }, + { + "epoch": 16.942307692307693, + "grad_norm": 13.325267791748047, + "learning_rate": 4.1528846153846154e-05, + "loss": 0.4861, + "step": 6167 + }, + { + "epoch": 16.945054945054945, + "grad_norm": 18.44881820678711, + "learning_rate": 4.1527472527472524e-05, + "loss": 0.6544, + "step": 6168 + }, + { + "epoch": 16.947802197802197, + "grad_norm": 9.332114219665527, + "learning_rate": 4.15260989010989e-05, + "loss": 0.2896, + "step": 6169 + }, + { + "epoch": 16.95054945054945, + "grad_norm": 14.961565017700195, + "learning_rate": 4.152472527472528e-05, + "loss": 0.5562, + "step": 6170 + }, + { + "epoch": 16.953296703296704, + "grad_norm": 13.558509826660156, + "learning_rate": 4.152335164835165e-05, + "loss": 0.3342, + "step": 6171 + }, + { + "epoch": 16.956043956043956, + "grad_norm": 4.914913654327393, + "learning_rate": 4.1521978021978025e-05, + "loss": 0.1134, + "step": 6172 + }, + { + "epoch": 16.958791208791208, + "grad_norm": 14.561174392700195, + "learning_rate": 4.1520604395604395e-05, + "loss": 0.4072, + "step": 6173 + }, + { + "epoch": 16.96153846153846, + "grad_norm": 16.025901794433594, + "learning_rate": 4.151923076923077e-05, + "loss": 0.3983, + "step": 6174 + }, + { + "epoch": 16.964285714285715, + "grad_norm": 7.760944843292236, + "learning_rate": 4.151785714285715e-05, + "loss": 0.2049, + "step": 6175 + }, + { + "epoch": 16.967032967032967, + "grad_norm": 8.356590270996094, + "learning_rate": 4.151648351648352e-05, + "loss": 0.2009, + "step": 6176 + }, + { + "epoch": 16.96978021978022, + "grad_norm": 18.974103927612305, + "learning_rate": 4.1515109890109895e-05, + "loss": 0.7609, + "step": 6177 + }, + { + "epoch": 16.97252747252747, + "grad_norm": 15.935303688049316, + "learning_rate": 4.1513736263736265e-05, + "loss": 0.4541, + "step": 6178 + }, + { + "epoch": 16.975274725274726, + "grad_norm": 15.323057174682617, + "learning_rate": 4.151236263736264e-05, + "loss": 0.898, + "step": 6179 + }, + { + "epoch": 16.978021978021978, + "grad_norm": 18.8624210357666, + "learning_rate": 4.151098901098902e-05, + "loss": 0.7546, + "step": 6180 + }, + { + "epoch": 16.98076923076923, + "grad_norm": 4.582338809967041, + "learning_rate": 4.150961538461539e-05, + "loss": 0.1346, + "step": 6181 + }, + { + "epoch": 16.983516483516482, + "grad_norm": 15.139116287231445, + "learning_rate": 4.150824175824176e-05, + "loss": 0.3827, + "step": 6182 + }, + { + "epoch": 16.986263736263737, + "grad_norm": 6.584336757659912, + "learning_rate": 4.150686813186813e-05, + "loss": 0.2281, + "step": 6183 + }, + { + "epoch": 16.98901098901099, + "grad_norm": 13.199065208435059, + "learning_rate": 4.1505494505494505e-05, + "loss": 0.5901, + "step": 6184 + }, + { + "epoch": 16.99175824175824, + "grad_norm": 17.776958465576172, + "learning_rate": 4.150412087912088e-05, + "loss": 0.4234, + "step": 6185 + }, + { + "epoch": 16.994505494505496, + "grad_norm": 14.97443962097168, + "learning_rate": 4.150274725274725e-05, + "loss": 0.8618, + "step": 6186 + }, + { + "epoch": 16.997252747252748, + "grad_norm": 17.038740158081055, + "learning_rate": 4.150137362637363e-05, + "loss": 0.6614, + "step": 6187 + }, + { + "epoch": 17.0, + "grad_norm": 45.17449188232422, + "learning_rate": 4.15e-05, + "loss": 1.6885, + "step": 6188 + }, + { + "epoch": 17.0, + "eval_accuracy": 0.6666666666666666, + "eval_f1": 0.6495402423261061, + "eval_f1_DuraRiadoRio_64x64": 0.3352601156069364, + "eval_f1_Mole_64x64": 0.7586206896551724, + "eval_f1_Quebrado_64x64": 0.8112676056338028, + "eval_f1_RiadoRio_64x64": 0.5822222222222222, + "eval_f1_RioFechado_64x64": 0.7603305785123967, + "eval_loss": 1.1906168460845947, + "eval_precision": 0.808412354082509, + "eval_precision_DuraRiadoRio_64x64": 1.0, + "eval_precision_Mole_64x64": 1.0, + "eval_precision_Quebrado_64x64": 0.6824644549763034, + "eval_precision_RiadoRio_64x64": 0.4395973154362416, + "eval_precision_RioFechado_64x64": 0.92, + "eval_recall": 0.664445885841364, + "eval_recall_DuraRiadoRio_64x64": 0.2013888888888889, + "eval_recall_Mole_64x64": 0.6111111111111112, + "eval_recall_Quebrado_64x64": 1.0, + "eval_recall_RiadoRio_64x64": 0.8618421052631579, + "eval_recall_RioFechado_64x64": 0.647887323943662, + "eval_runtime": 1.715, + "eval_samples_per_second": 423.328, + "eval_steps_per_second": 26.822, + "step": 6188 + }, + { + "epoch": 17.002747252747252, + "grad_norm": 14.487593650817871, + "learning_rate": 4.1498626373626376e-05, + "loss": 0.4081, + "step": 6189 + }, + { + "epoch": 17.005494505494507, + "grad_norm": 12.506811141967773, + "learning_rate": 4.149725274725275e-05, + "loss": 0.4599, + "step": 6190 + }, + { + "epoch": 17.00824175824176, + "grad_norm": 15.113593101501465, + "learning_rate": 4.149587912087912e-05, + "loss": 0.7355, + "step": 6191 + }, + { + "epoch": 17.01098901098901, + "grad_norm": 13.927034378051758, + "learning_rate": 4.14945054945055e-05, + "loss": 0.3934, + "step": 6192 + }, + { + "epoch": 17.013736263736263, + "grad_norm": 17.331863403320312, + "learning_rate": 4.149313186813187e-05, + "loss": 0.4877, + "step": 6193 + }, + { + "epoch": 17.016483516483518, + "grad_norm": 11.917930603027344, + "learning_rate": 4.1491758241758246e-05, + "loss": 0.2737, + "step": 6194 + }, + { + "epoch": 17.01923076923077, + "grad_norm": 8.33717155456543, + "learning_rate": 4.149038461538462e-05, + "loss": 0.2572, + "step": 6195 + }, + { + "epoch": 17.021978021978022, + "grad_norm": 12.504807472229004, + "learning_rate": 4.148901098901099e-05, + "loss": 0.5242, + "step": 6196 + }, + { + "epoch": 17.024725274725274, + "grad_norm": 4.851657390594482, + "learning_rate": 4.148763736263736e-05, + "loss": 0.1224, + "step": 6197 + }, + { + "epoch": 17.02747252747253, + "grad_norm": 8.249452590942383, + "learning_rate": 4.148626373626373e-05, + "loss": 0.4029, + "step": 6198 + }, + { + "epoch": 17.03021978021978, + "grad_norm": 13.780360221862793, + "learning_rate": 4.148489010989011e-05, + "loss": 0.5683, + "step": 6199 + }, + { + "epoch": 17.032967032967033, + "grad_norm": 13.69520378112793, + "learning_rate": 4.148351648351649e-05, + "loss": 0.663, + "step": 6200 + }, + { + "epoch": 17.035714285714285, + "grad_norm": 12.03925609588623, + "learning_rate": 4.148214285714286e-05, + "loss": 0.2889, + "step": 6201 + }, + { + "epoch": 17.03846153846154, + "grad_norm": 14.01942253112793, + "learning_rate": 4.1480769230769234e-05, + "loss": 0.2777, + "step": 6202 + }, + { + "epoch": 17.041208791208792, + "grad_norm": 13.86361312866211, + "learning_rate": 4.1479395604395604e-05, + "loss": 0.3486, + "step": 6203 + }, + { + "epoch": 17.043956043956044, + "grad_norm": 8.947507858276367, + "learning_rate": 4.147802197802198e-05, + "loss": 0.2378, + "step": 6204 + }, + { + "epoch": 17.046703296703296, + "grad_norm": 8.695796966552734, + "learning_rate": 4.147664835164836e-05, + "loss": 0.1661, + "step": 6205 + }, + { + "epoch": 17.04945054945055, + "grad_norm": 18.443540573120117, + "learning_rate": 4.147527472527473e-05, + "loss": 0.5305, + "step": 6206 + }, + { + "epoch": 17.052197802197803, + "grad_norm": 18.125940322875977, + "learning_rate": 4.1473901098901104e-05, + "loss": 0.4929, + "step": 6207 + }, + { + "epoch": 17.054945054945055, + "grad_norm": 12.634420394897461, + "learning_rate": 4.1472527472527474e-05, + "loss": 0.3708, + "step": 6208 + }, + { + "epoch": 17.057692307692307, + "grad_norm": 16.056676864624023, + "learning_rate": 4.147115384615385e-05, + "loss": 0.7295, + "step": 6209 + }, + { + "epoch": 17.060439560439562, + "grad_norm": 9.348967552185059, + "learning_rate": 4.146978021978023e-05, + "loss": 0.1632, + "step": 6210 + }, + { + "epoch": 17.063186813186814, + "grad_norm": 11.462554931640625, + "learning_rate": 4.14684065934066e-05, + "loss": 0.2437, + "step": 6211 + }, + { + "epoch": 17.065934065934066, + "grad_norm": 17.265655517578125, + "learning_rate": 4.146703296703297e-05, + "loss": 0.5319, + "step": 6212 + }, + { + "epoch": 17.068681318681318, + "grad_norm": 14.316194534301758, + "learning_rate": 4.146565934065934e-05, + "loss": 0.3652, + "step": 6213 + }, + { + "epoch": 17.071428571428573, + "grad_norm": 12.915121078491211, + "learning_rate": 4.1464285714285714e-05, + "loss": 0.53, + "step": 6214 + }, + { + "epoch": 17.074175824175825, + "grad_norm": 16.552989959716797, + "learning_rate": 4.146291208791209e-05, + "loss": 0.6513, + "step": 6215 + }, + { + "epoch": 17.076923076923077, + "grad_norm": 14.907559394836426, + "learning_rate": 4.146153846153846e-05, + "loss": 0.4812, + "step": 6216 + }, + { + "epoch": 17.07967032967033, + "grad_norm": 12.031340599060059, + "learning_rate": 4.146016483516484e-05, + "loss": 0.5367, + "step": 6217 + }, + { + "epoch": 17.082417582417584, + "grad_norm": 8.5762357711792, + "learning_rate": 4.145879120879121e-05, + "loss": 0.2036, + "step": 6218 + }, + { + "epoch": 17.085164835164836, + "grad_norm": 12.849506378173828, + "learning_rate": 4.1457417582417585e-05, + "loss": 0.2457, + "step": 6219 + }, + { + "epoch": 17.087912087912088, + "grad_norm": 13.229537010192871, + "learning_rate": 4.145604395604396e-05, + "loss": 0.5566, + "step": 6220 + }, + { + "epoch": 17.09065934065934, + "grad_norm": 14.587250709533691, + "learning_rate": 4.145467032967033e-05, + "loss": 0.475, + "step": 6221 + }, + { + "epoch": 17.093406593406595, + "grad_norm": 3.8411900997161865, + "learning_rate": 4.145329670329671e-05, + "loss": 0.0935, + "step": 6222 + }, + { + "epoch": 17.096153846153847, + "grad_norm": 6.0730438232421875, + "learning_rate": 4.145192307692308e-05, + "loss": 0.1541, + "step": 6223 + }, + { + "epoch": 17.0989010989011, + "grad_norm": 14.716182708740234, + "learning_rate": 4.1450549450549455e-05, + "loss": 0.6475, + "step": 6224 + }, + { + "epoch": 17.10164835164835, + "grad_norm": 12.280799865722656, + "learning_rate": 4.144917582417583e-05, + "loss": 0.3138, + "step": 6225 + }, + { + "epoch": 17.104395604395606, + "grad_norm": 15.729974746704102, + "learning_rate": 4.14478021978022e-05, + "loss": 0.7047, + "step": 6226 + }, + { + "epoch": 17.107142857142858, + "grad_norm": 8.008561134338379, + "learning_rate": 4.144642857142857e-05, + "loss": 0.1675, + "step": 6227 + }, + { + "epoch": 17.10989010989011, + "grad_norm": 11.24770736694336, + "learning_rate": 4.144505494505494e-05, + "loss": 0.3647, + "step": 6228 + }, + { + "epoch": 17.11263736263736, + "grad_norm": 10.756522178649902, + "learning_rate": 4.144368131868132e-05, + "loss": 0.3534, + "step": 6229 + }, + { + "epoch": 17.115384615384617, + "grad_norm": 13.595144271850586, + "learning_rate": 4.1442307692307696e-05, + "loss": 0.4802, + "step": 6230 + }, + { + "epoch": 17.11813186813187, + "grad_norm": 11.947257041931152, + "learning_rate": 4.1440934065934066e-05, + "loss": 0.3147, + "step": 6231 + }, + { + "epoch": 17.12087912087912, + "grad_norm": 9.867071151733398, + "learning_rate": 4.143956043956044e-05, + "loss": 0.452, + "step": 6232 + }, + { + "epoch": 17.123626373626372, + "grad_norm": 6.544808864593506, + "learning_rate": 4.143818681318681e-05, + "loss": 0.1915, + "step": 6233 + }, + { + "epoch": 17.126373626373628, + "grad_norm": 12.768381118774414, + "learning_rate": 4.143681318681319e-05, + "loss": 0.3226, + "step": 6234 + }, + { + "epoch": 17.12912087912088, + "grad_norm": 7.308452606201172, + "learning_rate": 4.1435439560439566e-05, + "loss": 0.2365, + "step": 6235 + }, + { + "epoch": 17.13186813186813, + "grad_norm": 11.703985214233398, + "learning_rate": 4.1434065934065936e-05, + "loss": 0.3549, + "step": 6236 + }, + { + "epoch": 17.134615384615383, + "grad_norm": 13.863771438598633, + "learning_rate": 4.143269230769231e-05, + "loss": 0.6291, + "step": 6237 + }, + { + "epoch": 17.13736263736264, + "grad_norm": 18.095561981201172, + "learning_rate": 4.143131868131868e-05, + "loss": 0.7089, + "step": 6238 + }, + { + "epoch": 17.14010989010989, + "grad_norm": 6.572982311248779, + "learning_rate": 4.142994505494506e-05, + "loss": 0.1892, + "step": 6239 + }, + { + "epoch": 17.142857142857142, + "grad_norm": 11.115100860595703, + "learning_rate": 4.1428571428571437e-05, + "loss": 0.474, + "step": 6240 + }, + { + "epoch": 17.145604395604394, + "grad_norm": 18.30523109436035, + "learning_rate": 4.1427197802197807e-05, + "loss": 0.7867, + "step": 6241 + }, + { + "epoch": 17.14835164835165, + "grad_norm": 10.699790000915527, + "learning_rate": 4.1425824175824177e-05, + "loss": 0.3278, + "step": 6242 + }, + { + "epoch": 17.1510989010989, + "grad_norm": 10.38434886932373, + "learning_rate": 4.142445054945055e-05, + "loss": 0.3272, + "step": 6243 + }, + { + "epoch": 17.153846153846153, + "grad_norm": 18.04276466369629, + "learning_rate": 4.1423076923076923e-05, + "loss": 0.5091, + "step": 6244 + }, + { + "epoch": 17.156593406593405, + "grad_norm": 6.969692707061768, + "learning_rate": 4.14217032967033e-05, + "loss": 0.1629, + "step": 6245 + }, + { + "epoch": 17.15934065934066, + "grad_norm": 18.270837783813477, + "learning_rate": 4.142032967032967e-05, + "loss": 0.5989, + "step": 6246 + }, + { + "epoch": 17.162087912087912, + "grad_norm": 14.685286521911621, + "learning_rate": 4.141895604395605e-05, + "loss": 0.3611, + "step": 6247 + }, + { + "epoch": 17.164835164835164, + "grad_norm": 9.281500816345215, + "learning_rate": 4.141758241758242e-05, + "loss": 0.1926, + "step": 6248 + }, + { + "epoch": 17.167582417582416, + "grad_norm": 10.201518058776855, + "learning_rate": 4.1416208791208794e-05, + "loss": 0.438, + "step": 6249 + }, + { + "epoch": 17.17032967032967, + "grad_norm": 8.559322357177734, + "learning_rate": 4.141483516483517e-05, + "loss": 0.2333, + "step": 6250 + }, + { + "epoch": 17.173076923076923, + "grad_norm": 16.13735580444336, + "learning_rate": 4.141346153846154e-05, + "loss": 0.5839, + "step": 6251 + }, + { + "epoch": 17.175824175824175, + "grad_norm": 10.182190895080566, + "learning_rate": 4.141208791208792e-05, + "loss": 0.3063, + "step": 6252 + }, + { + "epoch": 17.178571428571427, + "grad_norm": 9.004852294921875, + "learning_rate": 4.141071428571429e-05, + "loss": 0.1265, + "step": 6253 + }, + { + "epoch": 17.181318681318682, + "grad_norm": 13.507658004760742, + "learning_rate": 4.1409340659340664e-05, + "loss": 0.3303, + "step": 6254 + }, + { + "epoch": 17.184065934065934, + "grad_norm": 12.201615333557129, + "learning_rate": 4.140796703296704e-05, + "loss": 0.3128, + "step": 6255 + }, + { + "epoch": 17.186813186813186, + "grad_norm": 12.31782341003418, + "learning_rate": 4.140659340659341e-05, + "loss": 0.2489, + "step": 6256 + }, + { + "epoch": 17.189560439560438, + "grad_norm": 13.48625373840332, + "learning_rate": 4.140521978021978e-05, + "loss": 0.5185, + "step": 6257 + }, + { + "epoch": 17.192307692307693, + "grad_norm": 16.8026180267334, + "learning_rate": 4.140384615384615e-05, + "loss": 0.5202, + "step": 6258 + }, + { + "epoch": 17.195054945054945, + "grad_norm": 18.425840377807617, + "learning_rate": 4.140247252747253e-05, + "loss": 0.5458, + "step": 6259 + }, + { + "epoch": 17.197802197802197, + "grad_norm": 15.639365196228027, + "learning_rate": 4.14010989010989e-05, + "loss": 0.3697, + "step": 6260 + }, + { + "epoch": 17.20054945054945, + "grad_norm": 14.970843315124512, + "learning_rate": 4.1399725274725275e-05, + "loss": 0.3541, + "step": 6261 + }, + { + "epoch": 17.203296703296704, + "grad_norm": 10.944805145263672, + "learning_rate": 4.139835164835165e-05, + "loss": 0.4392, + "step": 6262 + }, + { + "epoch": 17.206043956043956, + "grad_norm": 6.214494705200195, + "learning_rate": 4.139697802197802e-05, + "loss": 0.1605, + "step": 6263 + }, + { + "epoch": 17.208791208791208, + "grad_norm": 13.805779457092285, + "learning_rate": 4.13956043956044e-05, + "loss": 0.477, + "step": 6264 + }, + { + "epoch": 17.21153846153846, + "grad_norm": 17.519866943359375, + "learning_rate": 4.139423076923077e-05, + "loss": 0.6982, + "step": 6265 + }, + { + "epoch": 17.214285714285715, + "grad_norm": 9.169163703918457, + "learning_rate": 4.1392857142857145e-05, + "loss": 0.4339, + "step": 6266 + }, + { + "epoch": 17.217032967032967, + "grad_norm": 8.842750549316406, + "learning_rate": 4.139148351648352e-05, + "loss": 0.2943, + "step": 6267 + }, + { + "epoch": 17.21978021978022, + "grad_norm": 14.11835765838623, + "learning_rate": 4.139010989010989e-05, + "loss": 0.4066, + "step": 6268 + }, + { + "epoch": 17.22252747252747, + "grad_norm": 15.509007453918457, + "learning_rate": 4.138873626373627e-05, + "loss": 0.4683, + "step": 6269 + }, + { + "epoch": 17.225274725274726, + "grad_norm": 14.445374488830566, + "learning_rate": 4.138736263736264e-05, + "loss": 0.488, + "step": 6270 + }, + { + "epoch": 17.228021978021978, + "grad_norm": 11.28947925567627, + "learning_rate": 4.1385989010989016e-05, + "loss": 0.2671, + "step": 6271 + }, + { + "epoch": 17.23076923076923, + "grad_norm": 8.728035926818848, + "learning_rate": 4.1384615384615386e-05, + "loss": 0.2989, + "step": 6272 + }, + { + "epoch": 17.233516483516482, + "grad_norm": 10.414810180664062, + "learning_rate": 4.1383241758241756e-05, + "loss": 0.3679, + "step": 6273 + }, + { + "epoch": 17.236263736263737, + "grad_norm": 11.564909934997559, + "learning_rate": 4.138186813186813e-05, + "loss": 0.4789, + "step": 6274 + }, + { + "epoch": 17.23901098901099, + "grad_norm": 7.88808012008667, + "learning_rate": 4.13804945054945e-05, + "loss": 0.2107, + "step": 6275 + }, + { + "epoch": 17.24175824175824, + "grad_norm": 10.91695785522461, + "learning_rate": 4.137912087912088e-05, + "loss": 0.551, + "step": 6276 + }, + { + "epoch": 17.244505494505493, + "grad_norm": 12.00015926361084, + "learning_rate": 4.1377747252747256e-05, + "loss": 0.3541, + "step": 6277 + }, + { + "epoch": 17.247252747252748, + "grad_norm": 18.611940383911133, + "learning_rate": 4.1376373626373626e-05, + "loss": 0.7907, + "step": 6278 + }, + { + "epoch": 17.25, + "grad_norm": 20.96906089782715, + "learning_rate": 4.1375e-05, + "loss": 0.7558, + "step": 6279 + }, + { + "epoch": 17.252747252747252, + "grad_norm": 5.131430149078369, + "learning_rate": 4.137362637362637e-05, + "loss": 0.1557, + "step": 6280 + }, + { + "epoch": 17.255494505494504, + "grad_norm": 8.474939346313477, + "learning_rate": 4.137225274725275e-05, + "loss": 0.2511, + "step": 6281 + }, + { + "epoch": 17.25824175824176, + "grad_norm": 16.665233612060547, + "learning_rate": 4.1370879120879126e-05, + "loss": 0.5615, + "step": 6282 + }, + { + "epoch": 17.26098901098901, + "grad_norm": 5.90110445022583, + "learning_rate": 4.1369505494505496e-05, + "loss": 0.1442, + "step": 6283 + }, + { + "epoch": 17.263736263736263, + "grad_norm": 12.39127254486084, + "learning_rate": 4.136813186813187e-05, + "loss": 0.3631, + "step": 6284 + }, + { + "epoch": 17.266483516483518, + "grad_norm": 14.94949722290039, + "learning_rate": 4.136675824175824e-05, + "loss": 0.7258, + "step": 6285 + }, + { + "epoch": 17.26923076923077, + "grad_norm": 15.154142379760742, + "learning_rate": 4.136538461538462e-05, + "loss": 0.6875, + "step": 6286 + }, + { + "epoch": 17.271978021978022, + "grad_norm": 14.962540626525879, + "learning_rate": 4.136401098901099e-05, + "loss": 0.3616, + "step": 6287 + }, + { + "epoch": 17.274725274725274, + "grad_norm": 14.7079439163208, + "learning_rate": 4.136263736263736e-05, + "loss": 0.4715, + "step": 6288 + }, + { + "epoch": 17.27747252747253, + "grad_norm": 16.143178939819336, + "learning_rate": 4.136126373626374e-05, + "loss": 0.6783, + "step": 6289 + }, + { + "epoch": 17.28021978021978, + "grad_norm": 16.757699966430664, + "learning_rate": 4.135989010989011e-05, + "loss": 0.2832, + "step": 6290 + }, + { + "epoch": 17.282967032967033, + "grad_norm": 11.93613338470459, + "learning_rate": 4.1358516483516484e-05, + "loss": 0.4817, + "step": 6291 + }, + { + "epoch": 17.285714285714285, + "grad_norm": 12.143254280090332, + "learning_rate": 4.135714285714286e-05, + "loss": 0.5096, + "step": 6292 + }, + { + "epoch": 17.28846153846154, + "grad_norm": 17.943145751953125, + "learning_rate": 4.135576923076923e-05, + "loss": 0.5573, + "step": 6293 + }, + { + "epoch": 17.291208791208792, + "grad_norm": 9.675633430480957, + "learning_rate": 4.135439560439561e-05, + "loss": 0.3295, + "step": 6294 + }, + { + "epoch": 17.293956043956044, + "grad_norm": 11.659461975097656, + "learning_rate": 4.135302197802198e-05, + "loss": 0.4432, + "step": 6295 + }, + { + "epoch": 17.296703296703296, + "grad_norm": 16.628921508789062, + "learning_rate": 4.1351648351648354e-05, + "loss": 0.5522, + "step": 6296 + }, + { + "epoch": 17.29945054945055, + "grad_norm": 11.652006149291992, + "learning_rate": 4.135027472527473e-05, + "loss": 0.3153, + "step": 6297 + }, + { + "epoch": 17.302197802197803, + "grad_norm": 15.359655380249023, + "learning_rate": 4.13489010989011e-05, + "loss": 0.537, + "step": 6298 + }, + { + "epoch": 17.304945054945055, + "grad_norm": 13.912713050842285, + "learning_rate": 4.134752747252748e-05, + "loss": 0.2576, + "step": 6299 + }, + { + "epoch": 17.307692307692307, + "grad_norm": 19.046573638916016, + "learning_rate": 4.134615384615385e-05, + "loss": 0.7224, + "step": 6300 + }, + { + "epoch": 17.310439560439562, + "grad_norm": 15.844698905944824, + "learning_rate": 4.1344780219780225e-05, + "loss": 0.455, + "step": 6301 + }, + { + "epoch": 17.313186813186814, + "grad_norm": 14.837043762207031, + "learning_rate": 4.1343406593406595e-05, + "loss": 0.7014, + "step": 6302 + }, + { + "epoch": 17.315934065934066, + "grad_norm": 21.25314712524414, + "learning_rate": 4.1342032967032965e-05, + "loss": 0.816, + "step": 6303 + }, + { + "epoch": 17.318681318681318, + "grad_norm": 12.272734642028809, + "learning_rate": 4.134065934065934e-05, + "loss": 0.3749, + "step": 6304 + }, + { + "epoch": 17.321428571428573, + "grad_norm": 20.47638511657715, + "learning_rate": 4.133928571428571e-05, + "loss": 0.5004, + "step": 6305 + }, + { + "epoch": 17.324175824175825, + "grad_norm": 18.08555030822754, + "learning_rate": 4.133791208791209e-05, + "loss": 0.399, + "step": 6306 + }, + { + "epoch": 17.326923076923077, + "grad_norm": 11.817310333251953, + "learning_rate": 4.1336538461538465e-05, + "loss": 0.5166, + "step": 6307 + }, + { + "epoch": 17.32967032967033, + "grad_norm": 14.93368148803711, + "learning_rate": 4.1335164835164835e-05, + "loss": 0.5753, + "step": 6308 + }, + { + "epoch": 17.332417582417584, + "grad_norm": 16.753074645996094, + "learning_rate": 4.133379120879121e-05, + "loss": 0.691, + "step": 6309 + }, + { + "epoch": 17.335164835164836, + "grad_norm": 10.174989700317383, + "learning_rate": 4.133241758241758e-05, + "loss": 0.3642, + "step": 6310 + }, + { + "epoch": 17.337912087912088, + "grad_norm": 10.016265869140625, + "learning_rate": 4.133104395604396e-05, + "loss": 0.2032, + "step": 6311 + }, + { + "epoch": 17.34065934065934, + "grad_norm": 7.134219646453857, + "learning_rate": 4.1329670329670335e-05, + "loss": 0.194, + "step": 6312 + }, + { + "epoch": 17.343406593406595, + "grad_norm": 10.607952117919922, + "learning_rate": 4.1328296703296705e-05, + "loss": 0.3662, + "step": 6313 + }, + { + "epoch": 17.346153846153847, + "grad_norm": 8.818632125854492, + "learning_rate": 4.132692307692308e-05, + "loss": 0.208, + "step": 6314 + }, + { + "epoch": 17.3489010989011, + "grad_norm": 20.674253463745117, + "learning_rate": 4.132554945054945e-05, + "loss": 0.892, + "step": 6315 + }, + { + "epoch": 17.35164835164835, + "grad_norm": 11.389036178588867, + "learning_rate": 4.132417582417583e-05, + "loss": 0.2615, + "step": 6316 + }, + { + "epoch": 17.354395604395606, + "grad_norm": 9.059064865112305, + "learning_rate": 4.13228021978022e-05, + "loss": 0.4623, + "step": 6317 + }, + { + "epoch": 17.357142857142858, + "grad_norm": 14.954682350158691, + "learning_rate": 4.132142857142857e-05, + "loss": 0.5567, + "step": 6318 + }, + { + "epoch": 17.35989010989011, + "grad_norm": 17.638214111328125, + "learning_rate": 4.1320054945054946e-05, + "loss": 0.445, + "step": 6319 + }, + { + "epoch": 17.36263736263736, + "grad_norm": 11.099918365478516, + "learning_rate": 4.1318681318681316e-05, + "loss": 0.3559, + "step": 6320 + }, + { + "epoch": 17.365384615384617, + "grad_norm": 17.3588924407959, + "learning_rate": 4.131730769230769e-05, + "loss": 0.6684, + "step": 6321 + }, + { + "epoch": 17.36813186813187, + "grad_norm": 7.483561038970947, + "learning_rate": 4.131593406593407e-05, + "loss": 0.3206, + "step": 6322 + }, + { + "epoch": 17.37087912087912, + "grad_norm": 10.170816421508789, + "learning_rate": 4.131456043956044e-05, + "loss": 0.3387, + "step": 6323 + }, + { + "epoch": 17.373626373626372, + "grad_norm": 10.843228340148926, + "learning_rate": 4.1313186813186816e-05, + "loss": 0.4251, + "step": 6324 + }, + { + "epoch": 17.376373626373628, + "grad_norm": 6.022737979888916, + "learning_rate": 4.1311813186813186e-05, + "loss": 0.1518, + "step": 6325 + }, + { + "epoch": 17.37912087912088, + "grad_norm": 11.765899658203125, + "learning_rate": 4.131043956043956e-05, + "loss": 0.4187, + "step": 6326 + }, + { + "epoch": 17.38186813186813, + "grad_norm": 13.728609085083008, + "learning_rate": 4.130906593406594e-05, + "loss": 0.3885, + "step": 6327 + }, + { + "epoch": 17.384615384615383, + "grad_norm": 14.949813842773438, + "learning_rate": 4.130769230769231e-05, + "loss": 0.5082, + "step": 6328 + }, + { + "epoch": 17.38736263736264, + "grad_norm": 7.299574375152588, + "learning_rate": 4.130631868131869e-05, + "loss": 0.2311, + "step": 6329 + }, + { + "epoch": 17.39010989010989, + "grad_norm": 16.38318634033203, + "learning_rate": 4.130494505494506e-05, + "loss": 0.5066, + "step": 6330 + }, + { + "epoch": 17.392857142857142, + "grad_norm": 11.201486587524414, + "learning_rate": 4.1303571428571434e-05, + "loss": 0.2729, + "step": 6331 + }, + { + "epoch": 17.395604395604394, + "grad_norm": 10.809460639953613, + "learning_rate": 4.1302197802197804e-05, + "loss": 0.2795, + "step": 6332 + }, + { + "epoch": 17.39835164835165, + "grad_norm": 13.764679908752441, + "learning_rate": 4.1300824175824174e-05, + "loss": 0.5847, + "step": 6333 + }, + { + "epoch": 17.4010989010989, + "grad_norm": 15.867295265197754, + "learning_rate": 4.129945054945055e-05, + "loss": 0.6395, + "step": 6334 + }, + { + "epoch": 17.403846153846153, + "grad_norm": 14.454540252685547, + "learning_rate": 4.129807692307692e-05, + "loss": 0.4023, + "step": 6335 + }, + { + "epoch": 17.406593406593405, + "grad_norm": 9.862451553344727, + "learning_rate": 4.12967032967033e-05, + "loss": 0.5115, + "step": 6336 + }, + { + "epoch": 17.40934065934066, + "grad_norm": 17.593372344970703, + "learning_rate": 4.1295329670329674e-05, + "loss": 0.5059, + "step": 6337 + }, + { + "epoch": 17.412087912087912, + "grad_norm": 15.519306182861328, + "learning_rate": 4.1293956043956044e-05, + "loss": 0.4265, + "step": 6338 + }, + { + "epoch": 17.414835164835164, + "grad_norm": 15.151277542114258, + "learning_rate": 4.129258241758242e-05, + "loss": 0.4978, + "step": 6339 + }, + { + "epoch": 17.417582417582416, + "grad_norm": 11.60322380065918, + "learning_rate": 4.129120879120879e-05, + "loss": 0.3568, + "step": 6340 + }, + { + "epoch": 17.42032967032967, + "grad_norm": 14.423542022705078, + "learning_rate": 4.128983516483517e-05, + "loss": 0.496, + "step": 6341 + }, + { + "epoch": 17.423076923076923, + "grad_norm": 13.277395248413086, + "learning_rate": 4.1288461538461544e-05, + "loss": 0.4089, + "step": 6342 + }, + { + "epoch": 17.425824175824175, + "grad_norm": 9.830857276916504, + "learning_rate": 4.1287087912087914e-05, + "loss": 0.4254, + "step": 6343 + }, + { + "epoch": 17.428571428571427, + "grad_norm": 18.382638931274414, + "learning_rate": 4.128571428571429e-05, + "loss": 0.5809, + "step": 6344 + }, + { + "epoch": 17.431318681318682, + "grad_norm": 18.002412796020508, + "learning_rate": 4.128434065934066e-05, + "loss": 0.6133, + "step": 6345 + }, + { + "epoch": 17.434065934065934, + "grad_norm": 15.054315567016602, + "learning_rate": 4.128296703296704e-05, + "loss": 0.3651, + "step": 6346 + }, + { + "epoch": 17.436813186813186, + "grad_norm": 14.544858932495117, + "learning_rate": 4.128159340659341e-05, + "loss": 0.4742, + "step": 6347 + }, + { + "epoch": 17.439560439560438, + "grad_norm": 14.589920043945312, + "learning_rate": 4.128021978021978e-05, + "loss": 0.3749, + "step": 6348 + }, + { + "epoch": 17.442307692307693, + "grad_norm": 20.072296142578125, + "learning_rate": 4.1278846153846155e-05, + "loss": 0.6596, + "step": 6349 + }, + { + "epoch": 17.445054945054945, + "grad_norm": 10.666169166564941, + "learning_rate": 4.1277472527472525e-05, + "loss": 0.4547, + "step": 6350 + }, + { + "epoch": 17.447802197802197, + "grad_norm": 7.710601329803467, + "learning_rate": 4.12760989010989e-05, + "loss": 0.2527, + "step": 6351 + }, + { + "epoch": 17.45054945054945, + "grad_norm": 12.889838218688965, + "learning_rate": 4.127472527472528e-05, + "loss": 0.4682, + "step": 6352 + }, + { + "epoch": 17.453296703296704, + "grad_norm": 8.652366638183594, + "learning_rate": 4.127335164835165e-05, + "loss": 0.2472, + "step": 6353 + }, + { + "epoch": 17.456043956043956, + "grad_norm": 11.816415786743164, + "learning_rate": 4.1271978021978025e-05, + "loss": 0.4487, + "step": 6354 + }, + { + "epoch": 17.458791208791208, + "grad_norm": 18.02031135559082, + "learning_rate": 4.1270604395604395e-05, + "loss": 0.552, + "step": 6355 + }, + { + "epoch": 17.46153846153846, + "grad_norm": 11.837550163269043, + "learning_rate": 4.126923076923077e-05, + "loss": 0.3515, + "step": 6356 + }, + { + "epoch": 17.464285714285715, + "grad_norm": 12.216712951660156, + "learning_rate": 4.126785714285715e-05, + "loss": 0.5282, + "step": 6357 + }, + { + "epoch": 17.467032967032967, + "grad_norm": 11.543730735778809, + "learning_rate": 4.126648351648352e-05, + "loss": 0.2388, + "step": 6358 + }, + { + "epoch": 17.46978021978022, + "grad_norm": 9.013507843017578, + "learning_rate": 4.1265109890109896e-05, + "loss": 0.342, + "step": 6359 + }, + { + "epoch": 17.47252747252747, + "grad_norm": 8.874127388000488, + "learning_rate": 4.1263736263736266e-05, + "loss": 0.2486, + "step": 6360 + }, + { + "epoch": 17.475274725274726, + "grad_norm": 20.066957473754883, + "learning_rate": 4.126236263736264e-05, + "loss": 0.7318, + "step": 6361 + }, + { + "epoch": 17.478021978021978, + "grad_norm": 18.113094329833984, + "learning_rate": 4.126098901098901e-05, + "loss": 0.6188, + "step": 6362 + }, + { + "epoch": 17.48076923076923, + "grad_norm": 15.929001808166504, + "learning_rate": 4.125961538461538e-05, + "loss": 0.4398, + "step": 6363 + }, + { + "epoch": 17.483516483516482, + "grad_norm": 13.373180389404297, + "learning_rate": 4.125824175824176e-05, + "loss": 0.4414, + "step": 6364 + }, + { + "epoch": 17.486263736263737, + "grad_norm": 13.500988006591797, + "learning_rate": 4.125686813186813e-05, + "loss": 0.5141, + "step": 6365 + }, + { + "epoch": 17.48901098901099, + "grad_norm": 16.248632431030273, + "learning_rate": 4.1255494505494506e-05, + "loss": 0.3937, + "step": 6366 + }, + { + "epoch": 17.49175824175824, + "grad_norm": 15.139240264892578, + "learning_rate": 4.125412087912088e-05, + "loss": 0.6731, + "step": 6367 + }, + { + "epoch": 17.494505494505496, + "grad_norm": 13.429319381713867, + "learning_rate": 4.125274725274725e-05, + "loss": 0.4869, + "step": 6368 + }, + { + "epoch": 17.497252747252748, + "grad_norm": 10.825124740600586, + "learning_rate": 4.125137362637363e-05, + "loss": 0.3244, + "step": 6369 + }, + { + "epoch": 17.5, + "grad_norm": 13.201373100280762, + "learning_rate": 4.125e-05, + "loss": 0.3475, + "step": 6370 + }, + { + "epoch": 17.502747252747252, + "grad_norm": 10.170852661132812, + "learning_rate": 4.1248626373626377e-05, + "loss": 0.3141, + "step": 6371 + }, + { + "epoch": 17.505494505494504, + "grad_norm": 12.671960830688477, + "learning_rate": 4.124725274725275e-05, + "loss": 0.3734, + "step": 6372 + }, + { + "epoch": 17.50824175824176, + "grad_norm": 8.002625465393066, + "learning_rate": 4.124587912087912e-05, + "loss": 0.225, + "step": 6373 + }, + { + "epoch": 17.51098901098901, + "grad_norm": 14.038962364196777, + "learning_rate": 4.12445054945055e-05, + "loss": 0.5021, + "step": 6374 + }, + { + "epoch": 17.513736263736263, + "grad_norm": 10.116547584533691, + "learning_rate": 4.124313186813187e-05, + "loss": 0.2057, + "step": 6375 + }, + { + "epoch": 17.516483516483518, + "grad_norm": 8.0348482131958, + "learning_rate": 4.124175824175825e-05, + "loss": 0.1719, + "step": 6376 + }, + { + "epoch": 17.51923076923077, + "grad_norm": 10.59771728515625, + "learning_rate": 4.124038461538462e-05, + "loss": 0.2964, + "step": 6377 + }, + { + "epoch": 17.521978021978022, + "grad_norm": 11.734062194824219, + "learning_rate": 4.123901098901099e-05, + "loss": 0.2845, + "step": 6378 + }, + { + "epoch": 17.524725274725274, + "grad_norm": 12.011924743652344, + "learning_rate": 4.1237637362637364e-05, + "loss": 0.2486, + "step": 6379 + }, + { + "epoch": 17.52747252747253, + "grad_norm": 8.639259338378906, + "learning_rate": 4.1236263736263734e-05, + "loss": 0.2089, + "step": 6380 + }, + { + "epoch": 17.53021978021978, + "grad_norm": 17.852245330810547, + "learning_rate": 4.123489010989011e-05, + "loss": 0.6521, + "step": 6381 + }, + { + "epoch": 17.532967032967033, + "grad_norm": 9.129315376281738, + "learning_rate": 4.123351648351649e-05, + "loss": 0.2131, + "step": 6382 + }, + { + "epoch": 17.535714285714285, + "grad_norm": 20.296031951904297, + "learning_rate": 4.123214285714286e-05, + "loss": 1.0649, + "step": 6383 + }, + { + "epoch": 17.53846153846154, + "grad_norm": 14.912147521972656, + "learning_rate": 4.1230769230769234e-05, + "loss": 0.3515, + "step": 6384 + }, + { + "epoch": 17.541208791208792, + "grad_norm": 18.446657180786133, + "learning_rate": 4.1229395604395604e-05, + "loss": 0.8422, + "step": 6385 + }, + { + "epoch": 17.543956043956044, + "grad_norm": 12.884865760803223, + "learning_rate": 4.122802197802198e-05, + "loss": 0.3594, + "step": 6386 + }, + { + "epoch": 17.546703296703296, + "grad_norm": 17.3142147064209, + "learning_rate": 4.122664835164836e-05, + "loss": 0.4431, + "step": 6387 + }, + { + "epoch": 17.54945054945055, + "grad_norm": 12.601593017578125, + "learning_rate": 4.122527472527473e-05, + "loss": 0.3086, + "step": 6388 + }, + { + "epoch": 17.552197802197803, + "grad_norm": 20.419261932373047, + "learning_rate": 4.1223901098901105e-05, + "loss": 0.7982, + "step": 6389 + }, + { + "epoch": 17.554945054945055, + "grad_norm": 18.9777774810791, + "learning_rate": 4.1222527472527475e-05, + "loss": 0.5986, + "step": 6390 + }, + { + "epoch": 17.557692307692307, + "grad_norm": 10.676227569580078, + "learning_rate": 4.122115384615385e-05, + "loss": 0.3186, + "step": 6391 + }, + { + "epoch": 17.560439560439562, + "grad_norm": 11.754008293151855, + "learning_rate": 4.121978021978022e-05, + "loss": 0.391, + "step": 6392 + }, + { + "epoch": 17.563186813186814, + "grad_norm": 9.97059154510498, + "learning_rate": 4.121840659340659e-05, + "loss": 0.2286, + "step": 6393 + }, + { + "epoch": 17.565934065934066, + "grad_norm": 16.295333862304688, + "learning_rate": 4.121703296703297e-05, + "loss": 0.7173, + "step": 6394 + }, + { + "epoch": 17.568681318681318, + "grad_norm": 8.095362663269043, + "learning_rate": 4.121565934065934e-05, + "loss": 0.1802, + "step": 6395 + }, + { + "epoch": 17.571428571428573, + "grad_norm": 13.242950439453125, + "learning_rate": 4.1214285714285715e-05, + "loss": 0.6633, + "step": 6396 + }, + { + "epoch": 17.574175824175825, + "grad_norm": 10.072051048278809, + "learning_rate": 4.121291208791209e-05, + "loss": 0.3022, + "step": 6397 + }, + { + "epoch": 17.576923076923077, + "grad_norm": 12.847372055053711, + "learning_rate": 4.121153846153846e-05, + "loss": 0.3236, + "step": 6398 + }, + { + "epoch": 17.57967032967033, + "grad_norm": 12.673837661743164, + "learning_rate": 4.121016483516484e-05, + "loss": 0.5385, + "step": 6399 + }, + { + "epoch": 17.582417582417584, + "grad_norm": 16.194801330566406, + "learning_rate": 4.120879120879121e-05, + "loss": 0.5043, + "step": 6400 + }, + { + "epoch": 17.585164835164836, + "grad_norm": 10.462986946105957, + "learning_rate": 4.1207417582417586e-05, + "loss": 0.3144, + "step": 6401 + }, + { + "epoch": 17.587912087912088, + "grad_norm": 17.625303268432617, + "learning_rate": 4.120604395604396e-05, + "loss": 0.7454, + "step": 6402 + }, + { + "epoch": 17.59065934065934, + "grad_norm": 9.273622512817383, + "learning_rate": 4.120467032967033e-05, + "loss": 0.2708, + "step": 6403 + }, + { + "epoch": 17.593406593406595, + "grad_norm": 18.1446590423584, + "learning_rate": 4.120329670329671e-05, + "loss": 0.7811, + "step": 6404 + }, + { + "epoch": 17.596153846153847, + "grad_norm": 8.976174354553223, + "learning_rate": 4.120192307692308e-05, + "loss": 0.2546, + "step": 6405 + }, + { + "epoch": 17.5989010989011, + "grad_norm": 24.977954864501953, + "learning_rate": 4.1200549450549456e-05, + "loss": 0.9375, + "step": 6406 + }, + { + "epoch": 17.60164835164835, + "grad_norm": 14.050237655639648, + "learning_rate": 4.1199175824175826e-05, + "loss": 0.379, + "step": 6407 + }, + { + "epoch": 17.604395604395606, + "grad_norm": 9.363144874572754, + "learning_rate": 4.1197802197802196e-05, + "loss": 0.2183, + "step": 6408 + }, + { + "epoch": 17.607142857142858, + "grad_norm": 6.02243185043335, + "learning_rate": 4.119642857142857e-05, + "loss": 0.1337, + "step": 6409 + }, + { + "epoch": 17.60989010989011, + "grad_norm": 9.288419723510742, + "learning_rate": 4.119505494505494e-05, + "loss": 0.4097, + "step": 6410 + }, + { + "epoch": 17.61263736263736, + "grad_norm": 8.503987312316895, + "learning_rate": 4.119368131868132e-05, + "loss": 0.3265, + "step": 6411 + }, + { + "epoch": 17.615384615384617, + "grad_norm": 5.315973281860352, + "learning_rate": 4.1192307692307696e-05, + "loss": 0.2003, + "step": 6412 + }, + { + "epoch": 17.61813186813187, + "grad_norm": 8.849947929382324, + "learning_rate": 4.1190934065934066e-05, + "loss": 0.2344, + "step": 6413 + }, + { + "epoch": 17.62087912087912, + "grad_norm": 15.471561431884766, + "learning_rate": 4.118956043956044e-05, + "loss": 0.5385, + "step": 6414 + }, + { + "epoch": 17.623626373626372, + "grad_norm": 18.859477996826172, + "learning_rate": 4.118818681318681e-05, + "loss": 0.6178, + "step": 6415 + }, + { + "epoch": 17.626373626373628, + "grad_norm": 13.612900733947754, + "learning_rate": 4.118681318681319e-05, + "loss": 0.7612, + "step": 6416 + }, + { + "epoch": 17.62912087912088, + "grad_norm": 8.46008586883545, + "learning_rate": 4.118543956043957e-05, + "loss": 0.2416, + "step": 6417 + }, + { + "epoch": 17.63186813186813, + "grad_norm": 15.234652519226074, + "learning_rate": 4.118406593406594e-05, + "loss": 0.8315, + "step": 6418 + }, + { + "epoch": 17.634615384615383, + "grad_norm": 15.031200408935547, + "learning_rate": 4.1182692307692314e-05, + "loss": 0.6833, + "step": 6419 + }, + { + "epoch": 17.63736263736264, + "grad_norm": 18.813528060913086, + "learning_rate": 4.1181318681318684e-05, + "loss": 0.6793, + "step": 6420 + }, + { + "epoch": 17.64010989010989, + "grad_norm": 11.231038093566895, + "learning_rate": 4.117994505494506e-05, + "loss": 0.3546, + "step": 6421 + }, + { + "epoch": 17.642857142857142, + "grad_norm": 8.781229972839355, + "learning_rate": 4.117857142857143e-05, + "loss": 0.347, + "step": 6422 + }, + { + "epoch": 17.645604395604394, + "grad_norm": 15.020986557006836, + "learning_rate": 4.11771978021978e-05, + "loss": 0.5459, + "step": 6423 + }, + { + "epoch": 17.64835164835165, + "grad_norm": 10.967090606689453, + "learning_rate": 4.117582417582418e-05, + "loss": 0.3416, + "step": 6424 + }, + { + "epoch": 17.6510989010989, + "grad_norm": 14.245430946350098, + "learning_rate": 4.117445054945055e-05, + "loss": 0.7273, + "step": 6425 + }, + { + "epoch": 17.653846153846153, + "grad_norm": 12.766895294189453, + "learning_rate": 4.1173076923076924e-05, + "loss": 0.328, + "step": 6426 + }, + { + "epoch": 17.656593406593405, + "grad_norm": 9.251570701599121, + "learning_rate": 4.11717032967033e-05, + "loss": 0.3653, + "step": 6427 + }, + { + "epoch": 17.65934065934066, + "grad_norm": 8.813742637634277, + "learning_rate": 4.117032967032967e-05, + "loss": 0.2021, + "step": 6428 + }, + { + "epoch": 17.662087912087912, + "grad_norm": 18.864479064941406, + "learning_rate": 4.116895604395605e-05, + "loss": 0.8447, + "step": 6429 + }, + { + "epoch": 17.664835164835164, + "grad_norm": 9.045172691345215, + "learning_rate": 4.116758241758242e-05, + "loss": 0.2871, + "step": 6430 + }, + { + "epoch": 17.667582417582416, + "grad_norm": 13.692267417907715, + "learning_rate": 4.1166208791208795e-05, + "loss": 0.3416, + "step": 6431 + }, + { + "epoch": 17.67032967032967, + "grad_norm": 17.178287506103516, + "learning_rate": 4.116483516483517e-05, + "loss": 0.7018, + "step": 6432 + }, + { + "epoch": 17.673076923076923, + "grad_norm": 15.027490615844727, + "learning_rate": 4.116346153846154e-05, + "loss": 0.5146, + "step": 6433 + }, + { + "epoch": 17.675824175824175, + "grad_norm": 11.945121765136719, + "learning_rate": 4.116208791208792e-05, + "loss": 0.3058, + "step": 6434 + }, + { + "epoch": 17.678571428571427, + "grad_norm": 16.902780532836914, + "learning_rate": 4.116071428571429e-05, + "loss": 0.5323, + "step": 6435 + }, + { + "epoch": 17.681318681318682, + "grad_norm": 18.58658218383789, + "learning_rate": 4.1159340659340665e-05, + "loss": 0.4852, + "step": 6436 + }, + { + "epoch": 17.684065934065934, + "grad_norm": 16.9692325592041, + "learning_rate": 4.1157967032967035e-05, + "loss": 0.646, + "step": 6437 + }, + { + "epoch": 17.686813186813186, + "grad_norm": 10.147733688354492, + "learning_rate": 4.1156593406593405e-05, + "loss": 0.2599, + "step": 6438 + }, + { + "epoch": 17.689560439560438, + "grad_norm": 8.596922874450684, + "learning_rate": 4.115521978021978e-05, + "loss": 0.3079, + "step": 6439 + }, + { + "epoch": 17.692307692307693, + "grad_norm": 17.380922317504883, + "learning_rate": 4.115384615384615e-05, + "loss": 0.948, + "step": 6440 + }, + { + "epoch": 17.695054945054945, + "grad_norm": 7.684097766876221, + "learning_rate": 4.115247252747253e-05, + "loss": 0.19, + "step": 6441 + }, + { + "epoch": 17.697802197802197, + "grad_norm": 19.69759750366211, + "learning_rate": 4.1151098901098905e-05, + "loss": 0.8221, + "step": 6442 + }, + { + "epoch": 17.70054945054945, + "grad_norm": 18.49336814880371, + "learning_rate": 4.1149725274725275e-05, + "loss": 1.2929, + "step": 6443 + }, + { + "epoch": 17.703296703296704, + "grad_norm": 15.736310958862305, + "learning_rate": 4.114835164835165e-05, + "loss": 0.4311, + "step": 6444 + }, + { + "epoch": 17.706043956043956, + "grad_norm": 10.290641784667969, + "learning_rate": 4.114697802197802e-05, + "loss": 0.2433, + "step": 6445 + }, + { + "epoch": 17.708791208791208, + "grad_norm": 11.048901557922363, + "learning_rate": 4.11456043956044e-05, + "loss": 0.244, + "step": 6446 + }, + { + "epoch": 17.71153846153846, + "grad_norm": 8.966540336608887, + "learning_rate": 4.1144230769230776e-05, + "loss": 0.2328, + "step": 6447 + }, + { + "epoch": 17.714285714285715, + "grad_norm": 10.520952224731445, + "learning_rate": 4.1142857142857146e-05, + "loss": 0.4535, + "step": 6448 + }, + { + "epoch": 17.717032967032967, + "grad_norm": 10.182903289794922, + "learning_rate": 4.114148351648352e-05, + "loss": 0.334, + "step": 6449 + }, + { + "epoch": 17.71978021978022, + "grad_norm": 13.038180351257324, + "learning_rate": 4.114010989010989e-05, + "loss": 0.4414, + "step": 6450 + }, + { + "epoch": 17.72252747252747, + "grad_norm": 10.609359741210938, + "learning_rate": 4.113873626373627e-05, + "loss": 0.311, + "step": 6451 + }, + { + "epoch": 17.725274725274726, + "grad_norm": 14.726232528686523, + "learning_rate": 4.113736263736264e-05, + "loss": 0.3769, + "step": 6452 + }, + { + "epoch": 17.728021978021978, + "grad_norm": 13.337776184082031, + "learning_rate": 4.113598901098901e-05, + "loss": 0.541, + "step": 6453 + }, + { + "epoch": 17.73076923076923, + "grad_norm": 9.479643821716309, + "learning_rate": 4.1134615384615386e-05, + "loss": 0.3051, + "step": 6454 + }, + { + "epoch": 17.733516483516482, + "grad_norm": 11.924602508544922, + "learning_rate": 4.1133241758241756e-05, + "loss": 0.4054, + "step": 6455 + }, + { + "epoch": 17.736263736263737, + "grad_norm": 11.724189758300781, + "learning_rate": 4.113186813186813e-05, + "loss": 0.4573, + "step": 6456 + }, + { + "epoch": 17.73901098901099, + "grad_norm": 19.02130889892578, + "learning_rate": 4.113049450549451e-05, + "loss": 0.502, + "step": 6457 + }, + { + "epoch": 17.74175824175824, + "grad_norm": 14.643916130065918, + "learning_rate": 4.112912087912088e-05, + "loss": 0.4528, + "step": 6458 + }, + { + "epoch": 17.744505494505496, + "grad_norm": 8.261459350585938, + "learning_rate": 4.112774725274726e-05, + "loss": 0.2246, + "step": 6459 + }, + { + "epoch": 17.747252747252748, + "grad_norm": 16.888599395751953, + "learning_rate": 4.112637362637363e-05, + "loss": 0.7483, + "step": 6460 + }, + { + "epoch": 17.75, + "grad_norm": 16.370445251464844, + "learning_rate": 4.1125000000000004e-05, + "loss": 0.4451, + "step": 6461 + }, + { + "epoch": 17.752747252747252, + "grad_norm": 11.800590515136719, + "learning_rate": 4.112362637362638e-05, + "loss": 0.4158, + "step": 6462 + }, + { + "epoch": 17.755494505494504, + "grad_norm": 9.583211898803711, + "learning_rate": 4.112225274725275e-05, + "loss": 0.2595, + "step": 6463 + }, + { + "epoch": 17.75824175824176, + "grad_norm": 5.461537837982178, + "learning_rate": 4.112087912087913e-05, + "loss": 0.1543, + "step": 6464 + }, + { + "epoch": 17.76098901098901, + "grad_norm": 14.212482452392578, + "learning_rate": 4.11195054945055e-05, + "loss": 0.6152, + "step": 6465 + }, + { + "epoch": 17.763736263736263, + "grad_norm": 3.5316433906555176, + "learning_rate": 4.1118131868131874e-05, + "loss": 0.0812, + "step": 6466 + }, + { + "epoch": 17.766483516483518, + "grad_norm": 10.698714256286621, + "learning_rate": 4.1116758241758244e-05, + "loss": 0.3493, + "step": 6467 + }, + { + "epoch": 17.76923076923077, + "grad_norm": 18.418331146240234, + "learning_rate": 4.1115384615384614e-05, + "loss": 0.5539, + "step": 6468 + }, + { + "epoch": 17.771978021978022, + "grad_norm": 12.53420352935791, + "learning_rate": 4.111401098901099e-05, + "loss": 0.37, + "step": 6469 + }, + { + "epoch": 17.774725274725274, + "grad_norm": 19.583145141601562, + "learning_rate": 4.111263736263736e-05, + "loss": 0.6915, + "step": 6470 + }, + { + "epoch": 17.77747252747253, + "grad_norm": 16.9623966217041, + "learning_rate": 4.111126373626374e-05, + "loss": 0.5451, + "step": 6471 + }, + { + "epoch": 17.78021978021978, + "grad_norm": 4.9043707847595215, + "learning_rate": 4.1109890109890114e-05, + "loss": 0.1001, + "step": 6472 + }, + { + "epoch": 17.782967032967033, + "grad_norm": 7.692430019378662, + "learning_rate": 4.1108516483516484e-05, + "loss": 0.122, + "step": 6473 + }, + { + "epoch": 17.785714285714285, + "grad_norm": 17.74235725402832, + "learning_rate": 4.110714285714286e-05, + "loss": 0.5849, + "step": 6474 + }, + { + "epoch": 17.78846153846154, + "grad_norm": 13.67541790008545, + "learning_rate": 4.110576923076923e-05, + "loss": 0.4172, + "step": 6475 + }, + { + "epoch": 17.791208791208792, + "grad_norm": 13.810647964477539, + "learning_rate": 4.110439560439561e-05, + "loss": 0.3721, + "step": 6476 + }, + { + "epoch": 17.793956043956044, + "grad_norm": 3.807950973510742, + "learning_rate": 4.1103021978021985e-05, + "loss": 0.0725, + "step": 6477 + }, + { + "epoch": 17.796703296703296, + "grad_norm": 14.082236289978027, + "learning_rate": 4.1101648351648355e-05, + "loss": 0.4728, + "step": 6478 + }, + { + "epoch": 17.79945054945055, + "grad_norm": 11.421531677246094, + "learning_rate": 4.110027472527473e-05, + "loss": 0.3902, + "step": 6479 + }, + { + "epoch": 17.802197802197803, + "grad_norm": 17.07058334350586, + "learning_rate": 4.10989010989011e-05, + "loss": 0.8106, + "step": 6480 + }, + { + "epoch": 17.804945054945055, + "grad_norm": 18.735843658447266, + "learning_rate": 4.109752747252748e-05, + "loss": 0.8859, + "step": 6481 + }, + { + "epoch": 17.807692307692307, + "grad_norm": 10.760663986206055, + "learning_rate": 4.109615384615385e-05, + "loss": 0.3341, + "step": 6482 + }, + { + "epoch": 17.810439560439562, + "grad_norm": 17.995670318603516, + "learning_rate": 4.109478021978022e-05, + "loss": 0.6671, + "step": 6483 + }, + { + "epoch": 17.813186813186814, + "grad_norm": 9.666443824768066, + "learning_rate": 4.1093406593406595e-05, + "loss": 0.2238, + "step": 6484 + }, + { + "epoch": 17.815934065934066, + "grad_norm": 8.992522239685059, + "learning_rate": 4.1092032967032965e-05, + "loss": 0.3261, + "step": 6485 + }, + { + "epoch": 17.818681318681318, + "grad_norm": 22.87196922302246, + "learning_rate": 4.109065934065934e-05, + "loss": 0.8702, + "step": 6486 + }, + { + "epoch": 17.821428571428573, + "grad_norm": 10.947626113891602, + "learning_rate": 4.108928571428571e-05, + "loss": 0.3628, + "step": 6487 + }, + { + "epoch": 17.824175824175825, + "grad_norm": 9.262643814086914, + "learning_rate": 4.108791208791209e-05, + "loss": 0.331, + "step": 6488 + }, + { + "epoch": 17.826923076923077, + "grad_norm": 11.72070598602295, + "learning_rate": 4.1086538461538466e-05, + "loss": 0.2969, + "step": 6489 + }, + { + "epoch": 17.82967032967033, + "grad_norm": 8.439034461975098, + "learning_rate": 4.1085164835164836e-05, + "loss": 0.2469, + "step": 6490 + }, + { + "epoch": 17.832417582417584, + "grad_norm": 10.944647789001465, + "learning_rate": 4.108379120879121e-05, + "loss": 0.2694, + "step": 6491 + }, + { + "epoch": 17.835164835164836, + "grad_norm": 19.207557678222656, + "learning_rate": 4.108241758241758e-05, + "loss": 0.6129, + "step": 6492 + }, + { + "epoch": 17.837912087912088, + "grad_norm": 9.18821907043457, + "learning_rate": 4.108104395604396e-05, + "loss": 0.344, + "step": 6493 + }, + { + "epoch": 17.84065934065934, + "grad_norm": 12.873556137084961, + "learning_rate": 4.1079670329670336e-05, + "loss": 0.2218, + "step": 6494 + }, + { + "epoch": 17.843406593406595, + "grad_norm": 10.664111137390137, + "learning_rate": 4.1078296703296706e-05, + "loss": 0.2824, + "step": 6495 + }, + { + "epoch": 17.846153846153847, + "grad_norm": 6.981533050537109, + "learning_rate": 4.1076923076923076e-05, + "loss": 0.1608, + "step": 6496 + }, + { + "epoch": 17.8489010989011, + "grad_norm": 7.76682186126709, + "learning_rate": 4.107554945054945e-05, + "loss": 0.2837, + "step": 6497 + }, + { + "epoch": 17.85164835164835, + "grad_norm": 11.282777786254883, + "learning_rate": 4.107417582417582e-05, + "loss": 0.3159, + "step": 6498 + }, + { + "epoch": 17.854395604395606, + "grad_norm": 9.54216480255127, + "learning_rate": 4.10728021978022e-05, + "loss": 0.2093, + "step": 6499 + }, + { + "epoch": 17.857142857142858, + "grad_norm": 15.986505508422852, + "learning_rate": 4.107142857142857e-05, + "loss": 0.4152, + "step": 6500 + }, + { + "epoch": 17.85989010989011, + "grad_norm": 12.420957565307617, + "learning_rate": 4.1070054945054947e-05, + "loss": 0.3583, + "step": 6501 + }, + { + "epoch": 17.86263736263736, + "grad_norm": 21.522197723388672, + "learning_rate": 4.1068681318681317e-05, + "loss": 0.9696, + "step": 6502 + }, + { + "epoch": 17.865384615384617, + "grad_norm": 4.218857765197754, + "learning_rate": 4.106730769230769e-05, + "loss": 0.1049, + "step": 6503 + }, + { + "epoch": 17.86813186813187, + "grad_norm": 11.993911743164062, + "learning_rate": 4.106593406593407e-05, + "loss": 0.3116, + "step": 6504 + }, + { + "epoch": 17.87087912087912, + "grad_norm": 12.971229553222656, + "learning_rate": 4.106456043956044e-05, + "loss": 0.4052, + "step": 6505 + }, + { + "epoch": 17.873626373626372, + "grad_norm": 17.107885360717773, + "learning_rate": 4.106318681318682e-05, + "loss": 0.6005, + "step": 6506 + }, + { + "epoch": 17.876373626373628, + "grad_norm": 5.961361408233643, + "learning_rate": 4.106181318681319e-05, + "loss": 0.1867, + "step": 6507 + }, + { + "epoch": 17.87912087912088, + "grad_norm": 8.522360801696777, + "learning_rate": 4.1060439560439564e-05, + "loss": 0.3295, + "step": 6508 + }, + { + "epoch": 17.88186813186813, + "grad_norm": 7.06940221786499, + "learning_rate": 4.105906593406594e-05, + "loss": 0.1871, + "step": 6509 + }, + { + "epoch": 17.884615384615383, + "grad_norm": 19.106449127197266, + "learning_rate": 4.105769230769231e-05, + "loss": 0.71, + "step": 6510 + }, + { + "epoch": 17.88736263736264, + "grad_norm": 9.829886436462402, + "learning_rate": 4.105631868131868e-05, + "loss": 0.4384, + "step": 6511 + }, + { + "epoch": 17.89010989010989, + "grad_norm": 9.888697624206543, + "learning_rate": 4.105494505494506e-05, + "loss": 0.2238, + "step": 6512 + }, + { + "epoch": 17.892857142857142, + "grad_norm": 13.403907775878906, + "learning_rate": 4.105357142857143e-05, + "loss": 0.4368, + "step": 6513 + }, + { + "epoch": 17.895604395604394, + "grad_norm": 11.052061080932617, + "learning_rate": 4.1052197802197804e-05, + "loss": 0.3507, + "step": 6514 + }, + { + "epoch": 17.89835164835165, + "grad_norm": 15.58244514465332, + "learning_rate": 4.1050824175824174e-05, + "loss": 0.7019, + "step": 6515 + }, + { + "epoch": 17.9010989010989, + "grad_norm": 12.753094673156738, + "learning_rate": 4.104945054945055e-05, + "loss": 0.3224, + "step": 6516 + }, + { + "epoch": 17.903846153846153, + "grad_norm": 13.920419692993164, + "learning_rate": 4.104807692307692e-05, + "loss": 0.543, + "step": 6517 + }, + { + "epoch": 17.906593406593405, + "grad_norm": 9.421914100646973, + "learning_rate": 4.10467032967033e-05, + "loss": 0.2413, + "step": 6518 + }, + { + "epoch": 17.90934065934066, + "grad_norm": 10.442683219909668, + "learning_rate": 4.1045329670329675e-05, + "loss": 0.3572, + "step": 6519 + }, + { + "epoch": 17.912087912087912, + "grad_norm": 12.765339851379395, + "learning_rate": 4.1043956043956045e-05, + "loss": 0.3383, + "step": 6520 + }, + { + "epoch": 17.914835164835164, + "grad_norm": 4.309356689453125, + "learning_rate": 4.104258241758242e-05, + "loss": 0.1075, + "step": 6521 + }, + { + "epoch": 17.917582417582416, + "grad_norm": 8.316768646240234, + "learning_rate": 4.104120879120879e-05, + "loss": 0.2116, + "step": 6522 + }, + { + "epoch": 17.92032967032967, + "grad_norm": 19.386455535888672, + "learning_rate": 4.103983516483517e-05, + "loss": 0.7365, + "step": 6523 + }, + { + "epoch": 17.923076923076923, + "grad_norm": 15.688206672668457, + "learning_rate": 4.1038461538461545e-05, + "loss": 0.4879, + "step": 6524 + }, + { + "epoch": 17.925824175824175, + "grad_norm": 16.752538681030273, + "learning_rate": 4.1037087912087915e-05, + "loss": 0.9534, + "step": 6525 + }, + { + "epoch": 17.928571428571427, + "grad_norm": 10.151394844055176, + "learning_rate": 4.1035714285714285e-05, + "loss": 0.2861, + "step": 6526 + }, + { + "epoch": 17.931318681318682, + "grad_norm": 21.606565475463867, + "learning_rate": 4.103434065934066e-05, + "loss": 0.8553, + "step": 6527 + }, + { + "epoch": 17.934065934065934, + "grad_norm": 13.737750053405762, + "learning_rate": 4.103296703296703e-05, + "loss": 0.3843, + "step": 6528 + }, + { + "epoch": 17.936813186813186, + "grad_norm": 13.678561210632324, + "learning_rate": 4.103159340659341e-05, + "loss": 0.4249, + "step": 6529 + }, + { + "epoch": 17.939560439560438, + "grad_norm": 11.85893726348877, + "learning_rate": 4.103021978021978e-05, + "loss": 0.3547, + "step": 6530 + }, + { + "epoch": 17.942307692307693, + "grad_norm": 10.115303993225098, + "learning_rate": 4.1028846153846156e-05, + "loss": 0.2207, + "step": 6531 + }, + { + "epoch": 17.945054945054945, + "grad_norm": 14.56618881225586, + "learning_rate": 4.1027472527472526e-05, + "loss": 0.4403, + "step": 6532 + }, + { + "epoch": 17.947802197802197, + "grad_norm": 17.3442440032959, + "learning_rate": 4.10260989010989e-05, + "loss": 0.9978, + "step": 6533 + }, + { + "epoch": 17.95054945054945, + "grad_norm": 16.273317337036133, + "learning_rate": 4.102472527472528e-05, + "loss": 0.7891, + "step": 6534 + }, + { + "epoch": 17.953296703296704, + "grad_norm": 13.787393569946289, + "learning_rate": 4.102335164835165e-05, + "loss": 0.4291, + "step": 6535 + }, + { + "epoch": 17.956043956043956, + "grad_norm": 23.148460388183594, + "learning_rate": 4.1021978021978026e-05, + "loss": 0.6673, + "step": 6536 + }, + { + "epoch": 17.958791208791208, + "grad_norm": 16.66389274597168, + "learning_rate": 4.1020604395604396e-05, + "loss": 0.7025, + "step": 6537 + }, + { + "epoch": 17.96153846153846, + "grad_norm": 10.731378555297852, + "learning_rate": 4.101923076923077e-05, + "loss": 0.368, + "step": 6538 + }, + { + "epoch": 17.964285714285715, + "grad_norm": 16.270544052124023, + "learning_rate": 4.101785714285715e-05, + "loss": 0.6248, + "step": 6539 + }, + { + "epoch": 17.967032967032967, + "grad_norm": 17.687349319458008, + "learning_rate": 4.101648351648352e-05, + "loss": 0.5895, + "step": 6540 + }, + { + "epoch": 17.96978021978022, + "grad_norm": 8.268208503723145, + "learning_rate": 4.101510989010989e-05, + "loss": 0.1663, + "step": 6541 + }, + { + "epoch": 17.97252747252747, + "grad_norm": 7.47742223739624, + "learning_rate": 4.1013736263736266e-05, + "loss": 0.1485, + "step": 6542 + }, + { + "epoch": 17.975274725274726, + "grad_norm": 20.9819278717041, + "learning_rate": 4.1012362637362636e-05, + "loss": 1.1637, + "step": 6543 + }, + { + "epoch": 17.978021978021978, + "grad_norm": 12.572525978088379, + "learning_rate": 4.101098901098901e-05, + "loss": 0.6355, + "step": 6544 + }, + { + "epoch": 17.98076923076923, + "grad_norm": 8.203313827514648, + "learning_rate": 4.100961538461538e-05, + "loss": 0.3071, + "step": 6545 + }, + { + "epoch": 17.983516483516482, + "grad_norm": 10.477799415588379, + "learning_rate": 4.100824175824176e-05, + "loss": 0.4422, + "step": 6546 + }, + { + "epoch": 17.986263736263737, + "grad_norm": 9.207043647766113, + "learning_rate": 4.100686813186813e-05, + "loss": 0.2893, + "step": 6547 + }, + { + "epoch": 17.98901098901099, + "grad_norm": 20.24212646484375, + "learning_rate": 4.100549450549451e-05, + "loss": 0.96, + "step": 6548 + }, + { + "epoch": 17.99175824175824, + "grad_norm": 16.06568717956543, + "learning_rate": 4.1004120879120884e-05, + "loss": 0.652, + "step": 6549 + }, + { + "epoch": 17.994505494505496, + "grad_norm": 9.499350547790527, + "learning_rate": 4.1002747252747254e-05, + "loss": 0.2143, + "step": 6550 + }, + { + "epoch": 17.997252747252748, + "grad_norm": 13.466588973999023, + "learning_rate": 4.100137362637363e-05, + "loss": 0.5315, + "step": 6551 + }, + { + "epoch": 18.0, + "grad_norm": 35.26015090942383, + "learning_rate": 4.1e-05, + "loss": 0.7317, + "step": 6552 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.7341597796143251, + "eval_f1": 0.7421167068973242, + "eval_f1_DuraRiadoRio_64x64": 0.743421052631579, + "eval_f1_Mole_64x64": 0.6607929515418502, + "eval_f1_Quebrado_64x64": 0.852112676056338, + "eval_f1_RiadoRio_64x64": 0.612987012987013, + "eval_f1_RioFechado_64x64": 0.8412698412698413, + "eval_loss": 0.9376833438873291, + "eval_precision": 0.7888448607987494, + "eval_precision_DuraRiadoRio_64x64": 0.70625, + "eval_precision_Mole_64x64": 0.9036144578313253, + "eval_precision_Quebrado_64x64": 0.8642857142857143, + "eval_precision_RiadoRio_64x64": 0.5064377682403434, + "eval_precision_RioFechado_64x64": 0.9636363636363636, + "eval_recall": 0.7337255992092908, + "eval_recall_DuraRiadoRio_64x64": 0.7847222222222222, + "eval_recall_Mole_64x64": 0.5208333333333334, + "eval_recall_Quebrado_64x64": 0.8402777777777778, + "eval_recall_RiadoRio_64x64": 0.7763157894736842, + "eval_recall_RioFechado_64x64": 0.7464788732394366, + "eval_runtime": 1.7357, + "eval_samples_per_second": 418.285, + "eval_steps_per_second": 26.503, + "step": 6552 + }, + { + "epoch": 18.002747252747252, + "grad_norm": 13.401869773864746, + "learning_rate": 4.099862637362638e-05, + "loss": 0.4541, + "step": 6553 + }, + { + "epoch": 18.005494505494507, + "grad_norm": 18.52347755432129, + "learning_rate": 4.0997252747252754e-05, + "loss": 0.9484, + "step": 6554 + }, + { + "epoch": 18.00824175824176, + "grad_norm": 16.455581665039062, + "learning_rate": 4.0995879120879124e-05, + "loss": 0.558, + "step": 6555 + }, + { + "epoch": 18.01098901098901, + "grad_norm": 12.59964656829834, + "learning_rate": 4.0994505494505494e-05, + "loss": 0.3271, + "step": 6556 + }, + { + "epoch": 18.013736263736263, + "grad_norm": 7.785242080688477, + "learning_rate": 4.099313186813187e-05, + "loss": 0.2717, + "step": 6557 + }, + { + "epoch": 18.016483516483518, + "grad_norm": 11.4080171585083, + "learning_rate": 4.099175824175824e-05, + "loss": 0.3269, + "step": 6558 + }, + { + "epoch": 18.01923076923077, + "grad_norm": 10.00847339630127, + "learning_rate": 4.099038461538462e-05, + "loss": 0.1709, + "step": 6559 + }, + { + "epoch": 18.021978021978022, + "grad_norm": 15.015392303466797, + "learning_rate": 4.098901098901099e-05, + "loss": 0.4488, + "step": 6560 + }, + { + "epoch": 18.024725274725274, + "grad_norm": 19.58930778503418, + "learning_rate": 4.0987637362637365e-05, + "loss": 0.8217, + "step": 6561 + }, + { + "epoch": 18.02747252747253, + "grad_norm": 10.699392318725586, + "learning_rate": 4.0986263736263735e-05, + "loss": 0.2032, + "step": 6562 + }, + { + "epoch": 18.03021978021978, + "grad_norm": 13.377606391906738, + "learning_rate": 4.098489010989011e-05, + "loss": 0.4614, + "step": 6563 + }, + { + "epoch": 18.032967032967033, + "grad_norm": 14.296445846557617, + "learning_rate": 4.098351648351649e-05, + "loss": 0.4998, + "step": 6564 + }, + { + "epoch": 18.035714285714285, + "grad_norm": 13.884092330932617, + "learning_rate": 4.098214285714286e-05, + "loss": 0.52, + "step": 6565 + }, + { + "epoch": 18.03846153846154, + "grad_norm": 12.463939666748047, + "learning_rate": 4.0980769230769235e-05, + "loss": 0.2694, + "step": 6566 + }, + { + "epoch": 18.041208791208792, + "grad_norm": 6.327002048492432, + "learning_rate": 4.0979395604395605e-05, + "loss": 0.137, + "step": 6567 + }, + { + "epoch": 18.043956043956044, + "grad_norm": 4.710035800933838, + "learning_rate": 4.097802197802198e-05, + "loss": 0.1561, + "step": 6568 + }, + { + "epoch": 18.046703296703296, + "grad_norm": 15.673011779785156, + "learning_rate": 4.097664835164836e-05, + "loss": 0.8413, + "step": 6569 + }, + { + "epoch": 18.04945054945055, + "grad_norm": 16.94874382019043, + "learning_rate": 4.097527472527473e-05, + "loss": 0.8115, + "step": 6570 + }, + { + "epoch": 18.052197802197803, + "grad_norm": 15.41296672821045, + "learning_rate": 4.09739010989011e-05, + "loss": 0.4844, + "step": 6571 + }, + { + "epoch": 18.054945054945055, + "grad_norm": 9.51594066619873, + "learning_rate": 4.0972527472527475e-05, + "loss": 0.2865, + "step": 6572 + }, + { + "epoch": 18.057692307692307, + "grad_norm": 15.540059089660645, + "learning_rate": 4.0971153846153845e-05, + "loss": 0.6119, + "step": 6573 + }, + { + "epoch": 18.060439560439562, + "grad_norm": 7.6818623542785645, + "learning_rate": 4.096978021978022e-05, + "loss": 0.2789, + "step": 6574 + }, + { + "epoch": 18.063186813186814, + "grad_norm": 14.539088249206543, + "learning_rate": 4.096840659340659e-05, + "loss": 0.5081, + "step": 6575 + }, + { + "epoch": 18.065934065934066, + "grad_norm": 10.114639282226562, + "learning_rate": 4.096703296703297e-05, + "loss": 0.3839, + "step": 6576 + }, + { + "epoch": 18.068681318681318, + "grad_norm": 17.138561248779297, + "learning_rate": 4.096565934065934e-05, + "loss": 0.5748, + "step": 6577 + }, + { + "epoch": 18.071428571428573, + "grad_norm": 11.598466873168945, + "learning_rate": 4.0964285714285716e-05, + "loss": 0.4124, + "step": 6578 + }, + { + "epoch": 18.074175824175825, + "grad_norm": 20.526546478271484, + "learning_rate": 4.096291208791209e-05, + "loss": 0.8307, + "step": 6579 + }, + { + "epoch": 18.076923076923077, + "grad_norm": 12.62765884399414, + "learning_rate": 4.096153846153846e-05, + "loss": 0.426, + "step": 6580 + }, + { + "epoch": 18.07967032967033, + "grad_norm": 3.02024507522583, + "learning_rate": 4.096016483516484e-05, + "loss": 0.061, + "step": 6581 + }, + { + "epoch": 18.082417582417584, + "grad_norm": 10.951004981994629, + "learning_rate": 4.095879120879121e-05, + "loss": 0.3898, + "step": 6582 + }, + { + "epoch": 18.085164835164836, + "grad_norm": 16.266714096069336, + "learning_rate": 4.0957417582417586e-05, + "loss": 0.5639, + "step": 6583 + }, + { + "epoch": 18.087912087912088, + "grad_norm": 8.340124130249023, + "learning_rate": 4.095604395604396e-05, + "loss": 0.2263, + "step": 6584 + }, + { + "epoch": 18.09065934065934, + "grad_norm": 8.470468521118164, + "learning_rate": 4.095467032967033e-05, + "loss": 0.1705, + "step": 6585 + }, + { + "epoch": 18.093406593406595, + "grad_norm": 19.035364151000977, + "learning_rate": 4.09532967032967e-05, + "loss": 0.7159, + "step": 6586 + }, + { + "epoch": 18.096153846153847, + "grad_norm": 8.030405044555664, + "learning_rate": 4.095192307692308e-05, + "loss": 0.1649, + "step": 6587 + }, + { + "epoch": 18.0989010989011, + "grad_norm": 5.4265360832214355, + "learning_rate": 4.095054945054945e-05, + "loss": 0.1999, + "step": 6588 + }, + { + "epoch": 18.10164835164835, + "grad_norm": 13.620123863220215, + "learning_rate": 4.094917582417583e-05, + "loss": 0.3755, + "step": 6589 + }, + { + "epoch": 18.104395604395606, + "grad_norm": 9.474682807922363, + "learning_rate": 4.09478021978022e-05, + "loss": 0.3967, + "step": 6590 + }, + { + "epoch": 18.107142857142858, + "grad_norm": 11.380550384521484, + "learning_rate": 4.0946428571428574e-05, + "loss": 0.3468, + "step": 6591 + }, + { + "epoch": 18.10989010989011, + "grad_norm": 8.540322303771973, + "learning_rate": 4.0945054945054944e-05, + "loss": 0.3594, + "step": 6592 + }, + { + "epoch": 18.11263736263736, + "grad_norm": 10.71751594543457, + "learning_rate": 4.094368131868132e-05, + "loss": 0.3778, + "step": 6593 + }, + { + "epoch": 18.115384615384617, + "grad_norm": 9.163987159729004, + "learning_rate": 4.09423076923077e-05, + "loss": 0.282, + "step": 6594 + }, + { + "epoch": 18.11813186813187, + "grad_norm": 4.124557971954346, + "learning_rate": 4.094093406593407e-05, + "loss": 0.1057, + "step": 6595 + }, + { + "epoch": 18.12087912087912, + "grad_norm": 9.947619438171387, + "learning_rate": 4.0939560439560444e-05, + "loss": 0.2444, + "step": 6596 + }, + { + "epoch": 18.123626373626372, + "grad_norm": 12.65004825592041, + "learning_rate": 4.0938186813186814e-05, + "loss": 0.5119, + "step": 6597 + }, + { + "epoch": 18.126373626373628, + "grad_norm": 20.572921752929688, + "learning_rate": 4.093681318681319e-05, + "loss": 0.8831, + "step": 6598 + }, + { + "epoch": 18.12912087912088, + "grad_norm": 15.07052230834961, + "learning_rate": 4.093543956043957e-05, + "loss": 0.5863, + "step": 6599 + }, + { + "epoch": 18.13186813186813, + "grad_norm": 5.338656902313232, + "learning_rate": 4.093406593406594e-05, + "loss": 0.1455, + "step": 6600 + }, + { + "epoch": 18.134615384615383, + "grad_norm": 12.583786964416504, + "learning_rate": 4.093269230769231e-05, + "loss": 0.6133, + "step": 6601 + }, + { + "epoch": 18.13736263736264, + "grad_norm": 10.969428062438965, + "learning_rate": 4.0931318681318684e-05, + "loss": 0.3456, + "step": 6602 + }, + { + "epoch": 18.14010989010989, + "grad_norm": 17.968059539794922, + "learning_rate": 4.0929945054945054e-05, + "loss": 0.7381, + "step": 6603 + }, + { + "epoch": 18.142857142857142, + "grad_norm": 11.527155876159668, + "learning_rate": 4.092857142857143e-05, + "loss": 0.2698, + "step": 6604 + }, + { + "epoch": 18.145604395604394, + "grad_norm": 16.75048828125, + "learning_rate": 4.09271978021978e-05, + "loss": 0.8153, + "step": 6605 + }, + { + "epoch": 18.14835164835165, + "grad_norm": 14.20851993560791, + "learning_rate": 4.092582417582418e-05, + "loss": 0.4463, + "step": 6606 + }, + { + "epoch": 18.1510989010989, + "grad_norm": 17.137439727783203, + "learning_rate": 4.092445054945055e-05, + "loss": 0.3528, + "step": 6607 + }, + { + "epoch": 18.153846153846153, + "grad_norm": 16.558813095092773, + "learning_rate": 4.0923076923076925e-05, + "loss": 0.541, + "step": 6608 + }, + { + "epoch": 18.156593406593405, + "grad_norm": 8.323894500732422, + "learning_rate": 4.09217032967033e-05, + "loss": 0.3862, + "step": 6609 + }, + { + "epoch": 18.15934065934066, + "grad_norm": 13.16943073272705, + "learning_rate": 4.092032967032967e-05, + "loss": 0.6213, + "step": 6610 + }, + { + "epoch": 18.162087912087912, + "grad_norm": 10.547895431518555, + "learning_rate": 4.091895604395605e-05, + "loss": 0.2553, + "step": 6611 + }, + { + "epoch": 18.164835164835164, + "grad_norm": 19.45762825012207, + "learning_rate": 4.091758241758242e-05, + "loss": 0.6126, + "step": 6612 + }, + { + "epoch": 18.167582417582416, + "grad_norm": 13.089165687561035, + "learning_rate": 4.0916208791208795e-05, + "loss": 0.403, + "step": 6613 + }, + { + "epoch": 18.17032967032967, + "grad_norm": 17.460285186767578, + "learning_rate": 4.091483516483517e-05, + "loss": 0.8745, + "step": 6614 + }, + { + "epoch": 18.173076923076923, + "grad_norm": 16.72990608215332, + "learning_rate": 4.091346153846154e-05, + "loss": 0.683, + "step": 6615 + }, + { + "epoch": 18.175824175824175, + "grad_norm": 12.360166549682617, + "learning_rate": 4.091208791208791e-05, + "loss": 0.479, + "step": 6616 + }, + { + "epoch": 18.178571428571427, + "grad_norm": 12.17430591583252, + "learning_rate": 4.091071428571429e-05, + "loss": 0.2386, + "step": 6617 + }, + { + "epoch": 18.181318681318682, + "grad_norm": 15.227357864379883, + "learning_rate": 4.090934065934066e-05, + "loss": 0.4404, + "step": 6618 + }, + { + "epoch": 18.184065934065934, + "grad_norm": 9.978826522827148, + "learning_rate": 4.0907967032967036e-05, + "loss": 0.2889, + "step": 6619 + }, + { + "epoch": 18.186813186813186, + "grad_norm": 16.9860782623291, + "learning_rate": 4.0906593406593406e-05, + "loss": 0.6612, + "step": 6620 + }, + { + "epoch": 18.189560439560438, + "grad_norm": 15.457613945007324, + "learning_rate": 4.090521978021978e-05, + "loss": 0.497, + "step": 6621 + }, + { + "epoch": 18.192307692307693, + "grad_norm": 16.454267501831055, + "learning_rate": 4.090384615384615e-05, + "loss": 0.8228, + "step": 6622 + }, + { + "epoch": 18.195054945054945, + "grad_norm": 13.534896850585938, + "learning_rate": 4.090247252747253e-05, + "loss": 0.4219, + "step": 6623 + }, + { + "epoch": 18.197802197802197, + "grad_norm": 11.939118385314941, + "learning_rate": 4.0901098901098906e-05, + "loss": 0.2936, + "step": 6624 + }, + { + "epoch": 18.20054945054945, + "grad_norm": 14.258889198303223, + "learning_rate": 4.0899725274725276e-05, + "loss": 0.4019, + "step": 6625 + }, + { + "epoch": 18.203296703296704, + "grad_norm": 12.677217483520508, + "learning_rate": 4.089835164835165e-05, + "loss": 0.3578, + "step": 6626 + }, + { + "epoch": 18.206043956043956, + "grad_norm": 14.807519912719727, + "learning_rate": 4.089697802197802e-05, + "loss": 0.6304, + "step": 6627 + }, + { + "epoch": 18.208791208791208, + "grad_norm": 10.026910781860352, + "learning_rate": 4.08956043956044e-05, + "loss": 0.2357, + "step": 6628 + }, + { + "epoch": 18.21153846153846, + "grad_norm": 9.449548721313477, + "learning_rate": 4.0894230769230777e-05, + "loss": 0.2481, + "step": 6629 + }, + { + "epoch": 18.214285714285715, + "grad_norm": 18.428110122680664, + "learning_rate": 4.0892857142857147e-05, + "loss": 0.7967, + "step": 6630 + }, + { + "epoch": 18.217032967032967, + "grad_norm": 19.281070709228516, + "learning_rate": 4.0891483516483517e-05, + "loss": 0.6078, + "step": 6631 + }, + { + "epoch": 18.21978021978022, + "grad_norm": 14.4254150390625, + "learning_rate": 4.089010989010989e-05, + "loss": 0.4957, + "step": 6632 + }, + { + "epoch": 18.22252747252747, + "grad_norm": 12.377348899841309, + "learning_rate": 4.088873626373626e-05, + "loss": 0.3326, + "step": 6633 + }, + { + "epoch": 18.225274725274726, + "grad_norm": 17.347108840942383, + "learning_rate": 4.088736263736264e-05, + "loss": 0.5439, + "step": 6634 + }, + { + "epoch": 18.228021978021978, + "grad_norm": 8.514055252075195, + "learning_rate": 4.088598901098901e-05, + "loss": 0.1933, + "step": 6635 + }, + { + "epoch": 18.23076923076923, + "grad_norm": 16.269121170043945, + "learning_rate": 4.088461538461539e-05, + "loss": 0.5892, + "step": 6636 + }, + { + "epoch": 18.233516483516482, + "grad_norm": 13.374417304992676, + "learning_rate": 4.088324175824176e-05, + "loss": 0.5197, + "step": 6637 + }, + { + "epoch": 18.236263736263737, + "grad_norm": 13.55257511138916, + "learning_rate": 4.0881868131868134e-05, + "loss": 0.4601, + "step": 6638 + }, + { + "epoch": 18.23901098901099, + "grad_norm": 18.68012809753418, + "learning_rate": 4.088049450549451e-05, + "loss": 0.5314, + "step": 6639 + }, + { + "epoch": 18.24175824175824, + "grad_norm": 13.245396614074707, + "learning_rate": 4.087912087912088e-05, + "loss": 0.3618, + "step": 6640 + }, + { + "epoch": 18.244505494505493, + "grad_norm": 20.33782386779785, + "learning_rate": 4.087774725274726e-05, + "loss": 0.4652, + "step": 6641 + }, + { + "epoch": 18.247252747252748, + "grad_norm": 13.81729793548584, + "learning_rate": 4.087637362637363e-05, + "loss": 0.4449, + "step": 6642 + }, + { + "epoch": 18.25, + "grad_norm": 12.693230628967285, + "learning_rate": 4.0875000000000004e-05, + "loss": 0.4303, + "step": 6643 + }, + { + "epoch": 18.252747252747252, + "grad_norm": 15.090543746948242, + "learning_rate": 4.087362637362638e-05, + "loss": 0.3101, + "step": 6644 + }, + { + "epoch": 18.255494505494504, + "grad_norm": 10.027409553527832, + "learning_rate": 4.087225274725275e-05, + "loss": 0.2719, + "step": 6645 + }, + { + "epoch": 18.25824175824176, + "grad_norm": 20.24063491821289, + "learning_rate": 4.087087912087912e-05, + "loss": 0.7868, + "step": 6646 + }, + { + "epoch": 18.26098901098901, + "grad_norm": 15.441455841064453, + "learning_rate": 4.08695054945055e-05, + "loss": 0.7724, + "step": 6647 + }, + { + "epoch": 18.263736263736263, + "grad_norm": 12.694588661193848, + "learning_rate": 4.086813186813187e-05, + "loss": 0.4346, + "step": 6648 + }, + { + "epoch": 18.266483516483518, + "grad_norm": 13.77758502960205, + "learning_rate": 4.0866758241758245e-05, + "loss": 0.4642, + "step": 6649 + }, + { + "epoch": 18.26923076923077, + "grad_norm": 7.218052387237549, + "learning_rate": 4.0865384615384615e-05, + "loss": 0.2185, + "step": 6650 + }, + { + "epoch": 18.271978021978022, + "grad_norm": 8.0472412109375, + "learning_rate": 4.086401098901099e-05, + "loss": 0.1824, + "step": 6651 + }, + { + "epoch": 18.274725274725274, + "grad_norm": 11.786468505859375, + "learning_rate": 4.086263736263736e-05, + "loss": 0.4626, + "step": 6652 + }, + { + "epoch": 18.27747252747253, + "grad_norm": 12.194053649902344, + "learning_rate": 4.086126373626374e-05, + "loss": 0.5193, + "step": 6653 + }, + { + "epoch": 18.28021978021978, + "grad_norm": 12.78073501586914, + "learning_rate": 4.0859890109890115e-05, + "loss": 0.3232, + "step": 6654 + }, + { + "epoch": 18.282967032967033, + "grad_norm": 7.337818145751953, + "learning_rate": 4.0858516483516485e-05, + "loss": 0.1965, + "step": 6655 + }, + { + "epoch": 18.285714285714285, + "grad_norm": 18.476951599121094, + "learning_rate": 4.085714285714286e-05, + "loss": 0.6166, + "step": 6656 + }, + { + "epoch": 18.28846153846154, + "grad_norm": 12.392934799194336, + "learning_rate": 4.085576923076923e-05, + "loss": 0.5795, + "step": 6657 + }, + { + "epoch": 18.291208791208792, + "grad_norm": 6.5212178230285645, + "learning_rate": 4.085439560439561e-05, + "loss": 0.2071, + "step": 6658 + }, + { + "epoch": 18.293956043956044, + "grad_norm": 14.417549133300781, + "learning_rate": 4.0853021978021985e-05, + "loss": 0.4975, + "step": 6659 + }, + { + "epoch": 18.296703296703296, + "grad_norm": 13.519658088684082, + "learning_rate": 4.0851648351648356e-05, + "loss": 0.5282, + "step": 6660 + }, + { + "epoch": 18.29945054945055, + "grad_norm": 12.454890251159668, + "learning_rate": 4.0850274725274726e-05, + "loss": 0.4668, + "step": 6661 + }, + { + "epoch": 18.302197802197803, + "grad_norm": 8.646952629089355, + "learning_rate": 4.08489010989011e-05, + "loss": 0.2814, + "step": 6662 + }, + { + "epoch": 18.304945054945055, + "grad_norm": 7.8804755210876465, + "learning_rate": 4.084752747252747e-05, + "loss": 0.2655, + "step": 6663 + }, + { + "epoch": 18.307692307692307, + "grad_norm": 9.562962532043457, + "learning_rate": 4.084615384615385e-05, + "loss": 0.2481, + "step": 6664 + }, + { + "epoch": 18.310439560439562, + "grad_norm": 14.008150100708008, + "learning_rate": 4.084478021978022e-05, + "loss": 0.5218, + "step": 6665 + }, + { + "epoch": 18.313186813186814, + "grad_norm": 12.103066444396973, + "learning_rate": 4.0843406593406596e-05, + "loss": 0.3083, + "step": 6666 + }, + { + "epoch": 18.315934065934066, + "grad_norm": 11.329785346984863, + "learning_rate": 4.0842032967032966e-05, + "loss": 0.3209, + "step": 6667 + }, + { + "epoch": 18.318681318681318, + "grad_norm": 19.432811737060547, + "learning_rate": 4.084065934065934e-05, + "loss": 0.7303, + "step": 6668 + }, + { + "epoch": 18.321428571428573, + "grad_norm": 9.672209739685059, + "learning_rate": 4.083928571428572e-05, + "loss": 0.2398, + "step": 6669 + }, + { + "epoch": 18.324175824175825, + "grad_norm": 7.435473918914795, + "learning_rate": 4.083791208791209e-05, + "loss": 0.3073, + "step": 6670 + }, + { + "epoch": 18.326923076923077, + "grad_norm": 6.336196422576904, + "learning_rate": 4.0836538461538466e-05, + "loss": 0.2467, + "step": 6671 + }, + { + "epoch": 18.32967032967033, + "grad_norm": 7.040702819824219, + "learning_rate": 4.0835164835164836e-05, + "loss": 0.1848, + "step": 6672 + }, + { + "epoch": 18.332417582417584, + "grad_norm": 15.635703086853027, + "learning_rate": 4.083379120879121e-05, + "loss": 0.6919, + "step": 6673 + }, + { + "epoch": 18.335164835164836, + "grad_norm": 11.75327205657959, + "learning_rate": 4.083241758241759e-05, + "loss": 0.3711, + "step": 6674 + }, + { + "epoch": 18.337912087912088, + "grad_norm": 16.845293045043945, + "learning_rate": 4.083104395604396e-05, + "loss": 0.6881, + "step": 6675 + }, + { + "epoch": 18.34065934065934, + "grad_norm": 12.70947551727295, + "learning_rate": 4.082967032967033e-05, + "loss": 0.4214, + "step": 6676 + }, + { + "epoch": 18.343406593406595, + "grad_norm": 7.033535480499268, + "learning_rate": 4.08282967032967e-05, + "loss": 0.2458, + "step": 6677 + }, + { + "epoch": 18.346153846153847, + "grad_norm": 11.237932205200195, + "learning_rate": 4.082692307692308e-05, + "loss": 0.3706, + "step": 6678 + }, + { + "epoch": 18.3489010989011, + "grad_norm": 13.008477210998535, + "learning_rate": 4.0825549450549454e-05, + "loss": 0.3854, + "step": 6679 + }, + { + "epoch": 18.35164835164835, + "grad_norm": 8.508703231811523, + "learning_rate": 4.0824175824175824e-05, + "loss": 0.1748, + "step": 6680 + }, + { + "epoch": 18.354395604395606, + "grad_norm": 8.726119041442871, + "learning_rate": 4.08228021978022e-05, + "loss": 0.2187, + "step": 6681 + }, + { + "epoch": 18.357142857142858, + "grad_norm": 5.039693355560303, + "learning_rate": 4.082142857142857e-05, + "loss": 0.1438, + "step": 6682 + }, + { + "epoch": 18.35989010989011, + "grad_norm": 6.563214302062988, + "learning_rate": 4.082005494505495e-05, + "loss": 0.1803, + "step": 6683 + }, + { + "epoch": 18.36263736263736, + "grad_norm": 4.3899617195129395, + "learning_rate": 4.0818681318681324e-05, + "loss": 0.1042, + "step": 6684 + }, + { + "epoch": 18.365384615384617, + "grad_norm": 14.146652221679688, + "learning_rate": 4.0817307692307694e-05, + "loss": 0.5273, + "step": 6685 + }, + { + "epoch": 18.36813186813187, + "grad_norm": 20.24327278137207, + "learning_rate": 4.081593406593407e-05, + "loss": 0.6457, + "step": 6686 + }, + { + "epoch": 18.37087912087912, + "grad_norm": 13.536376953125, + "learning_rate": 4.081456043956044e-05, + "loss": 0.4945, + "step": 6687 + }, + { + "epoch": 18.373626373626372, + "grad_norm": 13.855438232421875, + "learning_rate": 4.081318681318682e-05, + "loss": 0.5641, + "step": 6688 + }, + { + "epoch": 18.376373626373628, + "grad_norm": 12.738161087036133, + "learning_rate": 4.0811813186813194e-05, + "loss": 0.382, + "step": 6689 + }, + { + "epoch": 18.37912087912088, + "grad_norm": 8.34372615814209, + "learning_rate": 4.0810439560439564e-05, + "loss": 0.1732, + "step": 6690 + }, + { + "epoch": 18.38186813186813, + "grad_norm": 15.075684547424316, + "learning_rate": 4.0809065934065935e-05, + "loss": 0.4629, + "step": 6691 + }, + { + "epoch": 18.384615384615383, + "grad_norm": 13.71504020690918, + "learning_rate": 4.0807692307692305e-05, + "loss": 0.2925, + "step": 6692 + }, + { + "epoch": 18.38736263736264, + "grad_norm": 19.433513641357422, + "learning_rate": 4.080631868131868e-05, + "loss": 0.7537, + "step": 6693 + }, + { + "epoch": 18.39010989010989, + "grad_norm": 8.997506141662598, + "learning_rate": 4.080494505494506e-05, + "loss": 0.2962, + "step": 6694 + }, + { + "epoch": 18.392857142857142, + "grad_norm": 10.091825485229492, + "learning_rate": 4.080357142857143e-05, + "loss": 0.3987, + "step": 6695 + }, + { + "epoch": 18.395604395604394, + "grad_norm": 11.017231941223145, + "learning_rate": 4.0802197802197805e-05, + "loss": 0.3457, + "step": 6696 + }, + { + "epoch": 18.39835164835165, + "grad_norm": 17.747833251953125, + "learning_rate": 4.0800824175824175e-05, + "loss": 0.5748, + "step": 6697 + }, + { + "epoch": 18.4010989010989, + "grad_norm": 13.549720764160156, + "learning_rate": 4.079945054945055e-05, + "loss": 0.395, + "step": 6698 + }, + { + "epoch": 18.403846153846153, + "grad_norm": 9.529152870178223, + "learning_rate": 4.079807692307693e-05, + "loss": 0.2556, + "step": 6699 + }, + { + "epoch": 18.406593406593405, + "grad_norm": 16.301151275634766, + "learning_rate": 4.07967032967033e-05, + "loss": 0.8354, + "step": 6700 + }, + { + "epoch": 18.40934065934066, + "grad_norm": 9.192469596862793, + "learning_rate": 4.0795329670329675e-05, + "loss": 0.3527, + "step": 6701 + }, + { + "epoch": 18.412087912087912, + "grad_norm": 15.932689666748047, + "learning_rate": 4.0793956043956045e-05, + "loss": 0.5684, + "step": 6702 + }, + { + "epoch": 18.414835164835164, + "grad_norm": 9.797750473022461, + "learning_rate": 4.079258241758242e-05, + "loss": 0.2927, + "step": 6703 + }, + { + "epoch": 18.417582417582416, + "grad_norm": 15.343177795410156, + "learning_rate": 4.07912087912088e-05, + "loss": 0.4813, + "step": 6704 + }, + { + "epoch": 18.42032967032967, + "grad_norm": 11.000707626342773, + "learning_rate": 4.078983516483517e-05, + "loss": 0.2958, + "step": 6705 + }, + { + "epoch": 18.423076923076923, + "grad_norm": 16.637798309326172, + "learning_rate": 4.078846153846154e-05, + "loss": 0.5146, + "step": 6706 + }, + { + "epoch": 18.425824175824175, + "grad_norm": 13.67520523071289, + "learning_rate": 4.078708791208791e-05, + "loss": 0.4063, + "step": 6707 + }, + { + "epoch": 18.428571428571427, + "grad_norm": 17.53256607055664, + "learning_rate": 4.0785714285714286e-05, + "loss": 0.5897, + "step": 6708 + }, + { + "epoch": 18.431318681318682, + "grad_norm": 9.213679313659668, + "learning_rate": 4.078434065934066e-05, + "loss": 0.3304, + "step": 6709 + }, + { + "epoch": 18.434065934065934, + "grad_norm": 10.59893798828125, + "learning_rate": 4.078296703296703e-05, + "loss": 0.2862, + "step": 6710 + }, + { + "epoch": 18.436813186813186, + "grad_norm": 9.573556900024414, + "learning_rate": 4.078159340659341e-05, + "loss": 0.3324, + "step": 6711 + }, + { + "epoch": 18.439560439560438, + "grad_norm": 12.13176441192627, + "learning_rate": 4.078021978021978e-05, + "loss": 0.3408, + "step": 6712 + }, + { + "epoch": 18.442307692307693, + "grad_norm": 15.870308876037598, + "learning_rate": 4.0778846153846156e-05, + "loss": 0.385, + "step": 6713 + }, + { + "epoch": 18.445054945054945, + "grad_norm": 18.65706443786621, + "learning_rate": 4.0777472527472526e-05, + "loss": 0.8503, + "step": 6714 + }, + { + "epoch": 18.447802197802197, + "grad_norm": 12.95778751373291, + "learning_rate": 4.07760989010989e-05, + "loss": 0.3831, + "step": 6715 + }, + { + "epoch": 18.45054945054945, + "grad_norm": 11.189443588256836, + "learning_rate": 4.077472527472528e-05, + "loss": 0.3697, + "step": 6716 + }, + { + "epoch": 18.453296703296704, + "grad_norm": 15.905070304870605, + "learning_rate": 4.077335164835165e-05, + "loss": 0.5366, + "step": 6717 + }, + { + "epoch": 18.456043956043956, + "grad_norm": 6.258213043212891, + "learning_rate": 4.077197802197803e-05, + "loss": 0.1798, + "step": 6718 + }, + { + "epoch": 18.458791208791208, + "grad_norm": 16.6025333404541, + "learning_rate": 4.07706043956044e-05, + "loss": 0.5846, + "step": 6719 + }, + { + "epoch": 18.46153846153846, + "grad_norm": 4.212619304656982, + "learning_rate": 4.0769230769230773e-05, + "loss": 0.1261, + "step": 6720 + }, + { + "epoch": 18.464285714285715, + "grad_norm": 20.502288818359375, + "learning_rate": 4.0767857142857143e-05, + "loss": 0.7505, + "step": 6721 + }, + { + "epoch": 18.467032967032967, + "grad_norm": 13.014527320861816, + "learning_rate": 4.0766483516483514e-05, + "loss": 0.3325, + "step": 6722 + }, + { + "epoch": 18.46978021978022, + "grad_norm": 11.282859802246094, + "learning_rate": 4.076510989010989e-05, + "loss": 0.3832, + "step": 6723 + }, + { + "epoch": 18.47252747252747, + "grad_norm": 5.1267619132995605, + "learning_rate": 4.076373626373626e-05, + "loss": 0.1209, + "step": 6724 + }, + { + "epoch": 18.475274725274726, + "grad_norm": 7.484025478363037, + "learning_rate": 4.076236263736264e-05, + "loss": 0.1779, + "step": 6725 + }, + { + "epoch": 18.478021978021978, + "grad_norm": 12.7611665725708, + "learning_rate": 4.0760989010989014e-05, + "loss": 0.5387, + "step": 6726 + }, + { + "epoch": 18.48076923076923, + "grad_norm": 14.655779838562012, + "learning_rate": 4.0759615384615384e-05, + "loss": 0.472, + "step": 6727 + }, + { + "epoch": 18.483516483516482, + "grad_norm": 15.226805686950684, + "learning_rate": 4.075824175824176e-05, + "loss": 0.4879, + "step": 6728 + }, + { + "epoch": 18.486263736263737, + "grad_norm": 4.448369979858398, + "learning_rate": 4.075686813186813e-05, + "loss": 0.071, + "step": 6729 + }, + { + "epoch": 18.48901098901099, + "grad_norm": 14.651583671569824, + "learning_rate": 4.075549450549451e-05, + "loss": 0.4774, + "step": 6730 + }, + { + "epoch": 18.49175824175824, + "grad_norm": 12.912549018859863, + "learning_rate": 4.0754120879120884e-05, + "loss": 0.394, + "step": 6731 + }, + { + "epoch": 18.494505494505496, + "grad_norm": 11.394492149353027, + "learning_rate": 4.0752747252747254e-05, + "loss": 0.3295, + "step": 6732 + }, + { + "epoch": 18.497252747252748, + "grad_norm": 18.822620391845703, + "learning_rate": 4.075137362637363e-05, + "loss": 0.6618, + "step": 6733 + }, + { + "epoch": 18.5, + "grad_norm": 8.857860565185547, + "learning_rate": 4.075e-05, + "loss": 0.2714, + "step": 6734 + }, + { + "epoch": 18.502747252747252, + "grad_norm": 16.446807861328125, + "learning_rate": 4.074862637362638e-05, + "loss": 0.3741, + "step": 6735 + }, + { + "epoch": 18.505494505494504, + "grad_norm": 15.205599784851074, + "learning_rate": 4.074725274725275e-05, + "loss": 0.5243, + "step": 6736 + }, + { + "epoch": 18.50824175824176, + "grad_norm": 8.864509582519531, + "learning_rate": 4.074587912087912e-05, + "loss": 0.2035, + "step": 6737 + }, + { + "epoch": 18.51098901098901, + "grad_norm": 13.699999809265137, + "learning_rate": 4.0744505494505495e-05, + "loss": 0.325, + "step": 6738 + }, + { + "epoch": 18.513736263736263, + "grad_norm": 13.33934211730957, + "learning_rate": 4.0743131868131865e-05, + "loss": 0.4703, + "step": 6739 + }, + { + "epoch": 18.516483516483518, + "grad_norm": 9.61860466003418, + "learning_rate": 4.074175824175824e-05, + "loss": 0.2271, + "step": 6740 + }, + { + "epoch": 18.51923076923077, + "grad_norm": 9.262579917907715, + "learning_rate": 4.074038461538462e-05, + "loss": 0.2537, + "step": 6741 + }, + { + "epoch": 18.521978021978022, + "grad_norm": 18.08061408996582, + "learning_rate": 4.073901098901099e-05, + "loss": 0.8462, + "step": 6742 + }, + { + "epoch": 18.524725274725274, + "grad_norm": 14.847686767578125, + "learning_rate": 4.0737637362637365e-05, + "loss": 0.4227, + "step": 6743 + }, + { + "epoch": 18.52747252747253, + "grad_norm": 11.347742080688477, + "learning_rate": 4.0736263736263735e-05, + "loss": 0.3739, + "step": 6744 + }, + { + "epoch": 18.53021978021978, + "grad_norm": 8.657550811767578, + "learning_rate": 4.073489010989011e-05, + "loss": 0.2109, + "step": 6745 + }, + { + "epoch": 18.532967032967033, + "grad_norm": 11.307381629943848, + "learning_rate": 4.073351648351649e-05, + "loss": 0.3524, + "step": 6746 + }, + { + "epoch": 18.535714285714285, + "grad_norm": 12.37407112121582, + "learning_rate": 4.073214285714286e-05, + "loss": 0.5715, + "step": 6747 + }, + { + "epoch": 18.53846153846154, + "grad_norm": 10.73703384399414, + "learning_rate": 4.0730769230769236e-05, + "loss": 0.3176, + "step": 6748 + }, + { + "epoch": 18.541208791208792, + "grad_norm": 20.4503173828125, + "learning_rate": 4.0729395604395606e-05, + "loss": 0.9328, + "step": 6749 + }, + { + "epoch": 18.543956043956044, + "grad_norm": 13.410175323486328, + "learning_rate": 4.072802197802198e-05, + "loss": 0.4913, + "step": 6750 + }, + { + "epoch": 18.546703296703296, + "grad_norm": 7.561065673828125, + "learning_rate": 4.072664835164835e-05, + "loss": 0.1758, + "step": 6751 + }, + { + "epoch": 18.54945054945055, + "grad_norm": 15.15070629119873, + "learning_rate": 4.072527472527472e-05, + "loss": 0.4505, + "step": 6752 + }, + { + "epoch": 18.552197802197803, + "grad_norm": 12.301416397094727, + "learning_rate": 4.07239010989011e-05, + "loss": 0.2305, + "step": 6753 + }, + { + "epoch": 18.554945054945055, + "grad_norm": 28.676910400390625, + "learning_rate": 4.072252747252747e-05, + "loss": 0.9979, + "step": 6754 + }, + { + "epoch": 18.557692307692307, + "grad_norm": 5.557855129241943, + "learning_rate": 4.0721153846153846e-05, + "loss": 0.1268, + "step": 6755 + }, + { + "epoch": 18.560439560439562, + "grad_norm": 12.55672836303711, + "learning_rate": 4.071978021978022e-05, + "loss": 0.4068, + "step": 6756 + }, + { + "epoch": 18.563186813186814, + "grad_norm": 13.294766426086426, + "learning_rate": 4.071840659340659e-05, + "loss": 0.3699, + "step": 6757 + }, + { + "epoch": 18.565934065934066, + "grad_norm": 14.868728637695312, + "learning_rate": 4.071703296703297e-05, + "loss": 0.6054, + "step": 6758 + }, + { + "epoch": 18.568681318681318, + "grad_norm": 8.006075859069824, + "learning_rate": 4.071565934065934e-05, + "loss": 0.3674, + "step": 6759 + }, + { + "epoch": 18.571428571428573, + "grad_norm": 22.088762283325195, + "learning_rate": 4.0714285714285717e-05, + "loss": 0.9778, + "step": 6760 + }, + { + "epoch": 18.574175824175825, + "grad_norm": 8.445390701293945, + "learning_rate": 4.071291208791209e-05, + "loss": 0.1955, + "step": 6761 + }, + { + "epoch": 18.576923076923077, + "grad_norm": 9.364397048950195, + "learning_rate": 4.071153846153846e-05, + "loss": 0.3739, + "step": 6762 + }, + { + "epoch": 18.57967032967033, + "grad_norm": 9.433045387268066, + "learning_rate": 4.071016483516484e-05, + "loss": 0.3599, + "step": 6763 + }, + { + "epoch": 18.582417582417584, + "grad_norm": 18.237030029296875, + "learning_rate": 4.070879120879121e-05, + "loss": 0.8452, + "step": 6764 + }, + { + "epoch": 18.585164835164836, + "grad_norm": 11.139121055603027, + "learning_rate": 4.070741758241759e-05, + "loss": 0.2814, + "step": 6765 + }, + { + "epoch": 18.587912087912088, + "grad_norm": 17.25691032409668, + "learning_rate": 4.070604395604396e-05, + "loss": 0.5728, + "step": 6766 + }, + { + "epoch": 18.59065934065934, + "grad_norm": 6.232461929321289, + "learning_rate": 4.070467032967033e-05, + "loss": 0.201, + "step": 6767 + }, + { + "epoch": 18.593406593406595, + "grad_norm": 9.932036399841309, + "learning_rate": 4.0703296703296704e-05, + "loss": 0.2691, + "step": 6768 + }, + { + "epoch": 18.596153846153847, + "grad_norm": 13.781249046325684, + "learning_rate": 4.0701923076923074e-05, + "loss": 0.4532, + "step": 6769 + }, + { + "epoch": 18.5989010989011, + "grad_norm": 19.94683837890625, + "learning_rate": 4.070054945054945e-05, + "loss": 0.8968, + "step": 6770 + }, + { + "epoch": 18.60164835164835, + "grad_norm": 11.743661880493164, + "learning_rate": 4.069917582417583e-05, + "loss": 0.2684, + "step": 6771 + }, + { + "epoch": 18.604395604395606, + "grad_norm": 11.272674560546875, + "learning_rate": 4.06978021978022e-05, + "loss": 0.3798, + "step": 6772 + }, + { + "epoch": 18.607142857142858, + "grad_norm": 16.654706954956055, + "learning_rate": 4.0696428571428574e-05, + "loss": 0.4914, + "step": 6773 + }, + { + "epoch": 18.60989010989011, + "grad_norm": 11.885549545288086, + "learning_rate": 4.0695054945054944e-05, + "loss": 0.3541, + "step": 6774 + }, + { + "epoch": 18.61263736263736, + "grad_norm": 9.275858879089355, + "learning_rate": 4.069368131868132e-05, + "loss": 0.2469, + "step": 6775 + }, + { + "epoch": 18.615384615384617, + "grad_norm": 8.342236518859863, + "learning_rate": 4.06923076923077e-05, + "loss": 0.2921, + "step": 6776 + }, + { + "epoch": 18.61813186813187, + "grad_norm": 12.703060150146484, + "learning_rate": 4.069093406593407e-05, + "loss": 0.4635, + "step": 6777 + }, + { + "epoch": 18.62087912087912, + "grad_norm": 16.883697509765625, + "learning_rate": 4.0689560439560445e-05, + "loss": 0.5533, + "step": 6778 + }, + { + "epoch": 18.623626373626372, + "grad_norm": 13.956048011779785, + "learning_rate": 4.0688186813186815e-05, + "loss": 0.4511, + "step": 6779 + }, + { + "epoch": 18.626373626373628, + "grad_norm": 19.06551170349121, + "learning_rate": 4.068681318681319e-05, + "loss": 0.7607, + "step": 6780 + }, + { + "epoch": 18.62912087912088, + "grad_norm": 14.228803634643555, + "learning_rate": 4.068543956043956e-05, + "loss": 0.7143, + "step": 6781 + }, + { + "epoch": 18.63186813186813, + "grad_norm": 9.040056228637695, + "learning_rate": 4.068406593406593e-05, + "loss": 0.2963, + "step": 6782 + }, + { + "epoch": 18.634615384615383, + "grad_norm": 21.087448120117188, + "learning_rate": 4.068269230769231e-05, + "loss": 0.6116, + "step": 6783 + }, + { + "epoch": 18.63736263736264, + "grad_norm": 11.415996551513672, + "learning_rate": 4.068131868131868e-05, + "loss": 0.2106, + "step": 6784 + }, + { + "epoch": 18.64010989010989, + "grad_norm": 19.974994659423828, + "learning_rate": 4.0679945054945055e-05, + "loss": 0.3789, + "step": 6785 + }, + { + "epoch": 18.642857142857142, + "grad_norm": 11.478302955627441, + "learning_rate": 4.067857142857143e-05, + "loss": 0.3712, + "step": 6786 + }, + { + "epoch": 18.645604395604394, + "grad_norm": 12.152091979980469, + "learning_rate": 4.06771978021978e-05, + "loss": 0.4375, + "step": 6787 + }, + { + "epoch": 18.64835164835165, + "grad_norm": 13.95510482788086, + "learning_rate": 4.067582417582418e-05, + "loss": 0.4226, + "step": 6788 + }, + { + "epoch": 18.6510989010989, + "grad_norm": 3.3865506649017334, + "learning_rate": 4.067445054945055e-05, + "loss": 0.1075, + "step": 6789 + }, + { + "epoch": 18.653846153846153, + "grad_norm": 15.612456321716309, + "learning_rate": 4.0673076923076926e-05, + "loss": 0.4225, + "step": 6790 + }, + { + "epoch": 18.656593406593405, + "grad_norm": 9.901491165161133, + "learning_rate": 4.06717032967033e-05, + "loss": 0.2443, + "step": 6791 + }, + { + "epoch": 18.65934065934066, + "grad_norm": 20.794103622436523, + "learning_rate": 4.067032967032967e-05, + "loss": 0.7992, + "step": 6792 + }, + { + "epoch": 18.662087912087912, + "grad_norm": 10.424991607666016, + "learning_rate": 4.066895604395605e-05, + "loss": 0.3174, + "step": 6793 + }, + { + "epoch": 18.664835164835164, + "grad_norm": 17.031890869140625, + "learning_rate": 4.066758241758242e-05, + "loss": 0.4693, + "step": 6794 + }, + { + "epoch": 18.667582417582416, + "grad_norm": 11.148852348327637, + "learning_rate": 4.0666208791208796e-05, + "loss": 0.2667, + "step": 6795 + }, + { + "epoch": 18.67032967032967, + "grad_norm": 20.06100082397461, + "learning_rate": 4.0664835164835166e-05, + "loss": 0.5236, + "step": 6796 + }, + { + "epoch": 18.673076923076923, + "grad_norm": 15.807194709777832, + "learning_rate": 4.0663461538461536e-05, + "loss": 0.4575, + "step": 6797 + }, + { + "epoch": 18.675824175824175, + "grad_norm": 27.057668685913086, + "learning_rate": 4.066208791208791e-05, + "loss": 1.4378, + "step": 6798 + }, + { + "epoch": 18.678571428571427, + "grad_norm": 7.412449836730957, + "learning_rate": 4.066071428571428e-05, + "loss": 0.1335, + "step": 6799 + }, + { + "epoch": 18.681318681318682, + "grad_norm": 10.571999549865723, + "learning_rate": 4.065934065934066e-05, + "loss": 0.4579, + "step": 6800 + }, + { + "epoch": 18.684065934065934, + "grad_norm": 7.435108661651611, + "learning_rate": 4.0657967032967036e-05, + "loss": 0.1835, + "step": 6801 + }, + { + "epoch": 18.686813186813186, + "grad_norm": 9.334844589233398, + "learning_rate": 4.0656593406593406e-05, + "loss": 0.2673, + "step": 6802 + }, + { + "epoch": 18.689560439560438, + "grad_norm": 19.07665252685547, + "learning_rate": 4.065521978021978e-05, + "loss": 0.6797, + "step": 6803 + }, + { + "epoch": 18.692307692307693, + "grad_norm": 6.806623935699463, + "learning_rate": 4.065384615384615e-05, + "loss": 0.2258, + "step": 6804 + }, + { + "epoch": 18.695054945054945, + "grad_norm": 7.783562660217285, + "learning_rate": 4.065247252747253e-05, + "loss": 0.1946, + "step": 6805 + }, + { + "epoch": 18.697802197802197, + "grad_norm": 8.78884506225586, + "learning_rate": 4.065109890109891e-05, + "loss": 0.2426, + "step": 6806 + }, + { + "epoch": 18.70054945054945, + "grad_norm": 15.267644882202148, + "learning_rate": 4.064972527472528e-05, + "loss": 0.4708, + "step": 6807 + }, + { + "epoch": 18.703296703296704, + "grad_norm": 10.086282730102539, + "learning_rate": 4.0648351648351654e-05, + "loss": 0.401, + "step": 6808 + }, + { + "epoch": 18.706043956043956, + "grad_norm": 15.490350723266602, + "learning_rate": 4.0646978021978024e-05, + "loss": 0.4194, + "step": 6809 + }, + { + "epoch": 18.708791208791208, + "grad_norm": 9.703792572021484, + "learning_rate": 4.06456043956044e-05, + "loss": 0.314, + "step": 6810 + }, + { + "epoch": 18.71153846153846, + "grad_norm": 6.912189960479736, + "learning_rate": 4.064423076923077e-05, + "loss": 0.1656, + "step": 6811 + }, + { + "epoch": 18.714285714285715, + "grad_norm": 13.577672004699707, + "learning_rate": 4.064285714285714e-05, + "loss": 0.5125, + "step": 6812 + }, + { + "epoch": 18.717032967032967, + "grad_norm": 17.145183563232422, + "learning_rate": 4.064148351648352e-05, + "loss": 0.7798, + "step": 6813 + }, + { + "epoch": 18.71978021978022, + "grad_norm": 13.629937171936035, + "learning_rate": 4.064010989010989e-05, + "loss": 0.3472, + "step": 6814 + }, + { + "epoch": 18.72252747252747, + "grad_norm": 14.32968521118164, + "learning_rate": 4.0638736263736264e-05, + "loss": 0.6059, + "step": 6815 + }, + { + "epoch": 18.725274725274726, + "grad_norm": 13.910097122192383, + "learning_rate": 4.063736263736264e-05, + "loss": 0.48, + "step": 6816 + }, + { + "epoch": 18.728021978021978, + "grad_norm": 9.083317756652832, + "learning_rate": 4.063598901098901e-05, + "loss": 0.2048, + "step": 6817 + }, + { + "epoch": 18.73076923076923, + "grad_norm": 8.812725067138672, + "learning_rate": 4.063461538461539e-05, + "loss": 0.1518, + "step": 6818 + }, + { + "epoch": 18.733516483516482, + "grad_norm": 10.732921600341797, + "learning_rate": 4.063324175824176e-05, + "loss": 0.3873, + "step": 6819 + }, + { + "epoch": 18.736263736263737, + "grad_norm": 10.304919242858887, + "learning_rate": 4.0631868131868134e-05, + "loss": 0.2876, + "step": 6820 + }, + { + "epoch": 18.73901098901099, + "grad_norm": 7.745866298675537, + "learning_rate": 4.063049450549451e-05, + "loss": 0.2311, + "step": 6821 + }, + { + "epoch": 18.74175824175824, + "grad_norm": 8.949213027954102, + "learning_rate": 4.062912087912088e-05, + "loss": 0.2275, + "step": 6822 + }, + { + "epoch": 18.744505494505496, + "grad_norm": 19.877838134765625, + "learning_rate": 4.062774725274726e-05, + "loss": 0.5573, + "step": 6823 + }, + { + "epoch": 18.747252747252748, + "grad_norm": 8.217915534973145, + "learning_rate": 4.062637362637363e-05, + "loss": 0.296, + "step": 6824 + }, + { + "epoch": 18.75, + "grad_norm": 9.402615547180176, + "learning_rate": 4.0625000000000005e-05, + "loss": 0.3662, + "step": 6825 + }, + { + "epoch": 18.752747252747252, + "grad_norm": 11.21933650970459, + "learning_rate": 4.0623626373626375e-05, + "loss": 0.399, + "step": 6826 + }, + { + "epoch": 18.755494505494504, + "grad_norm": 11.002741813659668, + "learning_rate": 4.0622252747252745e-05, + "loss": 0.2818, + "step": 6827 + }, + { + "epoch": 18.75824175824176, + "grad_norm": 13.47640609741211, + "learning_rate": 4.062087912087912e-05, + "loss": 0.4088, + "step": 6828 + }, + { + "epoch": 18.76098901098901, + "grad_norm": 10.126293182373047, + "learning_rate": 4.061950549450549e-05, + "loss": 0.2717, + "step": 6829 + }, + { + "epoch": 18.763736263736263, + "grad_norm": 13.81099796295166, + "learning_rate": 4.061813186813187e-05, + "loss": 0.6215, + "step": 6830 + }, + { + "epoch": 18.766483516483518, + "grad_norm": 9.16664981842041, + "learning_rate": 4.0616758241758245e-05, + "loss": 0.2342, + "step": 6831 + }, + { + "epoch": 18.76923076923077, + "grad_norm": 14.968103408813477, + "learning_rate": 4.0615384615384615e-05, + "loss": 0.5237, + "step": 6832 + }, + { + "epoch": 18.771978021978022, + "grad_norm": 9.578859329223633, + "learning_rate": 4.061401098901099e-05, + "loss": 0.3531, + "step": 6833 + }, + { + "epoch": 18.774725274725274, + "grad_norm": 13.230916023254395, + "learning_rate": 4.061263736263736e-05, + "loss": 0.2518, + "step": 6834 + }, + { + "epoch": 18.77747252747253, + "grad_norm": 5.269249439239502, + "learning_rate": 4.061126373626374e-05, + "loss": 0.1213, + "step": 6835 + }, + { + "epoch": 18.78021978021978, + "grad_norm": 12.488652229309082, + "learning_rate": 4.0609890109890116e-05, + "loss": 0.4448, + "step": 6836 + }, + { + "epoch": 18.782967032967033, + "grad_norm": 7.693845748901367, + "learning_rate": 4.0608516483516486e-05, + "loss": 0.1888, + "step": 6837 + }, + { + "epoch": 18.785714285714285, + "grad_norm": 17.403589248657227, + "learning_rate": 4.060714285714286e-05, + "loss": 0.6774, + "step": 6838 + }, + { + "epoch": 18.78846153846154, + "grad_norm": 17.507850646972656, + "learning_rate": 4.060576923076923e-05, + "loss": 0.5691, + "step": 6839 + }, + { + "epoch": 18.791208791208792, + "grad_norm": 10.719854354858398, + "learning_rate": 4.060439560439561e-05, + "loss": 0.3561, + "step": 6840 + }, + { + "epoch": 18.793956043956044, + "grad_norm": 14.365032196044922, + "learning_rate": 4.060302197802198e-05, + "loss": 0.4948, + "step": 6841 + }, + { + "epoch": 18.796703296703296, + "grad_norm": 8.366076469421387, + "learning_rate": 4.060164835164835e-05, + "loss": 0.2974, + "step": 6842 + }, + { + "epoch": 18.79945054945055, + "grad_norm": 20.945941925048828, + "learning_rate": 4.0600274725274726e-05, + "loss": 0.9926, + "step": 6843 + }, + { + "epoch": 18.802197802197803, + "grad_norm": 11.247264862060547, + "learning_rate": 4.0598901098901096e-05, + "loss": 0.2154, + "step": 6844 + }, + { + "epoch": 18.804945054945055, + "grad_norm": 17.344820022583008, + "learning_rate": 4.059752747252747e-05, + "loss": 0.6184, + "step": 6845 + }, + { + "epoch": 18.807692307692307, + "grad_norm": 8.51474380493164, + "learning_rate": 4.059615384615385e-05, + "loss": 0.2372, + "step": 6846 + }, + { + "epoch": 18.810439560439562, + "grad_norm": 4.479297161102295, + "learning_rate": 4.059478021978022e-05, + "loss": 0.1073, + "step": 6847 + }, + { + "epoch": 18.813186813186814, + "grad_norm": 10.429299354553223, + "learning_rate": 4.05934065934066e-05, + "loss": 0.3288, + "step": 6848 + }, + { + "epoch": 18.815934065934066, + "grad_norm": 5.238014221191406, + "learning_rate": 4.059203296703297e-05, + "loss": 0.113, + "step": 6849 + }, + { + "epoch": 18.818681318681318, + "grad_norm": 13.80600643157959, + "learning_rate": 4.0590659340659343e-05, + "loss": 0.4234, + "step": 6850 + }, + { + "epoch": 18.821428571428573, + "grad_norm": 10.39022445678711, + "learning_rate": 4.058928571428572e-05, + "loss": 0.3788, + "step": 6851 + }, + { + "epoch": 18.824175824175825, + "grad_norm": 13.263279914855957, + "learning_rate": 4.058791208791209e-05, + "loss": 0.5004, + "step": 6852 + }, + { + "epoch": 18.826923076923077, + "grad_norm": 8.165156364440918, + "learning_rate": 4.058653846153847e-05, + "loss": 0.1951, + "step": 6853 + }, + { + "epoch": 18.82967032967033, + "grad_norm": 9.820059776306152, + "learning_rate": 4.058516483516484e-05, + "loss": 0.4185, + "step": 6854 + }, + { + "epoch": 18.832417582417584, + "grad_norm": 13.392374992370605, + "learning_rate": 4.0583791208791214e-05, + "loss": 0.7178, + "step": 6855 + }, + { + "epoch": 18.835164835164836, + "grad_norm": 15.691166877746582, + "learning_rate": 4.0582417582417584e-05, + "loss": 0.6967, + "step": 6856 + }, + { + "epoch": 18.837912087912088, + "grad_norm": 14.061055183410645, + "learning_rate": 4.0581043956043954e-05, + "loss": 0.4654, + "step": 6857 + }, + { + "epoch": 18.84065934065934, + "grad_norm": 17.73890495300293, + "learning_rate": 4.057967032967033e-05, + "loss": 0.6077, + "step": 6858 + }, + { + "epoch": 18.843406593406595, + "grad_norm": 13.549565315246582, + "learning_rate": 4.05782967032967e-05, + "loss": 0.4761, + "step": 6859 + }, + { + "epoch": 18.846153846153847, + "grad_norm": 14.934684753417969, + "learning_rate": 4.057692307692308e-05, + "loss": 0.3614, + "step": 6860 + }, + { + "epoch": 18.8489010989011, + "grad_norm": 12.957789421081543, + "learning_rate": 4.0575549450549454e-05, + "loss": 0.3821, + "step": 6861 + }, + { + "epoch": 18.85164835164835, + "grad_norm": 21.04880714416504, + "learning_rate": 4.0574175824175824e-05, + "loss": 0.7521, + "step": 6862 + }, + { + "epoch": 18.854395604395606, + "grad_norm": 12.286931991577148, + "learning_rate": 4.05728021978022e-05, + "loss": 0.4147, + "step": 6863 + }, + { + "epoch": 18.857142857142858, + "grad_norm": 9.745198249816895, + "learning_rate": 4.057142857142857e-05, + "loss": 0.2165, + "step": 6864 + }, + { + "epoch": 18.85989010989011, + "grad_norm": 10.000843048095703, + "learning_rate": 4.057005494505495e-05, + "loss": 0.3112, + "step": 6865 + }, + { + "epoch": 18.86263736263736, + "grad_norm": 10.79208755493164, + "learning_rate": 4.0568681318681325e-05, + "loss": 0.3525, + "step": 6866 + }, + { + "epoch": 18.865384615384617, + "grad_norm": 3.9191579818725586, + "learning_rate": 4.0567307692307695e-05, + "loss": 0.1103, + "step": 6867 + }, + { + "epoch": 18.86813186813187, + "grad_norm": 4.318774223327637, + "learning_rate": 4.056593406593407e-05, + "loss": 0.1174, + "step": 6868 + }, + { + "epoch": 18.87087912087912, + "grad_norm": 11.250327110290527, + "learning_rate": 4.056456043956044e-05, + "loss": 0.388, + "step": 6869 + }, + { + "epoch": 18.873626373626372, + "grad_norm": 14.538435935974121, + "learning_rate": 4.056318681318682e-05, + "loss": 0.4437, + "step": 6870 + }, + { + "epoch": 18.876373626373628, + "grad_norm": 11.8911771774292, + "learning_rate": 4.056181318681319e-05, + "loss": 0.3492, + "step": 6871 + }, + { + "epoch": 18.87912087912088, + "grad_norm": 12.969342231750488, + "learning_rate": 4.056043956043956e-05, + "loss": 0.3111, + "step": 6872 + }, + { + "epoch": 18.88186813186813, + "grad_norm": 5.54830265045166, + "learning_rate": 4.0559065934065935e-05, + "loss": 0.1125, + "step": 6873 + }, + { + "epoch": 18.884615384615383, + "grad_norm": 14.372328758239746, + "learning_rate": 4.0557692307692305e-05, + "loss": 0.5593, + "step": 6874 + }, + { + "epoch": 18.88736263736264, + "grad_norm": 12.541982650756836, + "learning_rate": 4.055631868131868e-05, + "loss": 0.5496, + "step": 6875 + }, + { + "epoch": 18.89010989010989, + "grad_norm": 6.243578910827637, + "learning_rate": 4.055494505494506e-05, + "loss": 0.2739, + "step": 6876 + }, + { + "epoch": 18.892857142857142, + "grad_norm": 9.937167167663574, + "learning_rate": 4.055357142857143e-05, + "loss": 0.498, + "step": 6877 + }, + { + "epoch": 18.895604395604394, + "grad_norm": 11.234950065612793, + "learning_rate": 4.0552197802197806e-05, + "loss": 0.2932, + "step": 6878 + }, + { + "epoch": 18.89835164835165, + "grad_norm": 14.366880416870117, + "learning_rate": 4.0550824175824176e-05, + "loss": 0.4328, + "step": 6879 + }, + { + "epoch": 18.9010989010989, + "grad_norm": 15.154485702514648, + "learning_rate": 4.054945054945055e-05, + "loss": 0.5549, + "step": 6880 + }, + { + "epoch": 18.903846153846153, + "grad_norm": 10.355493545532227, + "learning_rate": 4.054807692307693e-05, + "loss": 0.3498, + "step": 6881 + }, + { + "epoch": 18.906593406593405, + "grad_norm": 19.19426155090332, + "learning_rate": 4.05467032967033e-05, + "loss": 0.7059, + "step": 6882 + }, + { + "epoch": 18.90934065934066, + "grad_norm": 13.41904067993164, + "learning_rate": 4.0545329670329676e-05, + "loss": 0.4181, + "step": 6883 + }, + { + "epoch": 18.912087912087912, + "grad_norm": 14.254697799682617, + "learning_rate": 4.0543956043956046e-05, + "loss": 0.5543, + "step": 6884 + }, + { + "epoch": 18.914835164835164, + "grad_norm": 7.83069372177124, + "learning_rate": 4.054258241758242e-05, + "loss": 0.1395, + "step": 6885 + }, + { + "epoch": 18.917582417582416, + "grad_norm": 9.6536865234375, + "learning_rate": 4.054120879120879e-05, + "loss": 0.2741, + "step": 6886 + }, + { + "epoch": 18.92032967032967, + "grad_norm": 17.928560256958008, + "learning_rate": 4.053983516483516e-05, + "loss": 0.4749, + "step": 6887 + }, + { + "epoch": 18.923076923076923, + "grad_norm": 15.263189315795898, + "learning_rate": 4.053846153846154e-05, + "loss": 0.3403, + "step": 6888 + }, + { + "epoch": 18.925824175824175, + "grad_norm": 21.748807907104492, + "learning_rate": 4.053708791208791e-05, + "loss": 0.852, + "step": 6889 + }, + { + "epoch": 18.928571428571427, + "grad_norm": 13.36639404296875, + "learning_rate": 4.0535714285714287e-05, + "loss": 0.6726, + "step": 6890 + }, + { + "epoch": 18.931318681318682, + "grad_norm": 11.208495140075684, + "learning_rate": 4.053434065934066e-05, + "loss": 0.2281, + "step": 6891 + }, + { + "epoch": 18.934065934065934, + "grad_norm": 8.925859451293945, + "learning_rate": 4.053296703296703e-05, + "loss": 0.2982, + "step": 6892 + }, + { + "epoch": 18.936813186813186, + "grad_norm": 14.02806568145752, + "learning_rate": 4.053159340659341e-05, + "loss": 0.4399, + "step": 6893 + }, + { + "epoch": 18.939560439560438, + "grad_norm": 13.847806930541992, + "learning_rate": 4.053021978021978e-05, + "loss": 0.4445, + "step": 6894 + }, + { + "epoch": 18.942307692307693, + "grad_norm": 7.091760158538818, + "learning_rate": 4.052884615384616e-05, + "loss": 0.1272, + "step": 6895 + }, + { + "epoch": 18.945054945054945, + "grad_norm": 19.257587432861328, + "learning_rate": 4.0527472527472534e-05, + "loss": 0.6928, + "step": 6896 + }, + { + "epoch": 18.947802197802197, + "grad_norm": 13.799538612365723, + "learning_rate": 4.0526098901098904e-05, + "loss": 0.514, + "step": 6897 + }, + { + "epoch": 18.95054945054945, + "grad_norm": 6.2221879959106445, + "learning_rate": 4.052472527472528e-05, + "loss": 0.2305, + "step": 6898 + }, + { + "epoch": 18.953296703296704, + "grad_norm": 6.386800289154053, + "learning_rate": 4.052335164835165e-05, + "loss": 0.1308, + "step": 6899 + }, + { + "epoch": 18.956043956043956, + "grad_norm": 9.665105819702148, + "learning_rate": 4.052197802197803e-05, + "loss": 0.2656, + "step": 6900 + }, + { + "epoch": 18.958791208791208, + "grad_norm": 9.285029411315918, + "learning_rate": 4.05206043956044e-05, + "loss": 0.1379, + "step": 6901 + }, + { + "epoch": 18.96153846153846, + "grad_norm": 11.34583854675293, + "learning_rate": 4.051923076923077e-05, + "loss": 0.4872, + "step": 6902 + }, + { + "epoch": 18.964285714285715, + "grad_norm": 13.379741668701172, + "learning_rate": 4.0517857142857144e-05, + "loss": 0.3231, + "step": 6903 + }, + { + "epoch": 18.967032967032967, + "grad_norm": 17.116329193115234, + "learning_rate": 4.0516483516483514e-05, + "loss": 0.51, + "step": 6904 + }, + { + "epoch": 18.96978021978022, + "grad_norm": 12.332602500915527, + "learning_rate": 4.051510989010989e-05, + "loss": 0.4449, + "step": 6905 + }, + { + "epoch": 18.97252747252747, + "grad_norm": 10.25604248046875, + "learning_rate": 4.051373626373627e-05, + "loss": 0.4095, + "step": 6906 + }, + { + "epoch": 18.975274725274726, + "grad_norm": 6.990005016326904, + "learning_rate": 4.051236263736264e-05, + "loss": 0.1727, + "step": 6907 + }, + { + "epoch": 18.978021978021978, + "grad_norm": 13.316627502441406, + "learning_rate": 4.0510989010989015e-05, + "loss": 0.456, + "step": 6908 + }, + { + "epoch": 18.98076923076923, + "grad_norm": 13.795771598815918, + "learning_rate": 4.0509615384615385e-05, + "loss": 0.4703, + "step": 6909 + }, + { + "epoch": 18.983516483516482, + "grad_norm": 24.61060905456543, + "learning_rate": 4.050824175824176e-05, + "loss": 0.8981, + "step": 6910 + }, + { + "epoch": 18.986263736263737, + "grad_norm": 16.105504989624023, + "learning_rate": 4.050686813186814e-05, + "loss": 0.534, + "step": 6911 + }, + { + "epoch": 18.98901098901099, + "grad_norm": 6.91457462310791, + "learning_rate": 4.050549450549451e-05, + "loss": 0.1358, + "step": 6912 + }, + { + "epoch": 18.99175824175824, + "grad_norm": 8.87353801727295, + "learning_rate": 4.0504120879120885e-05, + "loss": 0.2052, + "step": 6913 + }, + { + "epoch": 18.994505494505496, + "grad_norm": 6.920859336853027, + "learning_rate": 4.0502747252747255e-05, + "loss": 0.1332, + "step": 6914 + }, + { + "epoch": 18.997252747252748, + "grad_norm": 14.256680488586426, + "learning_rate": 4.050137362637363e-05, + "loss": 0.728, + "step": 6915 + }, + { + "epoch": 19.0, + "grad_norm": 53.31938171386719, + "learning_rate": 4.05e-05, + "loss": 1.1095, + "step": 6916 + }, + { + "epoch": 19.0, + "eval_accuracy": 0.7548209366391184, + "eval_f1": 0.7375029307481191, + "eval_f1_DuraRiadoRio_64x64": 0.5025125628140703, + "eval_f1_Mole_64x64": 0.8150943396226416, + "eval_f1_Quebrado_64x64": 0.890282131661442, + "eval_f1_RiadoRio_64x64": 0.6934984520123839, + "eval_f1_RioFechado_64x64": 0.7861271676300579, + "eval_loss": 1.2330628633499146, + "eval_precision": 0.7869437781782278, + "eval_precision_DuraRiadoRio_64x64": 0.9090909090909091, + "eval_precision_Mole_64x64": 0.8925619834710744, + "eval_precision_Quebrado_64x64": 0.8114285714285714, + "eval_precision_RiadoRio_64x64": 0.6549707602339181, + "eval_precision_RioFechado_64x64": 0.6666666666666666, + "eval_recall": 0.7555843834939462, + "eval_recall_DuraRiadoRio_64x64": 0.3472222222222222, + "eval_recall_Mole_64x64": 0.75, + "eval_recall_Quebrado_64x64": 0.9861111111111112, + "eval_recall_RiadoRio_64x64": 0.7368421052631579, + "eval_recall_RioFechado_64x64": 0.9577464788732394, + "eval_runtime": 1.8429, + "eval_samples_per_second": 393.936, + "eval_steps_per_second": 24.96, + "step": 6916 + }, + { + "epoch": 19.002747252747252, + "grad_norm": 4.401243209838867, + "learning_rate": 4.049862637362637e-05, + "loss": 0.0946, + "step": 6917 + }, + { + "epoch": 19.005494505494507, + "grad_norm": 14.851733207702637, + "learning_rate": 4.049725274725275e-05, + "loss": 0.451, + "step": 6918 + }, + { + "epoch": 19.00824175824176, + "grad_norm": 4.221693515777588, + "learning_rate": 4.049587912087912e-05, + "loss": 0.1221, + "step": 6919 + }, + { + "epoch": 19.01098901098901, + "grad_norm": 12.841436386108398, + "learning_rate": 4.0494505494505496e-05, + "loss": 0.5946, + "step": 6920 + }, + { + "epoch": 19.013736263736263, + "grad_norm": 12.415454864501953, + "learning_rate": 4.049313186813187e-05, + "loss": 0.2787, + "step": 6921 + }, + { + "epoch": 19.016483516483518, + "grad_norm": 14.27585506439209, + "learning_rate": 4.049175824175824e-05, + "loss": 0.3051, + "step": 6922 + }, + { + "epoch": 19.01923076923077, + "grad_norm": 12.402995109558105, + "learning_rate": 4.049038461538462e-05, + "loss": 0.2055, + "step": 6923 + }, + { + "epoch": 19.021978021978022, + "grad_norm": 12.354409217834473, + "learning_rate": 4.048901098901099e-05, + "loss": 0.4621, + "step": 6924 + }, + { + "epoch": 19.024725274725274, + "grad_norm": 9.39338493347168, + "learning_rate": 4.0487637362637366e-05, + "loss": 0.2239, + "step": 6925 + }, + { + "epoch": 19.02747252747253, + "grad_norm": 11.237970352172852, + "learning_rate": 4.048626373626374e-05, + "loss": 0.2535, + "step": 6926 + }, + { + "epoch": 19.03021978021978, + "grad_norm": 17.55658531188965, + "learning_rate": 4.048489010989011e-05, + "loss": 0.6975, + "step": 6927 + }, + { + "epoch": 19.032967032967033, + "grad_norm": 10.791068077087402, + "learning_rate": 4.048351648351649e-05, + "loss": 0.4359, + "step": 6928 + }, + { + "epoch": 19.035714285714285, + "grad_norm": 21.975801467895508, + "learning_rate": 4.048214285714286e-05, + "loss": 0.9269, + "step": 6929 + }, + { + "epoch": 19.03846153846154, + "grad_norm": 16.660795211791992, + "learning_rate": 4.0480769230769236e-05, + "loss": 0.5052, + "step": 6930 + }, + { + "epoch": 19.041208791208792, + "grad_norm": 9.128256797790527, + "learning_rate": 4.0479395604395606e-05, + "loss": 0.3695, + "step": 6931 + }, + { + "epoch": 19.043956043956044, + "grad_norm": 11.897278785705566, + "learning_rate": 4.0478021978021976e-05, + "loss": 0.283, + "step": 6932 + }, + { + "epoch": 19.046703296703296, + "grad_norm": 14.861307144165039, + "learning_rate": 4.047664835164835e-05, + "loss": 0.6537, + "step": 6933 + }, + { + "epoch": 19.04945054945055, + "grad_norm": 21.180635452270508, + "learning_rate": 4.047527472527472e-05, + "loss": 0.5371, + "step": 6934 + }, + { + "epoch": 19.052197802197803, + "grad_norm": 12.172987937927246, + "learning_rate": 4.04739010989011e-05, + "loss": 0.32, + "step": 6935 + }, + { + "epoch": 19.054945054945055, + "grad_norm": 18.249210357666016, + "learning_rate": 4.047252747252748e-05, + "loss": 0.7204, + "step": 6936 + }, + { + "epoch": 19.057692307692307, + "grad_norm": 8.585695266723633, + "learning_rate": 4.047115384615385e-05, + "loss": 0.2706, + "step": 6937 + }, + { + "epoch": 19.060439560439562, + "grad_norm": 11.528693199157715, + "learning_rate": 4.0469780219780224e-05, + "loss": 0.2956, + "step": 6938 + }, + { + "epoch": 19.063186813186814, + "grad_norm": 15.207686424255371, + "learning_rate": 4.0468406593406594e-05, + "loss": 0.4982, + "step": 6939 + }, + { + "epoch": 19.065934065934066, + "grad_norm": 19.716033935546875, + "learning_rate": 4.046703296703297e-05, + "loss": 0.632, + "step": 6940 + }, + { + "epoch": 19.068681318681318, + "grad_norm": 15.427635192871094, + "learning_rate": 4.046565934065934e-05, + "loss": 0.6132, + "step": 6941 + }, + { + "epoch": 19.071428571428573, + "grad_norm": 18.591562271118164, + "learning_rate": 4.046428571428572e-05, + "loss": 0.4846, + "step": 6942 + }, + { + "epoch": 19.074175824175825, + "grad_norm": 9.880825996398926, + "learning_rate": 4.0462912087912094e-05, + "loss": 0.3087, + "step": 6943 + }, + { + "epoch": 19.076923076923077, + "grad_norm": 9.428200721740723, + "learning_rate": 4.0461538461538464e-05, + "loss": 0.2192, + "step": 6944 + }, + { + "epoch": 19.07967032967033, + "grad_norm": 14.769866943359375, + "learning_rate": 4.046016483516484e-05, + "loss": 0.4229, + "step": 6945 + }, + { + "epoch": 19.082417582417584, + "grad_norm": 19.539554595947266, + "learning_rate": 4.045879120879121e-05, + "loss": 0.6547, + "step": 6946 + }, + { + "epoch": 19.085164835164836, + "grad_norm": 13.842180252075195, + "learning_rate": 4.045741758241758e-05, + "loss": 0.3844, + "step": 6947 + }, + { + "epoch": 19.087912087912088, + "grad_norm": 13.651077270507812, + "learning_rate": 4.045604395604396e-05, + "loss": 0.3608, + "step": 6948 + }, + { + "epoch": 19.09065934065934, + "grad_norm": 22.960901260375977, + "learning_rate": 4.045467032967033e-05, + "loss": 0.8397, + "step": 6949 + }, + { + "epoch": 19.093406593406595, + "grad_norm": 11.716349601745605, + "learning_rate": 4.0453296703296704e-05, + "loss": 0.397, + "step": 6950 + }, + { + "epoch": 19.096153846153847, + "grad_norm": 11.497152328491211, + "learning_rate": 4.0451923076923075e-05, + "loss": 0.3148, + "step": 6951 + }, + { + "epoch": 19.0989010989011, + "grad_norm": 17.202913284301758, + "learning_rate": 4.045054945054945e-05, + "loss": 0.4142, + "step": 6952 + }, + { + "epoch": 19.10164835164835, + "grad_norm": 9.145445823669434, + "learning_rate": 4.044917582417583e-05, + "loss": 0.2268, + "step": 6953 + }, + { + "epoch": 19.104395604395606, + "grad_norm": 9.932782173156738, + "learning_rate": 4.04478021978022e-05, + "loss": 0.2091, + "step": 6954 + }, + { + "epoch": 19.107142857142858, + "grad_norm": 10.973907470703125, + "learning_rate": 4.0446428571428575e-05, + "loss": 0.2675, + "step": 6955 + }, + { + "epoch": 19.10989010989011, + "grad_norm": 11.445404052734375, + "learning_rate": 4.0445054945054945e-05, + "loss": 0.317, + "step": 6956 + }, + { + "epoch": 19.11263736263736, + "grad_norm": 11.58174991607666, + "learning_rate": 4.044368131868132e-05, + "loss": 0.2867, + "step": 6957 + }, + { + "epoch": 19.115384615384617, + "grad_norm": 7.21368932723999, + "learning_rate": 4.04423076923077e-05, + "loss": 0.2206, + "step": 6958 + }, + { + "epoch": 19.11813186813187, + "grad_norm": 17.448951721191406, + "learning_rate": 4.044093406593407e-05, + "loss": 0.653, + "step": 6959 + }, + { + "epoch": 19.12087912087912, + "grad_norm": 10.339183807373047, + "learning_rate": 4.0439560439560445e-05, + "loss": 0.2821, + "step": 6960 + }, + { + "epoch": 19.123626373626372, + "grad_norm": 16.3574275970459, + "learning_rate": 4.0438186813186815e-05, + "loss": 0.6651, + "step": 6961 + }, + { + "epoch": 19.126373626373628, + "grad_norm": 9.184050559997559, + "learning_rate": 4.0436813186813185e-05, + "loss": 0.2438, + "step": 6962 + }, + { + "epoch": 19.12912087912088, + "grad_norm": 6.703480243682861, + "learning_rate": 4.043543956043956e-05, + "loss": 0.1809, + "step": 6963 + }, + { + "epoch": 19.13186813186813, + "grad_norm": 10.261198043823242, + "learning_rate": 4.043406593406593e-05, + "loss": 0.1836, + "step": 6964 + }, + { + "epoch": 19.134615384615383, + "grad_norm": 15.591870307922363, + "learning_rate": 4.043269230769231e-05, + "loss": 0.3983, + "step": 6965 + }, + { + "epoch": 19.13736263736264, + "grad_norm": 12.466153144836426, + "learning_rate": 4.043131868131868e-05, + "loss": 0.2669, + "step": 6966 + }, + { + "epoch": 19.14010989010989, + "grad_norm": 10.874460220336914, + "learning_rate": 4.0429945054945056e-05, + "loss": 0.383, + "step": 6967 + }, + { + "epoch": 19.142857142857142, + "grad_norm": 25.864686965942383, + "learning_rate": 4.042857142857143e-05, + "loss": 0.9775, + "step": 6968 + }, + { + "epoch": 19.145604395604394, + "grad_norm": 13.151211738586426, + "learning_rate": 4.04271978021978e-05, + "loss": 0.311, + "step": 6969 + }, + { + "epoch": 19.14835164835165, + "grad_norm": 6.667947292327881, + "learning_rate": 4.042582417582418e-05, + "loss": 0.172, + "step": 6970 + }, + { + "epoch": 19.1510989010989, + "grad_norm": 16.86350440979004, + "learning_rate": 4.042445054945055e-05, + "loss": 0.4519, + "step": 6971 + }, + { + "epoch": 19.153846153846153, + "grad_norm": 19.70250701904297, + "learning_rate": 4.0423076923076926e-05, + "loss": 0.6875, + "step": 6972 + }, + { + "epoch": 19.156593406593405, + "grad_norm": 10.352231979370117, + "learning_rate": 4.04217032967033e-05, + "loss": 0.2573, + "step": 6973 + }, + { + "epoch": 19.15934065934066, + "grad_norm": 15.870438575744629, + "learning_rate": 4.042032967032967e-05, + "loss": 0.3685, + "step": 6974 + }, + { + "epoch": 19.162087912087912, + "grad_norm": 13.52342414855957, + "learning_rate": 4.041895604395605e-05, + "loss": 0.4409, + "step": 6975 + }, + { + "epoch": 19.164835164835164, + "grad_norm": 13.996910095214844, + "learning_rate": 4.041758241758242e-05, + "loss": 0.4744, + "step": 6976 + }, + { + "epoch": 19.167582417582416, + "grad_norm": 11.397170066833496, + "learning_rate": 4.041620879120879e-05, + "loss": 0.2436, + "step": 6977 + }, + { + "epoch": 19.17032967032967, + "grad_norm": 20.487916946411133, + "learning_rate": 4.041483516483517e-05, + "loss": 0.5316, + "step": 6978 + }, + { + "epoch": 19.173076923076923, + "grad_norm": 5.228979587554932, + "learning_rate": 4.041346153846154e-05, + "loss": 0.1194, + "step": 6979 + }, + { + "epoch": 19.175824175824175, + "grad_norm": 12.33879566192627, + "learning_rate": 4.0412087912087913e-05, + "loss": 0.3693, + "step": 6980 + }, + { + "epoch": 19.178571428571427, + "grad_norm": 20.83277702331543, + "learning_rate": 4.0410714285714283e-05, + "loss": 0.8785, + "step": 6981 + }, + { + "epoch": 19.181318681318682, + "grad_norm": 14.804715156555176, + "learning_rate": 4.040934065934066e-05, + "loss": 0.4587, + "step": 6982 + }, + { + "epoch": 19.184065934065934, + "grad_norm": 18.45142364501953, + "learning_rate": 4.040796703296704e-05, + "loss": 1.0449, + "step": 6983 + }, + { + "epoch": 19.186813186813186, + "grad_norm": 7.143518924713135, + "learning_rate": 4.040659340659341e-05, + "loss": 0.2267, + "step": 6984 + }, + { + "epoch": 19.189560439560438, + "grad_norm": 14.721936225891113, + "learning_rate": 4.0405219780219784e-05, + "loss": 0.4404, + "step": 6985 + }, + { + "epoch": 19.192307692307693, + "grad_norm": 16.982698440551758, + "learning_rate": 4.0403846153846154e-05, + "loss": 0.6108, + "step": 6986 + }, + { + "epoch": 19.195054945054945, + "grad_norm": 14.648795127868652, + "learning_rate": 4.040247252747253e-05, + "loss": 0.5302, + "step": 6987 + }, + { + "epoch": 19.197802197802197, + "grad_norm": 7.926825523376465, + "learning_rate": 4.040109890109891e-05, + "loss": 0.2359, + "step": 6988 + }, + { + "epoch": 19.20054945054945, + "grad_norm": 14.754846572875977, + "learning_rate": 4.039972527472528e-05, + "loss": 0.4953, + "step": 6989 + }, + { + "epoch": 19.203296703296704, + "grad_norm": 12.109328269958496, + "learning_rate": 4.0398351648351654e-05, + "loss": 0.4058, + "step": 6990 + }, + { + "epoch": 19.206043956043956, + "grad_norm": 19.274354934692383, + "learning_rate": 4.0396978021978024e-05, + "loss": 0.5009, + "step": 6991 + }, + { + "epoch": 19.208791208791208, + "grad_norm": 13.619345664978027, + "learning_rate": 4.0395604395604394e-05, + "loss": 0.5514, + "step": 6992 + }, + { + "epoch": 19.21153846153846, + "grad_norm": 10.470011711120605, + "learning_rate": 4.039423076923077e-05, + "loss": 0.33, + "step": 6993 + }, + { + "epoch": 19.214285714285715, + "grad_norm": 12.866056442260742, + "learning_rate": 4.039285714285714e-05, + "loss": 0.4046, + "step": 6994 + }, + { + "epoch": 19.217032967032967, + "grad_norm": 10.328460693359375, + "learning_rate": 4.039148351648352e-05, + "loss": 0.3492, + "step": 6995 + }, + { + "epoch": 19.21978021978022, + "grad_norm": 11.124384880065918, + "learning_rate": 4.039010989010989e-05, + "loss": 0.4598, + "step": 6996 + }, + { + "epoch": 19.22252747252747, + "grad_norm": 12.630188941955566, + "learning_rate": 4.0388736263736265e-05, + "loss": 0.4539, + "step": 6997 + }, + { + "epoch": 19.225274725274726, + "grad_norm": 12.665327072143555, + "learning_rate": 4.038736263736264e-05, + "loss": 0.4143, + "step": 6998 + }, + { + "epoch": 19.228021978021978, + "grad_norm": 7.5446457862854, + "learning_rate": 4.038598901098901e-05, + "loss": 0.2134, + "step": 6999 + }, + { + "epoch": 19.23076923076923, + "grad_norm": 6.238699913024902, + "learning_rate": 4.038461538461539e-05, + "loss": 0.1424, + "step": 7000 + }, + { + "epoch": 19.233516483516482, + "grad_norm": 9.929783821105957, + "learning_rate": 4.038324175824176e-05, + "loss": 0.3645, + "step": 7001 + }, + { + "epoch": 19.236263736263737, + "grad_norm": 9.755167007446289, + "learning_rate": 4.0381868131868135e-05, + "loss": 0.1934, + "step": 7002 + }, + { + "epoch": 19.23901098901099, + "grad_norm": 18.12070655822754, + "learning_rate": 4.038049450549451e-05, + "loss": 0.6359, + "step": 7003 + }, + { + "epoch": 19.24175824175824, + "grad_norm": 17.546287536621094, + "learning_rate": 4.037912087912088e-05, + "loss": 0.6564, + "step": 7004 + }, + { + "epoch": 19.244505494505493, + "grad_norm": 14.745613098144531, + "learning_rate": 4.037774725274726e-05, + "loss": 0.4208, + "step": 7005 + }, + { + "epoch": 19.247252747252748, + "grad_norm": 11.608036041259766, + "learning_rate": 4.037637362637363e-05, + "loss": 0.3103, + "step": 7006 + }, + { + "epoch": 19.25, + "grad_norm": 18.02235984802246, + "learning_rate": 4.0375e-05, + "loss": 0.7516, + "step": 7007 + }, + { + "epoch": 19.252747252747252, + "grad_norm": 13.114799499511719, + "learning_rate": 4.0373626373626376e-05, + "loss": 0.4414, + "step": 7008 + }, + { + "epoch": 19.255494505494504, + "grad_norm": 6.115532875061035, + "learning_rate": 4.0372252747252746e-05, + "loss": 0.1243, + "step": 7009 + }, + { + "epoch": 19.25824175824176, + "grad_norm": 16.526147842407227, + "learning_rate": 4.037087912087912e-05, + "loss": 0.5517, + "step": 7010 + }, + { + "epoch": 19.26098901098901, + "grad_norm": 8.272756576538086, + "learning_rate": 4.036950549450549e-05, + "loss": 0.2505, + "step": 7011 + }, + { + "epoch": 19.263736263736263, + "grad_norm": 10.940335273742676, + "learning_rate": 4.036813186813187e-05, + "loss": 0.3234, + "step": 7012 + }, + { + "epoch": 19.266483516483518, + "grad_norm": 19.762981414794922, + "learning_rate": 4.0366758241758246e-05, + "loss": 0.7813, + "step": 7013 + }, + { + "epoch": 19.26923076923077, + "grad_norm": 18.97223472595215, + "learning_rate": 4.0365384615384616e-05, + "loss": 0.8532, + "step": 7014 + }, + { + "epoch": 19.271978021978022, + "grad_norm": 14.97293758392334, + "learning_rate": 4.036401098901099e-05, + "loss": 0.3822, + "step": 7015 + }, + { + "epoch": 19.274725274725274, + "grad_norm": 14.57535171508789, + "learning_rate": 4.036263736263736e-05, + "loss": 0.591, + "step": 7016 + }, + { + "epoch": 19.27747252747253, + "grad_norm": 10.156332969665527, + "learning_rate": 4.036126373626374e-05, + "loss": 0.2315, + "step": 7017 + }, + { + "epoch": 19.28021978021978, + "grad_norm": 28.146526336669922, + "learning_rate": 4.0359890109890116e-05, + "loss": 0.5691, + "step": 7018 + }, + { + "epoch": 19.282967032967033, + "grad_norm": 19.455272674560547, + "learning_rate": 4.0358516483516486e-05, + "loss": 0.6733, + "step": 7019 + }, + { + "epoch": 19.285714285714285, + "grad_norm": 13.314696311950684, + "learning_rate": 4.035714285714286e-05, + "loss": 0.3785, + "step": 7020 + }, + { + "epoch": 19.28846153846154, + "grad_norm": 16.426774978637695, + "learning_rate": 4.035576923076923e-05, + "loss": 0.5898, + "step": 7021 + }, + { + "epoch": 19.291208791208792, + "grad_norm": 14.43246078491211, + "learning_rate": 4.03543956043956e-05, + "loss": 0.3929, + "step": 7022 + }, + { + "epoch": 19.293956043956044, + "grad_norm": 6.577111721038818, + "learning_rate": 4.035302197802198e-05, + "loss": 0.1444, + "step": 7023 + }, + { + "epoch": 19.296703296703296, + "grad_norm": 9.665300369262695, + "learning_rate": 4.035164835164835e-05, + "loss": 0.3308, + "step": 7024 + }, + { + "epoch": 19.29945054945055, + "grad_norm": 13.844747543334961, + "learning_rate": 4.035027472527473e-05, + "loss": 0.3331, + "step": 7025 + }, + { + "epoch": 19.302197802197803, + "grad_norm": 14.385124206542969, + "learning_rate": 4.03489010989011e-05, + "loss": 0.4718, + "step": 7026 + }, + { + "epoch": 19.304945054945055, + "grad_norm": 4.124425888061523, + "learning_rate": 4.0347527472527474e-05, + "loss": 0.127, + "step": 7027 + }, + { + "epoch": 19.307692307692307, + "grad_norm": 13.135889053344727, + "learning_rate": 4.034615384615385e-05, + "loss": 0.4014, + "step": 7028 + }, + { + "epoch": 19.310439560439562, + "grad_norm": 10.086965560913086, + "learning_rate": 4.034478021978022e-05, + "loss": 0.2986, + "step": 7029 + }, + { + "epoch": 19.313186813186814, + "grad_norm": 13.473281860351562, + "learning_rate": 4.03434065934066e-05, + "loss": 0.6365, + "step": 7030 + }, + { + "epoch": 19.315934065934066, + "grad_norm": 7.998299598693848, + "learning_rate": 4.034203296703297e-05, + "loss": 0.2098, + "step": 7031 + }, + { + "epoch": 19.318681318681318, + "grad_norm": 18.468734741210938, + "learning_rate": 4.0340659340659344e-05, + "loss": 0.8839, + "step": 7032 + }, + { + "epoch": 19.321428571428573, + "grad_norm": 6.4204630851745605, + "learning_rate": 4.033928571428572e-05, + "loss": 0.1458, + "step": 7033 + }, + { + "epoch": 19.324175824175825, + "grad_norm": 16.67462921142578, + "learning_rate": 4.033791208791209e-05, + "loss": 0.57, + "step": 7034 + }, + { + "epoch": 19.326923076923077, + "grad_norm": 10.293401718139648, + "learning_rate": 4.033653846153847e-05, + "loss": 0.2672, + "step": 7035 + }, + { + "epoch": 19.32967032967033, + "grad_norm": 12.436668395996094, + "learning_rate": 4.033516483516484e-05, + "loss": 0.58, + "step": 7036 + }, + { + "epoch": 19.332417582417584, + "grad_norm": 5.036440849304199, + "learning_rate": 4.033379120879121e-05, + "loss": 0.0818, + "step": 7037 + }, + { + "epoch": 19.335164835164836, + "grad_norm": 14.4231538772583, + "learning_rate": 4.0332417582417585e-05, + "loss": 0.4437, + "step": 7038 + }, + { + "epoch": 19.337912087912088, + "grad_norm": 11.11279582977295, + "learning_rate": 4.0331043956043955e-05, + "loss": 0.2851, + "step": 7039 + }, + { + "epoch": 19.34065934065934, + "grad_norm": 16.753253936767578, + "learning_rate": 4.032967032967033e-05, + "loss": 0.3942, + "step": 7040 + }, + { + "epoch": 19.343406593406595, + "grad_norm": 11.341469764709473, + "learning_rate": 4.03282967032967e-05, + "loss": 0.4225, + "step": 7041 + }, + { + "epoch": 19.346153846153847, + "grad_norm": 13.847043991088867, + "learning_rate": 4.032692307692308e-05, + "loss": 0.6252, + "step": 7042 + }, + { + "epoch": 19.3489010989011, + "grad_norm": 12.572757720947266, + "learning_rate": 4.0325549450549455e-05, + "loss": 0.4799, + "step": 7043 + }, + { + "epoch": 19.35164835164835, + "grad_norm": 12.271183013916016, + "learning_rate": 4.0324175824175825e-05, + "loss": 0.2899, + "step": 7044 + }, + { + "epoch": 19.354395604395606, + "grad_norm": 23.999267578125, + "learning_rate": 4.03228021978022e-05, + "loss": 1.049, + "step": 7045 + }, + { + "epoch": 19.357142857142858, + "grad_norm": 15.94389820098877, + "learning_rate": 4.032142857142857e-05, + "loss": 0.6815, + "step": 7046 + }, + { + "epoch": 19.35989010989011, + "grad_norm": 16.91859245300293, + "learning_rate": 4.032005494505495e-05, + "loss": 0.7741, + "step": 7047 + }, + { + "epoch": 19.36263736263736, + "grad_norm": 8.269837379455566, + "learning_rate": 4.0318681318681325e-05, + "loss": 0.1836, + "step": 7048 + }, + { + "epoch": 19.365384615384617, + "grad_norm": 11.211411476135254, + "learning_rate": 4.0317307692307695e-05, + "loss": 0.3624, + "step": 7049 + }, + { + "epoch": 19.36813186813187, + "grad_norm": 16.620683670043945, + "learning_rate": 4.031593406593407e-05, + "loss": 0.4858, + "step": 7050 + }, + { + "epoch": 19.37087912087912, + "grad_norm": 8.426816940307617, + "learning_rate": 4.031456043956044e-05, + "loss": 0.2055, + "step": 7051 + }, + { + "epoch": 19.373626373626372, + "grad_norm": 28.53621482849121, + "learning_rate": 4.031318681318681e-05, + "loss": 1.0472, + "step": 7052 + }, + { + "epoch": 19.376373626373628, + "grad_norm": 11.635716438293457, + "learning_rate": 4.031181318681319e-05, + "loss": 0.2234, + "step": 7053 + }, + { + "epoch": 19.37912087912088, + "grad_norm": 4.634669303894043, + "learning_rate": 4.031043956043956e-05, + "loss": 0.1076, + "step": 7054 + }, + { + "epoch": 19.38186813186813, + "grad_norm": 6.55201530456543, + "learning_rate": 4.0309065934065936e-05, + "loss": 0.1395, + "step": 7055 + }, + { + "epoch": 19.384615384615383, + "grad_norm": 17.73562240600586, + "learning_rate": 4.0307692307692306e-05, + "loss": 0.7859, + "step": 7056 + }, + { + "epoch": 19.38736263736264, + "grad_norm": 16.48375701904297, + "learning_rate": 4.030631868131868e-05, + "loss": 0.6164, + "step": 7057 + }, + { + "epoch": 19.39010989010989, + "grad_norm": 8.368973731994629, + "learning_rate": 4.030494505494506e-05, + "loss": 0.2365, + "step": 7058 + }, + { + "epoch": 19.392857142857142, + "grad_norm": 16.271709442138672, + "learning_rate": 4.030357142857143e-05, + "loss": 0.613, + "step": 7059 + }, + { + "epoch": 19.395604395604394, + "grad_norm": 8.569600105285645, + "learning_rate": 4.0302197802197806e-05, + "loss": 0.2258, + "step": 7060 + }, + { + "epoch": 19.39835164835165, + "grad_norm": 4.611356258392334, + "learning_rate": 4.0300824175824176e-05, + "loss": 0.0886, + "step": 7061 + }, + { + "epoch": 19.4010989010989, + "grad_norm": 14.505084037780762, + "learning_rate": 4.029945054945055e-05, + "loss": 0.4726, + "step": 7062 + }, + { + "epoch": 19.403846153846153, + "grad_norm": 17.923709869384766, + "learning_rate": 4.029807692307693e-05, + "loss": 0.5436, + "step": 7063 + }, + { + "epoch": 19.406593406593405, + "grad_norm": 15.848883628845215, + "learning_rate": 4.02967032967033e-05, + "loss": 0.3767, + "step": 7064 + }, + { + "epoch": 19.40934065934066, + "grad_norm": 11.271806716918945, + "learning_rate": 4.029532967032968e-05, + "loss": 0.3639, + "step": 7065 + }, + { + "epoch": 19.412087912087912, + "grad_norm": 16.532114028930664, + "learning_rate": 4.029395604395605e-05, + "loss": 0.4998, + "step": 7066 + }, + { + "epoch": 19.414835164835164, + "grad_norm": 10.79595947265625, + "learning_rate": 4.029258241758242e-05, + "loss": 0.2245, + "step": 7067 + }, + { + "epoch": 19.417582417582416, + "grad_norm": 12.057621002197266, + "learning_rate": 4.0291208791208794e-05, + "loss": 0.2964, + "step": 7068 + }, + { + "epoch": 19.42032967032967, + "grad_norm": 18.199424743652344, + "learning_rate": 4.0289835164835164e-05, + "loss": 0.6307, + "step": 7069 + }, + { + "epoch": 19.423076923076923, + "grad_norm": 9.224838256835938, + "learning_rate": 4.028846153846154e-05, + "loss": 0.3358, + "step": 7070 + }, + { + "epoch": 19.425824175824175, + "grad_norm": 14.391082763671875, + "learning_rate": 4.028708791208791e-05, + "loss": 0.5571, + "step": 7071 + }, + { + "epoch": 19.428571428571427, + "grad_norm": 16.980512619018555, + "learning_rate": 4.028571428571429e-05, + "loss": 0.8274, + "step": 7072 + }, + { + "epoch": 19.431318681318682, + "grad_norm": 17.941579818725586, + "learning_rate": 4.0284340659340664e-05, + "loss": 0.6787, + "step": 7073 + }, + { + "epoch": 19.434065934065934, + "grad_norm": 7.903016567230225, + "learning_rate": 4.0282967032967034e-05, + "loss": 0.2137, + "step": 7074 + }, + { + "epoch": 19.436813186813186, + "grad_norm": 21.289175033569336, + "learning_rate": 4.028159340659341e-05, + "loss": 0.6853, + "step": 7075 + }, + { + "epoch": 19.439560439560438, + "grad_norm": 12.98686695098877, + "learning_rate": 4.028021978021978e-05, + "loss": 0.3653, + "step": 7076 + }, + { + "epoch": 19.442307692307693, + "grad_norm": 14.766858100891113, + "learning_rate": 4.027884615384616e-05, + "loss": 0.5598, + "step": 7077 + }, + { + "epoch": 19.445054945054945, + "grad_norm": 9.55290699005127, + "learning_rate": 4.0277472527472534e-05, + "loss": 0.2522, + "step": 7078 + }, + { + "epoch": 19.447802197802197, + "grad_norm": 23.084514617919922, + "learning_rate": 4.0276098901098904e-05, + "loss": 0.7101, + "step": 7079 + }, + { + "epoch": 19.45054945054945, + "grad_norm": 8.966601371765137, + "learning_rate": 4.027472527472528e-05, + "loss": 0.2175, + "step": 7080 + }, + { + "epoch": 19.453296703296704, + "grad_norm": 12.574530601501465, + "learning_rate": 4.027335164835165e-05, + "loss": 0.3092, + "step": 7081 + }, + { + "epoch": 19.456043956043956, + "grad_norm": 13.412056922912598, + "learning_rate": 4.027197802197802e-05, + "loss": 0.3483, + "step": 7082 + }, + { + "epoch": 19.458791208791208, + "grad_norm": 14.993966102600098, + "learning_rate": 4.02706043956044e-05, + "loss": 0.4492, + "step": 7083 + }, + { + "epoch": 19.46153846153846, + "grad_norm": 20.552852630615234, + "learning_rate": 4.026923076923077e-05, + "loss": 0.8866, + "step": 7084 + }, + { + "epoch": 19.464285714285715, + "grad_norm": 9.251616477966309, + "learning_rate": 4.0267857142857145e-05, + "loss": 0.2606, + "step": 7085 + }, + { + "epoch": 19.467032967032967, + "grad_norm": 16.91600799560547, + "learning_rate": 4.0266483516483515e-05, + "loss": 0.9681, + "step": 7086 + }, + { + "epoch": 19.46978021978022, + "grad_norm": 11.797872543334961, + "learning_rate": 4.026510989010989e-05, + "loss": 0.5547, + "step": 7087 + }, + { + "epoch": 19.47252747252747, + "grad_norm": 16.088438034057617, + "learning_rate": 4.026373626373627e-05, + "loss": 0.6587, + "step": 7088 + }, + { + "epoch": 19.475274725274726, + "grad_norm": 5.639907360076904, + "learning_rate": 4.026236263736264e-05, + "loss": 0.1878, + "step": 7089 + }, + { + "epoch": 19.478021978021978, + "grad_norm": 11.626577377319336, + "learning_rate": 4.0260989010989015e-05, + "loss": 0.3287, + "step": 7090 + }, + { + "epoch": 19.48076923076923, + "grad_norm": 9.208274841308594, + "learning_rate": 4.0259615384615385e-05, + "loss": 0.2325, + "step": 7091 + }, + { + "epoch": 19.483516483516482, + "grad_norm": 10.772926330566406, + "learning_rate": 4.025824175824176e-05, + "loss": 0.3156, + "step": 7092 + }, + { + "epoch": 19.486263736263737, + "grad_norm": 5.085850238800049, + "learning_rate": 4.025686813186814e-05, + "loss": 0.106, + "step": 7093 + }, + { + "epoch": 19.48901098901099, + "grad_norm": 14.15855598449707, + "learning_rate": 4.025549450549451e-05, + "loss": 0.4273, + "step": 7094 + }, + { + "epoch": 19.49175824175824, + "grad_norm": 20.30573081970215, + "learning_rate": 4.0254120879120886e-05, + "loss": 0.9308, + "step": 7095 + }, + { + "epoch": 19.494505494505496, + "grad_norm": 13.487578392028809, + "learning_rate": 4.0252747252747256e-05, + "loss": 0.4855, + "step": 7096 + }, + { + "epoch": 19.497252747252748, + "grad_norm": 5.358744144439697, + "learning_rate": 4.0251373626373626e-05, + "loss": 0.1649, + "step": 7097 + }, + { + "epoch": 19.5, + "grad_norm": 16.02411460876465, + "learning_rate": 4.025e-05, + "loss": 0.4737, + "step": 7098 + }, + { + "epoch": 19.502747252747252, + "grad_norm": 20.17888069152832, + "learning_rate": 4.024862637362637e-05, + "loss": 0.5882, + "step": 7099 + }, + { + "epoch": 19.505494505494504, + "grad_norm": 16.84200668334961, + "learning_rate": 4.024725274725275e-05, + "loss": 0.5189, + "step": 7100 + }, + { + "epoch": 19.50824175824176, + "grad_norm": 14.258095741271973, + "learning_rate": 4.024587912087912e-05, + "loss": 0.5566, + "step": 7101 + }, + { + "epoch": 19.51098901098901, + "grad_norm": 17.699344635009766, + "learning_rate": 4.0244505494505496e-05, + "loss": 0.7601, + "step": 7102 + }, + { + "epoch": 19.513736263736263, + "grad_norm": 7.1426167488098145, + "learning_rate": 4.024313186813187e-05, + "loss": 0.213, + "step": 7103 + }, + { + "epoch": 19.516483516483518, + "grad_norm": 16.31320571899414, + "learning_rate": 4.024175824175824e-05, + "loss": 0.5664, + "step": 7104 + }, + { + "epoch": 19.51923076923077, + "grad_norm": 9.973937034606934, + "learning_rate": 4.024038461538462e-05, + "loss": 0.2894, + "step": 7105 + }, + { + "epoch": 19.521978021978022, + "grad_norm": 10.370638847351074, + "learning_rate": 4.023901098901099e-05, + "loss": 0.3277, + "step": 7106 + }, + { + "epoch": 19.524725274725274, + "grad_norm": 11.64006233215332, + "learning_rate": 4.023763736263737e-05, + "loss": 0.3199, + "step": 7107 + }, + { + "epoch": 19.52747252747253, + "grad_norm": 14.045062065124512, + "learning_rate": 4.0236263736263743e-05, + "loss": 0.4803, + "step": 7108 + }, + { + "epoch": 19.53021978021978, + "grad_norm": 14.803528785705566, + "learning_rate": 4.0234890109890113e-05, + "loss": 0.2845, + "step": 7109 + }, + { + "epoch": 19.532967032967033, + "grad_norm": 16.438007354736328, + "learning_rate": 4.0233516483516483e-05, + "loss": 0.4053, + "step": 7110 + }, + { + "epoch": 19.535714285714285, + "grad_norm": 16.4603328704834, + "learning_rate": 4.023214285714286e-05, + "loss": 0.6987, + "step": 7111 + }, + { + "epoch": 19.53846153846154, + "grad_norm": 14.068038940429688, + "learning_rate": 4.023076923076923e-05, + "loss": 0.419, + "step": 7112 + }, + { + "epoch": 19.541208791208792, + "grad_norm": 10.392354011535645, + "learning_rate": 4.022939560439561e-05, + "loss": 0.2844, + "step": 7113 + }, + { + "epoch": 19.543956043956044, + "grad_norm": 13.774822235107422, + "learning_rate": 4.022802197802198e-05, + "loss": 0.5415, + "step": 7114 + }, + { + "epoch": 19.546703296703296, + "grad_norm": 14.365984916687012, + "learning_rate": 4.0226648351648354e-05, + "loss": 0.5005, + "step": 7115 + }, + { + "epoch": 19.54945054945055, + "grad_norm": 14.701580047607422, + "learning_rate": 4.0225274725274724e-05, + "loss": 0.4042, + "step": 7116 + }, + { + "epoch": 19.552197802197803, + "grad_norm": 7.86131477355957, + "learning_rate": 4.02239010989011e-05, + "loss": 0.306, + "step": 7117 + }, + { + "epoch": 19.554945054945055, + "grad_norm": 16.82156753540039, + "learning_rate": 4.022252747252748e-05, + "loss": 0.455, + "step": 7118 + }, + { + "epoch": 19.557692307692307, + "grad_norm": 10.991137504577637, + "learning_rate": 4.022115384615385e-05, + "loss": 0.2598, + "step": 7119 + }, + { + "epoch": 19.560439560439562, + "grad_norm": 12.754013061523438, + "learning_rate": 4.0219780219780224e-05, + "loss": 0.3789, + "step": 7120 + }, + { + "epoch": 19.563186813186814, + "grad_norm": 13.123597145080566, + "learning_rate": 4.0218406593406594e-05, + "loss": 0.654, + "step": 7121 + }, + { + "epoch": 19.565934065934066, + "grad_norm": 11.686490058898926, + "learning_rate": 4.021703296703297e-05, + "loss": 0.4318, + "step": 7122 + }, + { + "epoch": 19.568681318681318, + "grad_norm": 16.790254592895508, + "learning_rate": 4.021565934065935e-05, + "loss": 0.4828, + "step": 7123 + }, + { + "epoch": 19.571428571428573, + "grad_norm": 12.493009567260742, + "learning_rate": 4.021428571428572e-05, + "loss": 0.3825, + "step": 7124 + }, + { + "epoch": 19.574175824175825, + "grad_norm": 16.285404205322266, + "learning_rate": 4.021291208791209e-05, + "loss": 0.4015, + "step": 7125 + }, + { + "epoch": 19.576923076923077, + "grad_norm": 8.804610252380371, + "learning_rate": 4.0211538461538465e-05, + "loss": 0.2481, + "step": 7126 + }, + { + "epoch": 19.57967032967033, + "grad_norm": 14.437273979187012, + "learning_rate": 4.0210164835164835e-05, + "loss": 0.6001, + "step": 7127 + }, + { + "epoch": 19.582417582417584, + "grad_norm": 10.227684020996094, + "learning_rate": 4.020879120879121e-05, + "loss": 0.2811, + "step": 7128 + }, + { + "epoch": 19.585164835164836, + "grad_norm": 13.706265449523926, + "learning_rate": 4.020741758241758e-05, + "loss": 0.2186, + "step": 7129 + }, + { + "epoch": 19.587912087912088, + "grad_norm": 14.450949668884277, + "learning_rate": 4.020604395604396e-05, + "loss": 0.3682, + "step": 7130 + }, + { + "epoch": 19.59065934065934, + "grad_norm": 9.43893814086914, + "learning_rate": 4.020467032967033e-05, + "loss": 0.2752, + "step": 7131 + }, + { + "epoch": 19.593406593406595, + "grad_norm": 21.20318031311035, + "learning_rate": 4.0203296703296705e-05, + "loss": 0.6582, + "step": 7132 + }, + { + "epoch": 19.596153846153847, + "grad_norm": 9.536355972290039, + "learning_rate": 4.020192307692308e-05, + "loss": 0.2639, + "step": 7133 + }, + { + "epoch": 19.5989010989011, + "grad_norm": 13.847299575805664, + "learning_rate": 4.020054945054945e-05, + "loss": 0.5665, + "step": 7134 + }, + { + "epoch": 19.60164835164835, + "grad_norm": 16.542245864868164, + "learning_rate": 4.019917582417583e-05, + "loss": 0.7588, + "step": 7135 + }, + { + "epoch": 19.604395604395606, + "grad_norm": 14.669787406921387, + "learning_rate": 4.01978021978022e-05, + "loss": 0.4096, + "step": 7136 + }, + { + "epoch": 19.607142857142858, + "grad_norm": 8.794051170349121, + "learning_rate": 4.0196428571428576e-05, + "loss": 0.1792, + "step": 7137 + }, + { + "epoch": 19.60989010989011, + "grad_norm": 13.009622573852539, + "learning_rate": 4.019505494505495e-05, + "loss": 0.4554, + "step": 7138 + }, + { + "epoch": 19.61263736263736, + "grad_norm": 5.679090976715088, + "learning_rate": 4.019368131868132e-05, + "loss": 0.1685, + "step": 7139 + }, + { + "epoch": 19.615384615384617, + "grad_norm": 17.246116638183594, + "learning_rate": 4.019230769230769e-05, + "loss": 0.6237, + "step": 7140 + }, + { + "epoch": 19.61813186813187, + "grad_norm": 10.798749923706055, + "learning_rate": 4.019093406593407e-05, + "loss": 0.3137, + "step": 7141 + }, + { + "epoch": 19.62087912087912, + "grad_norm": 13.272788047790527, + "learning_rate": 4.018956043956044e-05, + "loss": 0.4187, + "step": 7142 + }, + { + "epoch": 19.623626373626372, + "grad_norm": 8.520744323730469, + "learning_rate": 4.0188186813186816e-05, + "loss": 0.3189, + "step": 7143 + }, + { + "epoch": 19.626373626373628, + "grad_norm": 20.9497127532959, + "learning_rate": 4.0186813186813186e-05, + "loss": 0.8918, + "step": 7144 + }, + { + "epoch": 19.62912087912088, + "grad_norm": 14.698195457458496, + "learning_rate": 4.018543956043956e-05, + "loss": 0.5316, + "step": 7145 + }, + { + "epoch": 19.63186813186813, + "grad_norm": 13.706982612609863, + "learning_rate": 4.018406593406593e-05, + "loss": 0.3356, + "step": 7146 + }, + { + "epoch": 19.634615384615383, + "grad_norm": 16.598106384277344, + "learning_rate": 4.018269230769231e-05, + "loss": 0.4654, + "step": 7147 + }, + { + "epoch": 19.63736263736264, + "grad_norm": 20.143373489379883, + "learning_rate": 4.0181318681318686e-05, + "loss": 0.5274, + "step": 7148 + }, + { + "epoch": 19.64010989010989, + "grad_norm": 12.268908500671387, + "learning_rate": 4.0179945054945056e-05, + "loss": 0.3067, + "step": 7149 + }, + { + "epoch": 19.642857142857142, + "grad_norm": 17.760677337646484, + "learning_rate": 4.017857142857143e-05, + "loss": 0.6895, + "step": 7150 + }, + { + "epoch": 19.645604395604394, + "grad_norm": 9.57988452911377, + "learning_rate": 4.01771978021978e-05, + "loss": 0.379, + "step": 7151 + }, + { + "epoch": 19.64835164835165, + "grad_norm": 11.874537467956543, + "learning_rate": 4.017582417582418e-05, + "loss": 0.3167, + "step": 7152 + }, + { + "epoch": 19.6510989010989, + "grad_norm": 13.924474716186523, + "learning_rate": 4.017445054945056e-05, + "loss": 0.3826, + "step": 7153 + }, + { + "epoch": 19.653846153846153, + "grad_norm": 14.908965110778809, + "learning_rate": 4.017307692307693e-05, + "loss": 0.5019, + "step": 7154 + }, + { + "epoch": 19.656593406593405, + "grad_norm": 9.445047378540039, + "learning_rate": 4.01717032967033e-05, + "loss": 0.2143, + "step": 7155 + }, + { + "epoch": 19.65934065934066, + "grad_norm": 11.652932167053223, + "learning_rate": 4.0170329670329674e-05, + "loss": 0.2359, + "step": 7156 + }, + { + "epoch": 19.662087912087912, + "grad_norm": 18.168581008911133, + "learning_rate": 4.0168956043956044e-05, + "loss": 0.6568, + "step": 7157 + }, + { + "epoch": 19.664835164835164, + "grad_norm": 9.129504203796387, + "learning_rate": 4.016758241758242e-05, + "loss": 0.3002, + "step": 7158 + }, + { + "epoch": 19.667582417582416, + "grad_norm": 11.798786163330078, + "learning_rate": 4.016620879120879e-05, + "loss": 0.4998, + "step": 7159 + }, + { + "epoch": 19.67032967032967, + "grad_norm": 10.466083526611328, + "learning_rate": 4.016483516483517e-05, + "loss": 0.4554, + "step": 7160 + }, + { + "epoch": 19.673076923076923, + "grad_norm": 7.860388278961182, + "learning_rate": 4.016346153846154e-05, + "loss": 0.284, + "step": 7161 + }, + { + "epoch": 19.675824175824175, + "grad_norm": 8.039405822753906, + "learning_rate": 4.0162087912087914e-05, + "loss": 0.1328, + "step": 7162 + }, + { + "epoch": 19.678571428571427, + "grad_norm": 11.7203950881958, + "learning_rate": 4.016071428571429e-05, + "loss": 0.2645, + "step": 7163 + }, + { + "epoch": 19.681318681318682, + "grad_norm": 5.99713659286499, + "learning_rate": 4.015934065934066e-05, + "loss": 0.1765, + "step": 7164 + }, + { + "epoch": 19.684065934065934, + "grad_norm": 12.837370872497559, + "learning_rate": 4.015796703296704e-05, + "loss": 0.413, + "step": 7165 + }, + { + "epoch": 19.686813186813186, + "grad_norm": 12.183205604553223, + "learning_rate": 4.015659340659341e-05, + "loss": 0.3628, + "step": 7166 + }, + { + "epoch": 19.689560439560438, + "grad_norm": 19.70168685913086, + "learning_rate": 4.0155219780219785e-05, + "loss": 0.7383, + "step": 7167 + }, + { + "epoch": 19.692307692307693, + "grad_norm": 8.89777946472168, + "learning_rate": 4.0153846153846155e-05, + "loss": 0.1861, + "step": 7168 + }, + { + "epoch": 19.695054945054945, + "grad_norm": 7.236611843109131, + "learning_rate": 4.015247252747253e-05, + "loss": 0.2553, + "step": 7169 + }, + { + "epoch": 19.697802197802197, + "grad_norm": 11.633084297180176, + "learning_rate": 4.01510989010989e-05, + "loss": 0.4122, + "step": 7170 + }, + { + "epoch": 19.70054945054945, + "grad_norm": 13.819381713867188, + "learning_rate": 4.014972527472528e-05, + "loss": 0.5903, + "step": 7171 + }, + { + "epoch": 19.703296703296704, + "grad_norm": 14.808600425720215, + "learning_rate": 4.014835164835165e-05, + "loss": 0.6047, + "step": 7172 + }, + { + "epoch": 19.706043956043956, + "grad_norm": 8.026472091674805, + "learning_rate": 4.014697802197802e-05, + "loss": 0.1927, + "step": 7173 + }, + { + "epoch": 19.708791208791208, + "grad_norm": 20.235641479492188, + "learning_rate": 4.0145604395604395e-05, + "loss": 0.6249, + "step": 7174 + }, + { + "epoch": 19.71153846153846, + "grad_norm": 10.235337257385254, + "learning_rate": 4.014423076923077e-05, + "loss": 0.2295, + "step": 7175 + }, + { + "epoch": 19.714285714285715, + "grad_norm": 8.088581085205078, + "learning_rate": 4.014285714285714e-05, + "loss": 0.2465, + "step": 7176 + }, + { + "epoch": 19.717032967032967, + "grad_norm": 11.726119041442871, + "learning_rate": 4.014148351648352e-05, + "loss": 0.3437, + "step": 7177 + }, + { + "epoch": 19.71978021978022, + "grad_norm": 13.755522727966309, + "learning_rate": 4.014010989010989e-05, + "loss": 0.6231, + "step": 7178 + }, + { + "epoch": 19.72252747252747, + "grad_norm": 12.026082992553711, + "learning_rate": 4.0138736263736265e-05, + "loss": 0.2983, + "step": 7179 + }, + { + "epoch": 19.725274725274726, + "grad_norm": 12.020628929138184, + "learning_rate": 4.013736263736264e-05, + "loss": 0.2471, + "step": 7180 + }, + { + "epoch": 19.728021978021978, + "grad_norm": 8.521327018737793, + "learning_rate": 4.013598901098901e-05, + "loss": 0.2748, + "step": 7181 + }, + { + "epoch": 19.73076923076923, + "grad_norm": 13.930619239807129, + "learning_rate": 4.013461538461539e-05, + "loss": 0.4206, + "step": 7182 + }, + { + "epoch": 19.733516483516482, + "grad_norm": 15.879878044128418, + "learning_rate": 4.013324175824176e-05, + "loss": 0.5159, + "step": 7183 + }, + { + "epoch": 19.736263736263737, + "grad_norm": 8.545011520385742, + "learning_rate": 4.0131868131868136e-05, + "loss": 0.1513, + "step": 7184 + }, + { + "epoch": 19.73901098901099, + "grad_norm": 12.040079116821289, + "learning_rate": 4.0130494505494506e-05, + "loss": 0.3299, + "step": 7185 + }, + { + "epoch": 19.74175824175824, + "grad_norm": 12.400199890136719, + "learning_rate": 4.012912087912088e-05, + "loss": 0.2417, + "step": 7186 + }, + { + "epoch": 19.744505494505496, + "grad_norm": 11.638163566589355, + "learning_rate": 4.012774725274725e-05, + "loss": 0.2571, + "step": 7187 + }, + { + "epoch": 19.747252747252748, + "grad_norm": 4.722909450531006, + "learning_rate": 4.012637362637362e-05, + "loss": 0.1357, + "step": 7188 + }, + { + "epoch": 19.75, + "grad_norm": 9.064067840576172, + "learning_rate": 4.0125e-05, + "loss": 0.241, + "step": 7189 + }, + { + "epoch": 19.752747252747252, + "grad_norm": 6.798277854919434, + "learning_rate": 4.0123626373626376e-05, + "loss": 0.1032, + "step": 7190 + }, + { + "epoch": 19.755494505494504, + "grad_norm": 12.162154197692871, + "learning_rate": 4.0122252747252746e-05, + "loss": 0.3074, + "step": 7191 + }, + { + "epoch": 19.75824175824176, + "grad_norm": 14.202739715576172, + "learning_rate": 4.012087912087912e-05, + "loss": 0.5314, + "step": 7192 + }, + { + "epoch": 19.76098901098901, + "grad_norm": 14.746758460998535, + "learning_rate": 4.011950549450549e-05, + "loss": 0.4369, + "step": 7193 + }, + { + "epoch": 19.763736263736263, + "grad_norm": 18.925607681274414, + "learning_rate": 4.011813186813187e-05, + "loss": 0.8373, + "step": 7194 + }, + { + "epoch": 19.766483516483518, + "grad_norm": 15.66102409362793, + "learning_rate": 4.011675824175825e-05, + "loss": 0.5068, + "step": 7195 + }, + { + "epoch": 19.76923076923077, + "grad_norm": 14.766998291015625, + "learning_rate": 4.011538461538462e-05, + "loss": 0.4139, + "step": 7196 + }, + { + "epoch": 19.771978021978022, + "grad_norm": 13.757720947265625, + "learning_rate": 4.0114010989010994e-05, + "loss": 0.4097, + "step": 7197 + }, + { + "epoch": 19.774725274725274, + "grad_norm": 10.556098937988281, + "learning_rate": 4.0112637362637364e-05, + "loss": 0.4209, + "step": 7198 + }, + { + "epoch": 19.77747252747253, + "grad_norm": 9.769147872924805, + "learning_rate": 4.011126373626374e-05, + "loss": 0.1992, + "step": 7199 + }, + { + "epoch": 19.78021978021978, + "grad_norm": 12.500823974609375, + "learning_rate": 4.010989010989011e-05, + "loss": 0.3988, + "step": 7200 + }, + { + "epoch": 19.782967032967033, + "grad_norm": 17.428382873535156, + "learning_rate": 4.010851648351649e-05, + "loss": 0.6293, + "step": 7201 + }, + { + "epoch": 19.785714285714285, + "grad_norm": 10.22877025604248, + "learning_rate": 4.010714285714286e-05, + "loss": 0.3218, + "step": 7202 + }, + { + "epoch": 19.78846153846154, + "grad_norm": 9.705565452575684, + "learning_rate": 4.010576923076923e-05, + "loss": 0.4586, + "step": 7203 + }, + { + "epoch": 19.791208791208792, + "grad_norm": 15.484349250793457, + "learning_rate": 4.0104395604395604e-05, + "loss": 0.618, + "step": 7204 + }, + { + "epoch": 19.793956043956044, + "grad_norm": 13.75462818145752, + "learning_rate": 4.010302197802198e-05, + "loss": 0.3793, + "step": 7205 + }, + { + "epoch": 19.796703296703296, + "grad_norm": 16.277881622314453, + "learning_rate": 4.010164835164835e-05, + "loss": 0.999, + "step": 7206 + }, + { + "epoch": 19.79945054945055, + "grad_norm": 9.125872611999512, + "learning_rate": 4.010027472527473e-05, + "loss": 0.2648, + "step": 7207 + }, + { + "epoch": 19.802197802197803, + "grad_norm": 14.195160865783691, + "learning_rate": 4.00989010989011e-05, + "loss": 0.5595, + "step": 7208 + }, + { + "epoch": 19.804945054945055, + "grad_norm": 8.150592803955078, + "learning_rate": 4.0097527472527474e-05, + "loss": 0.3079, + "step": 7209 + }, + { + "epoch": 19.807692307692307, + "grad_norm": 15.856552124023438, + "learning_rate": 4.009615384615385e-05, + "loss": 0.6598, + "step": 7210 + }, + { + "epoch": 19.810439560439562, + "grad_norm": 7.774684429168701, + "learning_rate": 4.009478021978022e-05, + "loss": 0.1837, + "step": 7211 + }, + { + "epoch": 19.813186813186814, + "grad_norm": 14.560467720031738, + "learning_rate": 4.00934065934066e-05, + "loss": 0.5268, + "step": 7212 + }, + { + "epoch": 19.815934065934066, + "grad_norm": 12.669717788696289, + "learning_rate": 4.009203296703297e-05, + "loss": 0.4647, + "step": 7213 + }, + { + "epoch": 19.818681318681318, + "grad_norm": 20.348966598510742, + "learning_rate": 4.0090659340659345e-05, + "loss": 1.0493, + "step": 7214 + }, + { + "epoch": 19.821428571428573, + "grad_norm": 21.488140106201172, + "learning_rate": 4.0089285714285715e-05, + "loss": 1.0434, + "step": 7215 + }, + { + "epoch": 19.824175824175825, + "grad_norm": 10.607516288757324, + "learning_rate": 4.008791208791209e-05, + "loss": 0.245, + "step": 7216 + }, + { + "epoch": 19.826923076923077, + "grad_norm": 14.015868186950684, + "learning_rate": 4.008653846153846e-05, + "loss": 0.5864, + "step": 7217 + }, + { + "epoch": 19.82967032967033, + "grad_norm": 10.595494270324707, + "learning_rate": 4.008516483516483e-05, + "loss": 0.2534, + "step": 7218 + }, + { + "epoch": 19.832417582417584, + "grad_norm": 9.512530326843262, + "learning_rate": 4.008379120879121e-05, + "loss": 0.2317, + "step": 7219 + }, + { + "epoch": 19.835164835164836, + "grad_norm": 10.502058029174805, + "learning_rate": 4.0082417582417585e-05, + "loss": 0.2372, + "step": 7220 + }, + { + "epoch": 19.837912087912088, + "grad_norm": 12.75951862335205, + "learning_rate": 4.0081043956043955e-05, + "loss": 0.5193, + "step": 7221 + }, + { + "epoch": 19.84065934065934, + "grad_norm": 6.3410162925720215, + "learning_rate": 4.007967032967033e-05, + "loss": 0.1097, + "step": 7222 + }, + { + "epoch": 19.843406593406595, + "grad_norm": 11.752280235290527, + "learning_rate": 4.00782967032967e-05, + "loss": 0.3265, + "step": 7223 + }, + { + "epoch": 19.846153846153847, + "grad_norm": 18.14155387878418, + "learning_rate": 4.007692307692308e-05, + "loss": 0.6191, + "step": 7224 + }, + { + "epoch": 19.8489010989011, + "grad_norm": 12.391186714172363, + "learning_rate": 4.0075549450549456e-05, + "loss": 0.3621, + "step": 7225 + }, + { + "epoch": 19.85164835164835, + "grad_norm": 9.103999137878418, + "learning_rate": 4.0074175824175826e-05, + "loss": 0.2095, + "step": 7226 + }, + { + "epoch": 19.854395604395606, + "grad_norm": 10.973751068115234, + "learning_rate": 4.00728021978022e-05, + "loss": 0.2614, + "step": 7227 + }, + { + "epoch": 19.857142857142858, + "grad_norm": 8.203840255737305, + "learning_rate": 4.007142857142857e-05, + "loss": 0.254, + "step": 7228 + }, + { + "epoch": 19.85989010989011, + "grad_norm": 8.675148963928223, + "learning_rate": 4.007005494505495e-05, + "loss": 0.2087, + "step": 7229 + }, + { + "epoch": 19.86263736263736, + "grad_norm": 15.260458946228027, + "learning_rate": 4.006868131868132e-05, + "loss": 0.6219, + "step": 7230 + }, + { + "epoch": 19.865384615384617, + "grad_norm": 9.145264625549316, + "learning_rate": 4.0067307692307696e-05, + "loss": 0.2881, + "step": 7231 + }, + { + "epoch": 19.86813186813187, + "grad_norm": 17.221534729003906, + "learning_rate": 4.0065934065934066e-05, + "loss": 0.6791, + "step": 7232 + }, + { + "epoch": 19.87087912087912, + "grad_norm": 14.477645874023438, + "learning_rate": 4.0064560439560436e-05, + "loss": 0.4158, + "step": 7233 + }, + { + "epoch": 19.873626373626372, + "grad_norm": 19.736209869384766, + "learning_rate": 4.006318681318681e-05, + "loss": 0.8056, + "step": 7234 + }, + { + "epoch": 19.876373626373628, + "grad_norm": 5.662983417510986, + "learning_rate": 4.006181318681319e-05, + "loss": 0.1198, + "step": 7235 + }, + { + "epoch": 19.87912087912088, + "grad_norm": 6.7403411865234375, + "learning_rate": 4.006043956043956e-05, + "loss": 0.2361, + "step": 7236 + }, + { + "epoch": 19.88186813186813, + "grad_norm": 13.366555213928223, + "learning_rate": 4.005906593406594e-05, + "loss": 0.253, + "step": 7237 + }, + { + "epoch": 19.884615384615383, + "grad_norm": 10.087956428527832, + "learning_rate": 4.005769230769231e-05, + "loss": 0.2638, + "step": 7238 + }, + { + "epoch": 19.88736263736264, + "grad_norm": 14.791084289550781, + "learning_rate": 4.0056318681318683e-05, + "loss": 0.3359, + "step": 7239 + }, + { + "epoch": 19.89010989010989, + "grad_norm": 7.989175319671631, + "learning_rate": 4.005494505494506e-05, + "loss": 0.2548, + "step": 7240 + }, + { + "epoch": 19.892857142857142, + "grad_norm": 10.623103141784668, + "learning_rate": 4.005357142857143e-05, + "loss": 0.2384, + "step": 7241 + }, + { + "epoch": 19.895604395604394, + "grad_norm": 9.235777854919434, + "learning_rate": 4.005219780219781e-05, + "loss": 0.2057, + "step": 7242 + }, + { + "epoch": 19.89835164835165, + "grad_norm": 23.341289520263672, + "learning_rate": 4.005082417582418e-05, + "loss": 0.8209, + "step": 7243 + }, + { + "epoch": 19.9010989010989, + "grad_norm": 15.681184768676758, + "learning_rate": 4.0049450549450554e-05, + "loss": 0.3554, + "step": 7244 + }, + { + "epoch": 19.903846153846153, + "grad_norm": 10.571019172668457, + "learning_rate": 4.0048076923076924e-05, + "loss": 0.1894, + "step": 7245 + }, + { + "epoch": 19.906593406593405, + "grad_norm": 14.195361137390137, + "learning_rate": 4.00467032967033e-05, + "loss": 0.436, + "step": 7246 + }, + { + "epoch": 19.90934065934066, + "grad_norm": 14.544700622558594, + "learning_rate": 4.004532967032967e-05, + "loss": 0.4467, + "step": 7247 + }, + { + "epoch": 19.912087912087912, + "grad_norm": 22.271747589111328, + "learning_rate": 4.004395604395604e-05, + "loss": 0.6176, + "step": 7248 + }, + { + "epoch": 19.914835164835164, + "grad_norm": 15.434371948242188, + "learning_rate": 4.004258241758242e-05, + "loss": 0.4388, + "step": 7249 + }, + { + "epoch": 19.917582417582416, + "grad_norm": 9.038774490356445, + "learning_rate": 4.0041208791208794e-05, + "loss": 0.2312, + "step": 7250 + }, + { + "epoch": 19.92032967032967, + "grad_norm": 6.631771087646484, + "learning_rate": 4.0039835164835164e-05, + "loss": 0.1617, + "step": 7251 + }, + { + "epoch": 19.923076923076923, + "grad_norm": 15.646732330322266, + "learning_rate": 4.003846153846154e-05, + "loss": 0.6204, + "step": 7252 + }, + { + "epoch": 19.925824175824175, + "grad_norm": 12.894627571105957, + "learning_rate": 4.003708791208791e-05, + "loss": 0.4077, + "step": 7253 + }, + { + "epoch": 19.928571428571427, + "grad_norm": 11.826465606689453, + "learning_rate": 4.003571428571429e-05, + "loss": 0.3065, + "step": 7254 + }, + { + "epoch": 19.931318681318682, + "grad_norm": 12.495794296264648, + "learning_rate": 4.0034340659340665e-05, + "loss": 0.4133, + "step": 7255 + }, + { + "epoch": 19.934065934065934, + "grad_norm": 17.098033905029297, + "learning_rate": 4.0032967032967035e-05, + "loss": 0.7756, + "step": 7256 + }, + { + "epoch": 19.936813186813186, + "grad_norm": 14.375153541564941, + "learning_rate": 4.003159340659341e-05, + "loss": 0.4577, + "step": 7257 + }, + { + "epoch": 19.939560439560438, + "grad_norm": 13.786449432373047, + "learning_rate": 4.003021978021978e-05, + "loss": 0.3012, + "step": 7258 + }, + { + "epoch": 19.942307692307693, + "grad_norm": 9.986862182617188, + "learning_rate": 4.002884615384616e-05, + "loss": 0.2608, + "step": 7259 + }, + { + "epoch": 19.945054945054945, + "grad_norm": 11.501779556274414, + "learning_rate": 4.002747252747253e-05, + "loss": 0.37, + "step": 7260 + }, + { + "epoch": 19.947802197802197, + "grad_norm": 17.07391929626465, + "learning_rate": 4.0026098901098905e-05, + "loss": 0.2259, + "step": 7261 + }, + { + "epoch": 19.95054945054945, + "grad_norm": 18.86551284790039, + "learning_rate": 4.0024725274725275e-05, + "loss": 0.7329, + "step": 7262 + }, + { + "epoch": 19.953296703296704, + "grad_norm": 3.5517210960388184, + "learning_rate": 4.0023351648351645e-05, + "loss": 0.0644, + "step": 7263 + }, + { + "epoch": 19.956043956043956, + "grad_norm": 10.324817657470703, + "learning_rate": 4.002197802197802e-05, + "loss": 0.2742, + "step": 7264 + }, + { + "epoch": 19.958791208791208, + "grad_norm": 10.120099067687988, + "learning_rate": 4.00206043956044e-05, + "loss": 0.3116, + "step": 7265 + }, + { + "epoch": 19.96153846153846, + "grad_norm": 13.62010383605957, + "learning_rate": 4.001923076923077e-05, + "loss": 0.5002, + "step": 7266 + }, + { + "epoch": 19.964285714285715, + "grad_norm": 16.097665786743164, + "learning_rate": 4.0017857142857146e-05, + "loss": 0.394, + "step": 7267 + }, + { + "epoch": 19.967032967032967, + "grad_norm": 11.090713500976562, + "learning_rate": 4.0016483516483516e-05, + "loss": 0.3154, + "step": 7268 + }, + { + "epoch": 19.96978021978022, + "grad_norm": 16.620752334594727, + "learning_rate": 4.001510989010989e-05, + "loss": 0.5047, + "step": 7269 + }, + { + "epoch": 19.97252747252747, + "grad_norm": 8.94991683959961, + "learning_rate": 4.001373626373627e-05, + "loss": 0.1584, + "step": 7270 + }, + { + "epoch": 19.975274725274726, + "grad_norm": 11.852099418640137, + "learning_rate": 4.001236263736264e-05, + "loss": 0.4478, + "step": 7271 + }, + { + "epoch": 19.978021978021978, + "grad_norm": 4.8305840492248535, + "learning_rate": 4.0010989010989016e-05, + "loss": 0.1246, + "step": 7272 + }, + { + "epoch": 19.98076923076923, + "grad_norm": 15.555137634277344, + "learning_rate": 4.0009615384615386e-05, + "loss": 0.5168, + "step": 7273 + }, + { + "epoch": 19.983516483516482, + "grad_norm": 7.6382317543029785, + "learning_rate": 4.000824175824176e-05, + "loss": 0.354, + "step": 7274 + }, + { + "epoch": 19.986263736263737, + "grad_norm": 14.961857795715332, + "learning_rate": 4.000686813186813e-05, + "loss": 0.3389, + "step": 7275 + }, + { + "epoch": 19.98901098901099, + "grad_norm": 14.40065860748291, + "learning_rate": 4.000549450549451e-05, + "loss": 0.6126, + "step": 7276 + }, + { + "epoch": 19.99175824175824, + "grad_norm": 16.701345443725586, + "learning_rate": 4.000412087912088e-05, + "loss": 0.8175, + "step": 7277 + }, + { + "epoch": 19.994505494505496, + "grad_norm": 16.758838653564453, + "learning_rate": 4.000274725274725e-05, + "loss": 0.9545, + "step": 7278 + }, + { + "epoch": 19.997252747252748, + "grad_norm": 13.537131309509277, + "learning_rate": 4.0001373626373626e-05, + "loss": 0.5268, + "step": 7279 + }, + { + "epoch": 20.0, + "grad_norm": 22.555465698242188, + "learning_rate": 4e-05, + "loss": 0.3625, + "step": 7280 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.7410468319559229, + "eval_f1": 0.7328573286824142, + "eval_f1_DuraRiadoRio_64x64": 0.7464387464387464, + "eval_f1_Mole_64x64": 0.6320754716981132, + "eval_f1_Quebrado_64x64": 0.7626459143968871, + "eval_f1_RiadoRio_64x64": 0.7054794520547946, + "eval_f1_RioFechado_64x64": 0.8176470588235294, + "eval_loss": 0.9930658936500549, + "eval_precision": 0.7846270968191165, + "eval_precision_DuraRiadoRio_64x64": 0.6328502415458938, + "eval_precision_Mole_64x64": 0.9852941176470589, + "eval_precision_Quebrado_64x64": 0.8672566371681416, + "eval_precision_RiadoRio_64x64": 0.7357142857142858, + "eval_precision_RioFechado_64x64": 0.702020202020202, + "eval_recall": 0.7424120747879087, + "eval_recall_DuraRiadoRio_64x64": 0.9097222222222222, + "eval_recall_Mole_64x64": 0.4652777777777778, + "eval_recall_Quebrado_64x64": 0.6805555555555556, + "eval_recall_RiadoRio_64x64": 0.6776315789473685, + "eval_recall_RioFechado_64x64": 0.9788732394366197, + "eval_runtime": 1.747, + "eval_samples_per_second": 415.581, + "eval_steps_per_second": 26.332, + "step": 7280 + }, + { + "epoch": 20.002747252747252, + "grad_norm": 9.549551963806152, + "learning_rate": 3.999862637362637e-05, + "loss": 0.3642, + "step": 7281 + }, + { + "epoch": 20.005494505494507, + "grad_norm": 17.162431716918945, + "learning_rate": 3.999725274725275e-05, + "loss": 0.6857, + "step": 7282 + }, + { + "epoch": 20.00824175824176, + "grad_norm": 8.319671630859375, + "learning_rate": 3.999587912087912e-05, + "loss": 0.2561, + "step": 7283 + }, + { + "epoch": 20.01098901098901, + "grad_norm": 15.260879516601562, + "learning_rate": 3.99945054945055e-05, + "loss": 0.3391, + "step": 7284 + }, + { + "epoch": 20.013736263736263, + "grad_norm": 12.365449905395508, + "learning_rate": 3.9993131868131874e-05, + "loss": 0.503, + "step": 7285 + }, + { + "epoch": 20.016483516483518, + "grad_norm": 15.181599617004395, + "learning_rate": 3.9991758241758244e-05, + "loss": 0.4859, + "step": 7286 + }, + { + "epoch": 20.01923076923077, + "grad_norm": 10.030150413513184, + "learning_rate": 3.999038461538462e-05, + "loss": 0.2234, + "step": 7287 + }, + { + "epoch": 20.021978021978022, + "grad_norm": 20.351102828979492, + "learning_rate": 3.998901098901099e-05, + "loss": 0.7326, + "step": 7288 + }, + { + "epoch": 20.024725274725274, + "grad_norm": 14.153630256652832, + "learning_rate": 3.998763736263737e-05, + "loss": 0.4466, + "step": 7289 + }, + { + "epoch": 20.02747252747253, + "grad_norm": 11.023886680603027, + "learning_rate": 3.998626373626374e-05, + "loss": 0.2703, + "step": 7290 + }, + { + "epoch": 20.03021978021978, + "grad_norm": 10.864439010620117, + "learning_rate": 3.9984890109890114e-05, + "loss": 0.3023, + "step": 7291 + }, + { + "epoch": 20.032967032967033, + "grad_norm": 11.502232551574707, + "learning_rate": 3.9983516483516484e-05, + "loss": 0.3218, + "step": 7292 + }, + { + "epoch": 20.035714285714285, + "grad_norm": 16.936641693115234, + "learning_rate": 3.9982142857142854e-05, + "loss": 0.4069, + "step": 7293 + }, + { + "epoch": 20.03846153846154, + "grad_norm": 13.818474769592285, + "learning_rate": 3.998076923076923e-05, + "loss": 0.534, + "step": 7294 + }, + { + "epoch": 20.041208791208792, + "grad_norm": 17.598018646240234, + "learning_rate": 3.997939560439561e-05, + "loss": 0.708, + "step": 7295 + }, + { + "epoch": 20.043956043956044, + "grad_norm": 8.197731018066406, + "learning_rate": 3.997802197802198e-05, + "loss": 0.1934, + "step": 7296 + }, + { + "epoch": 20.046703296703296, + "grad_norm": 11.655959129333496, + "learning_rate": 3.9976648351648355e-05, + "loss": 0.5291, + "step": 7297 + }, + { + "epoch": 20.04945054945055, + "grad_norm": 18.026243209838867, + "learning_rate": 3.9975274725274725e-05, + "loss": 0.564, + "step": 7298 + }, + { + "epoch": 20.052197802197803, + "grad_norm": 7.210413932800293, + "learning_rate": 3.99739010989011e-05, + "loss": 0.1577, + "step": 7299 + }, + { + "epoch": 20.054945054945055, + "grad_norm": 3.0392537117004395, + "learning_rate": 3.997252747252748e-05, + "loss": 0.0711, + "step": 7300 + }, + { + "epoch": 20.057692307692307, + "grad_norm": 13.735152244567871, + "learning_rate": 3.997115384615385e-05, + "loss": 0.4041, + "step": 7301 + }, + { + "epoch": 20.060439560439562, + "grad_norm": 18.852006912231445, + "learning_rate": 3.9969780219780225e-05, + "loss": 0.659, + "step": 7302 + }, + { + "epoch": 20.063186813186814, + "grad_norm": 9.60627555847168, + "learning_rate": 3.9968406593406595e-05, + "loss": 0.2236, + "step": 7303 + }, + { + "epoch": 20.065934065934066, + "grad_norm": 10.39315128326416, + "learning_rate": 3.996703296703297e-05, + "loss": 0.2881, + "step": 7304 + }, + { + "epoch": 20.068681318681318, + "grad_norm": 12.208348274230957, + "learning_rate": 3.996565934065934e-05, + "loss": 0.3228, + "step": 7305 + }, + { + "epoch": 20.071428571428573, + "grad_norm": 13.058869361877441, + "learning_rate": 3.996428571428571e-05, + "loss": 0.314, + "step": 7306 + }, + { + "epoch": 20.074175824175825, + "grad_norm": 12.842049598693848, + "learning_rate": 3.996291208791209e-05, + "loss": 0.301, + "step": 7307 + }, + { + "epoch": 20.076923076923077, + "grad_norm": 15.088382720947266, + "learning_rate": 3.996153846153846e-05, + "loss": 0.5079, + "step": 7308 + }, + { + "epoch": 20.07967032967033, + "grad_norm": 7.134170055389404, + "learning_rate": 3.9960164835164835e-05, + "loss": 0.25, + "step": 7309 + }, + { + "epoch": 20.082417582417584, + "grad_norm": 13.99882698059082, + "learning_rate": 3.995879120879121e-05, + "loss": 0.6898, + "step": 7310 + }, + { + "epoch": 20.085164835164836, + "grad_norm": 10.590657234191895, + "learning_rate": 3.995741758241758e-05, + "loss": 0.2713, + "step": 7311 + }, + { + "epoch": 20.087912087912088, + "grad_norm": 18.103687286376953, + "learning_rate": 3.995604395604396e-05, + "loss": 0.6808, + "step": 7312 + }, + { + "epoch": 20.09065934065934, + "grad_norm": 9.971380233764648, + "learning_rate": 3.995467032967033e-05, + "loss": 0.2383, + "step": 7313 + }, + { + "epoch": 20.093406593406595, + "grad_norm": 12.74191951751709, + "learning_rate": 3.9953296703296706e-05, + "loss": 0.3574, + "step": 7314 + }, + { + "epoch": 20.096153846153847, + "grad_norm": 15.369473457336426, + "learning_rate": 3.995192307692308e-05, + "loss": 0.4173, + "step": 7315 + }, + { + "epoch": 20.0989010989011, + "grad_norm": 13.050456047058105, + "learning_rate": 3.995054945054945e-05, + "loss": 0.5672, + "step": 7316 + }, + { + "epoch": 20.10164835164835, + "grad_norm": 14.753016471862793, + "learning_rate": 3.994917582417583e-05, + "loss": 0.5308, + "step": 7317 + }, + { + "epoch": 20.104395604395606, + "grad_norm": 5.275570869445801, + "learning_rate": 3.99478021978022e-05, + "loss": 0.1452, + "step": 7318 + }, + { + "epoch": 20.107142857142858, + "grad_norm": 19.876249313354492, + "learning_rate": 3.9946428571428576e-05, + "loss": 0.6277, + "step": 7319 + }, + { + "epoch": 20.10989010989011, + "grad_norm": 16.577640533447266, + "learning_rate": 3.9945054945054946e-05, + "loss": 0.5027, + "step": 7320 + }, + { + "epoch": 20.11263736263736, + "grad_norm": 20.7364444732666, + "learning_rate": 3.9943681318681316e-05, + "loss": 0.7248, + "step": 7321 + }, + { + "epoch": 20.115384615384617, + "grad_norm": 6.459165096282959, + "learning_rate": 3.994230769230769e-05, + "loss": 0.1428, + "step": 7322 + }, + { + "epoch": 20.11813186813187, + "grad_norm": 6.4013590812683105, + "learning_rate": 3.994093406593406e-05, + "loss": 0.1521, + "step": 7323 + }, + { + "epoch": 20.12087912087912, + "grad_norm": 8.054272651672363, + "learning_rate": 3.993956043956044e-05, + "loss": 0.1935, + "step": 7324 + }, + { + "epoch": 20.123626373626372, + "grad_norm": 18.49172592163086, + "learning_rate": 3.993818681318682e-05, + "loss": 0.9072, + "step": 7325 + }, + { + "epoch": 20.126373626373628, + "grad_norm": 5.325240135192871, + "learning_rate": 3.993681318681319e-05, + "loss": 0.107, + "step": 7326 + }, + { + "epoch": 20.12912087912088, + "grad_norm": 14.614646911621094, + "learning_rate": 3.9935439560439564e-05, + "loss": 0.541, + "step": 7327 + }, + { + "epoch": 20.13186813186813, + "grad_norm": 14.136178970336914, + "learning_rate": 3.9934065934065934e-05, + "loss": 0.4658, + "step": 7328 + }, + { + "epoch": 20.134615384615383, + "grad_norm": 23.54781150817871, + "learning_rate": 3.993269230769231e-05, + "loss": 0.8353, + "step": 7329 + }, + { + "epoch": 20.13736263736264, + "grad_norm": 10.357584953308105, + "learning_rate": 3.993131868131869e-05, + "loss": 0.2758, + "step": 7330 + }, + { + "epoch": 20.14010989010989, + "grad_norm": 6.295288562774658, + "learning_rate": 3.992994505494506e-05, + "loss": 0.1383, + "step": 7331 + }, + { + "epoch": 20.142857142857142, + "grad_norm": 16.95966911315918, + "learning_rate": 3.9928571428571434e-05, + "loss": 0.3622, + "step": 7332 + }, + { + "epoch": 20.145604395604394, + "grad_norm": 12.754267692565918, + "learning_rate": 3.9927197802197804e-05, + "loss": 0.3551, + "step": 7333 + }, + { + "epoch": 20.14835164835165, + "grad_norm": 15.699557304382324, + "learning_rate": 3.992582417582418e-05, + "loss": 0.4781, + "step": 7334 + }, + { + "epoch": 20.1510989010989, + "grad_norm": 8.657279014587402, + "learning_rate": 3.992445054945055e-05, + "loss": 0.2557, + "step": 7335 + }, + { + "epoch": 20.153846153846153, + "grad_norm": 20.88522720336914, + "learning_rate": 3.992307692307692e-05, + "loss": 0.84, + "step": 7336 + }, + { + "epoch": 20.156593406593405, + "grad_norm": 10.406078338623047, + "learning_rate": 3.99217032967033e-05, + "loss": 0.3253, + "step": 7337 + }, + { + "epoch": 20.15934065934066, + "grad_norm": 7.570446491241455, + "learning_rate": 3.992032967032967e-05, + "loss": 0.2047, + "step": 7338 + }, + { + "epoch": 20.162087912087912, + "grad_norm": 19.871978759765625, + "learning_rate": 3.9918956043956044e-05, + "loss": 0.6477, + "step": 7339 + }, + { + "epoch": 20.164835164835164, + "grad_norm": 8.958305358886719, + "learning_rate": 3.991758241758242e-05, + "loss": 0.2732, + "step": 7340 + }, + { + "epoch": 20.167582417582416, + "grad_norm": 12.852191925048828, + "learning_rate": 3.991620879120879e-05, + "loss": 0.5275, + "step": 7341 + }, + { + "epoch": 20.17032967032967, + "grad_norm": 7.643299102783203, + "learning_rate": 3.991483516483517e-05, + "loss": 0.1677, + "step": 7342 + }, + { + "epoch": 20.173076923076923, + "grad_norm": 8.358694076538086, + "learning_rate": 3.991346153846154e-05, + "loss": 0.1979, + "step": 7343 + }, + { + "epoch": 20.175824175824175, + "grad_norm": 12.324334144592285, + "learning_rate": 3.9912087912087915e-05, + "loss": 0.3678, + "step": 7344 + }, + { + "epoch": 20.178571428571427, + "grad_norm": 10.729249954223633, + "learning_rate": 3.991071428571429e-05, + "loss": 0.3218, + "step": 7345 + }, + { + "epoch": 20.181318681318682, + "grad_norm": 17.771745681762695, + "learning_rate": 3.990934065934066e-05, + "loss": 0.6051, + "step": 7346 + }, + { + "epoch": 20.184065934065934, + "grad_norm": 16.215133666992188, + "learning_rate": 3.990796703296704e-05, + "loss": 0.9514, + "step": 7347 + }, + { + "epoch": 20.186813186813186, + "grad_norm": 18.837554931640625, + "learning_rate": 3.990659340659341e-05, + "loss": 0.4929, + "step": 7348 + }, + { + "epoch": 20.189560439560438, + "grad_norm": 15.638165473937988, + "learning_rate": 3.9905219780219785e-05, + "loss": 0.4364, + "step": 7349 + }, + { + "epoch": 20.192307692307693, + "grad_norm": 9.93359088897705, + "learning_rate": 3.9903846153846155e-05, + "loss": 0.2674, + "step": 7350 + }, + { + "epoch": 20.195054945054945, + "grad_norm": 16.254798889160156, + "learning_rate": 3.9902472527472525e-05, + "loss": 0.6155, + "step": 7351 + }, + { + "epoch": 20.197802197802197, + "grad_norm": 13.919620513916016, + "learning_rate": 3.99010989010989e-05, + "loss": 0.4559, + "step": 7352 + }, + { + "epoch": 20.20054945054945, + "grad_norm": 12.772133827209473, + "learning_rate": 3.989972527472527e-05, + "loss": 0.4019, + "step": 7353 + }, + { + "epoch": 20.203296703296704, + "grad_norm": 12.971504211425781, + "learning_rate": 3.989835164835165e-05, + "loss": 0.3924, + "step": 7354 + }, + { + "epoch": 20.206043956043956, + "grad_norm": 12.226426124572754, + "learning_rate": 3.9896978021978026e-05, + "loss": 0.2634, + "step": 7355 + }, + { + "epoch": 20.208791208791208, + "grad_norm": 12.076688766479492, + "learning_rate": 3.9895604395604396e-05, + "loss": 0.5648, + "step": 7356 + }, + { + "epoch": 20.21153846153846, + "grad_norm": 6.431704044342041, + "learning_rate": 3.989423076923077e-05, + "loss": 0.1384, + "step": 7357 + }, + { + "epoch": 20.214285714285715, + "grad_norm": 12.68264389038086, + "learning_rate": 3.989285714285714e-05, + "loss": 0.3324, + "step": 7358 + }, + { + "epoch": 20.217032967032967, + "grad_norm": 13.763578414916992, + "learning_rate": 3.989148351648352e-05, + "loss": 0.5629, + "step": 7359 + }, + { + "epoch": 20.21978021978022, + "grad_norm": 6.071643352508545, + "learning_rate": 3.9890109890109896e-05, + "loss": 0.1496, + "step": 7360 + }, + { + "epoch": 20.22252747252747, + "grad_norm": 11.226510047912598, + "learning_rate": 3.9888736263736266e-05, + "loss": 0.4357, + "step": 7361 + }, + { + "epoch": 20.225274725274726, + "grad_norm": 12.801694869995117, + "learning_rate": 3.988736263736264e-05, + "loss": 0.4311, + "step": 7362 + }, + { + "epoch": 20.228021978021978, + "grad_norm": 6.8585429191589355, + "learning_rate": 3.988598901098901e-05, + "loss": 0.2338, + "step": 7363 + }, + { + "epoch": 20.23076923076923, + "grad_norm": 12.697897911071777, + "learning_rate": 3.988461538461539e-05, + "loss": 0.3003, + "step": 7364 + }, + { + "epoch": 20.233516483516482, + "grad_norm": 12.19594955444336, + "learning_rate": 3.988324175824176e-05, + "loss": 0.3984, + "step": 7365 + }, + { + "epoch": 20.236263736263737, + "grad_norm": 7.403346061706543, + "learning_rate": 3.988186813186813e-05, + "loss": 0.121, + "step": 7366 + }, + { + "epoch": 20.23901098901099, + "grad_norm": 13.495145797729492, + "learning_rate": 3.9880494505494507e-05, + "loss": 0.3813, + "step": 7367 + }, + { + "epoch": 20.24175824175824, + "grad_norm": 16.755041122436523, + "learning_rate": 3.987912087912088e-05, + "loss": 0.4965, + "step": 7368 + }, + { + "epoch": 20.244505494505493, + "grad_norm": 19.424354553222656, + "learning_rate": 3.9877747252747253e-05, + "loss": 0.8374, + "step": 7369 + }, + { + "epoch": 20.247252747252748, + "grad_norm": 8.34373664855957, + "learning_rate": 3.987637362637363e-05, + "loss": 0.211, + "step": 7370 + }, + { + "epoch": 20.25, + "grad_norm": 15.611519813537598, + "learning_rate": 3.9875e-05, + "loss": 0.2875, + "step": 7371 + }, + { + "epoch": 20.252747252747252, + "grad_norm": 5.183530807495117, + "learning_rate": 3.987362637362638e-05, + "loss": 0.1076, + "step": 7372 + }, + { + "epoch": 20.255494505494504, + "grad_norm": 19.068622589111328, + "learning_rate": 3.987225274725275e-05, + "loss": 0.7365, + "step": 7373 + }, + { + "epoch": 20.25824175824176, + "grad_norm": 14.293002128601074, + "learning_rate": 3.9870879120879124e-05, + "loss": 0.7538, + "step": 7374 + }, + { + "epoch": 20.26098901098901, + "grad_norm": 10.700455665588379, + "learning_rate": 3.98695054945055e-05, + "loss": 0.4087, + "step": 7375 + }, + { + "epoch": 20.263736263736263, + "grad_norm": 14.802379608154297, + "learning_rate": 3.986813186813187e-05, + "loss": 0.3979, + "step": 7376 + }, + { + "epoch": 20.266483516483518, + "grad_norm": 10.844808578491211, + "learning_rate": 3.986675824175825e-05, + "loss": 0.3653, + "step": 7377 + }, + { + "epoch": 20.26923076923077, + "grad_norm": 15.546173095703125, + "learning_rate": 3.986538461538462e-05, + "loss": 0.5045, + "step": 7378 + }, + { + "epoch": 20.271978021978022, + "grad_norm": 10.586849212646484, + "learning_rate": 3.9864010989010994e-05, + "loss": 0.2892, + "step": 7379 + }, + { + "epoch": 20.274725274725274, + "grad_norm": 10.470393180847168, + "learning_rate": 3.9862637362637364e-05, + "loss": 0.29, + "step": 7380 + }, + { + "epoch": 20.27747252747253, + "grad_norm": 18.822704315185547, + "learning_rate": 3.9861263736263734e-05, + "loss": 0.6881, + "step": 7381 + }, + { + "epoch": 20.28021978021978, + "grad_norm": 15.694698333740234, + "learning_rate": 3.985989010989011e-05, + "loss": 0.5568, + "step": 7382 + }, + { + "epoch": 20.282967032967033, + "grad_norm": 14.475744247436523, + "learning_rate": 3.985851648351648e-05, + "loss": 0.4021, + "step": 7383 + }, + { + "epoch": 20.285714285714285, + "grad_norm": 18.653905868530273, + "learning_rate": 3.985714285714286e-05, + "loss": 0.7578, + "step": 7384 + }, + { + "epoch": 20.28846153846154, + "grad_norm": 14.060734748840332, + "learning_rate": 3.9855769230769235e-05, + "loss": 0.4494, + "step": 7385 + }, + { + "epoch": 20.291208791208792, + "grad_norm": 17.65713119506836, + "learning_rate": 3.9854395604395605e-05, + "loss": 0.5116, + "step": 7386 + }, + { + "epoch": 20.293956043956044, + "grad_norm": 17.84254264831543, + "learning_rate": 3.985302197802198e-05, + "loss": 0.6757, + "step": 7387 + }, + { + "epoch": 20.296703296703296, + "grad_norm": 14.310617446899414, + "learning_rate": 3.985164835164835e-05, + "loss": 0.6019, + "step": 7388 + }, + { + "epoch": 20.29945054945055, + "grad_norm": 16.35056495666504, + "learning_rate": 3.985027472527473e-05, + "loss": 0.6168, + "step": 7389 + }, + { + "epoch": 20.302197802197803, + "grad_norm": 8.528718948364258, + "learning_rate": 3.9848901098901105e-05, + "loss": 0.3022, + "step": 7390 + }, + { + "epoch": 20.304945054945055, + "grad_norm": 9.70041275024414, + "learning_rate": 3.9847527472527475e-05, + "loss": 0.278, + "step": 7391 + }, + { + "epoch": 20.307692307692307, + "grad_norm": 21.22376251220703, + "learning_rate": 3.984615384615385e-05, + "loss": 0.7818, + "step": 7392 + }, + { + "epoch": 20.310439560439562, + "grad_norm": 12.761017799377441, + "learning_rate": 3.984478021978022e-05, + "loss": 0.4054, + "step": 7393 + }, + { + "epoch": 20.313186813186814, + "grad_norm": 12.609147071838379, + "learning_rate": 3.98434065934066e-05, + "loss": 0.3713, + "step": 7394 + }, + { + "epoch": 20.315934065934066, + "grad_norm": 14.206159591674805, + "learning_rate": 3.984203296703297e-05, + "loss": 0.3392, + "step": 7395 + }, + { + "epoch": 20.318681318681318, + "grad_norm": 10.179539680480957, + "learning_rate": 3.984065934065934e-05, + "loss": 0.2482, + "step": 7396 + }, + { + "epoch": 20.321428571428573, + "grad_norm": 8.372234344482422, + "learning_rate": 3.9839285714285716e-05, + "loss": 0.2656, + "step": 7397 + }, + { + "epoch": 20.324175824175825, + "grad_norm": 12.819564819335938, + "learning_rate": 3.9837912087912086e-05, + "loss": 0.3623, + "step": 7398 + }, + { + "epoch": 20.326923076923077, + "grad_norm": 18.35387420654297, + "learning_rate": 3.983653846153846e-05, + "loss": 0.7818, + "step": 7399 + }, + { + "epoch": 20.32967032967033, + "grad_norm": 14.942824363708496, + "learning_rate": 3.983516483516483e-05, + "loss": 0.4666, + "step": 7400 + }, + { + "epoch": 20.332417582417584, + "grad_norm": 12.004154205322266, + "learning_rate": 3.983379120879121e-05, + "loss": 0.2482, + "step": 7401 + }, + { + "epoch": 20.335164835164836, + "grad_norm": 11.112947463989258, + "learning_rate": 3.9832417582417586e-05, + "loss": 0.2338, + "step": 7402 + }, + { + "epoch": 20.337912087912088, + "grad_norm": 14.735577583312988, + "learning_rate": 3.9831043956043956e-05, + "loss": 0.6248, + "step": 7403 + }, + { + "epoch": 20.34065934065934, + "grad_norm": 10.691901206970215, + "learning_rate": 3.982967032967033e-05, + "loss": 0.3041, + "step": 7404 + }, + { + "epoch": 20.343406593406595, + "grad_norm": 9.76063346862793, + "learning_rate": 3.98282967032967e-05, + "loss": 0.2574, + "step": 7405 + }, + { + "epoch": 20.346153846153847, + "grad_norm": 8.739889144897461, + "learning_rate": 3.982692307692308e-05, + "loss": 0.2146, + "step": 7406 + }, + { + "epoch": 20.3489010989011, + "grad_norm": 13.08285903930664, + "learning_rate": 3.9825549450549456e-05, + "loss": 0.2993, + "step": 7407 + }, + { + "epoch": 20.35164835164835, + "grad_norm": 6.627087116241455, + "learning_rate": 3.9824175824175826e-05, + "loss": 0.2553, + "step": 7408 + }, + { + "epoch": 20.354395604395606, + "grad_norm": 12.551352500915527, + "learning_rate": 3.98228021978022e-05, + "loss": 0.5328, + "step": 7409 + }, + { + "epoch": 20.357142857142858, + "grad_norm": 7.240415573120117, + "learning_rate": 3.982142857142857e-05, + "loss": 0.2821, + "step": 7410 + }, + { + "epoch": 20.35989010989011, + "grad_norm": 13.385899543762207, + "learning_rate": 3.982005494505494e-05, + "loss": 0.3241, + "step": 7411 + }, + { + "epoch": 20.36263736263736, + "grad_norm": 6.729339122772217, + "learning_rate": 3.981868131868132e-05, + "loss": 0.2365, + "step": 7412 + }, + { + "epoch": 20.365384615384617, + "grad_norm": 10.114368438720703, + "learning_rate": 3.981730769230769e-05, + "loss": 0.3585, + "step": 7413 + }, + { + "epoch": 20.36813186813187, + "grad_norm": 12.476093292236328, + "learning_rate": 3.981593406593407e-05, + "loss": 0.367, + "step": 7414 + }, + { + "epoch": 20.37087912087912, + "grad_norm": 9.655406951904297, + "learning_rate": 3.981456043956044e-05, + "loss": 0.4914, + "step": 7415 + }, + { + "epoch": 20.373626373626372, + "grad_norm": 8.03652286529541, + "learning_rate": 3.9813186813186814e-05, + "loss": 0.1609, + "step": 7416 + }, + { + "epoch": 20.376373626373628, + "grad_norm": 13.60301399230957, + "learning_rate": 3.981181318681319e-05, + "loss": 0.2825, + "step": 7417 + }, + { + "epoch": 20.37912087912088, + "grad_norm": 11.06744384765625, + "learning_rate": 3.981043956043956e-05, + "loss": 0.4724, + "step": 7418 + }, + { + "epoch": 20.38186813186813, + "grad_norm": 6.390135288238525, + "learning_rate": 3.980906593406594e-05, + "loss": 0.0997, + "step": 7419 + }, + { + "epoch": 20.384615384615383, + "grad_norm": 9.420255661010742, + "learning_rate": 3.980769230769231e-05, + "loss": 0.2141, + "step": 7420 + }, + { + "epoch": 20.38736263736264, + "grad_norm": 13.50339126586914, + "learning_rate": 3.9806318681318684e-05, + "loss": 0.3794, + "step": 7421 + }, + { + "epoch": 20.39010989010989, + "grad_norm": 15.261809349060059, + "learning_rate": 3.980494505494506e-05, + "loss": 0.4332, + "step": 7422 + }, + { + "epoch": 20.392857142857142, + "grad_norm": 18.152118682861328, + "learning_rate": 3.980357142857143e-05, + "loss": 0.854, + "step": 7423 + }, + { + "epoch": 20.395604395604394, + "grad_norm": 19.675268173217773, + "learning_rate": 3.980219780219781e-05, + "loss": 0.7328, + "step": 7424 + }, + { + "epoch": 20.39835164835165, + "grad_norm": 7.199579238891602, + "learning_rate": 3.980082417582418e-05, + "loss": 0.2259, + "step": 7425 + }, + { + "epoch": 20.4010989010989, + "grad_norm": 17.379133224487305, + "learning_rate": 3.979945054945055e-05, + "loss": 0.5424, + "step": 7426 + }, + { + "epoch": 20.403846153846153, + "grad_norm": 17.152759552001953, + "learning_rate": 3.9798076923076925e-05, + "loss": 0.497, + "step": 7427 + }, + { + "epoch": 20.406593406593405, + "grad_norm": 7.592296600341797, + "learning_rate": 3.9796703296703295e-05, + "loss": 0.2355, + "step": 7428 + }, + { + "epoch": 20.40934065934066, + "grad_norm": 12.769701957702637, + "learning_rate": 3.979532967032967e-05, + "loss": 0.4426, + "step": 7429 + }, + { + "epoch": 20.412087912087912, + "grad_norm": 17.632291793823242, + "learning_rate": 3.979395604395604e-05, + "loss": 0.5577, + "step": 7430 + }, + { + "epoch": 20.414835164835164, + "grad_norm": 15.460408210754395, + "learning_rate": 3.979258241758242e-05, + "loss": 0.4812, + "step": 7431 + }, + { + "epoch": 20.417582417582416, + "grad_norm": 4.443985939025879, + "learning_rate": 3.9791208791208795e-05, + "loss": 0.1337, + "step": 7432 + }, + { + "epoch": 20.42032967032967, + "grad_norm": 19.377010345458984, + "learning_rate": 3.9789835164835165e-05, + "loss": 0.6533, + "step": 7433 + }, + { + "epoch": 20.423076923076923, + "grad_norm": 8.902471542358398, + "learning_rate": 3.978846153846154e-05, + "loss": 0.2108, + "step": 7434 + }, + { + "epoch": 20.425824175824175, + "grad_norm": 13.262514114379883, + "learning_rate": 3.978708791208791e-05, + "loss": 0.2757, + "step": 7435 + }, + { + "epoch": 20.428571428571427, + "grad_norm": 14.696688652038574, + "learning_rate": 3.978571428571429e-05, + "loss": 0.5133, + "step": 7436 + }, + { + "epoch": 20.431318681318682, + "grad_norm": 13.743922233581543, + "learning_rate": 3.9784340659340665e-05, + "loss": 0.4097, + "step": 7437 + }, + { + "epoch": 20.434065934065934, + "grad_norm": 4.555562973022461, + "learning_rate": 3.9782967032967035e-05, + "loss": 0.1562, + "step": 7438 + }, + { + "epoch": 20.436813186813186, + "grad_norm": 5.396402835845947, + "learning_rate": 3.978159340659341e-05, + "loss": 0.1477, + "step": 7439 + }, + { + "epoch": 20.439560439560438, + "grad_norm": 12.643402099609375, + "learning_rate": 3.978021978021978e-05, + "loss": 0.3488, + "step": 7440 + }, + { + "epoch": 20.442307692307693, + "grad_norm": 8.166704177856445, + "learning_rate": 3.977884615384615e-05, + "loss": 0.272, + "step": 7441 + }, + { + "epoch": 20.445054945054945, + "grad_norm": 13.218303680419922, + "learning_rate": 3.977747252747253e-05, + "loss": 0.4134, + "step": 7442 + }, + { + "epoch": 20.447802197802197, + "grad_norm": 13.498741149902344, + "learning_rate": 3.97760989010989e-05, + "loss": 0.3829, + "step": 7443 + }, + { + "epoch": 20.45054945054945, + "grad_norm": 12.184596061706543, + "learning_rate": 3.9774725274725276e-05, + "loss": 0.4116, + "step": 7444 + }, + { + "epoch": 20.453296703296704, + "grad_norm": 14.351744651794434, + "learning_rate": 3.9773351648351646e-05, + "loss": 0.7562, + "step": 7445 + }, + { + "epoch": 20.456043956043956, + "grad_norm": 12.286423683166504, + "learning_rate": 3.977197802197802e-05, + "loss": 0.4285, + "step": 7446 + }, + { + "epoch": 20.458791208791208, + "grad_norm": 13.93399429321289, + "learning_rate": 3.97706043956044e-05, + "loss": 0.5732, + "step": 7447 + }, + { + "epoch": 20.46153846153846, + "grad_norm": 15.634751319885254, + "learning_rate": 3.976923076923077e-05, + "loss": 0.4019, + "step": 7448 + }, + { + "epoch": 20.464285714285715, + "grad_norm": 6.433925151824951, + "learning_rate": 3.9767857142857146e-05, + "loss": 0.2322, + "step": 7449 + }, + { + "epoch": 20.467032967032967, + "grad_norm": 12.370292663574219, + "learning_rate": 3.9766483516483516e-05, + "loss": 0.3219, + "step": 7450 + }, + { + "epoch": 20.46978021978022, + "grad_norm": 8.319644927978516, + "learning_rate": 3.976510989010989e-05, + "loss": 0.2762, + "step": 7451 + }, + { + "epoch": 20.47252747252747, + "grad_norm": 11.652069091796875, + "learning_rate": 3.976373626373627e-05, + "loss": 0.3923, + "step": 7452 + }, + { + "epoch": 20.475274725274726, + "grad_norm": 11.987881660461426, + "learning_rate": 3.976236263736264e-05, + "loss": 0.3681, + "step": 7453 + }, + { + "epoch": 20.478021978021978, + "grad_norm": 12.861050605773926, + "learning_rate": 3.976098901098902e-05, + "loss": 0.4679, + "step": 7454 + }, + { + "epoch": 20.48076923076923, + "grad_norm": 19.13287925720215, + "learning_rate": 3.975961538461539e-05, + "loss": 0.6238, + "step": 7455 + }, + { + "epoch": 20.483516483516482, + "grad_norm": 12.368768692016602, + "learning_rate": 3.975824175824176e-05, + "loss": 0.297, + "step": 7456 + }, + { + "epoch": 20.486263736263737, + "grad_norm": 8.559589385986328, + "learning_rate": 3.9756868131868134e-05, + "loss": 0.1865, + "step": 7457 + }, + { + "epoch": 20.48901098901099, + "grad_norm": 4.996022701263428, + "learning_rate": 3.9755494505494504e-05, + "loss": 0.129, + "step": 7458 + }, + { + "epoch": 20.49175824175824, + "grad_norm": 10.952906608581543, + "learning_rate": 3.975412087912088e-05, + "loss": 0.2953, + "step": 7459 + }, + { + "epoch": 20.494505494505496, + "grad_norm": 21.99152183532715, + "learning_rate": 3.975274725274725e-05, + "loss": 0.5493, + "step": 7460 + }, + { + "epoch": 20.497252747252748, + "grad_norm": 10.800567626953125, + "learning_rate": 3.975137362637363e-05, + "loss": 0.2961, + "step": 7461 + }, + { + "epoch": 20.5, + "grad_norm": 6.3396148681640625, + "learning_rate": 3.9750000000000004e-05, + "loss": 0.2153, + "step": 7462 + }, + { + "epoch": 20.502747252747252, + "grad_norm": 7.663769245147705, + "learning_rate": 3.9748626373626374e-05, + "loss": 0.193, + "step": 7463 + }, + { + "epoch": 20.505494505494504, + "grad_norm": 14.683032989501953, + "learning_rate": 3.974725274725275e-05, + "loss": 0.4937, + "step": 7464 + }, + { + "epoch": 20.50824175824176, + "grad_norm": 9.987406730651855, + "learning_rate": 3.974587912087912e-05, + "loss": 0.3258, + "step": 7465 + }, + { + "epoch": 20.51098901098901, + "grad_norm": 12.139036178588867, + "learning_rate": 3.97445054945055e-05, + "loss": 0.323, + "step": 7466 + }, + { + "epoch": 20.513736263736263, + "grad_norm": 14.329765319824219, + "learning_rate": 3.9743131868131874e-05, + "loss": 0.5048, + "step": 7467 + }, + { + "epoch": 20.516483516483518, + "grad_norm": 12.072620391845703, + "learning_rate": 3.9741758241758244e-05, + "loss": 0.2273, + "step": 7468 + }, + { + "epoch": 20.51923076923077, + "grad_norm": 22.27573013305664, + "learning_rate": 3.974038461538462e-05, + "loss": 0.6711, + "step": 7469 + }, + { + "epoch": 20.521978021978022, + "grad_norm": 19.98053741455078, + "learning_rate": 3.973901098901099e-05, + "loss": 0.5716, + "step": 7470 + }, + { + "epoch": 20.524725274725274, + "grad_norm": 13.753655433654785, + "learning_rate": 3.973763736263736e-05, + "loss": 0.5889, + "step": 7471 + }, + { + "epoch": 20.52747252747253, + "grad_norm": 17.343721389770508, + "learning_rate": 3.973626373626374e-05, + "loss": 0.7981, + "step": 7472 + }, + { + "epoch": 20.53021978021978, + "grad_norm": 7.741250038146973, + "learning_rate": 3.973489010989011e-05, + "loss": 0.1742, + "step": 7473 + }, + { + "epoch": 20.532967032967033, + "grad_norm": 25.252519607543945, + "learning_rate": 3.9733516483516485e-05, + "loss": 0.8662, + "step": 7474 + }, + { + "epoch": 20.535714285714285, + "grad_norm": 12.743553161621094, + "learning_rate": 3.9732142857142855e-05, + "loss": 0.4909, + "step": 7475 + }, + { + "epoch": 20.53846153846154, + "grad_norm": 25.642465591430664, + "learning_rate": 3.973076923076923e-05, + "loss": 0.7469, + "step": 7476 + }, + { + "epoch": 20.541208791208792, + "grad_norm": 7.729605197906494, + "learning_rate": 3.972939560439561e-05, + "loss": 0.2046, + "step": 7477 + }, + { + "epoch": 20.543956043956044, + "grad_norm": 19.285139083862305, + "learning_rate": 3.972802197802198e-05, + "loss": 0.5202, + "step": 7478 + }, + { + "epoch": 20.546703296703296, + "grad_norm": 15.07305908203125, + "learning_rate": 3.9726648351648355e-05, + "loss": 0.4253, + "step": 7479 + }, + { + "epoch": 20.54945054945055, + "grad_norm": 9.941361427307129, + "learning_rate": 3.9725274725274725e-05, + "loss": 0.3586, + "step": 7480 + }, + { + "epoch": 20.552197802197803, + "grad_norm": 8.440556526184082, + "learning_rate": 3.97239010989011e-05, + "loss": 0.3259, + "step": 7481 + }, + { + "epoch": 20.554945054945055, + "grad_norm": 15.833099365234375, + "learning_rate": 3.972252747252748e-05, + "loss": 0.5068, + "step": 7482 + }, + { + "epoch": 20.557692307692307, + "grad_norm": 11.048510551452637, + "learning_rate": 3.972115384615385e-05, + "loss": 0.4406, + "step": 7483 + }, + { + "epoch": 20.560439560439562, + "grad_norm": 13.466227531433105, + "learning_rate": 3.9719780219780226e-05, + "loss": 0.4622, + "step": 7484 + }, + { + "epoch": 20.563186813186814, + "grad_norm": 8.747045516967773, + "learning_rate": 3.9718406593406596e-05, + "loss": 0.3432, + "step": 7485 + }, + { + "epoch": 20.565934065934066, + "grad_norm": 8.686332702636719, + "learning_rate": 3.9717032967032966e-05, + "loss": 0.2762, + "step": 7486 + }, + { + "epoch": 20.568681318681318, + "grad_norm": 9.565933227539062, + "learning_rate": 3.971565934065934e-05, + "loss": 0.2958, + "step": 7487 + }, + { + "epoch": 20.571428571428573, + "grad_norm": 16.31696891784668, + "learning_rate": 3.971428571428571e-05, + "loss": 0.3655, + "step": 7488 + }, + { + "epoch": 20.574175824175825, + "grad_norm": 14.173613548278809, + "learning_rate": 3.971291208791209e-05, + "loss": 0.3814, + "step": 7489 + }, + { + "epoch": 20.576923076923077, + "grad_norm": 16.68792724609375, + "learning_rate": 3.971153846153846e-05, + "loss": 0.5042, + "step": 7490 + }, + { + "epoch": 20.57967032967033, + "grad_norm": 11.880914688110352, + "learning_rate": 3.9710164835164836e-05, + "loss": 0.3094, + "step": 7491 + }, + { + "epoch": 20.582417582417584, + "grad_norm": 8.434412002563477, + "learning_rate": 3.970879120879121e-05, + "loss": 0.1948, + "step": 7492 + }, + { + "epoch": 20.585164835164836, + "grad_norm": 18.675273895263672, + "learning_rate": 3.970741758241758e-05, + "loss": 0.5113, + "step": 7493 + }, + { + "epoch": 20.587912087912088, + "grad_norm": 5.992949962615967, + "learning_rate": 3.970604395604396e-05, + "loss": 0.1296, + "step": 7494 + }, + { + "epoch": 20.59065934065934, + "grad_norm": 12.5152587890625, + "learning_rate": 3.970467032967033e-05, + "loss": 0.4909, + "step": 7495 + }, + { + "epoch": 20.593406593406595, + "grad_norm": 10.949995994567871, + "learning_rate": 3.9703296703296707e-05, + "loss": 0.302, + "step": 7496 + }, + { + "epoch": 20.596153846153847, + "grad_norm": 10.76211929321289, + "learning_rate": 3.970192307692308e-05, + "loss": 0.3687, + "step": 7497 + }, + { + "epoch": 20.5989010989011, + "grad_norm": 14.291439056396484, + "learning_rate": 3.9700549450549453e-05, + "loss": 0.3555, + "step": 7498 + }, + { + "epoch": 20.60164835164835, + "grad_norm": 14.13058090209961, + "learning_rate": 3.969917582417583e-05, + "loss": 0.6295, + "step": 7499 + }, + { + "epoch": 20.604395604395606, + "grad_norm": 14.950287818908691, + "learning_rate": 3.96978021978022e-05, + "loss": 0.555, + "step": 7500 + }, + { + "epoch": 20.607142857142858, + "grad_norm": 13.305827140808105, + "learning_rate": 3.969642857142857e-05, + "loss": 0.3803, + "step": 7501 + }, + { + "epoch": 20.60989010989011, + "grad_norm": 12.860246658325195, + "learning_rate": 3.969505494505495e-05, + "loss": 0.4637, + "step": 7502 + }, + { + "epoch": 20.61263736263736, + "grad_norm": 16.78354263305664, + "learning_rate": 3.969368131868132e-05, + "loss": 0.7781, + "step": 7503 + }, + { + "epoch": 20.615384615384617, + "grad_norm": 10.95863151550293, + "learning_rate": 3.9692307692307694e-05, + "loss": 0.4302, + "step": 7504 + }, + { + "epoch": 20.61813186813187, + "grad_norm": 6.210714817047119, + "learning_rate": 3.9690934065934064e-05, + "loss": 0.1422, + "step": 7505 + }, + { + "epoch": 20.62087912087912, + "grad_norm": 9.134727478027344, + "learning_rate": 3.968956043956044e-05, + "loss": 0.3461, + "step": 7506 + }, + { + "epoch": 20.623626373626372, + "grad_norm": 11.173856735229492, + "learning_rate": 3.968818681318682e-05, + "loss": 0.4789, + "step": 7507 + }, + { + "epoch": 20.626373626373628, + "grad_norm": 11.123008728027344, + "learning_rate": 3.968681318681319e-05, + "loss": 0.3425, + "step": 7508 + }, + { + "epoch": 20.62912087912088, + "grad_norm": 15.06463623046875, + "learning_rate": 3.9685439560439564e-05, + "loss": 0.463, + "step": 7509 + }, + { + "epoch": 20.63186813186813, + "grad_norm": 13.193632125854492, + "learning_rate": 3.9684065934065934e-05, + "loss": 0.357, + "step": 7510 + }, + { + "epoch": 20.634615384615383, + "grad_norm": 8.095964431762695, + "learning_rate": 3.968269230769231e-05, + "loss": 0.1809, + "step": 7511 + }, + { + "epoch": 20.63736263736264, + "grad_norm": 18.188383102416992, + "learning_rate": 3.968131868131869e-05, + "loss": 0.6356, + "step": 7512 + }, + { + "epoch": 20.64010989010989, + "grad_norm": 10.94364070892334, + "learning_rate": 3.967994505494506e-05, + "loss": 0.2528, + "step": 7513 + }, + { + "epoch": 20.642857142857142, + "grad_norm": 17.222063064575195, + "learning_rate": 3.9678571428571435e-05, + "loss": 0.5489, + "step": 7514 + }, + { + "epoch": 20.645604395604394, + "grad_norm": 11.271734237670898, + "learning_rate": 3.9677197802197805e-05, + "loss": 0.4074, + "step": 7515 + }, + { + "epoch": 20.64835164835165, + "grad_norm": 13.568665504455566, + "learning_rate": 3.9675824175824175e-05, + "loss": 0.3958, + "step": 7516 + }, + { + "epoch": 20.6510989010989, + "grad_norm": 18.780136108398438, + "learning_rate": 3.967445054945055e-05, + "loss": 0.7086, + "step": 7517 + }, + { + "epoch": 20.653846153846153, + "grad_norm": 14.732504844665527, + "learning_rate": 3.967307692307692e-05, + "loss": 0.4996, + "step": 7518 + }, + { + "epoch": 20.656593406593405, + "grad_norm": 21.131790161132812, + "learning_rate": 3.96717032967033e-05, + "loss": 0.6429, + "step": 7519 + }, + { + "epoch": 20.65934065934066, + "grad_norm": 8.333465576171875, + "learning_rate": 3.967032967032967e-05, + "loss": 0.2108, + "step": 7520 + }, + { + "epoch": 20.662087912087912, + "grad_norm": 9.628145217895508, + "learning_rate": 3.9668956043956045e-05, + "loss": 0.3336, + "step": 7521 + }, + { + "epoch": 20.664835164835164, + "grad_norm": 9.679829597473145, + "learning_rate": 3.966758241758242e-05, + "loss": 0.2194, + "step": 7522 + }, + { + "epoch": 20.667582417582416, + "grad_norm": 11.868854522705078, + "learning_rate": 3.966620879120879e-05, + "loss": 0.2171, + "step": 7523 + }, + { + "epoch": 20.67032967032967, + "grad_norm": 16.794225692749023, + "learning_rate": 3.966483516483517e-05, + "loss": 0.3124, + "step": 7524 + }, + { + "epoch": 20.673076923076923, + "grad_norm": 19.413211822509766, + "learning_rate": 3.966346153846154e-05, + "loss": 0.732, + "step": 7525 + }, + { + "epoch": 20.675824175824175, + "grad_norm": 20.60863494873047, + "learning_rate": 3.9662087912087916e-05, + "loss": 0.6557, + "step": 7526 + }, + { + "epoch": 20.678571428571427, + "grad_norm": 9.29537582397461, + "learning_rate": 3.966071428571429e-05, + "loss": 0.1916, + "step": 7527 + }, + { + "epoch": 20.681318681318682, + "grad_norm": 9.852725982666016, + "learning_rate": 3.965934065934066e-05, + "loss": 0.2095, + "step": 7528 + }, + { + "epoch": 20.684065934065934, + "grad_norm": 9.044027328491211, + "learning_rate": 3.965796703296704e-05, + "loss": 0.2205, + "step": 7529 + }, + { + "epoch": 20.686813186813186, + "grad_norm": 13.18966007232666, + "learning_rate": 3.965659340659341e-05, + "loss": 0.3961, + "step": 7530 + }, + { + "epoch": 20.689560439560438, + "grad_norm": 13.359728813171387, + "learning_rate": 3.965521978021978e-05, + "loss": 0.5943, + "step": 7531 + }, + { + "epoch": 20.692307692307693, + "grad_norm": 11.111169815063477, + "learning_rate": 3.9653846153846156e-05, + "loss": 0.2216, + "step": 7532 + }, + { + "epoch": 20.695054945054945, + "grad_norm": 17.988807678222656, + "learning_rate": 3.9652472527472526e-05, + "loss": 0.5568, + "step": 7533 + }, + { + "epoch": 20.697802197802197, + "grad_norm": 15.295273780822754, + "learning_rate": 3.96510989010989e-05, + "loss": 0.4006, + "step": 7534 + }, + { + "epoch": 20.70054945054945, + "grad_norm": 15.124104499816895, + "learning_rate": 3.964972527472527e-05, + "loss": 0.4291, + "step": 7535 + }, + { + "epoch": 20.703296703296704, + "grad_norm": 14.527616500854492, + "learning_rate": 3.964835164835165e-05, + "loss": 0.3954, + "step": 7536 + }, + { + "epoch": 20.706043956043956, + "grad_norm": 20.378149032592773, + "learning_rate": 3.9646978021978026e-05, + "loss": 0.7367, + "step": 7537 + }, + { + "epoch": 20.708791208791208, + "grad_norm": 13.015893936157227, + "learning_rate": 3.9645604395604396e-05, + "loss": 0.5468, + "step": 7538 + }, + { + "epoch": 20.71153846153846, + "grad_norm": 14.044219017028809, + "learning_rate": 3.964423076923077e-05, + "loss": 0.4913, + "step": 7539 + }, + { + "epoch": 20.714285714285715, + "grad_norm": 11.22045612335205, + "learning_rate": 3.964285714285714e-05, + "loss": 0.2622, + "step": 7540 + }, + { + "epoch": 20.717032967032967, + "grad_norm": 6.525221347808838, + "learning_rate": 3.964148351648352e-05, + "loss": 0.1599, + "step": 7541 + }, + { + "epoch": 20.71978021978022, + "grad_norm": 7.077492713928223, + "learning_rate": 3.96401098901099e-05, + "loss": 0.1687, + "step": 7542 + }, + { + "epoch": 20.72252747252747, + "grad_norm": 16.249557495117188, + "learning_rate": 3.963873626373627e-05, + "loss": 0.7317, + "step": 7543 + }, + { + "epoch": 20.725274725274726, + "grad_norm": 13.934830665588379, + "learning_rate": 3.9637362637362644e-05, + "loss": 0.5704, + "step": 7544 + }, + { + "epoch": 20.728021978021978, + "grad_norm": 10.98708438873291, + "learning_rate": 3.9635989010989014e-05, + "loss": 0.3062, + "step": 7545 + }, + { + "epoch": 20.73076923076923, + "grad_norm": 15.230470657348633, + "learning_rate": 3.9634615384615384e-05, + "loss": 0.4622, + "step": 7546 + }, + { + "epoch": 20.733516483516482, + "grad_norm": 13.446895599365234, + "learning_rate": 3.963324175824176e-05, + "loss": 0.3238, + "step": 7547 + }, + { + "epoch": 20.736263736263737, + "grad_norm": 17.495895385742188, + "learning_rate": 3.963186813186813e-05, + "loss": 0.8125, + "step": 7548 + }, + { + "epoch": 20.73901098901099, + "grad_norm": 14.360431671142578, + "learning_rate": 3.963049450549451e-05, + "loss": 0.4626, + "step": 7549 + }, + { + "epoch": 20.74175824175824, + "grad_norm": 12.553208351135254, + "learning_rate": 3.962912087912088e-05, + "loss": 0.3396, + "step": 7550 + }, + { + "epoch": 20.744505494505496, + "grad_norm": 18.568206787109375, + "learning_rate": 3.9627747252747254e-05, + "loss": 0.8965, + "step": 7551 + }, + { + "epoch": 20.747252747252748, + "grad_norm": 17.69218635559082, + "learning_rate": 3.962637362637363e-05, + "loss": 0.803, + "step": 7552 + }, + { + "epoch": 20.75, + "grad_norm": 17.389686584472656, + "learning_rate": 3.9625e-05, + "loss": 0.5374, + "step": 7553 + }, + { + "epoch": 20.752747252747252, + "grad_norm": 7.675103187561035, + "learning_rate": 3.962362637362638e-05, + "loss": 0.1385, + "step": 7554 + }, + { + "epoch": 20.755494505494504, + "grad_norm": 12.491323471069336, + "learning_rate": 3.962225274725275e-05, + "loss": 0.4033, + "step": 7555 + }, + { + "epoch": 20.75824175824176, + "grad_norm": 19.29902458190918, + "learning_rate": 3.9620879120879125e-05, + "loss": 0.7265, + "step": 7556 + }, + { + "epoch": 20.76098901098901, + "grad_norm": 20.953710556030273, + "learning_rate": 3.96195054945055e-05, + "loss": 0.9242, + "step": 7557 + }, + { + "epoch": 20.763736263736263, + "grad_norm": 9.292744636535645, + "learning_rate": 3.961813186813187e-05, + "loss": 0.1942, + "step": 7558 + }, + { + "epoch": 20.766483516483518, + "grad_norm": 6.285183429718018, + "learning_rate": 3.961675824175825e-05, + "loss": 0.1189, + "step": 7559 + }, + { + "epoch": 20.76923076923077, + "grad_norm": 19.203086853027344, + "learning_rate": 3.961538461538462e-05, + "loss": 0.5329, + "step": 7560 + }, + { + "epoch": 20.771978021978022, + "grad_norm": 7.964118003845215, + "learning_rate": 3.961401098901099e-05, + "loss": 0.2059, + "step": 7561 + }, + { + "epoch": 20.774725274725274, + "grad_norm": 8.784049987792969, + "learning_rate": 3.9612637362637365e-05, + "loss": 0.3107, + "step": 7562 + }, + { + "epoch": 20.77747252747253, + "grad_norm": 12.545493125915527, + "learning_rate": 3.9611263736263735e-05, + "loss": 0.3054, + "step": 7563 + }, + { + "epoch": 20.78021978021978, + "grad_norm": 13.36910343170166, + "learning_rate": 3.960989010989011e-05, + "loss": 0.6629, + "step": 7564 + }, + { + "epoch": 20.782967032967033, + "grad_norm": 10.165008544921875, + "learning_rate": 3.960851648351648e-05, + "loss": 0.2716, + "step": 7565 + }, + { + "epoch": 20.785714285714285, + "grad_norm": 9.013547897338867, + "learning_rate": 3.960714285714286e-05, + "loss": 0.2726, + "step": 7566 + }, + { + "epoch": 20.78846153846154, + "grad_norm": 13.100957870483398, + "learning_rate": 3.9605769230769235e-05, + "loss": 0.4173, + "step": 7567 + }, + { + "epoch": 20.791208791208792, + "grad_norm": 11.117305755615234, + "learning_rate": 3.9604395604395605e-05, + "loss": 0.3386, + "step": 7568 + }, + { + "epoch": 20.793956043956044, + "grad_norm": 10.922688484191895, + "learning_rate": 3.960302197802198e-05, + "loss": 0.269, + "step": 7569 + }, + { + "epoch": 20.796703296703296, + "grad_norm": 15.36463451385498, + "learning_rate": 3.960164835164835e-05, + "loss": 0.3627, + "step": 7570 + }, + { + "epoch": 20.79945054945055, + "grad_norm": 12.966739654541016, + "learning_rate": 3.960027472527473e-05, + "loss": 0.4214, + "step": 7571 + }, + { + "epoch": 20.802197802197803, + "grad_norm": 14.598536491394043, + "learning_rate": 3.9598901098901106e-05, + "loss": 0.5337, + "step": 7572 + }, + { + "epoch": 20.804945054945055, + "grad_norm": 13.317842483520508, + "learning_rate": 3.9597527472527476e-05, + "loss": 0.5595, + "step": 7573 + }, + { + "epoch": 20.807692307692307, + "grad_norm": 11.729814529418945, + "learning_rate": 3.959615384615385e-05, + "loss": 0.325, + "step": 7574 + }, + { + "epoch": 20.810439560439562, + "grad_norm": 14.804491996765137, + "learning_rate": 3.959478021978022e-05, + "loss": 0.4167, + "step": 7575 + }, + { + "epoch": 20.813186813186814, + "grad_norm": 9.79523754119873, + "learning_rate": 3.959340659340659e-05, + "loss": 0.2802, + "step": 7576 + }, + { + "epoch": 20.815934065934066, + "grad_norm": 16.37957000732422, + "learning_rate": 3.959203296703297e-05, + "loss": 0.3904, + "step": 7577 + }, + { + "epoch": 20.818681318681318, + "grad_norm": 14.204608917236328, + "learning_rate": 3.959065934065934e-05, + "loss": 0.3988, + "step": 7578 + }, + { + "epoch": 20.821428571428573, + "grad_norm": 13.67800235748291, + "learning_rate": 3.9589285714285716e-05, + "loss": 0.4793, + "step": 7579 + }, + { + "epoch": 20.824175824175825, + "grad_norm": 10.115933418273926, + "learning_rate": 3.9587912087912086e-05, + "loss": 0.1748, + "step": 7580 + }, + { + "epoch": 20.826923076923077, + "grad_norm": 21.551673889160156, + "learning_rate": 3.958653846153846e-05, + "loss": 0.6413, + "step": 7581 + }, + { + "epoch": 20.82967032967033, + "grad_norm": 6.378410339355469, + "learning_rate": 3.958516483516484e-05, + "loss": 0.174, + "step": 7582 + }, + { + "epoch": 20.832417582417584, + "grad_norm": 9.879277229309082, + "learning_rate": 3.958379120879121e-05, + "loss": 0.2059, + "step": 7583 + }, + { + "epoch": 20.835164835164836, + "grad_norm": 15.928099632263184, + "learning_rate": 3.958241758241759e-05, + "loss": 0.6715, + "step": 7584 + }, + { + "epoch": 20.837912087912088, + "grad_norm": 10.629929542541504, + "learning_rate": 3.958104395604396e-05, + "loss": 0.3117, + "step": 7585 + }, + { + "epoch": 20.84065934065934, + "grad_norm": 2.9767355918884277, + "learning_rate": 3.9579670329670334e-05, + "loss": 0.0612, + "step": 7586 + }, + { + "epoch": 20.843406593406595, + "grad_norm": 17.070314407348633, + "learning_rate": 3.957829670329671e-05, + "loss": 0.5181, + "step": 7587 + }, + { + "epoch": 20.846153846153847, + "grad_norm": 15.351325988769531, + "learning_rate": 3.957692307692308e-05, + "loss": 0.5465, + "step": 7588 + }, + { + "epoch": 20.8489010989011, + "grad_norm": 12.95417308807373, + "learning_rate": 3.957554945054946e-05, + "loss": 0.3584, + "step": 7589 + }, + { + "epoch": 20.85164835164835, + "grad_norm": 10.623530387878418, + "learning_rate": 3.957417582417583e-05, + "loss": 0.3652, + "step": 7590 + }, + { + "epoch": 20.854395604395606, + "grad_norm": 13.045985221862793, + "learning_rate": 3.95728021978022e-05, + "loss": 0.3116, + "step": 7591 + }, + { + "epoch": 20.857142857142858, + "grad_norm": 15.944500923156738, + "learning_rate": 3.9571428571428574e-05, + "loss": 0.3455, + "step": 7592 + }, + { + "epoch": 20.85989010989011, + "grad_norm": 2.4073009490966797, + "learning_rate": 3.9570054945054944e-05, + "loss": 0.0495, + "step": 7593 + }, + { + "epoch": 20.86263736263736, + "grad_norm": 13.688018798828125, + "learning_rate": 3.956868131868132e-05, + "loss": 0.285, + "step": 7594 + }, + { + "epoch": 20.865384615384617, + "grad_norm": 18.74803352355957, + "learning_rate": 3.956730769230769e-05, + "loss": 0.638, + "step": 7595 + }, + { + "epoch": 20.86813186813187, + "grad_norm": 20.973892211914062, + "learning_rate": 3.956593406593407e-05, + "loss": 0.5793, + "step": 7596 + }, + { + "epoch": 20.87087912087912, + "grad_norm": 6.43150520324707, + "learning_rate": 3.9564560439560444e-05, + "loss": 0.1289, + "step": 7597 + }, + { + "epoch": 20.873626373626372, + "grad_norm": 8.317821502685547, + "learning_rate": 3.9563186813186814e-05, + "loss": 0.1292, + "step": 7598 + }, + { + "epoch": 20.876373626373628, + "grad_norm": 7.035290718078613, + "learning_rate": 3.956181318681319e-05, + "loss": 0.1414, + "step": 7599 + }, + { + "epoch": 20.87912087912088, + "grad_norm": 18.33978271484375, + "learning_rate": 3.956043956043956e-05, + "loss": 0.4981, + "step": 7600 + }, + { + "epoch": 20.88186813186813, + "grad_norm": 18.46303939819336, + "learning_rate": 3.955906593406594e-05, + "loss": 0.4625, + "step": 7601 + }, + { + "epoch": 20.884615384615383, + "grad_norm": 15.595277786254883, + "learning_rate": 3.9557692307692315e-05, + "loss": 0.5166, + "step": 7602 + }, + { + "epoch": 20.88736263736264, + "grad_norm": 10.624954223632812, + "learning_rate": 3.9556318681318685e-05, + "loss": 0.3872, + "step": 7603 + }, + { + "epoch": 20.89010989010989, + "grad_norm": 16.46563148498535, + "learning_rate": 3.955494505494506e-05, + "loss": 0.3597, + "step": 7604 + }, + { + "epoch": 20.892857142857142, + "grad_norm": 8.47962474822998, + "learning_rate": 3.955357142857143e-05, + "loss": 0.1627, + "step": 7605 + }, + { + "epoch": 20.895604395604394, + "grad_norm": 8.00242805480957, + "learning_rate": 3.95521978021978e-05, + "loss": 0.1343, + "step": 7606 + }, + { + "epoch": 20.89835164835165, + "grad_norm": 7.202004432678223, + "learning_rate": 3.955082417582418e-05, + "loss": 0.2816, + "step": 7607 + }, + { + "epoch": 20.9010989010989, + "grad_norm": 11.809389114379883, + "learning_rate": 3.954945054945055e-05, + "loss": 0.3659, + "step": 7608 + }, + { + "epoch": 20.903846153846153, + "grad_norm": 13.679898262023926, + "learning_rate": 3.9548076923076925e-05, + "loss": 0.5321, + "step": 7609 + }, + { + "epoch": 20.906593406593405, + "grad_norm": 14.153528213500977, + "learning_rate": 3.9546703296703295e-05, + "loss": 0.4804, + "step": 7610 + }, + { + "epoch": 20.90934065934066, + "grad_norm": 8.001453399658203, + "learning_rate": 3.954532967032967e-05, + "loss": 0.2514, + "step": 7611 + }, + { + "epoch": 20.912087912087912, + "grad_norm": 16.864999771118164, + "learning_rate": 3.954395604395605e-05, + "loss": 0.8331, + "step": 7612 + }, + { + "epoch": 20.914835164835164, + "grad_norm": 7.473356246948242, + "learning_rate": 3.954258241758242e-05, + "loss": 0.2283, + "step": 7613 + }, + { + "epoch": 20.917582417582416, + "grad_norm": 11.420013427734375, + "learning_rate": 3.9541208791208796e-05, + "loss": 0.3628, + "step": 7614 + }, + { + "epoch": 20.92032967032967, + "grad_norm": 11.264405250549316, + "learning_rate": 3.9539835164835166e-05, + "loss": 0.4106, + "step": 7615 + }, + { + "epoch": 20.923076923076923, + "grad_norm": 10.601788520812988, + "learning_rate": 3.953846153846154e-05, + "loss": 0.2737, + "step": 7616 + }, + { + "epoch": 20.925824175824175, + "grad_norm": 19.03792381286621, + "learning_rate": 3.953708791208792e-05, + "loss": 0.8043, + "step": 7617 + }, + { + "epoch": 20.928571428571427, + "grad_norm": 17.511398315429688, + "learning_rate": 3.953571428571429e-05, + "loss": 0.504, + "step": 7618 + }, + { + "epoch": 20.931318681318682, + "grad_norm": 9.543642044067383, + "learning_rate": 3.9534340659340666e-05, + "loss": 0.1576, + "step": 7619 + }, + { + "epoch": 20.934065934065934, + "grad_norm": 6.7870049476623535, + "learning_rate": 3.9532967032967036e-05, + "loss": 0.1992, + "step": 7620 + }, + { + "epoch": 20.936813186813186, + "grad_norm": 7.903902530670166, + "learning_rate": 3.9531593406593406e-05, + "loss": 0.2734, + "step": 7621 + }, + { + "epoch": 20.939560439560438, + "grad_norm": 8.701632499694824, + "learning_rate": 3.9530219780219776e-05, + "loss": 0.1424, + "step": 7622 + }, + { + "epoch": 20.942307692307693, + "grad_norm": 16.608333587646484, + "learning_rate": 3.952884615384615e-05, + "loss": 0.529, + "step": 7623 + }, + { + "epoch": 20.945054945054945, + "grad_norm": 11.9808931350708, + "learning_rate": 3.952747252747253e-05, + "loss": 0.4166, + "step": 7624 + }, + { + "epoch": 20.947802197802197, + "grad_norm": 20.804981231689453, + "learning_rate": 3.95260989010989e-05, + "loss": 1.0335, + "step": 7625 + }, + { + "epoch": 20.95054945054945, + "grad_norm": 4.787008762359619, + "learning_rate": 3.9524725274725277e-05, + "loss": 0.1096, + "step": 7626 + }, + { + "epoch": 20.953296703296704, + "grad_norm": 7.544199466705322, + "learning_rate": 3.9523351648351647e-05, + "loss": 0.1464, + "step": 7627 + }, + { + "epoch": 20.956043956043956, + "grad_norm": 12.920008659362793, + "learning_rate": 3.952197802197802e-05, + "loss": 0.7742, + "step": 7628 + }, + { + "epoch": 20.958791208791208, + "grad_norm": 17.02153205871582, + "learning_rate": 3.95206043956044e-05, + "loss": 0.3757, + "step": 7629 + }, + { + "epoch": 20.96153846153846, + "grad_norm": 19.030502319335938, + "learning_rate": 3.951923076923077e-05, + "loss": 0.4934, + "step": 7630 + }, + { + "epoch": 20.964285714285715, + "grad_norm": 10.180602073669434, + "learning_rate": 3.951785714285715e-05, + "loss": 0.357, + "step": 7631 + }, + { + "epoch": 20.967032967032967, + "grad_norm": 12.712135314941406, + "learning_rate": 3.951648351648352e-05, + "loss": 0.3282, + "step": 7632 + }, + { + "epoch": 20.96978021978022, + "grad_norm": 12.27795124053955, + "learning_rate": 3.9515109890109894e-05, + "loss": 0.4101, + "step": 7633 + }, + { + "epoch": 20.97252747252747, + "grad_norm": 7.475817680358887, + "learning_rate": 3.951373626373627e-05, + "loss": 0.221, + "step": 7634 + }, + { + "epoch": 20.975274725274726, + "grad_norm": 9.96626091003418, + "learning_rate": 3.951236263736264e-05, + "loss": 0.2171, + "step": 7635 + }, + { + "epoch": 20.978021978021978, + "grad_norm": 12.70731258392334, + "learning_rate": 3.951098901098901e-05, + "loss": 0.3986, + "step": 7636 + }, + { + "epoch": 20.98076923076923, + "grad_norm": 13.136408805847168, + "learning_rate": 3.950961538461538e-05, + "loss": 0.4319, + "step": 7637 + }, + { + "epoch": 20.983516483516482, + "grad_norm": 7.116994857788086, + "learning_rate": 3.950824175824176e-05, + "loss": 0.114, + "step": 7638 + }, + { + "epoch": 20.986263736263737, + "grad_norm": 4.568376541137695, + "learning_rate": 3.9506868131868134e-05, + "loss": 0.0883, + "step": 7639 + }, + { + "epoch": 20.98901098901099, + "grad_norm": 10.293323516845703, + "learning_rate": 3.9505494505494504e-05, + "loss": 0.2086, + "step": 7640 + }, + { + "epoch": 20.99175824175824, + "grad_norm": 4.044374942779541, + "learning_rate": 3.950412087912088e-05, + "loss": 0.0925, + "step": 7641 + }, + { + "epoch": 20.994505494505496, + "grad_norm": 15.695619583129883, + "learning_rate": 3.950274725274725e-05, + "loss": 0.7782, + "step": 7642 + }, + { + "epoch": 20.997252747252748, + "grad_norm": 10.84719467163086, + "learning_rate": 3.950137362637363e-05, + "loss": 0.3892, + "step": 7643 + }, + { + "epoch": 21.0, + "grad_norm": 43.32843780517578, + "learning_rate": 3.9500000000000005e-05, + "loss": 0.9243, + "step": 7644 + }, + { + "epoch": 21.0, + "eval_accuracy": 0.6308539944903582, + "eval_f1": 0.5855600364771805, + "eval_f1_DuraRiadoRio_64x64": 0.32, + "eval_f1_Mole_64x64": 0.36363636363636365, + "eval_f1_Quebrado_64x64": 0.8089887640449438, + "eval_f1_RiadoRio_64x64": 0.5601750547045952, + "eval_f1_RioFechado_64x64": 0.875, + "eval_loss": 1.7097351551055908, + "eval_precision": 0.7730313838496318, + "eval_precision_DuraRiadoRio_64x64": 0.9032258064516129, + "eval_precision_Mole_64x64": 1.0, + "eval_precision_Quebrado_64x64": 0.6792452830188679, + "eval_precision_RiadoRio_64x64": 0.419672131147541, + "eval_precision_RioFechado_64x64": 0.863013698630137, + "eval_recall": 0.6292191746973066, + "eval_recall_DuraRiadoRio_64x64": 0.19444444444444445, + "eval_recall_Mole_64x64": 0.2222222222222222, + "eval_recall_Quebrado_64x64": 1.0, + "eval_recall_RiadoRio_64x64": 0.8421052631578947, + "eval_recall_RioFechado_64x64": 0.8873239436619719, + "eval_runtime": 1.7458, + "eval_samples_per_second": 415.865, + "eval_steps_per_second": 26.35, + "step": 7644 + }, + { + "epoch": 21.002747252747252, + "grad_norm": 9.233884811401367, + "learning_rate": 3.9498626373626375e-05, + "loss": 0.1608, + "step": 7645 + }, + { + "epoch": 21.005494505494507, + "grad_norm": 3.3834290504455566, + "learning_rate": 3.949725274725275e-05, + "loss": 0.0605, + "step": 7646 + }, + { + "epoch": 21.00824175824176, + "grad_norm": 5.101104736328125, + "learning_rate": 3.949587912087912e-05, + "loss": 0.0978, + "step": 7647 + }, + { + "epoch": 21.01098901098901, + "grad_norm": 6.015361785888672, + "learning_rate": 3.94945054945055e-05, + "loss": 0.0899, + "step": 7648 + }, + { + "epoch": 21.013736263736263, + "grad_norm": 11.081913948059082, + "learning_rate": 3.9493131868131875e-05, + "loss": 0.3593, + "step": 7649 + }, + { + "epoch": 21.016483516483518, + "grad_norm": 7.940384864807129, + "learning_rate": 3.9491758241758245e-05, + "loss": 0.1561, + "step": 7650 + }, + { + "epoch": 21.01923076923077, + "grad_norm": 8.6759672164917, + "learning_rate": 3.9490384615384615e-05, + "loss": 0.3133, + "step": 7651 + }, + { + "epoch": 21.021978021978022, + "grad_norm": 15.64369010925293, + "learning_rate": 3.9489010989010985e-05, + "loss": 0.3291, + "step": 7652 + }, + { + "epoch": 21.024725274725274, + "grad_norm": 8.283432960510254, + "learning_rate": 3.948763736263736e-05, + "loss": 0.2758, + "step": 7653 + }, + { + "epoch": 21.02747252747253, + "grad_norm": 8.643245697021484, + "learning_rate": 3.948626373626374e-05, + "loss": 0.2138, + "step": 7654 + }, + { + "epoch": 21.03021978021978, + "grad_norm": 10.914183616638184, + "learning_rate": 3.948489010989011e-05, + "loss": 0.2672, + "step": 7655 + }, + { + "epoch": 21.032967032967033, + "grad_norm": 19.766279220581055, + "learning_rate": 3.9483516483516486e-05, + "loss": 0.4887, + "step": 7656 + }, + { + "epoch": 21.035714285714285, + "grad_norm": 14.029861450195312, + "learning_rate": 3.9482142857142856e-05, + "loss": 0.6103, + "step": 7657 + }, + { + "epoch": 21.03846153846154, + "grad_norm": 8.530280113220215, + "learning_rate": 3.948076923076923e-05, + "loss": 0.2827, + "step": 7658 + }, + { + "epoch": 21.041208791208792, + "grad_norm": 11.067779541015625, + "learning_rate": 3.947939560439561e-05, + "loss": 0.3527, + "step": 7659 + }, + { + "epoch": 21.043956043956044, + "grad_norm": 15.046416282653809, + "learning_rate": 3.947802197802198e-05, + "loss": 0.5415, + "step": 7660 + }, + { + "epoch": 21.046703296703296, + "grad_norm": 11.301108360290527, + "learning_rate": 3.9476648351648356e-05, + "loss": 0.224, + "step": 7661 + }, + { + "epoch": 21.04945054945055, + "grad_norm": 14.759903907775879, + "learning_rate": 3.9475274725274726e-05, + "loss": 0.2669, + "step": 7662 + }, + { + "epoch": 21.052197802197803, + "grad_norm": 15.49983024597168, + "learning_rate": 3.94739010989011e-05, + "loss": 0.6278, + "step": 7663 + }, + { + "epoch": 21.054945054945055, + "grad_norm": 9.991177558898926, + "learning_rate": 3.947252747252748e-05, + "loss": 0.2092, + "step": 7664 + }, + { + "epoch": 21.057692307692307, + "grad_norm": 11.74490737915039, + "learning_rate": 3.947115384615385e-05, + "loss": 0.2373, + "step": 7665 + }, + { + "epoch": 21.060439560439562, + "grad_norm": 17.172121047973633, + "learning_rate": 3.946978021978022e-05, + "loss": 0.3181, + "step": 7666 + }, + { + "epoch": 21.063186813186814, + "grad_norm": 14.902891159057617, + "learning_rate": 3.946840659340659e-05, + "loss": 0.3825, + "step": 7667 + }, + { + "epoch": 21.065934065934066, + "grad_norm": 8.348053932189941, + "learning_rate": 3.9467032967032966e-05, + "loss": 0.238, + "step": 7668 + }, + { + "epoch": 21.068681318681318, + "grad_norm": 10.222410202026367, + "learning_rate": 3.946565934065934e-05, + "loss": 0.2713, + "step": 7669 + }, + { + "epoch": 21.071428571428573, + "grad_norm": 7.14251184463501, + "learning_rate": 3.946428571428571e-05, + "loss": 0.1069, + "step": 7670 + }, + { + "epoch": 21.074175824175825, + "grad_norm": 11.97569751739502, + "learning_rate": 3.946291208791209e-05, + "loss": 0.4329, + "step": 7671 + }, + { + "epoch": 21.076923076923077, + "grad_norm": 8.28690242767334, + "learning_rate": 3.946153846153846e-05, + "loss": 0.18, + "step": 7672 + }, + { + "epoch": 21.07967032967033, + "grad_norm": 16.49146842956543, + "learning_rate": 3.946016483516484e-05, + "loss": 0.4231, + "step": 7673 + }, + { + "epoch": 21.082417582417584, + "grad_norm": 20.503238677978516, + "learning_rate": 3.9458791208791214e-05, + "loss": 0.8793, + "step": 7674 + }, + { + "epoch": 21.085164835164836, + "grad_norm": 11.195940017700195, + "learning_rate": 3.9457417582417584e-05, + "loss": 0.3043, + "step": 7675 + }, + { + "epoch": 21.087912087912088, + "grad_norm": 10.75062084197998, + "learning_rate": 3.945604395604396e-05, + "loss": 0.2714, + "step": 7676 + }, + { + "epoch": 21.09065934065934, + "grad_norm": 20.97270965576172, + "learning_rate": 3.945467032967033e-05, + "loss": 0.8745, + "step": 7677 + }, + { + "epoch": 21.093406593406595, + "grad_norm": 23.71870994567871, + "learning_rate": 3.945329670329671e-05, + "loss": 1.0144, + "step": 7678 + }, + { + "epoch": 21.096153846153847, + "grad_norm": 8.60374927520752, + "learning_rate": 3.9451923076923084e-05, + "loss": 0.2131, + "step": 7679 + }, + { + "epoch": 21.0989010989011, + "grad_norm": 11.328445434570312, + "learning_rate": 3.9450549450549454e-05, + "loss": 0.3431, + "step": 7680 + }, + { + "epoch": 21.10164835164835, + "grad_norm": 10.274632453918457, + "learning_rate": 3.9449175824175824e-05, + "loss": 0.4817, + "step": 7681 + }, + { + "epoch": 21.104395604395606, + "grad_norm": 14.608348846435547, + "learning_rate": 3.9447802197802194e-05, + "loss": 0.4039, + "step": 7682 + }, + { + "epoch": 21.107142857142858, + "grad_norm": 18.061918258666992, + "learning_rate": 3.944642857142857e-05, + "loss": 0.5942, + "step": 7683 + }, + { + "epoch": 21.10989010989011, + "grad_norm": 7.253273963928223, + "learning_rate": 3.944505494505495e-05, + "loss": 0.2215, + "step": 7684 + }, + { + "epoch": 21.11263736263736, + "grad_norm": 10.562308311462402, + "learning_rate": 3.944368131868132e-05, + "loss": 0.2254, + "step": 7685 + }, + { + "epoch": 21.115384615384617, + "grad_norm": 10.305421829223633, + "learning_rate": 3.9442307692307695e-05, + "loss": 0.3626, + "step": 7686 + }, + { + "epoch": 21.11813186813187, + "grad_norm": 15.14384651184082, + "learning_rate": 3.9440934065934065e-05, + "loss": 0.3973, + "step": 7687 + }, + { + "epoch": 21.12087912087912, + "grad_norm": 14.819235801696777, + "learning_rate": 3.943956043956044e-05, + "loss": 0.525, + "step": 7688 + }, + { + "epoch": 21.123626373626372, + "grad_norm": 15.7346830368042, + "learning_rate": 3.943818681318682e-05, + "loss": 0.7336, + "step": 7689 + }, + { + "epoch": 21.126373626373628, + "grad_norm": 16.9801025390625, + "learning_rate": 3.943681318681319e-05, + "loss": 0.7775, + "step": 7690 + }, + { + "epoch": 21.12912087912088, + "grad_norm": 9.441671371459961, + "learning_rate": 3.9435439560439565e-05, + "loss": 0.2463, + "step": 7691 + }, + { + "epoch": 21.13186813186813, + "grad_norm": 8.34054183959961, + "learning_rate": 3.9434065934065935e-05, + "loss": 0.2089, + "step": 7692 + }, + { + "epoch": 21.134615384615383, + "grad_norm": 9.668362617492676, + "learning_rate": 3.943269230769231e-05, + "loss": 0.2126, + "step": 7693 + }, + { + "epoch": 21.13736263736264, + "grad_norm": 18.702392578125, + "learning_rate": 3.943131868131869e-05, + "loss": 0.7158, + "step": 7694 + }, + { + "epoch": 21.14010989010989, + "grad_norm": 13.286046028137207, + "learning_rate": 3.942994505494506e-05, + "loss": 0.4667, + "step": 7695 + }, + { + "epoch": 21.142857142857142, + "grad_norm": 10.973478317260742, + "learning_rate": 3.942857142857143e-05, + "loss": 0.2334, + "step": 7696 + }, + { + "epoch": 21.145604395604394, + "grad_norm": 22.618921279907227, + "learning_rate": 3.94271978021978e-05, + "loss": 0.8537, + "step": 7697 + }, + { + "epoch": 21.14835164835165, + "grad_norm": 10.503045082092285, + "learning_rate": 3.9425824175824175e-05, + "loss": 0.4009, + "step": 7698 + }, + { + "epoch": 21.1510989010989, + "grad_norm": 6.12162446975708, + "learning_rate": 3.942445054945055e-05, + "loss": 0.1333, + "step": 7699 + }, + { + "epoch": 21.153846153846153, + "grad_norm": 9.446958541870117, + "learning_rate": 3.942307692307692e-05, + "loss": 0.2435, + "step": 7700 + }, + { + "epoch": 21.156593406593405, + "grad_norm": 9.895821571350098, + "learning_rate": 3.94217032967033e-05, + "loss": 0.2805, + "step": 7701 + }, + { + "epoch": 21.15934065934066, + "grad_norm": 10.303709030151367, + "learning_rate": 3.942032967032967e-05, + "loss": 0.2776, + "step": 7702 + }, + { + "epoch": 21.162087912087912, + "grad_norm": 12.843785285949707, + "learning_rate": 3.9418956043956046e-05, + "loss": 0.3154, + "step": 7703 + }, + { + "epoch": 21.164835164835164, + "grad_norm": 6.3905487060546875, + "learning_rate": 3.941758241758242e-05, + "loss": 0.142, + "step": 7704 + }, + { + "epoch": 21.167582417582416, + "grad_norm": 21.727970123291016, + "learning_rate": 3.941620879120879e-05, + "loss": 0.5957, + "step": 7705 + }, + { + "epoch": 21.17032967032967, + "grad_norm": 12.824898719787598, + "learning_rate": 3.941483516483517e-05, + "loss": 0.4212, + "step": 7706 + }, + { + "epoch": 21.173076923076923, + "grad_norm": 18.61566925048828, + "learning_rate": 3.941346153846154e-05, + "loss": 0.3792, + "step": 7707 + }, + { + "epoch": 21.175824175824175, + "grad_norm": 10.661499977111816, + "learning_rate": 3.9412087912087916e-05, + "loss": 0.3051, + "step": 7708 + }, + { + "epoch": 21.178571428571427, + "grad_norm": 12.368117332458496, + "learning_rate": 3.941071428571429e-05, + "loss": 0.372, + "step": 7709 + }, + { + "epoch": 21.181318681318682, + "grad_norm": 4.279354572296143, + "learning_rate": 3.940934065934066e-05, + "loss": 0.1078, + "step": 7710 + }, + { + "epoch": 21.184065934065934, + "grad_norm": 20.438434600830078, + "learning_rate": 3.940796703296703e-05, + "loss": 0.7373, + "step": 7711 + }, + { + "epoch": 21.186813186813186, + "grad_norm": 6.131185054779053, + "learning_rate": 3.94065934065934e-05, + "loss": 0.12, + "step": 7712 + }, + { + "epoch": 21.189560439560438, + "grad_norm": 11.63831901550293, + "learning_rate": 3.940521978021978e-05, + "loss": 0.3444, + "step": 7713 + }, + { + "epoch": 21.192307692307693, + "grad_norm": 14.441641807556152, + "learning_rate": 3.940384615384616e-05, + "loss": 0.4159, + "step": 7714 + }, + { + "epoch": 21.195054945054945, + "grad_norm": 14.355294227600098, + "learning_rate": 3.940247252747253e-05, + "loss": 0.53, + "step": 7715 + }, + { + "epoch": 21.197802197802197, + "grad_norm": 11.902469635009766, + "learning_rate": 3.9401098901098904e-05, + "loss": 0.2562, + "step": 7716 + }, + { + "epoch": 21.20054945054945, + "grad_norm": 18.99232292175293, + "learning_rate": 3.9399725274725274e-05, + "loss": 0.5153, + "step": 7717 + }, + { + "epoch": 21.203296703296704, + "grad_norm": 17.457483291625977, + "learning_rate": 3.939835164835165e-05, + "loss": 0.5148, + "step": 7718 + }, + { + "epoch": 21.206043956043956, + "grad_norm": 6.83927059173584, + "learning_rate": 3.939697802197803e-05, + "loss": 0.1568, + "step": 7719 + }, + { + "epoch": 21.208791208791208, + "grad_norm": 15.798667907714844, + "learning_rate": 3.93956043956044e-05, + "loss": 0.4312, + "step": 7720 + }, + { + "epoch": 21.21153846153846, + "grad_norm": 8.472737312316895, + "learning_rate": 3.9394230769230774e-05, + "loss": 0.2209, + "step": 7721 + }, + { + "epoch": 21.214285714285715, + "grad_norm": 11.766992568969727, + "learning_rate": 3.9392857142857144e-05, + "loss": 0.225, + "step": 7722 + }, + { + "epoch": 21.217032967032967, + "grad_norm": 9.88040828704834, + "learning_rate": 3.939148351648352e-05, + "loss": 0.3084, + "step": 7723 + }, + { + "epoch": 21.21978021978022, + "grad_norm": 12.142316818237305, + "learning_rate": 3.93901098901099e-05, + "loss": 0.3726, + "step": 7724 + }, + { + "epoch": 21.22252747252747, + "grad_norm": 11.431536674499512, + "learning_rate": 3.938873626373627e-05, + "loss": 0.4946, + "step": 7725 + }, + { + "epoch": 21.225274725274726, + "grad_norm": 10.243082046508789, + "learning_rate": 3.938736263736264e-05, + "loss": 0.1578, + "step": 7726 + }, + { + "epoch": 21.228021978021978, + "grad_norm": 14.960838317871094, + "learning_rate": 3.938598901098901e-05, + "loss": 0.4569, + "step": 7727 + }, + { + "epoch": 21.23076923076923, + "grad_norm": 16.21436882019043, + "learning_rate": 3.9384615384615384e-05, + "loss": 0.4208, + "step": 7728 + }, + { + "epoch": 21.233516483516482, + "grad_norm": 12.267608642578125, + "learning_rate": 3.938324175824176e-05, + "loss": 0.3845, + "step": 7729 + }, + { + "epoch": 21.236263736263737, + "grad_norm": 18.191295623779297, + "learning_rate": 3.938186813186813e-05, + "loss": 0.7039, + "step": 7730 + }, + { + "epoch": 21.23901098901099, + "grad_norm": 10.167557716369629, + "learning_rate": 3.938049450549451e-05, + "loss": 0.3335, + "step": 7731 + }, + { + "epoch": 21.24175824175824, + "grad_norm": 20.508119583129883, + "learning_rate": 3.937912087912088e-05, + "loss": 0.7519, + "step": 7732 + }, + { + "epoch": 21.244505494505493, + "grad_norm": 12.51896858215332, + "learning_rate": 3.9377747252747255e-05, + "loss": 0.2905, + "step": 7733 + }, + { + "epoch": 21.247252747252748, + "grad_norm": 9.98288631439209, + "learning_rate": 3.937637362637363e-05, + "loss": 0.2035, + "step": 7734 + }, + { + "epoch": 21.25, + "grad_norm": 12.741758346557617, + "learning_rate": 3.9375e-05, + "loss": 0.4285, + "step": 7735 + }, + { + "epoch": 21.252747252747252, + "grad_norm": 11.511019706726074, + "learning_rate": 3.937362637362638e-05, + "loss": 0.3098, + "step": 7736 + }, + { + "epoch": 21.255494505494504, + "grad_norm": 15.431248664855957, + "learning_rate": 3.937225274725275e-05, + "loss": 0.4843, + "step": 7737 + }, + { + "epoch": 21.25824175824176, + "grad_norm": 10.321900367736816, + "learning_rate": 3.9370879120879125e-05, + "loss": 0.4346, + "step": 7738 + }, + { + "epoch": 21.26098901098901, + "grad_norm": 11.413106918334961, + "learning_rate": 3.9369505494505495e-05, + "loss": 0.399, + "step": 7739 + }, + { + "epoch": 21.263736263736263, + "grad_norm": 16.83232307434082, + "learning_rate": 3.936813186813187e-05, + "loss": 0.4419, + "step": 7740 + }, + { + "epoch": 21.266483516483518, + "grad_norm": 15.567425727844238, + "learning_rate": 3.936675824175824e-05, + "loss": 0.363, + "step": 7741 + }, + { + "epoch": 21.26923076923077, + "grad_norm": 7.031966686248779, + "learning_rate": 3.936538461538461e-05, + "loss": 0.2138, + "step": 7742 + }, + { + "epoch": 21.271978021978022, + "grad_norm": 8.972965240478516, + "learning_rate": 3.936401098901099e-05, + "loss": 0.3561, + "step": 7743 + }, + { + "epoch": 21.274725274725274, + "grad_norm": 25.51959800720215, + "learning_rate": 3.9362637362637366e-05, + "loss": 0.5616, + "step": 7744 + }, + { + "epoch": 21.27747252747253, + "grad_norm": 12.91791820526123, + "learning_rate": 3.9361263736263736e-05, + "loss": 0.4244, + "step": 7745 + }, + { + "epoch": 21.28021978021978, + "grad_norm": 6.196335315704346, + "learning_rate": 3.935989010989011e-05, + "loss": 0.1542, + "step": 7746 + }, + { + "epoch": 21.282967032967033, + "grad_norm": 16.381977081298828, + "learning_rate": 3.935851648351648e-05, + "loss": 0.5007, + "step": 7747 + }, + { + "epoch": 21.285714285714285, + "grad_norm": 12.141282081604004, + "learning_rate": 3.935714285714286e-05, + "loss": 0.3196, + "step": 7748 + }, + { + "epoch": 21.28846153846154, + "grad_norm": 11.702672958374023, + "learning_rate": 3.9355769230769236e-05, + "loss": 0.5427, + "step": 7749 + }, + { + "epoch": 21.291208791208792, + "grad_norm": 12.330574989318848, + "learning_rate": 3.9354395604395606e-05, + "loss": 0.3214, + "step": 7750 + }, + { + "epoch": 21.293956043956044, + "grad_norm": 8.08349609375, + "learning_rate": 3.935302197802198e-05, + "loss": 0.2078, + "step": 7751 + }, + { + "epoch": 21.296703296703296, + "grad_norm": 11.584790229797363, + "learning_rate": 3.935164835164835e-05, + "loss": 0.2789, + "step": 7752 + }, + { + "epoch": 21.29945054945055, + "grad_norm": 5.852667331695557, + "learning_rate": 3.935027472527473e-05, + "loss": 0.1133, + "step": 7753 + }, + { + "epoch": 21.302197802197803, + "grad_norm": 14.715546607971191, + "learning_rate": 3.93489010989011e-05, + "loss": 0.4722, + "step": 7754 + }, + { + "epoch": 21.304945054945055, + "grad_norm": 16.395275115966797, + "learning_rate": 3.9347527472527477e-05, + "loss": 0.5526, + "step": 7755 + }, + { + "epoch": 21.307692307692307, + "grad_norm": 15.24032974243164, + "learning_rate": 3.9346153846153847e-05, + "loss": 0.5747, + "step": 7756 + }, + { + "epoch": 21.310439560439562, + "grad_norm": 20.1490535736084, + "learning_rate": 3.9344780219780217e-05, + "loss": 0.5183, + "step": 7757 + }, + { + "epoch": 21.313186813186814, + "grad_norm": 12.947294235229492, + "learning_rate": 3.934340659340659e-05, + "loss": 0.4499, + "step": 7758 + }, + { + "epoch": 21.315934065934066, + "grad_norm": 13.110129356384277, + "learning_rate": 3.934203296703297e-05, + "loss": 0.3156, + "step": 7759 + }, + { + "epoch": 21.318681318681318, + "grad_norm": 11.223404884338379, + "learning_rate": 3.934065934065934e-05, + "loss": 0.3459, + "step": 7760 + }, + { + "epoch": 21.321428571428573, + "grad_norm": 5.46293830871582, + "learning_rate": 3.933928571428572e-05, + "loss": 0.0877, + "step": 7761 + }, + { + "epoch": 21.324175824175825, + "grad_norm": 15.31721305847168, + "learning_rate": 3.933791208791209e-05, + "loss": 0.5221, + "step": 7762 + }, + { + "epoch": 21.326923076923077, + "grad_norm": 6.795819282531738, + "learning_rate": 3.9336538461538464e-05, + "loss": 0.2072, + "step": 7763 + }, + { + "epoch": 21.32967032967033, + "grad_norm": 14.252470016479492, + "learning_rate": 3.933516483516484e-05, + "loss": 0.4847, + "step": 7764 + }, + { + "epoch": 21.332417582417584, + "grad_norm": 16.661396026611328, + "learning_rate": 3.933379120879121e-05, + "loss": 0.7469, + "step": 7765 + }, + { + "epoch": 21.335164835164836, + "grad_norm": 23.25759506225586, + "learning_rate": 3.933241758241759e-05, + "loss": 1.0207, + "step": 7766 + }, + { + "epoch": 21.337912087912088, + "grad_norm": 9.408174514770508, + "learning_rate": 3.933104395604396e-05, + "loss": 0.1491, + "step": 7767 + }, + { + "epoch": 21.34065934065934, + "grad_norm": 13.36827564239502, + "learning_rate": 3.9329670329670334e-05, + "loss": 0.3764, + "step": 7768 + }, + { + "epoch": 21.343406593406595, + "grad_norm": 14.019667625427246, + "learning_rate": 3.9328296703296704e-05, + "loss": 0.4176, + "step": 7769 + }, + { + "epoch": 21.346153846153847, + "grad_norm": 9.577125549316406, + "learning_rate": 3.932692307692308e-05, + "loss": 0.2834, + "step": 7770 + }, + { + "epoch": 21.3489010989011, + "grad_norm": 7.567320346832275, + "learning_rate": 3.932554945054945e-05, + "loss": 0.2061, + "step": 7771 + }, + { + "epoch": 21.35164835164835, + "grad_norm": 18.45162010192871, + "learning_rate": 3.932417582417582e-05, + "loss": 0.5761, + "step": 7772 + }, + { + "epoch": 21.354395604395606, + "grad_norm": 15.581897735595703, + "learning_rate": 3.93228021978022e-05, + "loss": 0.3225, + "step": 7773 + }, + { + "epoch": 21.357142857142858, + "grad_norm": 14.462833404541016, + "learning_rate": 3.9321428571428575e-05, + "loss": 0.4254, + "step": 7774 + }, + { + "epoch": 21.35989010989011, + "grad_norm": 19.39542579650879, + "learning_rate": 3.9320054945054945e-05, + "loss": 1.0953, + "step": 7775 + }, + { + "epoch": 21.36263736263736, + "grad_norm": 9.763177871704102, + "learning_rate": 3.931868131868132e-05, + "loss": 0.3258, + "step": 7776 + }, + { + "epoch": 21.365384615384617, + "grad_norm": 10.886159896850586, + "learning_rate": 3.931730769230769e-05, + "loss": 0.2153, + "step": 7777 + }, + { + "epoch": 21.36813186813187, + "grad_norm": 13.889119148254395, + "learning_rate": 3.931593406593407e-05, + "loss": 0.5052, + "step": 7778 + }, + { + "epoch": 21.37087912087912, + "grad_norm": 17.683364868164062, + "learning_rate": 3.9314560439560445e-05, + "loss": 0.6642, + "step": 7779 + }, + { + "epoch": 21.373626373626372, + "grad_norm": 8.911332130432129, + "learning_rate": 3.9313186813186815e-05, + "loss": 0.2448, + "step": 7780 + }, + { + "epoch": 21.376373626373628, + "grad_norm": 15.612540245056152, + "learning_rate": 3.931181318681319e-05, + "loss": 0.457, + "step": 7781 + }, + { + "epoch": 21.37912087912088, + "grad_norm": 15.683900833129883, + "learning_rate": 3.931043956043956e-05, + "loss": 0.4278, + "step": 7782 + }, + { + "epoch": 21.38186813186813, + "grad_norm": 10.1547269821167, + "learning_rate": 3.930906593406594e-05, + "loss": 0.2483, + "step": 7783 + }, + { + "epoch": 21.384615384615383, + "grad_norm": 14.034624099731445, + "learning_rate": 3.930769230769231e-05, + "loss": 0.3, + "step": 7784 + }, + { + "epoch": 21.38736263736264, + "grad_norm": 15.861090660095215, + "learning_rate": 3.9306318681318686e-05, + "loss": 0.5506, + "step": 7785 + }, + { + "epoch": 21.39010989010989, + "grad_norm": 8.129640579223633, + "learning_rate": 3.9304945054945056e-05, + "loss": 0.1796, + "step": 7786 + }, + { + "epoch": 21.392857142857142, + "grad_norm": 20.119094848632812, + "learning_rate": 3.9303571428571426e-05, + "loss": 0.6325, + "step": 7787 + }, + { + "epoch": 21.395604395604394, + "grad_norm": 12.261515617370605, + "learning_rate": 3.93021978021978e-05, + "loss": 0.4154, + "step": 7788 + }, + { + "epoch": 21.39835164835165, + "grad_norm": 13.75890064239502, + "learning_rate": 3.930082417582418e-05, + "loss": 0.5976, + "step": 7789 + }, + { + "epoch": 21.4010989010989, + "grad_norm": 19.991628646850586, + "learning_rate": 3.929945054945055e-05, + "loss": 0.708, + "step": 7790 + }, + { + "epoch": 21.403846153846153, + "grad_norm": 17.773466110229492, + "learning_rate": 3.9298076923076926e-05, + "loss": 0.5304, + "step": 7791 + }, + { + "epoch": 21.406593406593405, + "grad_norm": 13.518266677856445, + "learning_rate": 3.9296703296703296e-05, + "loss": 0.4283, + "step": 7792 + }, + { + "epoch": 21.40934065934066, + "grad_norm": 10.736621856689453, + "learning_rate": 3.929532967032967e-05, + "loss": 0.4361, + "step": 7793 + }, + { + "epoch": 21.412087912087912, + "grad_norm": 13.262165069580078, + "learning_rate": 3.929395604395605e-05, + "loss": 0.3589, + "step": 7794 + }, + { + "epoch": 21.414835164835164, + "grad_norm": 6.588325023651123, + "learning_rate": 3.929258241758242e-05, + "loss": 0.1577, + "step": 7795 + }, + { + "epoch": 21.417582417582416, + "grad_norm": 15.606049537658691, + "learning_rate": 3.9291208791208796e-05, + "loss": 0.4971, + "step": 7796 + }, + { + "epoch": 21.42032967032967, + "grad_norm": 17.824737548828125, + "learning_rate": 3.9289835164835166e-05, + "loss": 0.8022, + "step": 7797 + }, + { + "epoch": 21.423076923076923, + "grad_norm": 17.513282775878906, + "learning_rate": 3.928846153846154e-05, + "loss": 0.783, + "step": 7798 + }, + { + "epoch": 21.425824175824175, + "grad_norm": 16.584735870361328, + "learning_rate": 3.928708791208791e-05, + "loss": 0.5676, + "step": 7799 + }, + { + "epoch": 21.428571428571427, + "grad_norm": 19.090560913085938, + "learning_rate": 3.928571428571429e-05, + "loss": 0.6026, + "step": 7800 + }, + { + "epoch": 21.431318681318682, + "grad_norm": 20.85559844970703, + "learning_rate": 3.928434065934066e-05, + "loss": 0.8092, + "step": 7801 + }, + { + "epoch": 21.434065934065934, + "grad_norm": 18.160634994506836, + "learning_rate": 3.928296703296703e-05, + "loss": 0.5764, + "step": 7802 + }, + { + "epoch": 21.436813186813186, + "grad_norm": 10.340712547302246, + "learning_rate": 3.928159340659341e-05, + "loss": 0.343, + "step": 7803 + }, + { + "epoch": 21.439560439560438, + "grad_norm": 7.894662380218506, + "learning_rate": 3.9280219780219784e-05, + "loss": 0.174, + "step": 7804 + }, + { + "epoch": 21.442307692307693, + "grad_norm": 18.39068031311035, + "learning_rate": 3.9278846153846154e-05, + "loss": 0.634, + "step": 7805 + }, + { + "epoch": 21.445054945054945, + "grad_norm": 11.781664848327637, + "learning_rate": 3.927747252747253e-05, + "loss": 0.3149, + "step": 7806 + }, + { + "epoch": 21.447802197802197, + "grad_norm": 13.390726089477539, + "learning_rate": 3.92760989010989e-05, + "loss": 0.3373, + "step": 7807 + }, + { + "epoch": 21.45054945054945, + "grad_norm": 18.23586654663086, + "learning_rate": 3.927472527472528e-05, + "loss": 0.524, + "step": 7808 + }, + { + "epoch": 21.453296703296704, + "grad_norm": 14.667248725891113, + "learning_rate": 3.9273351648351654e-05, + "loss": 0.2956, + "step": 7809 + }, + { + "epoch": 21.456043956043956, + "grad_norm": 21.9932861328125, + "learning_rate": 3.9271978021978024e-05, + "loss": 0.7753, + "step": 7810 + }, + { + "epoch": 21.458791208791208, + "grad_norm": 15.38038444519043, + "learning_rate": 3.92706043956044e-05, + "loss": 0.4294, + "step": 7811 + }, + { + "epoch": 21.46153846153846, + "grad_norm": 11.798625946044922, + "learning_rate": 3.926923076923077e-05, + "loss": 0.2248, + "step": 7812 + }, + { + "epoch": 21.464285714285715, + "grad_norm": 15.578651428222656, + "learning_rate": 3.926785714285715e-05, + "loss": 0.4868, + "step": 7813 + }, + { + "epoch": 21.467032967032967, + "grad_norm": 12.98108959197998, + "learning_rate": 3.926648351648352e-05, + "loss": 0.36, + "step": 7814 + }, + { + "epoch": 21.46978021978022, + "grad_norm": 16.515535354614258, + "learning_rate": 3.9265109890109895e-05, + "loss": 0.6735, + "step": 7815 + }, + { + "epoch": 21.47252747252747, + "grad_norm": 15.854629516601562, + "learning_rate": 3.9263736263736265e-05, + "loss": 0.5062, + "step": 7816 + }, + { + "epoch": 21.475274725274726, + "grad_norm": 17.465497970581055, + "learning_rate": 3.9262362637362635e-05, + "loss": 0.3855, + "step": 7817 + }, + { + "epoch": 21.478021978021978, + "grad_norm": 14.393016815185547, + "learning_rate": 3.926098901098901e-05, + "loss": 0.4517, + "step": 7818 + }, + { + "epoch": 21.48076923076923, + "grad_norm": 14.216695785522461, + "learning_rate": 3.925961538461539e-05, + "loss": 0.6024, + "step": 7819 + }, + { + "epoch": 21.483516483516482, + "grad_norm": 10.508305549621582, + "learning_rate": 3.925824175824176e-05, + "loss": 0.4113, + "step": 7820 + }, + { + "epoch": 21.486263736263737, + "grad_norm": 26.60739517211914, + "learning_rate": 3.9256868131868135e-05, + "loss": 0.8951, + "step": 7821 + }, + { + "epoch": 21.48901098901099, + "grad_norm": 6.327914714813232, + "learning_rate": 3.9255494505494505e-05, + "loss": 0.2253, + "step": 7822 + }, + { + "epoch": 21.49175824175824, + "grad_norm": 11.406349182128906, + "learning_rate": 3.925412087912088e-05, + "loss": 0.4037, + "step": 7823 + }, + { + "epoch": 21.494505494505496, + "grad_norm": 10.233658790588379, + "learning_rate": 3.925274725274726e-05, + "loss": 0.3784, + "step": 7824 + }, + { + "epoch": 21.497252747252748, + "grad_norm": 10.42732048034668, + "learning_rate": 3.925137362637363e-05, + "loss": 0.2558, + "step": 7825 + }, + { + "epoch": 21.5, + "grad_norm": 18.04046058654785, + "learning_rate": 3.9250000000000005e-05, + "loss": 0.5032, + "step": 7826 + }, + { + "epoch": 21.502747252747252, + "grad_norm": 12.234116554260254, + "learning_rate": 3.9248626373626375e-05, + "loss": 0.393, + "step": 7827 + }, + { + "epoch": 21.505494505494504, + "grad_norm": 19.190135955810547, + "learning_rate": 3.924725274725275e-05, + "loss": 0.5835, + "step": 7828 + }, + { + "epoch": 21.50824175824176, + "grad_norm": 11.231039047241211, + "learning_rate": 3.924587912087912e-05, + "loss": 0.2394, + "step": 7829 + }, + { + "epoch": 21.51098901098901, + "grad_norm": 19.363773345947266, + "learning_rate": 3.92445054945055e-05, + "loss": 0.6793, + "step": 7830 + }, + { + "epoch": 21.513736263736263, + "grad_norm": 8.446717262268066, + "learning_rate": 3.924313186813187e-05, + "loss": 0.2068, + "step": 7831 + }, + { + "epoch": 21.516483516483518, + "grad_norm": 18.699979782104492, + "learning_rate": 3.924175824175824e-05, + "loss": 0.7747, + "step": 7832 + }, + { + "epoch": 21.51923076923077, + "grad_norm": 11.123977661132812, + "learning_rate": 3.9240384615384616e-05, + "loss": 0.295, + "step": 7833 + }, + { + "epoch": 21.521978021978022, + "grad_norm": 13.572368621826172, + "learning_rate": 3.923901098901099e-05, + "loss": 0.4718, + "step": 7834 + }, + { + "epoch": 21.524725274725274, + "grad_norm": 16.623472213745117, + "learning_rate": 3.923763736263736e-05, + "loss": 0.4317, + "step": 7835 + }, + { + "epoch": 21.52747252747253, + "grad_norm": 10.631824493408203, + "learning_rate": 3.923626373626374e-05, + "loss": 0.4191, + "step": 7836 + }, + { + "epoch": 21.53021978021978, + "grad_norm": 4.769809722900391, + "learning_rate": 3.923489010989011e-05, + "loss": 0.1202, + "step": 7837 + }, + { + "epoch": 21.532967032967033, + "grad_norm": 12.634805679321289, + "learning_rate": 3.9233516483516486e-05, + "loss": 0.5546, + "step": 7838 + }, + { + "epoch": 21.535714285714285, + "grad_norm": 9.798531532287598, + "learning_rate": 3.923214285714286e-05, + "loss": 0.1918, + "step": 7839 + }, + { + "epoch": 21.53846153846154, + "grad_norm": 21.02185821533203, + "learning_rate": 3.923076923076923e-05, + "loss": 1.0216, + "step": 7840 + }, + { + "epoch": 21.541208791208792, + "grad_norm": 5.927494525909424, + "learning_rate": 3.922939560439561e-05, + "loss": 0.1013, + "step": 7841 + }, + { + "epoch": 21.543956043956044, + "grad_norm": 11.625757217407227, + "learning_rate": 3.922802197802198e-05, + "loss": 0.2337, + "step": 7842 + }, + { + "epoch": 21.546703296703296, + "grad_norm": 12.440075874328613, + "learning_rate": 3.922664835164836e-05, + "loss": 0.4196, + "step": 7843 + }, + { + "epoch": 21.54945054945055, + "grad_norm": 8.451716423034668, + "learning_rate": 3.922527472527473e-05, + "loss": 0.1232, + "step": 7844 + }, + { + "epoch": 21.552197802197803, + "grad_norm": 14.861629486083984, + "learning_rate": 3.9223901098901103e-05, + "loss": 0.3774, + "step": 7845 + }, + { + "epoch": 21.554945054945055, + "grad_norm": 16.235597610473633, + "learning_rate": 3.9222527472527474e-05, + "loss": 0.6313, + "step": 7846 + }, + { + "epoch": 21.557692307692307, + "grad_norm": 9.316988945007324, + "learning_rate": 3.9221153846153844e-05, + "loss": 0.2442, + "step": 7847 + }, + { + "epoch": 21.560439560439562, + "grad_norm": 12.885857582092285, + "learning_rate": 3.921978021978022e-05, + "loss": 0.4357, + "step": 7848 + }, + { + "epoch": 21.563186813186814, + "grad_norm": 11.596722602844238, + "learning_rate": 3.921840659340659e-05, + "loss": 0.268, + "step": 7849 + }, + { + "epoch": 21.565934065934066, + "grad_norm": 11.534013748168945, + "learning_rate": 3.921703296703297e-05, + "loss": 0.2448, + "step": 7850 + }, + { + "epoch": 21.568681318681318, + "grad_norm": 13.956825256347656, + "learning_rate": 3.9215659340659344e-05, + "loss": 0.3078, + "step": 7851 + }, + { + "epoch": 21.571428571428573, + "grad_norm": 11.11393928527832, + "learning_rate": 3.9214285714285714e-05, + "loss": 0.2952, + "step": 7852 + }, + { + "epoch": 21.574175824175825, + "grad_norm": 11.257379531860352, + "learning_rate": 3.921291208791209e-05, + "loss": 0.2413, + "step": 7853 + }, + { + "epoch": 21.576923076923077, + "grad_norm": 11.8925142288208, + "learning_rate": 3.921153846153846e-05, + "loss": 0.3436, + "step": 7854 + }, + { + "epoch": 21.57967032967033, + "grad_norm": 15.225678443908691, + "learning_rate": 3.921016483516484e-05, + "loss": 0.5032, + "step": 7855 + }, + { + "epoch": 21.582417582417584, + "grad_norm": 8.573869705200195, + "learning_rate": 3.9208791208791214e-05, + "loss": 0.2898, + "step": 7856 + }, + { + "epoch": 21.585164835164836, + "grad_norm": 27.015493392944336, + "learning_rate": 3.9207417582417584e-05, + "loss": 0.9053, + "step": 7857 + }, + { + "epoch": 21.587912087912088, + "grad_norm": 11.97598934173584, + "learning_rate": 3.920604395604396e-05, + "loss": 0.3599, + "step": 7858 + }, + { + "epoch": 21.59065934065934, + "grad_norm": 10.782596588134766, + "learning_rate": 3.920467032967033e-05, + "loss": 0.2669, + "step": 7859 + }, + { + "epoch": 21.593406593406595, + "grad_norm": 13.728286743164062, + "learning_rate": 3.920329670329671e-05, + "loss": 0.4102, + "step": 7860 + }, + { + "epoch": 21.596153846153847, + "grad_norm": 17.965919494628906, + "learning_rate": 3.920192307692308e-05, + "loss": 0.4664, + "step": 7861 + }, + { + "epoch": 21.5989010989011, + "grad_norm": 12.27483081817627, + "learning_rate": 3.920054945054945e-05, + "loss": 0.3282, + "step": 7862 + }, + { + "epoch": 21.60164835164835, + "grad_norm": 18.07815933227539, + "learning_rate": 3.9199175824175825e-05, + "loss": 0.5172, + "step": 7863 + }, + { + "epoch": 21.604395604395606, + "grad_norm": 13.110086441040039, + "learning_rate": 3.9197802197802195e-05, + "loss": 0.4682, + "step": 7864 + }, + { + "epoch": 21.607142857142858, + "grad_norm": 17.951913833618164, + "learning_rate": 3.919642857142857e-05, + "loss": 0.7439, + "step": 7865 + }, + { + "epoch": 21.60989010989011, + "grad_norm": 20.00688934326172, + "learning_rate": 3.919505494505495e-05, + "loss": 0.9065, + "step": 7866 + }, + { + "epoch": 21.61263736263736, + "grad_norm": 12.682478904724121, + "learning_rate": 3.919368131868132e-05, + "loss": 0.4177, + "step": 7867 + }, + { + "epoch": 21.615384615384617, + "grad_norm": 17.196517944335938, + "learning_rate": 3.9192307692307695e-05, + "loss": 0.4652, + "step": 7868 + }, + { + "epoch": 21.61813186813187, + "grad_norm": 13.285714149475098, + "learning_rate": 3.9190934065934065e-05, + "loss": 0.2879, + "step": 7869 + }, + { + "epoch": 21.62087912087912, + "grad_norm": 18.577009201049805, + "learning_rate": 3.918956043956044e-05, + "loss": 0.7843, + "step": 7870 + }, + { + "epoch": 21.623626373626372, + "grad_norm": 11.07856559753418, + "learning_rate": 3.918818681318682e-05, + "loss": 0.28, + "step": 7871 + }, + { + "epoch": 21.626373626373628, + "grad_norm": 11.297945976257324, + "learning_rate": 3.918681318681319e-05, + "loss": 0.285, + "step": 7872 + }, + { + "epoch": 21.62912087912088, + "grad_norm": 21.25779914855957, + "learning_rate": 3.9185439560439566e-05, + "loss": 0.5482, + "step": 7873 + }, + { + "epoch": 21.63186813186813, + "grad_norm": 15.500907897949219, + "learning_rate": 3.9184065934065936e-05, + "loss": 0.4433, + "step": 7874 + }, + { + "epoch": 21.634615384615383, + "grad_norm": 7.517284870147705, + "learning_rate": 3.918269230769231e-05, + "loss": 0.182, + "step": 7875 + }, + { + "epoch": 21.63736263736264, + "grad_norm": 11.237554550170898, + "learning_rate": 3.918131868131868e-05, + "loss": 0.2883, + "step": 7876 + }, + { + "epoch": 21.64010989010989, + "grad_norm": 26.748065948486328, + "learning_rate": 3.917994505494505e-05, + "loss": 1.2274, + "step": 7877 + }, + { + "epoch": 21.642857142857142, + "grad_norm": 16.190967559814453, + "learning_rate": 3.917857142857143e-05, + "loss": 0.6019, + "step": 7878 + }, + { + "epoch": 21.645604395604394, + "grad_norm": 7.281632423400879, + "learning_rate": 3.91771978021978e-05, + "loss": 0.2966, + "step": 7879 + }, + { + "epoch": 21.64835164835165, + "grad_norm": 16.55240821838379, + "learning_rate": 3.9175824175824176e-05, + "loss": 0.465, + "step": 7880 + }, + { + "epoch": 21.6510989010989, + "grad_norm": 10.03466510772705, + "learning_rate": 3.917445054945055e-05, + "loss": 0.2335, + "step": 7881 + }, + { + "epoch": 21.653846153846153, + "grad_norm": 14.99110221862793, + "learning_rate": 3.917307692307692e-05, + "loss": 0.5763, + "step": 7882 + }, + { + "epoch": 21.656593406593405, + "grad_norm": 10.48010540008545, + "learning_rate": 3.91717032967033e-05, + "loss": 0.313, + "step": 7883 + }, + { + "epoch": 21.65934065934066, + "grad_norm": 16.685028076171875, + "learning_rate": 3.917032967032967e-05, + "loss": 0.633, + "step": 7884 + }, + { + "epoch": 21.662087912087912, + "grad_norm": 15.09060287475586, + "learning_rate": 3.9168956043956047e-05, + "loss": 0.5606, + "step": 7885 + }, + { + "epoch": 21.664835164835164, + "grad_norm": 9.29466724395752, + "learning_rate": 3.916758241758242e-05, + "loss": 0.1934, + "step": 7886 + }, + { + "epoch": 21.667582417582416, + "grad_norm": 13.368585586547852, + "learning_rate": 3.916620879120879e-05, + "loss": 0.5396, + "step": 7887 + }, + { + "epoch": 21.67032967032967, + "grad_norm": 7.004639625549316, + "learning_rate": 3.916483516483517e-05, + "loss": 0.2364, + "step": 7888 + }, + { + "epoch": 21.673076923076923, + "grad_norm": 8.489847183227539, + "learning_rate": 3.916346153846154e-05, + "loss": 0.4603, + "step": 7889 + }, + { + "epoch": 21.675824175824175, + "grad_norm": 12.716230392456055, + "learning_rate": 3.916208791208792e-05, + "loss": 0.4796, + "step": 7890 + }, + { + "epoch": 21.678571428571427, + "grad_norm": 14.138655662536621, + "learning_rate": 3.916071428571429e-05, + "loss": 0.3825, + "step": 7891 + }, + { + "epoch": 21.681318681318682, + "grad_norm": 10.875754356384277, + "learning_rate": 3.915934065934066e-05, + "loss": 0.2569, + "step": 7892 + }, + { + "epoch": 21.684065934065934, + "grad_norm": 13.442761421203613, + "learning_rate": 3.9157967032967034e-05, + "loss": 0.315, + "step": 7893 + }, + { + "epoch": 21.686813186813186, + "grad_norm": 10.97672176361084, + "learning_rate": 3.9156593406593404e-05, + "loss": 0.1933, + "step": 7894 + }, + { + "epoch": 21.689560439560438, + "grad_norm": 18.285764694213867, + "learning_rate": 3.915521978021978e-05, + "loss": 0.7119, + "step": 7895 + }, + { + "epoch": 21.692307692307693, + "grad_norm": 11.505475997924805, + "learning_rate": 3.915384615384616e-05, + "loss": 0.3085, + "step": 7896 + }, + { + "epoch": 21.695054945054945, + "grad_norm": 9.123727798461914, + "learning_rate": 3.915247252747253e-05, + "loss": 0.2269, + "step": 7897 + }, + { + "epoch": 21.697802197802197, + "grad_norm": 16.5219783782959, + "learning_rate": 3.9151098901098904e-05, + "loss": 0.3932, + "step": 7898 + }, + { + "epoch": 21.70054945054945, + "grad_norm": 12.000669479370117, + "learning_rate": 3.9149725274725274e-05, + "loss": 0.4173, + "step": 7899 + }, + { + "epoch": 21.703296703296704, + "grad_norm": 15.846819877624512, + "learning_rate": 3.914835164835165e-05, + "loss": 0.7659, + "step": 7900 + }, + { + "epoch": 21.706043956043956, + "grad_norm": 6.086513042449951, + "learning_rate": 3.914697802197803e-05, + "loss": 0.1534, + "step": 7901 + }, + { + "epoch": 21.708791208791208, + "grad_norm": 13.94480037689209, + "learning_rate": 3.91456043956044e-05, + "loss": 0.4326, + "step": 7902 + }, + { + "epoch": 21.71153846153846, + "grad_norm": 14.455192565917969, + "learning_rate": 3.9144230769230775e-05, + "loss": 0.4104, + "step": 7903 + }, + { + "epoch": 21.714285714285715, + "grad_norm": 18.395219802856445, + "learning_rate": 3.9142857142857145e-05, + "loss": 0.5699, + "step": 7904 + }, + { + "epoch": 21.717032967032967, + "grad_norm": 11.744175910949707, + "learning_rate": 3.914148351648352e-05, + "loss": 0.4004, + "step": 7905 + }, + { + "epoch": 21.71978021978022, + "grad_norm": 4.246826171875, + "learning_rate": 3.914010989010989e-05, + "loss": 0.132, + "step": 7906 + }, + { + "epoch": 21.72252747252747, + "grad_norm": 14.756223678588867, + "learning_rate": 3.913873626373626e-05, + "loss": 0.4801, + "step": 7907 + }, + { + "epoch": 21.725274725274726, + "grad_norm": 6.980513572692871, + "learning_rate": 3.913736263736264e-05, + "loss": 0.1803, + "step": 7908 + }, + { + "epoch": 21.728021978021978, + "grad_norm": 8.601231575012207, + "learning_rate": 3.913598901098901e-05, + "loss": 0.259, + "step": 7909 + }, + { + "epoch": 21.73076923076923, + "grad_norm": 11.074633598327637, + "learning_rate": 3.9134615384615385e-05, + "loss": 0.2811, + "step": 7910 + }, + { + "epoch": 21.733516483516482, + "grad_norm": 14.69363784790039, + "learning_rate": 3.913324175824176e-05, + "loss": 0.7313, + "step": 7911 + }, + { + "epoch": 21.736263736263737, + "grad_norm": 16.746997833251953, + "learning_rate": 3.913186813186813e-05, + "loss": 0.3934, + "step": 7912 + }, + { + "epoch": 21.73901098901099, + "grad_norm": 9.699884414672852, + "learning_rate": 3.913049450549451e-05, + "loss": 0.2843, + "step": 7913 + }, + { + "epoch": 21.74175824175824, + "grad_norm": 8.0118408203125, + "learning_rate": 3.912912087912088e-05, + "loss": 0.2226, + "step": 7914 + }, + { + "epoch": 21.744505494505496, + "grad_norm": 11.368861198425293, + "learning_rate": 3.9127747252747256e-05, + "loss": 0.2879, + "step": 7915 + }, + { + "epoch": 21.747252747252748, + "grad_norm": 20.066253662109375, + "learning_rate": 3.912637362637363e-05, + "loss": 0.9066, + "step": 7916 + }, + { + "epoch": 21.75, + "grad_norm": 14.793702125549316, + "learning_rate": 3.9125e-05, + "loss": 0.4744, + "step": 7917 + }, + { + "epoch": 21.752747252747252, + "grad_norm": 8.844145774841309, + "learning_rate": 3.912362637362638e-05, + "loss": 0.2211, + "step": 7918 + }, + { + "epoch": 21.755494505494504, + "grad_norm": 8.287795066833496, + "learning_rate": 3.912225274725275e-05, + "loss": 0.2034, + "step": 7919 + }, + { + "epoch": 21.75824175824176, + "grad_norm": 22.416152954101562, + "learning_rate": 3.912087912087912e-05, + "loss": 0.855, + "step": 7920 + }, + { + "epoch": 21.76098901098901, + "grad_norm": 9.22478199005127, + "learning_rate": 3.9119505494505496e-05, + "loss": 0.2896, + "step": 7921 + }, + { + "epoch": 21.763736263736263, + "grad_norm": 3.251513957977295, + "learning_rate": 3.9118131868131866e-05, + "loss": 0.0669, + "step": 7922 + }, + { + "epoch": 21.766483516483518, + "grad_norm": 7.275275230407715, + "learning_rate": 3.911675824175824e-05, + "loss": 0.1365, + "step": 7923 + }, + { + "epoch": 21.76923076923077, + "grad_norm": 8.696962356567383, + "learning_rate": 3.911538461538461e-05, + "loss": 0.2752, + "step": 7924 + }, + { + "epoch": 21.771978021978022, + "grad_norm": 11.51479721069336, + "learning_rate": 3.911401098901099e-05, + "loss": 0.2419, + "step": 7925 + }, + { + "epoch": 21.774725274725274, + "grad_norm": 9.51103401184082, + "learning_rate": 3.9112637362637366e-05, + "loss": 0.2088, + "step": 7926 + }, + { + "epoch": 21.77747252747253, + "grad_norm": 6.622183799743652, + "learning_rate": 3.9111263736263736e-05, + "loss": 0.1727, + "step": 7927 + }, + { + "epoch": 21.78021978021978, + "grad_norm": 19.639921188354492, + "learning_rate": 3.910989010989011e-05, + "loss": 0.6311, + "step": 7928 + }, + { + "epoch": 21.782967032967033, + "grad_norm": 9.390270233154297, + "learning_rate": 3.910851648351648e-05, + "loss": 0.2055, + "step": 7929 + }, + { + "epoch": 21.785714285714285, + "grad_norm": 14.029397964477539, + "learning_rate": 3.910714285714286e-05, + "loss": 0.3836, + "step": 7930 + }, + { + "epoch": 21.78846153846154, + "grad_norm": 9.241633415222168, + "learning_rate": 3.910576923076924e-05, + "loss": 0.247, + "step": 7931 + }, + { + "epoch": 21.791208791208792, + "grad_norm": 20.863035202026367, + "learning_rate": 3.910439560439561e-05, + "loss": 0.6097, + "step": 7932 + }, + { + "epoch": 21.793956043956044, + "grad_norm": 8.230416297912598, + "learning_rate": 3.9103021978021984e-05, + "loss": 0.2916, + "step": 7933 + }, + { + "epoch": 21.796703296703296, + "grad_norm": 11.961427688598633, + "learning_rate": 3.9101648351648354e-05, + "loss": 0.2644, + "step": 7934 + }, + { + "epoch": 21.79945054945055, + "grad_norm": 8.650546073913574, + "learning_rate": 3.9100274725274724e-05, + "loss": 0.2302, + "step": 7935 + }, + { + "epoch": 21.802197802197803, + "grad_norm": 19.94657325744629, + "learning_rate": 3.90989010989011e-05, + "loss": 0.441, + "step": 7936 + }, + { + "epoch": 21.804945054945055, + "grad_norm": 13.983583450317383, + "learning_rate": 3.909752747252747e-05, + "loss": 0.5334, + "step": 7937 + }, + { + "epoch": 21.807692307692307, + "grad_norm": 15.000822067260742, + "learning_rate": 3.909615384615385e-05, + "loss": 0.5243, + "step": 7938 + }, + { + "epoch": 21.810439560439562, + "grad_norm": 13.89273738861084, + "learning_rate": 3.909478021978022e-05, + "loss": 0.4878, + "step": 7939 + }, + { + "epoch": 21.813186813186814, + "grad_norm": 18.05341339111328, + "learning_rate": 3.9093406593406594e-05, + "loss": 0.4868, + "step": 7940 + }, + { + "epoch": 21.815934065934066, + "grad_norm": 12.907156944274902, + "learning_rate": 3.909203296703297e-05, + "loss": 0.3355, + "step": 7941 + }, + { + "epoch": 21.818681318681318, + "grad_norm": 18.651046752929688, + "learning_rate": 3.909065934065934e-05, + "loss": 0.5723, + "step": 7942 + }, + { + "epoch": 21.821428571428573, + "grad_norm": 16.558122634887695, + "learning_rate": 3.908928571428572e-05, + "loss": 0.6677, + "step": 7943 + }, + { + "epoch": 21.824175824175825, + "grad_norm": 13.901226043701172, + "learning_rate": 3.908791208791209e-05, + "loss": 0.4597, + "step": 7944 + }, + { + "epoch": 21.826923076923077, + "grad_norm": 19.031848907470703, + "learning_rate": 3.9086538461538464e-05, + "loss": 0.6301, + "step": 7945 + }, + { + "epoch": 21.82967032967033, + "grad_norm": 9.592060089111328, + "learning_rate": 3.908516483516484e-05, + "loss": 0.255, + "step": 7946 + }, + { + "epoch": 21.832417582417584, + "grad_norm": 19.076900482177734, + "learning_rate": 3.908379120879121e-05, + "loss": 0.6921, + "step": 7947 + }, + { + "epoch": 21.835164835164836, + "grad_norm": 7.974930286407471, + "learning_rate": 3.908241758241759e-05, + "loss": 0.2409, + "step": 7948 + }, + { + "epoch": 21.837912087912088, + "grad_norm": 13.592794418334961, + "learning_rate": 3.908104395604396e-05, + "loss": 0.2426, + "step": 7949 + }, + { + "epoch": 21.84065934065934, + "grad_norm": 9.580999374389648, + "learning_rate": 3.907967032967033e-05, + "loss": 0.138, + "step": 7950 + }, + { + "epoch": 21.843406593406595, + "grad_norm": 10.133282661437988, + "learning_rate": 3.9078296703296705e-05, + "loss": 0.2046, + "step": 7951 + }, + { + "epoch": 21.846153846153847, + "grad_norm": 4.8426642417907715, + "learning_rate": 3.9076923076923075e-05, + "loss": 0.0841, + "step": 7952 + }, + { + "epoch": 21.8489010989011, + "grad_norm": 14.557025909423828, + "learning_rate": 3.907554945054945e-05, + "loss": 0.3571, + "step": 7953 + }, + { + "epoch": 21.85164835164835, + "grad_norm": 13.667299270629883, + "learning_rate": 3.907417582417582e-05, + "loss": 0.4562, + "step": 7954 + }, + { + "epoch": 21.854395604395606, + "grad_norm": 18.578197479248047, + "learning_rate": 3.90728021978022e-05, + "loss": 0.6183, + "step": 7955 + }, + { + "epoch": 21.857142857142858, + "grad_norm": 10.358968734741211, + "learning_rate": 3.9071428571428575e-05, + "loss": 0.3247, + "step": 7956 + }, + { + "epoch": 21.85989010989011, + "grad_norm": 16.52140998840332, + "learning_rate": 3.9070054945054945e-05, + "loss": 0.381, + "step": 7957 + }, + { + "epoch": 21.86263736263736, + "grad_norm": 2.424720048904419, + "learning_rate": 3.906868131868132e-05, + "loss": 0.0673, + "step": 7958 + }, + { + "epoch": 21.865384615384617, + "grad_norm": 13.013669967651367, + "learning_rate": 3.906730769230769e-05, + "loss": 0.5169, + "step": 7959 + }, + { + "epoch": 21.86813186813187, + "grad_norm": 13.855642318725586, + "learning_rate": 3.906593406593407e-05, + "loss": 0.555, + "step": 7960 + }, + { + "epoch": 21.87087912087912, + "grad_norm": 9.944887161254883, + "learning_rate": 3.9064560439560446e-05, + "loss": 0.2129, + "step": 7961 + }, + { + "epoch": 21.873626373626372, + "grad_norm": 11.078766822814941, + "learning_rate": 3.9063186813186816e-05, + "loss": 0.2753, + "step": 7962 + }, + { + "epoch": 21.876373626373628, + "grad_norm": 9.803315162658691, + "learning_rate": 3.906181318681319e-05, + "loss": 0.382, + "step": 7963 + }, + { + "epoch": 21.87912087912088, + "grad_norm": 8.944167137145996, + "learning_rate": 3.906043956043956e-05, + "loss": 0.3154, + "step": 7964 + }, + { + "epoch": 21.88186813186813, + "grad_norm": 12.18025016784668, + "learning_rate": 3.905906593406593e-05, + "loss": 0.2533, + "step": 7965 + }, + { + "epoch": 21.884615384615383, + "grad_norm": 15.63033390045166, + "learning_rate": 3.905769230769231e-05, + "loss": 0.3287, + "step": 7966 + }, + { + "epoch": 21.88736263736264, + "grad_norm": 23.733896255493164, + "learning_rate": 3.905631868131868e-05, + "loss": 0.7323, + "step": 7967 + }, + { + "epoch": 21.89010989010989, + "grad_norm": 7.3346381187438965, + "learning_rate": 3.9054945054945056e-05, + "loss": 0.152, + "step": 7968 + }, + { + "epoch": 21.892857142857142, + "grad_norm": 12.308781623840332, + "learning_rate": 3.9053571428571426e-05, + "loss": 0.3051, + "step": 7969 + }, + { + "epoch": 21.895604395604394, + "grad_norm": 15.331327438354492, + "learning_rate": 3.90521978021978e-05, + "loss": 0.4037, + "step": 7970 + }, + { + "epoch": 21.89835164835165, + "grad_norm": 12.594071388244629, + "learning_rate": 3.905082417582418e-05, + "loss": 0.5001, + "step": 7971 + }, + { + "epoch": 21.9010989010989, + "grad_norm": 11.281556129455566, + "learning_rate": 3.904945054945055e-05, + "loss": 0.3409, + "step": 7972 + }, + { + "epoch": 21.903846153846153, + "grad_norm": 7.8545918464660645, + "learning_rate": 3.904807692307693e-05, + "loss": 0.2048, + "step": 7973 + }, + { + "epoch": 21.906593406593405, + "grad_norm": 11.936466217041016, + "learning_rate": 3.90467032967033e-05, + "loss": 0.3571, + "step": 7974 + }, + { + "epoch": 21.90934065934066, + "grad_norm": 13.603577613830566, + "learning_rate": 3.9045329670329673e-05, + "loss": 0.463, + "step": 7975 + }, + { + "epoch": 21.912087912087912, + "grad_norm": 13.973475456237793, + "learning_rate": 3.904395604395605e-05, + "loss": 0.3504, + "step": 7976 + }, + { + "epoch": 21.914835164835164, + "grad_norm": 12.179306030273438, + "learning_rate": 3.904258241758242e-05, + "loss": 0.3927, + "step": 7977 + }, + { + "epoch": 21.917582417582416, + "grad_norm": 10.598763465881348, + "learning_rate": 3.90412087912088e-05, + "loss": 0.3633, + "step": 7978 + }, + { + "epoch": 21.92032967032967, + "grad_norm": 15.739097595214844, + "learning_rate": 3.903983516483517e-05, + "loss": 0.4864, + "step": 7979 + }, + { + "epoch": 21.923076923076923, + "grad_norm": 20.160932540893555, + "learning_rate": 3.903846153846154e-05, + "loss": 0.6007, + "step": 7980 + }, + { + "epoch": 21.925824175824175, + "grad_norm": 15.080623626708984, + "learning_rate": 3.9037087912087914e-05, + "loss": 0.3232, + "step": 7981 + }, + { + "epoch": 21.928571428571427, + "grad_norm": 15.109248161315918, + "learning_rate": 3.9035714285714284e-05, + "loss": 0.6021, + "step": 7982 + }, + { + "epoch": 21.931318681318682, + "grad_norm": 10.34941577911377, + "learning_rate": 3.903434065934066e-05, + "loss": 0.191, + "step": 7983 + }, + { + "epoch": 21.934065934065934, + "grad_norm": 17.11443328857422, + "learning_rate": 3.903296703296703e-05, + "loss": 0.6321, + "step": 7984 + }, + { + "epoch": 21.936813186813186, + "grad_norm": 9.858141899108887, + "learning_rate": 3.903159340659341e-05, + "loss": 0.333, + "step": 7985 + }, + { + "epoch": 21.939560439560438, + "grad_norm": 13.94323444366455, + "learning_rate": 3.9030219780219784e-05, + "loss": 0.5228, + "step": 7986 + }, + { + "epoch": 21.942307692307693, + "grad_norm": 17.05861473083496, + "learning_rate": 3.9028846153846154e-05, + "loss": 0.4409, + "step": 7987 + }, + { + "epoch": 21.945054945054945, + "grad_norm": 12.872590065002441, + "learning_rate": 3.902747252747253e-05, + "loss": 0.364, + "step": 7988 + }, + { + "epoch": 21.947802197802197, + "grad_norm": 8.88665771484375, + "learning_rate": 3.90260989010989e-05, + "loss": 0.2749, + "step": 7989 + }, + { + "epoch": 21.95054945054945, + "grad_norm": 6.122255325317383, + "learning_rate": 3.902472527472528e-05, + "loss": 0.1389, + "step": 7990 + }, + { + "epoch": 21.953296703296704, + "grad_norm": 10.844390869140625, + "learning_rate": 3.9023351648351655e-05, + "loss": 0.4261, + "step": 7991 + }, + { + "epoch": 21.956043956043956, + "grad_norm": 7.979780673980713, + "learning_rate": 3.9021978021978025e-05, + "loss": 0.1699, + "step": 7992 + }, + { + "epoch": 21.958791208791208, + "grad_norm": 10.368865966796875, + "learning_rate": 3.90206043956044e-05, + "loss": 0.1769, + "step": 7993 + }, + { + "epoch": 21.96153846153846, + "grad_norm": 17.220670700073242, + "learning_rate": 3.901923076923077e-05, + "loss": 0.5936, + "step": 7994 + }, + { + "epoch": 21.964285714285715, + "grad_norm": 12.220222473144531, + "learning_rate": 3.901785714285714e-05, + "loss": 0.3821, + "step": 7995 + }, + { + "epoch": 21.967032967032967, + "grad_norm": 13.466747283935547, + "learning_rate": 3.901648351648352e-05, + "loss": 0.6126, + "step": 7996 + }, + { + "epoch": 21.96978021978022, + "grad_norm": 19.420635223388672, + "learning_rate": 3.901510989010989e-05, + "loss": 0.5087, + "step": 7997 + }, + { + "epoch": 21.97252747252747, + "grad_norm": 14.047806739807129, + "learning_rate": 3.9013736263736265e-05, + "loss": 0.3262, + "step": 7998 + }, + { + "epoch": 21.975274725274726, + "grad_norm": 7.034632205963135, + "learning_rate": 3.9012362637362635e-05, + "loss": 0.1435, + "step": 7999 + }, + { + "epoch": 21.978021978021978, + "grad_norm": 23.031034469604492, + "learning_rate": 3.901098901098901e-05, + "loss": 0.6988, + "step": 8000 + }, + { + "epoch": 21.98076923076923, + "grad_norm": 12.359333038330078, + "learning_rate": 3.900961538461539e-05, + "loss": 0.2308, + "step": 8001 + }, + { + "epoch": 21.983516483516482, + "grad_norm": 11.140485763549805, + "learning_rate": 3.900824175824176e-05, + "loss": 0.357, + "step": 8002 + }, + { + "epoch": 21.986263736263737, + "grad_norm": 9.677477836608887, + "learning_rate": 3.9006868131868136e-05, + "loss": 0.2381, + "step": 8003 + }, + { + "epoch": 21.98901098901099, + "grad_norm": 18.10784339904785, + "learning_rate": 3.9005494505494506e-05, + "loss": 0.769, + "step": 8004 + }, + { + "epoch": 21.99175824175824, + "grad_norm": 10.433279037475586, + "learning_rate": 3.900412087912088e-05, + "loss": 0.2293, + "step": 8005 + }, + { + "epoch": 21.994505494505496, + "grad_norm": 11.120743751525879, + "learning_rate": 3.900274725274726e-05, + "loss": 0.3401, + "step": 8006 + }, + { + "epoch": 21.997252747252748, + "grad_norm": 14.050702095031738, + "learning_rate": 3.900137362637363e-05, + "loss": 0.4881, + "step": 8007 + }, + { + "epoch": 22.0, + "grad_norm": 51.54266357421875, + "learning_rate": 3.9000000000000006e-05, + "loss": 1.4692, + "step": 8008 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.45179063360881544, + "eval_f1": 0.4157924307288671, + "eval_f1_DuraRiadoRio_64x64": 0.23952095808383234, + "eval_f1_Mole_64x64": 0.5358851674641149, + "eval_f1_Quebrado_64x64": 0.22485207100591717, + "eval_f1_RiadoRio_64x64": 0.6158940397350994, + "eval_f1_RioFechado_64x64": 0.4628099173553719, + "eval_loss": 3.568193197250366, + "eval_precision": 0.6826958977729942, + "eval_precision_DuraRiadoRio_64x64": 0.8695652173913043, + "eval_precision_Mole_64x64": 0.8615384615384616, + "eval_precision_Quebrado_64x64": 0.76, + "eval_precision_RiadoRio_64x64": 0.62, + "eval_precision_RioFechado_64x64": 0.3023758099352052, + "eval_recall": 0.45149596408862536, + "eval_recall_DuraRiadoRio_64x64": 0.1388888888888889, + "eval_recall_Mole_64x64": 0.3888888888888889, + "eval_recall_Quebrado_64x64": 0.13194444444444445, + "eval_recall_RiadoRio_64x64": 0.6118421052631579, + "eval_recall_RioFechado_64x64": 0.9859154929577465, + "eval_runtime": 1.7116, + "eval_samples_per_second": 424.165, + "eval_steps_per_second": 26.875, + "step": 8008 + }, + { + "epoch": 22.002747252747252, + "grad_norm": 14.798648834228516, + "learning_rate": 3.8998626373626376e-05, + "loss": 0.3028, + "step": 8009 + }, + { + "epoch": 22.005494505494507, + "grad_norm": 10.445240020751953, + "learning_rate": 3.8997252747252746e-05, + "loss": 0.2234, + "step": 8010 + }, + { + "epoch": 22.00824175824176, + "grad_norm": 12.283945083618164, + "learning_rate": 3.899587912087912e-05, + "loss": 0.4593, + "step": 8011 + }, + { + "epoch": 22.01098901098901, + "grad_norm": 12.585960388183594, + "learning_rate": 3.899450549450549e-05, + "loss": 0.5803, + "step": 8012 + }, + { + "epoch": 22.013736263736263, + "grad_norm": 15.278307914733887, + "learning_rate": 3.899313186813187e-05, + "loss": 0.4606, + "step": 8013 + }, + { + "epoch": 22.016483516483518, + "grad_norm": 17.013887405395508, + "learning_rate": 3.899175824175824e-05, + "loss": 0.4816, + "step": 8014 + }, + { + "epoch": 22.01923076923077, + "grad_norm": 8.114152908325195, + "learning_rate": 3.8990384615384617e-05, + "loss": 0.1621, + "step": 8015 + }, + { + "epoch": 22.021978021978022, + "grad_norm": 14.468428611755371, + "learning_rate": 3.898901098901099e-05, + "loss": 0.7243, + "step": 8016 + }, + { + "epoch": 22.024725274725274, + "grad_norm": 12.694710731506348, + "learning_rate": 3.898763736263736e-05, + "loss": 0.5128, + "step": 8017 + }, + { + "epoch": 22.02747252747253, + "grad_norm": 21.56125259399414, + "learning_rate": 3.898626373626374e-05, + "loss": 0.829, + "step": 8018 + }, + { + "epoch": 22.03021978021978, + "grad_norm": 7.913631916046143, + "learning_rate": 3.898489010989011e-05, + "loss": 0.1965, + "step": 8019 + }, + { + "epoch": 22.032967032967033, + "grad_norm": 8.128607749938965, + "learning_rate": 3.898351648351649e-05, + "loss": 0.2634, + "step": 8020 + }, + { + "epoch": 22.035714285714285, + "grad_norm": 9.646188735961914, + "learning_rate": 3.8982142857142864e-05, + "loss": 0.4432, + "step": 8021 + }, + { + "epoch": 22.03846153846154, + "grad_norm": 15.404045104980469, + "learning_rate": 3.8980769230769234e-05, + "loss": 0.5064, + "step": 8022 + }, + { + "epoch": 22.041208791208792, + "grad_norm": 2.1926863193511963, + "learning_rate": 3.897939560439561e-05, + "loss": 0.0418, + "step": 8023 + }, + { + "epoch": 22.043956043956044, + "grad_norm": 14.621249198913574, + "learning_rate": 3.897802197802198e-05, + "loss": 0.2871, + "step": 8024 + }, + { + "epoch": 22.046703296703296, + "grad_norm": 14.41920280456543, + "learning_rate": 3.897664835164835e-05, + "loss": 0.3514, + "step": 8025 + }, + { + "epoch": 22.04945054945055, + "grad_norm": 8.608880996704102, + "learning_rate": 3.897527472527473e-05, + "loss": 0.2484, + "step": 8026 + }, + { + "epoch": 22.052197802197803, + "grad_norm": 17.119935989379883, + "learning_rate": 3.89739010989011e-05, + "loss": 0.5745, + "step": 8027 + }, + { + "epoch": 22.054945054945055, + "grad_norm": 14.576447486877441, + "learning_rate": 3.8972527472527474e-05, + "loss": 0.3518, + "step": 8028 + }, + { + "epoch": 22.057692307692307, + "grad_norm": 28.10638427734375, + "learning_rate": 3.8971153846153844e-05, + "loss": 1.1302, + "step": 8029 + }, + { + "epoch": 22.060439560439562, + "grad_norm": 13.196001052856445, + "learning_rate": 3.896978021978022e-05, + "loss": 0.3545, + "step": 8030 + }, + { + "epoch": 22.063186813186814, + "grad_norm": 13.376428604125977, + "learning_rate": 3.89684065934066e-05, + "loss": 0.5344, + "step": 8031 + }, + { + "epoch": 22.065934065934066, + "grad_norm": 14.745237350463867, + "learning_rate": 3.896703296703297e-05, + "loss": 0.3381, + "step": 8032 + }, + { + "epoch": 22.068681318681318, + "grad_norm": 12.000152587890625, + "learning_rate": 3.8965659340659345e-05, + "loss": 0.3307, + "step": 8033 + }, + { + "epoch": 22.071428571428573, + "grad_norm": 23.111316680908203, + "learning_rate": 3.8964285714285715e-05, + "loss": 0.8291, + "step": 8034 + }, + { + "epoch": 22.074175824175825, + "grad_norm": 11.220093727111816, + "learning_rate": 3.896291208791209e-05, + "loss": 0.3826, + "step": 8035 + }, + { + "epoch": 22.076923076923077, + "grad_norm": 15.262918472290039, + "learning_rate": 3.896153846153847e-05, + "loss": 0.7061, + "step": 8036 + }, + { + "epoch": 22.07967032967033, + "grad_norm": 12.081550598144531, + "learning_rate": 3.896016483516484e-05, + "loss": 0.3972, + "step": 8037 + }, + { + "epoch": 22.082417582417584, + "grad_norm": 16.336217880249023, + "learning_rate": 3.8958791208791215e-05, + "loss": 0.5645, + "step": 8038 + }, + { + "epoch": 22.085164835164836, + "grad_norm": 12.95285701751709, + "learning_rate": 3.8957417582417585e-05, + "loss": 0.6103, + "step": 8039 + }, + { + "epoch": 22.087912087912088, + "grad_norm": 8.665323257446289, + "learning_rate": 3.8956043956043955e-05, + "loss": 0.2485, + "step": 8040 + }, + { + "epoch": 22.09065934065934, + "grad_norm": 9.889448165893555, + "learning_rate": 3.895467032967033e-05, + "loss": 0.2518, + "step": 8041 + }, + { + "epoch": 22.093406593406595, + "grad_norm": 10.91963005065918, + "learning_rate": 3.89532967032967e-05, + "loss": 0.1296, + "step": 8042 + }, + { + "epoch": 22.096153846153847, + "grad_norm": 15.478370666503906, + "learning_rate": 3.895192307692308e-05, + "loss": 0.3934, + "step": 8043 + }, + { + "epoch": 22.0989010989011, + "grad_norm": 6.137900352478027, + "learning_rate": 3.895054945054945e-05, + "loss": 0.1275, + "step": 8044 + }, + { + "epoch": 22.10164835164835, + "grad_norm": 23.522735595703125, + "learning_rate": 3.8949175824175826e-05, + "loss": 1.5103, + "step": 8045 + }, + { + "epoch": 22.104395604395606, + "grad_norm": 8.587608337402344, + "learning_rate": 3.89478021978022e-05, + "loss": 0.1717, + "step": 8046 + }, + { + "epoch": 22.107142857142858, + "grad_norm": 14.986591339111328, + "learning_rate": 3.894642857142857e-05, + "loss": 0.6931, + "step": 8047 + }, + { + "epoch": 22.10989010989011, + "grad_norm": 9.88875675201416, + "learning_rate": 3.894505494505495e-05, + "loss": 0.3566, + "step": 8048 + }, + { + "epoch": 22.11263736263736, + "grad_norm": 16.971778869628906, + "learning_rate": 3.894368131868132e-05, + "loss": 0.5862, + "step": 8049 + }, + { + "epoch": 22.115384615384617, + "grad_norm": 12.85965347290039, + "learning_rate": 3.8942307692307696e-05, + "loss": 0.3808, + "step": 8050 + }, + { + "epoch": 22.11813186813187, + "grad_norm": 14.754254341125488, + "learning_rate": 3.894093406593407e-05, + "loss": 0.4831, + "step": 8051 + }, + { + "epoch": 22.12087912087912, + "grad_norm": 11.343539237976074, + "learning_rate": 3.893956043956044e-05, + "loss": 0.2925, + "step": 8052 + }, + { + "epoch": 22.123626373626372, + "grad_norm": 5.560579776763916, + "learning_rate": 3.893818681318682e-05, + "loss": 0.0958, + "step": 8053 + }, + { + "epoch": 22.126373626373628, + "grad_norm": 11.227544784545898, + "learning_rate": 3.893681318681319e-05, + "loss": 0.1692, + "step": 8054 + }, + { + "epoch": 22.12912087912088, + "grad_norm": 9.376007080078125, + "learning_rate": 3.893543956043956e-05, + "loss": 0.3375, + "step": 8055 + }, + { + "epoch": 22.13186813186813, + "grad_norm": 10.127681732177734, + "learning_rate": 3.8934065934065936e-05, + "loss": 0.2212, + "step": 8056 + }, + { + "epoch": 22.134615384615383, + "grad_norm": 11.4071626663208, + "learning_rate": 3.8932692307692306e-05, + "loss": 0.2714, + "step": 8057 + }, + { + "epoch": 22.13736263736264, + "grad_norm": 19.559728622436523, + "learning_rate": 3.893131868131868e-05, + "loss": 0.8191, + "step": 8058 + }, + { + "epoch": 22.14010989010989, + "grad_norm": 5.406369209289551, + "learning_rate": 3.892994505494505e-05, + "loss": 0.0985, + "step": 8059 + }, + { + "epoch": 22.142857142857142, + "grad_norm": 16.403823852539062, + "learning_rate": 3.892857142857143e-05, + "loss": 0.2683, + "step": 8060 + }, + { + "epoch": 22.145604395604394, + "grad_norm": 12.844943046569824, + "learning_rate": 3.892719780219781e-05, + "loss": 0.42, + "step": 8061 + }, + { + "epoch": 22.14835164835165, + "grad_norm": 15.448777198791504, + "learning_rate": 3.892582417582418e-05, + "loss": 0.4353, + "step": 8062 + }, + { + "epoch": 22.1510989010989, + "grad_norm": 17.530784606933594, + "learning_rate": 3.8924450549450554e-05, + "loss": 0.7854, + "step": 8063 + }, + { + "epoch": 22.153846153846153, + "grad_norm": 12.942585945129395, + "learning_rate": 3.8923076923076924e-05, + "loss": 0.353, + "step": 8064 + }, + { + "epoch": 22.156593406593405, + "grad_norm": 20.20222282409668, + "learning_rate": 3.89217032967033e-05, + "loss": 0.7618, + "step": 8065 + }, + { + "epoch": 22.15934065934066, + "grad_norm": 8.620258331298828, + "learning_rate": 3.892032967032968e-05, + "loss": 0.2233, + "step": 8066 + }, + { + "epoch": 22.162087912087912, + "grad_norm": 8.43407917022705, + "learning_rate": 3.891895604395605e-05, + "loss": 0.1583, + "step": 8067 + }, + { + "epoch": 22.164835164835164, + "grad_norm": 15.11391544342041, + "learning_rate": 3.8917582417582424e-05, + "loss": 0.4585, + "step": 8068 + }, + { + "epoch": 22.167582417582416, + "grad_norm": 16.85024642944336, + "learning_rate": 3.8916208791208794e-05, + "loss": 0.584, + "step": 8069 + }, + { + "epoch": 22.17032967032967, + "grad_norm": 16.99519920349121, + "learning_rate": 3.8914835164835164e-05, + "loss": 0.6123, + "step": 8070 + }, + { + "epoch": 22.173076923076923, + "grad_norm": 12.334197998046875, + "learning_rate": 3.891346153846154e-05, + "loss": 0.2991, + "step": 8071 + }, + { + "epoch": 22.175824175824175, + "grad_norm": 18.091270446777344, + "learning_rate": 3.891208791208791e-05, + "loss": 0.5456, + "step": 8072 + }, + { + "epoch": 22.178571428571427, + "grad_norm": 5.320455074310303, + "learning_rate": 3.891071428571429e-05, + "loss": 0.1328, + "step": 8073 + }, + { + "epoch": 22.181318681318682, + "grad_norm": 12.070999145507812, + "learning_rate": 3.890934065934066e-05, + "loss": 0.3607, + "step": 8074 + }, + { + "epoch": 22.184065934065934, + "grad_norm": 16.481258392333984, + "learning_rate": 3.8907967032967034e-05, + "loss": 0.5401, + "step": 8075 + }, + { + "epoch": 22.186813186813186, + "grad_norm": 8.563852310180664, + "learning_rate": 3.890659340659341e-05, + "loss": 0.2471, + "step": 8076 + }, + { + "epoch": 22.189560439560438, + "grad_norm": 14.462085723876953, + "learning_rate": 3.890521978021978e-05, + "loss": 0.5217, + "step": 8077 + }, + { + "epoch": 22.192307692307693, + "grad_norm": 6.856210231781006, + "learning_rate": 3.890384615384616e-05, + "loss": 0.1593, + "step": 8078 + }, + { + "epoch": 22.195054945054945, + "grad_norm": 18.099443435668945, + "learning_rate": 3.890247252747253e-05, + "loss": 0.9368, + "step": 8079 + }, + { + "epoch": 22.197802197802197, + "grad_norm": 8.682809829711914, + "learning_rate": 3.8901098901098905e-05, + "loss": 0.2962, + "step": 8080 + }, + { + "epoch": 22.20054945054945, + "grad_norm": 6.949143886566162, + "learning_rate": 3.8899725274725275e-05, + "loss": 0.1962, + "step": 8081 + }, + { + "epoch": 22.203296703296704, + "grad_norm": 15.814098358154297, + "learning_rate": 3.889835164835165e-05, + "loss": 0.5957, + "step": 8082 + }, + { + "epoch": 22.206043956043956, + "grad_norm": 8.504631042480469, + "learning_rate": 3.889697802197803e-05, + "loss": 0.2662, + "step": 8083 + }, + { + "epoch": 22.208791208791208, + "grad_norm": 8.331999778747559, + "learning_rate": 3.88956043956044e-05, + "loss": 0.2432, + "step": 8084 + }, + { + "epoch": 22.21153846153846, + "grad_norm": 17.713884353637695, + "learning_rate": 3.889423076923077e-05, + "loss": 1.1515, + "step": 8085 + }, + { + "epoch": 22.214285714285715, + "grad_norm": 6.073230743408203, + "learning_rate": 3.8892857142857145e-05, + "loss": 0.1567, + "step": 8086 + }, + { + "epoch": 22.217032967032967, + "grad_norm": 11.731393814086914, + "learning_rate": 3.8891483516483515e-05, + "loss": 0.2579, + "step": 8087 + }, + { + "epoch": 22.21978021978022, + "grad_norm": 11.174355506896973, + "learning_rate": 3.889010989010989e-05, + "loss": 0.2352, + "step": 8088 + }, + { + "epoch": 22.22252747252747, + "grad_norm": 3.6154062747955322, + "learning_rate": 3.888873626373626e-05, + "loss": 0.1264, + "step": 8089 + }, + { + "epoch": 22.225274725274726, + "grad_norm": 22.12204360961914, + "learning_rate": 3.888736263736264e-05, + "loss": 0.5717, + "step": 8090 + }, + { + "epoch": 22.228021978021978, + "grad_norm": 8.350510597229004, + "learning_rate": 3.888598901098901e-05, + "loss": 0.1242, + "step": 8091 + }, + { + "epoch": 22.23076923076923, + "grad_norm": 10.179407119750977, + "learning_rate": 3.8884615384615386e-05, + "loss": 0.5004, + "step": 8092 + }, + { + "epoch": 22.233516483516482, + "grad_norm": 13.418266296386719, + "learning_rate": 3.888324175824176e-05, + "loss": 0.1846, + "step": 8093 + }, + { + "epoch": 22.236263736263737, + "grad_norm": 6.5988054275512695, + "learning_rate": 3.888186813186813e-05, + "loss": 0.2116, + "step": 8094 + }, + { + "epoch": 22.23901098901099, + "grad_norm": 20.17786407470703, + "learning_rate": 3.888049450549451e-05, + "loss": 0.8219, + "step": 8095 + }, + { + "epoch": 22.24175824175824, + "grad_norm": 3.0171711444854736, + "learning_rate": 3.887912087912088e-05, + "loss": 0.0605, + "step": 8096 + }, + { + "epoch": 22.244505494505493, + "grad_norm": 4.231556415557861, + "learning_rate": 3.8877747252747256e-05, + "loss": 0.0619, + "step": 8097 + }, + { + "epoch": 22.247252747252748, + "grad_norm": 6.700078964233398, + "learning_rate": 3.887637362637363e-05, + "loss": 0.2125, + "step": 8098 + }, + { + "epoch": 22.25, + "grad_norm": 12.172730445861816, + "learning_rate": 3.8875e-05, + "loss": 0.3797, + "step": 8099 + }, + { + "epoch": 22.252747252747252, + "grad_norm": 12.76350212097168, + "learning_rate": 3.887362637362637e-05, + "loss": 0.4016, + "step": 8100 + }, + { + "epoch": 22.255494505494504, + "grad_norm": 9.249321937561035, + "learning_rate": 3.887225274725275e-05, + "loss": 0.3436, + "step": 8101 + }, + { + "epoch": 22.25824175824176, + "grad_norm": 16.01664924621582, + "learning_rate": 3.887087912087912e-05, + "loss": 0.4676, + "step": 8102 + }, + { + "epoch": 22.26098901098901, + "grad_norm": 7.318417072296143, + "learning_rate": 3.88695054945055e-05, + "loss": 0.2022, + "step": 8103 + }, + { + "epoch": 22.263736263736263, + "grad_norm": 21.41403579711914, + "learning_rate": 3.886813186813187e-05, + "loss": 0.6982, + "step": 8104 + }, + { + "epoch": 22.266483516483518, + "grad_norm": 13.281561851501465, + "learning_rate": 3.8866758241758243e-05, + "loss": 0.34, + "step": 8105 + }, + { + "epoch": 22.26923076923077, + "grad_norm": 19.658369064331055, + "learning_rate": 3.8865384615384613e-05, + "loss": 0.8321, + "step": 8106 + }, + { + "epoch": 22.271978021978022, + "grad_norm": 11.744203567504883, + "learning_rate": 3.886401098901099e-05, + "loss": 0.2002, + "step": 8107 + }, + { + "epoch": 22.274725274725274, + "grad_norm": 4.5170817375183105, + "learning_rate": 3.886263736263737e-05, + "loss": 0.0967, + "step": 8108 + }, + { + "epoch": 22.27747252747253, + "grad_norm": 8.165882110595703, + "learning_rate": 3.886126373626374e-05, + "loss": 0.3449, + "step": 8109 + }, + { + "epoch": 22.28021978021978, + "grad_norm": 6.296064853668213, + "learning_rate": 3.8859890109890114e-05, + "loss": 0.1735, + "step": 8110 + }, + { + "epoch": 22.282967032967033, + "grad_norm": 11.452001571655273, + "learning_rate": 3.8858516483516484e-05, + "loss": 0.3321, + "step": 8111 + }, + { + "epoch": 22.285714285714285, + "grad_norm": 7.33566427230835, + "learning_rate": 3.885714285714286e-05, + "loss": 0.244, + "step": 8112 + }, + { + "epoch": 22.28846153846154, + "grad_norm": 14.454051971435547, + "learning_rate": 3.885576923076924e-05, + "loss": 0.2784, + "step": 8113 + }, + { + "epoch": 22.291208791208792, + "grad_norm": 17.354585647583008, + "learning_rate": 3.885439560439561e-05, + "loss": 0.7622, + "step": 8114 + }, + { + "epoch": 22.293956043956044, + "grad_norm": 11.359302520751953, + "learning_rate": 3.885302197802198e-05, + "loss": 0.3741, + "step": 8115 + }, + { + "epoch": 22.296703296703296, + "grad_norm": 6.049682140350342, + "learning_rate": 3.885164835164835e-05, + "loss": 0.1345, + "step": 8116 + }, + { + "epoch": 22.29945054945055, + "grad_norm": 13.433794975280762, + "learning_rate": 3.8850274725274724e-05, + "loss": 0.4158, + "step": 8117 + }, + { + "epoch": 22.302197802197803, + "grad_norm": 8.240715980529785, + "learning_rate": 3.88489010989011e-05, + "loss": 0.1608, + "step": 8118 + }, + { + "epoch": 22.304945054945055, + "grad_norm": 10.229879379272461, + "learning_rate": 3.884752747252747e-05, + "loss": 0.2489, + "step": 8119 + }, + { + "epoch": 22.307692307692307, + "grad_norm": 21.924999237060547, + "learning_rate": 3.884615384615385e-05, + "loss": 0.8876, + "step": 8120 + }, + { + "epoch": 22.310439560439562, + "grad_norm": 11.210090637207031, + "learning_rate": 3.884478021978022e-05, + "loss": 0.3305, + "step": 8121 + }, + { + "epoch": 22.313186813186814, + "grad_norm": 8.124990463256836, + "learning_rate": 3.8843406593406595e-05, + "loss": 0.1967, + "step": 8122 + }, + { + "epoch": 22.315934065934066, + "grad_norm": 10.312012672424316, + "learning_rate": 3.884203296703297e-05, + "loss": 0.5296, + "step": 8123 + }, + { + "epoch": 22.318681318681318, + "grad_norm": 12.4812650680542, + "learning_rate": 3.884065934065934e-05, + "loss": 0.3129, + "step": 8124 + }, + { + "epoch": 22.321428571428573, + "grad_norm": 19.540943145751953, + "learning_rate": 3.883928571428572e-05, + "loss": 0.7613, + "step": 8125 + }, + { + "epoch": 22.324175824175825, + "grad_norm": 5.919415473937988, + "learning_rate": 3.883791208791209e-05, + "loss": 0.1332, + "step": 8126 + }, + { + "epoch": 22.326923076923077, + "grad_norm": 12.38427734375, + "learning_rate": 3.8836538461538465e-05, + "loss": 0.4326, + "step": 8127 + }, + { + "epoch": 22.32967032967033, + "grad_norm": 7.655410289764404, + "learning_rate": 3.883516483516484e-05, + "loss": 0.1907, + "step": 8128 + }, + { + "epoch": 22.332417582417584, + "grad_norm": 15.890899658203125, + "learning_rate": 3.883379120879121e-05, + "loss": 0.3433, + "step": 8129 + }, + { + "epoch": 22.335164835164836, + "grad_norm": 25.84014892578125, + "learning_rate": 3.883241758241758e-05, + "loss": 1.1421, + "step": 8130 + }, + { + "epoch": 22.337912087912088, + "grad_norm": 9.169594764709473, + "learning_rate": 3.883104395604395e-05, + "loss": 0.2177, + "step": 8131 + }, + { + "epoch": 22.34065934065934, + "grad_norm": 21.8806209564209, + "learning_rate": 3.882967032967033e-05, + "loss": 0.7359, + "step": 8132 + }, + { + "epoch": 22.343406593406595, + "grad_norm": 10.164507865905762, + "learning_rate": 3.8828296703296706e-05, + "loss": 0.2912, + "step": 8133 + }, + { + "epoch": 22.346153846153847, + "grad_norm": 26.339372634887695, + "learning_rate": 3.8826923076923076e-05, + "loss": 1.588, + "step": 8134 + }, + { + "epoch": 22.3489010989011, + "grad_norm": 15.5683012008667, + "learning_rate": 3.882554945054945e-05, + "loss": 0.4987, + "step": 8135 + }, + { + "epoch": 22.35164835164835, + "grad_norm": 18.94334602355957, + "learning_rate": 3.882417582417582e-05, + "loss": 0.7317, + "step": 8136 + }, + { + "epoch": 22.354395604395606, + "grad_norm": 7.932946681976318, + "learning_rate": 3.88228021978022e-05, + "loss": 0.2024, + "step": 8137 + }, + { + "epoch": 22.357142857142858, + "grad_norm": 6.356466770172119, + "learning_rate": 3.8821428571428576e-05, + "loss": 0.1619, + "step": 8138 + }, + { + "epoch": 22.35989010989011, + "grad_norm": 10.826521873474121, + "learning_rate": 3.8820054945054946e-05, + "loss": 0.2481, + "step": 8139 + }, + { + "epoch": 22.36263736263736, + "grad_norm": 8.664471626281738, + "learning_rate": 3.881868131868132e-05, + "loss": 0.1337, + "step": 8140 + }, + { + "epoch": 22.365384615384617, + "grad_norm": 13.175453186035156, + "learning_rate": 3.881730769230769e-05, + "loss": 0.3865, + "step": 8141 + }, + { + "epoch": 22.36813186813187, + "grad_norm": 8.48509693145752, + "learning_rate": 3.881593406593407e-05, + "loss": 0.1778, + "step": 8142 + }, + { + "epoch": 22.37087912087912, + "grad_norm": 15.831615447998047, + "learning_rate": 3.8814560439560446e-05, + "loss": 0.441, + "step": 8143 + }, + { + "epoch": 22.373626373626372, + "grad_norm": 6.35835599899292, + "learning_rate": 3.8813186813186817e-05, + "loss": 0.2027, + "step": 8144 + }, + { + "epoch": 22.376373626373628, + "grad_norm": 14.480551719665527, + "learning_rate": 3.8811813186813187e-05, + "loss": 0.4569, + "step": 8145 + }, + { + "epoch": 22.37912087912088, + "grad_norm": 22.829051971435547, + "learning_rate": 3.8810439560439557e-05, + "loss": 0.6844, + "step": 8146 + }, + { + "epoch": 22.38186813186813, + "grad_norm": 8.829328536987305, + "learning_rate": 3.880906593406593e-05, + "loss": 0.2635, + "step": 8147 + }, + { + "epoch": 22.384615384615383, + "grad_norm": 20.306283950805664, + "learning_rate": 3.880769230769231e-05, + "loss": 0.6206, + "step": 8148 + }, + { + "epoch": 22.38736263736264, + "grad_norm": 17.925334930419922, + "learning_rate": 3.880631868131868e-05, + "loss": 0.4527, + "step": 8149 + }, + { + "epoch": 22.39010989010989, + "grad_norm": 14.8494291305542, + "learning_rate": 3.880494505494506e-05, + "loss": 0.4163, + "step": 8150 + }, + { + "epoch": 22.392857142857142, + "grad_norm": 10.251575469970703, + "learning_rate": 3.880357142857143e-05, + "loss": 0.2711, + "step": 8151 + }, + { + "epoch": 22.395604395604394, + "grad_norm": 9.144064903259277, + "learning_rate": 3.8802197802197804e-05, + "loss": 0.2449, + "step": 8152 + }, + { + "epoch": 22.39835164835165, + "grad_norm": 14.82552719116211, + "learning_rate": 3.880082417582418e-05, + "loss": 0.5862, + "step": 8153 + }, + { + "epoch": 22.4010989010989, + "grad_norm": 21.64590072631836, + "learning_rate": 3.879945054945055e-05, + "loss": 0.5, + "step": 8154 + }, + { + "epoch": 22.403846153846153, + "grad_norm": 13.8925142288208, + "learning_rate": 3.879807692307693e-05, + "loss": 0.5478, + "step": 8155 + }, + { + "epoch": 22.406593406593405, + "grad_norm": 11.40701675415039, + "learning_rate": 3.87967032967033e-05, + "loss": 0.2866, + "step": 8156 + }, + { + "epoch": 22.40934065934066, + "grad_norm": 18.900646209716797, + "learning_rate": 3.8795329670329674e-05, + "loss": 0.7545, + "step": 8157 + }, + { + "epoch": 22.412087912087912, + "grad_norm": 10.469565391540527, + "learning_rate": 3.879395604395605e-05, + "loss": 0.3082, + "step": 8158 + }, + { + "epoch": 22.414835164835164, + "grad_norm": 12.384699821472168, + "learning_rate": 3.879258241758242e-05, + "loss": 0.3341, + "step": 8159 + }, + { + "epoch": 22.417582417582416, + "grad_norm": 14.246861457824707, + "learning_rate": 3.879120879120879e-05, + "loss": 0.4497, + "step": 8160 + }, + { + "epoch": 22.42032967032967, + "grad_norm": 13.813091278076172, + "learning_rate": 3.878983516483516e-05, + "loss": 0.3265, + "step": 8161 + }, + { + "epoch": 22.423076923076923, + "grad_norm": 16.478893280029297, + "learning_rate": 3.878846153846154e-05, + "loss": 0.7848, + "step": 8162 + }, + { + "epoch": 22.425824175824175, + "grad_norm": 6.172826766967773, + "learning_rate": 3.8787087912087915e-05, + "loss": 0.1073, + "step": 8163 + }, + { + "epoch": 22.428571428571427, + "grad_norm": 13.763318061828613, + "learning_rate": 3.8785714285714285e-05, + "loss": 0.423, + "step": 8164 + }, + { + "epoch": 22.431318681318682, + "grad_norm": 12.477618217468262, + "learning_rate": 3.878434065934066e-05, + "loss": 0.3393, + "step": 8165 + }, + { + "epoch": 22.434065934065934, + "grad_norm": 9.476212501525879, + "learning_rate": 3.878296703296703e-05, + "loss": 0.3581, + "step": 8166 + }, + { + "epoch": 22.436813186813186, + "grad_norm": 17.086971282958984, + "learning_rate": 3.878159340659341e-05, + "loss": 0.5909, + "step": 8167 + }, + { + "epoch": 22.439560439560438, + "grad_norm": 21.26884651184082, + "learning_rate": 3.8780219780219785e-05, + "loss": 0.6089, + "step": 8168 + }, + { + "epoch": 22.442307692307693, + "grad_norm": 7.32974100112915, + "learning_rate": 3.8778846153846155e-05, + "loss": 0.1702, + "step": 8169 + }, + { + "epoch": 22.445054945054945, + "grad_norm": 16.989952087402344, + "learning_rate": 3.877747252747253e-05, + "loss": 0.5636, + "step": 8170 + }, + { + "epoch": 22.447802197802197, + "grad_norm": 9.447100639343262, + "learning_rate": 3.87760989010989e-05, + "loss": 0.2971, + "step": 8171 + }, + { + "epoch": 22.45054945054945, + "grad_norm": 14.076298713684082, + "learning_rate": 3.877472527472528e-05, + "loss": 0.3895, + "step": 8172 + }, + { + "epoch": 22.453296703296704, + "grad_norm": 24.4177303314209, + "learning_rate": 3.8773351648351655e-05, + "loss": 0.9119, + "step": 8173 + }, + { + "epoch": 22.456043956043956, + "grad_norm": 11.909493446350098, + "learning_rate": 3.8771978021978025e-05, + "loss": 0.3427, + "step": 8174 + }, + { + "epoch": 22.458791208791208, + "grad_norm": 10.763245582580566, + "learning_rate": 3.8770604395604396e-05, + "loss": 0.2385, + "step": 8175 + }, + { + "epoch": 22.46153846153846, + "grad_norm": 9.49814510345459, + "learning_rate": 3.8769230769230766e-05, + "loss": 0.4248, + "step": 8176 + }, + { + "epoch": 22.464285714285715, + "grad_norm": 9.794837951660156, + "learning_rate": 3.876785714285714e-05, + "loss": 0.1777, + "step": 8177 + }, + { + "epoch": 22.467032967032967, + "grad_norm": 15.36778450012207, + "learning_rate": 3.876648351648352e-05, + "loss": 0.3395, + "step": 8178 + }, + { + "epoch": 22.46978021978022, + "grad_norm": 19.209678649902344, + "learning_rate": 3.876510989010989e-05, + "loss": 0.5358, + "step": 8179 + }, + { + "epoch": 22.47252747252747, + "grad_norm": 12.253039360046387, + "learning_rate": 3.8763736263736266e-05, + "loss": 0.3627, + "step": 8180 + }, + { + "epoch": 22.475274725274726, + "grad_norm": 16.2943058013916, + "learning_rate": 3.8762362637362636e-05, + "loss": 0.5942, + "step": 8181 + }, + { + "epoch": 22.478021978021978, + "grad_norm": 14.26254940032959, + "learning_rate": 3.876098901098901e-05, + "loss": 0.421, + "step": 8182 + }, + { + "epoch": 22.48076923076923, + "grad_norm": 15.071134567260742, + "learning_rate": 3.875961538461539e-05, + "loss": 0.5489, + "step": 8183 + }, + { + "epoch": 22.483516483516482, + "grad_norm": 11.188108444213867, + "learning_rate": 3.875824175824176e-05, + "loss": 0.2442, + "step": 8184 + }, + { + "epoch": 22.486263736263737, + "grad_norm": 7.45683479309082, + "learning_rate": 3.8756868131868136e-05, + "loss": 0.1696, + "step": 8185 + }, + { + "epoch": 22.48901098901099, + "grad_norm": 14.152233123779297, + "learning_rate": 3.8755494505494506e-05, + "loss": 0.5373, + "step": 8186 + }, + { + "epoch": 22.49175824175824, + "grad_norm": 19.8896484375, + "learning_rate": 3.875412087912088e-05, + "loss": 0.8793, + "step": 8187 + }, + { + "epoch": 22.494505494505496, + "grad_norm": 18.892101287841797, + "learning_rate": 3.875274725274726e-05, + "loss": 0.7248, + "step": 8188 + }, + { + "epoch": 22.497252747252748, + "grad_norm": 11.785515785217285, + "learning_rate": 3.875137362637363e-05, + "loss": 0.3319, + "step": 8189 + }, + { + "epoch": 22.5, + "grad_norm": 20.935462951660156, + "learning_rate": 3.875e-05, + "loss": 0.7779, + "step": 8190 + }, + { + "epoch": 22.502747252747252, + "grad_norm": 13.449578285217285, + "learning_rate": 3.874862637362637e-05, + "loss": 0.3081, + "step": 8191 + }, + { + "epoch": 22.505494505494504, + "grad_norm": 8.693219184875488, + "learning_rate": 3.874725274725275e-05, + "loss": 0.2354, + "step": 8192 + }, + { + "epoch": 22.50824175824176, + "grad_norm": 5.0232977867126465, + "learning_rate": 3.8745879120879124e-05, + "loss": 0.1554, + "step": 8193 + }, + { + "epoch": 22.51098901098901, + "grad_norm": 9.118239402770996, + "learning_rate": 3.8744505494505494e-05, + "loss": 0.3727, + "step": 8194 + }, + { + "epoch": 22.513736263736263, + "grad_norm": 15.020462989807129, + "learning_rate": 3.874313186813187e-05, + "loss": 0.5454, + "step": 8195 + }, + { + "epoch": 22.516483516483518, + "grad_norm": 15.08069133758545, + "learning_rate": 3.874175824175824e-05, + "loss": 0.4859, + "step": 8196 + }, + { + "epoch": 22.51923076923077, + "grad_norm": 7.0804901123046875, + "learning_rate": 3.874038461538462e-05, + "loss": 0.1644, + "step": 8197 + }, + { + "epoch": 22.521978021978022, + "grad_norm": 11.01253890991211, + "learning_rate": 3.8739010989010994e-05, + "loss": 0.3394, + "step": 8198 + }, + { + "epoch": 22.524725274725274, + "grad_norm": 13.964298248291016, + "learning_rate": 3.8737637362637364e-05, + "loss": 0.3894, + "step": 8199 + }, + { + "epoch": 22.52747252747253, + "grad_norm": 13.080523490905762, + "learning_rate": 3.873626373626374e-05, + "loss": 0.4829, + "step": 8200 + }, + { + "epoch": 22.53021978021978, + "grad_norm": 15.336445808410645, + "learning_rate": 3.873489010989011e-05, + "loss": 0.3725, + "step": 8201 + }, + { + "epoch": 22.532967032967033, + "grad_norm": 15.05689811706543, + "learning_rate": 3.873351648351649e-05, + "loss": 0.3395, + "step": 8202 + }, + { + "epoch": 22.535714285714285, + "grad_norm": 8.671136856079102, + "learning_rate": 3.8732142857142864e-05, + "loss": 0.1824, + "step": 8203 + }, + { + "epoch": 22.53846153846154, + "grad_norm": 6.425974369049072, + "learning_rate": 3.8730769230769234e-05, + "loss": 0.1478, + "step": 8204 + }, + { + "epoch": 22.541208791208792, + "grad_norm": 7.433409214019775, + "learning_rate": 3.8729395604395604e-05, + "loss": 0.1143, + "step": 8205 + }, + { + "epoch": 22.543956043956044, + "grad_norm": 12.906431198120117, + "learning_rate": 3.8728021978021975e-05, + "loss": 0.1959, + "step": 8206 + }, + { + "epoch": 22.546703296703296, + "grad_norm": 14.748008728027344, + "learning_rate": 3.872664835164835e-05, + "loss": 0.319, + "step": 8207 + }, + { + "epoch": 22.54945054945055, + "grad_norm": 13.819900512695312, + "learning_rate": 3.872527472527473e-05, + "loss": 0.616, + "step": 8208 + }, + { + "epoch": 22.552197802197803, + "grad_norm": 22.43164825439453, + "learning_rate": 3.87239010989011e-05, + "loss": 1.1768, + "step": 8209 + }, + { + "epoch": 22.554945054945055, + "grad_norm": 16.692903518676758, + "learning_rate": 3.8722527472527475e-05, + "loss": 0.7504, + "step": 8210 + }, + { + "epoch": 22.557692307692307, + "grad_norm": 10.817397117614746, + "learning_rate": 3.8721153846153845e-05, + "loss": 0.2695, + "step": 8211 + }, + { + "epoch": 22.560439560439562, + "grad_norm": 6.519958019256592, + "learning_rate": 3.871978021978022e-05, + "loss": 0.0912, + "step": 8212 + }, + { + "epoch": 22.563186813186814, + "grad_norm": 6.938719272613525, + "learning_rate": 3.87184065934066e-05, + "loss": 0.1369, + "step": 8213 + }, + { + "epoch": 22.565934065934066, + "grad_norm": 16.249202728271484, + "learning_rate": 3.871703296703297e-05, + "loss": 0.3309, + "step": 8214 + }, + { + "epoch": 22.568681318681318, + "grad_norm": 9.403167724609375, + "learning_rate": 3.8715659340659345e-05, + "loss": 0.2966, + "step": 8215 + }, + { + "epoch": 22.571428571428573, + "grad_norm": 18.0167236328125, + "learning_rate": 3.8714285714285715e-05, + "loss": 0.527, + "step": 8216 + }, + { + "epoch": 22.574175824175825, + "grad_norm": 14.169246673583984, + "learning_rate": 3.871291208791209e-05, + "loss": 0.4703, + "step": 8217 + }, + { + "epoch": 22.576923076923077, + "grad_norm": 10.79463005065918, + "learning_rate": 3.871153846153847e-05, + "loss": 0.3747, + "step": 8218 + }, + { + "epoch": 22.57967032967033, + "grad_norm": 15.476387977600098, + "learning_rate": 3.871016483516484e-05, + "loss": 0.5538, + "step": 8219 + }, + { + "epoch": 22.582417582417584, + "grad_norm": 5.688168048858643, + "learning_rate": 3.870879120879121e-05, + "loss": 0.1451, + "step": 8220 + }, + { + "epoch": 22.585164835164836, + "grad_norm": 8.75598430633545, + "learning_rate": 3.870741758241758e-05, + "loss": 0.3247, + "step": 8221 + }, + { + "epoch": 22.587912087912088, + "grad_norm": 11.976503372192383, + "learning_rate": 3.8706043956043956e-05, + "loss": 0.3347, + "step": 8222 + }, + { + "epoch": 22.59065934065934, + "grad_norm": 12.361312866210938, + "learning_rate": 3.870467032967033e-05, + "loss": 0.405, + "step": 8223 + }, + { + "epoch": 22.593406593406595, + "grad_norm": 9.884882926940918, + "learning_rate": 3.87032967032967e-05, + "loss": 0.2616, + "step": 8224 + }, + { + "epoch": 22.596153846153847, + "grad_norm": 11.005295753479004, + "learning_rate": 3.870192307692308e-05, + "loss": 0.3115, + "step": 8225 + }, + { + "epoch": 22.5989010989011, + "grad_norm": 2.7493419647216797, + "learning_rate": 3.870054945054945e-05, + "loss": 0.0844, + "step": 8226 + }, + { + "epoch": 22.60164835164835, + "grad_norm": 11.916686058044434, + "learning_rate": 3.8699175824175826e-05, + "loss": 0.292, + "step": 8227 + }, + { + "epoch": 22.604395604395606, + "grad_norm": 7.443228244781494, + "learning_rate": 3.86978021978022e-05, + "loss": 0.0805, + "step": 8228 + }, + { + "epoch": 22.607142857142858, + "grad_norm": 10.488512992858887, + "learning_rate": 3.869642857142857e-05, + "loss": 0.2836, + "step": 8229 + }, + { + "epoch": 22.60989010989011, + "grad_norm": 6.873208045959473, + "learning_rate": 3.869505494505495e-05, + "loss": 0.1601, + "step": 8230 + }, + { + "epoch": 22.61263736263736, + "grad_norm": 10.609762191772461, + "learning_rate": 3.869368131868132e-05, + "loss": 0.3928, + "step": 8231 + }, + { + "epoch": 22.615384615384617, + "grad_norm": 24.82689666748047, + "learning_rate": 3.86923076923077e-05, + "loss": 0.9297, + "step": 8232 + }, + { + "epoch": 22.61813186813187, + "grad_norm": 4.186781406402588, + "learning_rate": 3.8690934065934073e-05, + "loss": 0.0882, + "step": 8233 + }, + { + "epoch": 22.62087912087912, + "grad_norm": 8.915940284729004, + "learning_rate": 3.8689560439560443e-05, + "loss": 0.2593, + "step": 8234 + }, + { + "epoch": 22.623626373626372, + "grad_norm": 8.939690589904785, + "learning_rate": 3.8688186813186813e-05, + "loss": 0.2073, + "step": 8235 + }, + { + "epoch": 22.626373626373628, + "grad_norm": 6.696369647979736, + "learning_rate": 3.8686813186813183e-05, + "loss": 0.234, + "step": 8236 + }, + { + "epoch": 22.62912087912088, + "grad_norm": 10.647603988647461, + "learning_rate": 3.868543956043956e-05, + "loss": 0.3448, + "step": 8237 + }, + { + "epoch": 22.63186813186813, + "grad_norm": 12.765392303466797, + "learning_rate": 3.868406593406594e-05, + "loss": 0.3629, + "step": 8238 + }, + { + "epoch": 22.634615384615383, + "grad_norm": 15.199531555175781, + "learning_rate": 3.868269230769231e-05, + "loss": 0.6252, + "step": 8239 + }, + { + "epoch": 22.63736263736264, + "grad_norm": 10.3743896484375, + "learning_rate": 3.8681318681318684e-05, + "loss": 0.5633, + "step": 8240 + }, + { + "epoch": 22.64010989010989, + "grad_norm": 10.475954055786133, + "learning_rate": 3.8679945054945054e-05, + "loss": 0.2607, + "step": 8241 + }, + { + "epoch": 22.642857142857142, + "grad_norm": 12.520203590393066, + "learning_rate": 3.867857142857143e-05, + "loss": 0.6625, + "step": 8242 + }, + { + "epoch": 22.645604395604394, + "grad_norm": 18.08314323425293, + "learning_rate": 3.867719780219781e-05, + "loss": 0.4596, + "step": 8243 + }, + { + "epoch": 22.64835164835165, + "grad_norm": 12.07844066619873, + "learning_rate": 3.867582417582418e-05, + "loss": 0.2478, + "step": 8244 + }, + { + "epoch": 22.6510989010989, + "grad_norm": 21.495824813842773, + "learning_rate": 3.8674450549450554e-05, + "loss": 0.5958, + "step": 8245 + }, + { + "epoch": 22.653846153846153, + "grad_norm": 13.242910385131836, + "learning_rate": 3.8673076923076924e-05, + "loss": 0.3549, + "step": 8246 + }, + { + "epoch": 22.656593406593405, + "grad_norm": 10.940282821655273, + "learning_rate": 3.86717032967033e-05, + "loss": 0.3275, + "step": 8247 + }, + { + "epoch": 22.65934065934066, + "grad_norm": 6.832888603210449, + "learning_rate": 3.867032967032968e-05, + "loss": 0.1642, + "step": 8248 + }, + { + "epoch": 22.662087912087912, + "grad_norm": 12.246657371520996, + "learning_rate": 3.866895604395605e-05, + "loss": 0.4485, + "step": 8249 + }, + { + "epoch": 22.664835164835164, + "grad_norm": 7.50468111038208, + "learning_rate": 3.866758241758242e-05, + "loss": 0.2337, + "step": 8250 + }, + { + "epoch": 22.667582417582416, + "grad_norm": 12.2212495803833, + "learning_rate": 3.866620879120879e-05, + "loss": 0.5073, + "step": 8251 + }, + { + "epoch": 22.67032967032967, + "grad_norm": 19.89171600341797, + "learning_rate": 3.8664835164835165e-05, + "loss": 0.812, + "step": 8252 + }, + { + "epoch": 22.673076923076923, + "grad_norm": 13.419235229492188, + "learning_rate": 3.866346153846154e-05, + "loss": 0.4039, + "step": 8253 + }, + { + "epoch": 22.675824175824175, + "grad_norm": 11.18930721282959, + "learning_rate": 3.866208791208791e-05, + "loss": 0.3177, + "step": 8254 + }, + { + "epoch": 22.678571428571427, + "grad_norm": 5.00797700881958, + "learning_rate": 3.866071428571429e-05, + "loss": 0.1176, + "step": 8255 + }, + { + "epoch": 22.681318681318682, + "grad_norm": 8.912195205688477, + "learning_rate": 3.865934065934066e-05, + "loss": 0.2183, + "step": 8256 + }, + { + "epoch": 22.684065934065934, + "grad_norm": 10.353285789489746, + "learning_rate": 3.8657967032967035e-05, + "loss": 0.2516, + "step": 8257 + }, + { + "epoch": 22.686813186813186, + "grad_norm": 7.271039962768555, + "learning_rate": 3.865659340659341e-05, + "loss": 0.1566, + "step": 8258 + }, + { + "epoch": 22.689560439560438, + "grad_norm": 8.701156616210938, + "learning_rate": 3.865521978021978e-05, + "loss": 0.3167, + "step": 8259 + }, + { + "epoch": 22.692307692307693, + "grad_norm": 14.191603660583496, + "learning_rate": 3.865384615384616e-05, + "loss": 0.3711, + "step": 8260 + }, + { + "epoch": 22.695054945054945, + "grad_norm": 11.841193199157715, + "learning_rate": 3.865247252747253e-05, + "loss": 0.4873, + "step": 8261 + }, + { + "epoch": 22.697802197802197, + "grad_norm": 15.822513580322266, + "learning_rate": 3.8651098901098906e-05, + "loss": 0.5621, + "step": 8262 + }, + { + "epoch": 22.70054945054945, + "grad_norm": 22.125484466552734, + "learning_rate": 3.864972527472528e-05, + "loss": 0.7031, + "step": 8263 + }, + { + "epoch": 22.703296703296704, + "grad_norm": 7.7313666343688965, + "learning_rate": 3.864835164835165e-05, + "loss": 0.2472, + "step": 8264 + }, + { + "epoch": 22.706043956043956, + "grad_norm": 15.976754188537598, + "learning_rate": 3.864697802197802e-05, + "loss": 0.547, + "step": 8265 + }, + { + "epoch": 22.708791208791208, + "grad_norm": 13.110068321228027, + "learning_rate": 3.864560439560439e-05, + "loss": 0.3826, + "step": 8266 + }, + { + "epoch": 22.71153846153846, + "grad_norm": 15.693153381347656, + "learning_rate": 3.864423076923077e-05, + "loss": 0.6121, + "step": 8267 + }, + { + "epoch": 22.714285714285715, + "grad_norm": 17.291643142700195, + "learning_rate": 3.8642857142857146e-05, + "loss": 0.4693, + "step": 8268 + }, + { + "epoch": 22.717032967032967, + "grad_norm": 18.229650497436523, + "learning_rate": 3.8641483516483516e-05, + "loss": 0.7913, + "step": 8269 + }, + { + "epoch": 22.71978021978022, + "grad_norm": 14.48773193359375, + "learning_rate": 3.864010989010989e-05, + "loss": 0.4207, + "step": 8270 + }, + { + "epoch": 22.72252747252747, + "grad_norm": 12.50248908996582, + "learning_rate": 3.863873626373626e-05, + "loss": 0.3286, + "step": 8271 + }, + { + "epoch": 22.725274725274726, + "grad_norm": 17.192821502685547, + "learning_rate": 3.863736263736264e-05, + "loss": 0.6187, + "step": 8272 + }, + { + "epoch": 22.728021978021978, + "grad_norm": 27.791248321533203, + "learning_rate": 3.8635989010989016e-05, + "loss": 1.0903, + "step": 8273 + }, + { + "epoch": 22.73076923076923, + "grad_norm": 5.515796661376953, + "learning_rate": 3.8634615384615386e-05, + "loss": 0.1875, + "step": 8274 + }, + { + "epoch": 22.733516483516482, + "grad_norm": 12.994961738586426, + "learning_rate": 3.863324175824176e-05, + "loss": 0.4556, + "step": 8275 + }, + { + "epoch": 22.736263736263737, + "grad_norm": 14.298200607299805, + "learning_rate": 3.863186813186813e-05, + "loss": 0.49, + "step": 8276 + }, + { + "epoch": 22.73901098901099, + "grad_norm": 14.42021369934082, + "learning_rate": 3.863049450549451e-05, + "loss": 0.3072, + "step": 8277 + }, + { + "epoch": 22.74175824175824, + "grad_norm": 10.881277084350586, + "learning_rate": 3.862912087912089e-05, + "loss": 0.2479, + "step": 8278 + }, + { + "epoch": 22.744505494505496, + "grad_norm": 7.017291069030762, + "learning_rate": 3.862774725274726e-05, + "loss": 0.2215, + "step": 8279 + }, + { + "epoch": 22.747252747252748, + "grad_norm": 8.449922561645508, + "learning_rate": 3.862637362637363e-05, + "loss": 0.2299, + "step": 8280 + }, + { + "epoch": 22.75, + "grad_norm": 11.181709289550781, + "learning_rate": 3.8625e-05, + "loss": 0.5502, + "step": 8281 + }, + { + "epoch": 22.752747252747252, + "grad_norm": 11.302124977111816, + "learning_rate": 3.8623626373626374e-05, + "loss": 0.3723, + "step": 8282 + }, + { + "epoch": 22.755494505494504, + "grad_norm": 14.558685302734375, + "learning_rate": 3.862225274725275e-05, + "loss": 0.49, + "step": 8283 + }, + { + "epoch": 22.75824175824176, + "grad_norm": 7.820427417755127, + "learning_rate": 3.862087912087912e-05, + "loss": 0.2695, + "step": 8284 + }, + { + "epoch": 22.76098901098901, + "grad_norm": 16.97159194946289, + "learning_rate": 3.86195054945055e-05, + "loss": 0.4295, + "step": 8285 + }, + { + "epoch": 22.763736263736263, + "grad_norm": 7.859422206878662, + "learning_rate": 3.861813186813187e-05, + "loss": 0.3092, + "step": 8286 + }, + { + "epoch": 22.766483516483518, + "grad_norm": 14.608089447021484, + "learning_rate": 3.8616758241758244e-05, + "loss": 0.3408, + "step": 8287 + }, + { + "epoch": 22.76923076923077, + "grad_norm": 19.420841217041016, + "learning_rate": 3.861538461538462e-05, + "loss": 0.769, + "step": 8288 + }, + { + "epoch": 22.771978021978022, + "grad_norm": 8.451741218566895, + "learning_rate": 3.861401098901099e-05, + "loss": 0.1828, + "step": 8289 + }, + { + "epoch": 22.774725274725274, + "grad_norm": 11.569426536560059, + "learning_rate": 3.861263736263737e-05, + "loss": 0.3503, + "step": 8290 + }, + { + "epoch": 22.77747252747253, + "grad_norm": 7.988996982574463, + "learning_rate": 3.861126373626374e-05, + "loss": 0.2877, + "step": 8291 + }, + { + "epoch": 22.78021978021978, + "grad_norm": 11.272968292236328, + "learning_rate": 3.8609890109890115e-05, + "loss": 0.2377, + "step": 8292 + }, + { + "epoch": 22.782967032967033, + "grad_norm": 7.128978729248047, + "learning_rate": 3.860851648351649e-05, + "loss": 0.1608, + "step": 8293 + }, + { + "epoch": 22.785714285714285, + "grad_norm": 8.840950965881348, + "learning_rate": 3.860714285714286e-05, + "loss": 0.209, + "step": 8294 + }, + { + "epoch": 22.78846153846154, + "grad_norm": 25.068246841430664, + "learning_rate": 3.860576923076923e-05, + "loss": 0.8838, + "step": 8295 + }, + { + "epoch": 22.791208791208792, + "grad_norm": 14.02299690246582, + "learning_rate": 3.86043956043956e-05, + "loss": 0.4702, + "step": 8296 + }, + { + "epoch": 22.793956043956044, + "grad_norm": 20.145973205566406, + "learning_rate": 3.860302197802198e-05, + "loss": 0.9965, + "step": 8297 + }, + { + "epoch": 22.796703296703296, + "grad_norm": 14.105260848999023, + "learning_rate": 3.8601648351648355e-05, + "loss": 0.4687, + "step": 8298 + }, + { + "epoch": 22.79945054945055, + "grad_norm": 13.906774520874023, + "learning_rate": 3.8600274725274725e-05, + "loss": 0.3978, + "step": 8299 + }, + { + "epoch": 22.802197802197803, + "grad_norm": 17.396223068237305, + "learning_rate": 3.85989010989011e-05, + "loss": 0.6159, + "step": 8300 + }, + { + "epoch": 22.804945054945055, + "grad_norm": 14.912979125976562, + "learning_rate": 3.859752747252747e-05, + "loss": 0.5912, + "step": 8301 + }, + { + "epoch": 22.807692307692307, + "grad_norm": 18.834688186645508, + "learning_rate": 3.859615384615385e-05, + "loss": 0.5427, + "step": 8302 + }, + { + "epoch": 22.810439560439562, + "grad_norm": 16.005231857299805, + "learning_rate": 3.8594780219780225e-05, + "loss": 0.6674, + "step": 8303 + }, + { + "epoch": 22.813186813186814, + "grad_norm": 7.761524677276611, + "learning_rate": 3.8593406593406595e-05, + "loss": 0.1642, + "step": 8304 + }, + { + "epoch": 22.815934065934066, + "grad_norm": 13.853425979614258, + "learning_rate": 3.859203296703297e-05, + "loss": 0.3219, + "step": 8305 + }, + { + "epoch": 22.818681318681318, + "grad_norm": 13.128746032714844, + "learning_rate": 3.859065934065934e-05, + "loss": 0.5035, + "step": 8306 + }, + { + "epoch": 22.821428571428573, + "grad_norm": 12.056258201599121, + "learning_rate": 3.858928571428572e-05, + "loss": 0.2325, + "step": 8307 + }, + { + "epoch": 22.824175824175825, + "grad_norm": 6.920071125030518, + "learning_rate": 3.858791208791209e-05, + "loss": 0.1685, + "step": 8308 + }, + { + "epoch": 22.826923076923077, + "grad_norm": 16.933189392089844, + "learning_rate": 3.8586538461538466e-05, + "loss": 0.7774, + "step": 8309 + }, + { + "epoch": 22.82967032967033, + "grad_norm": 7.73417854309082, + "learning_rate": 3.8585164835164836e-05, + "loss": 0.1798, + "step": 8310 + }, + { + "epoch": 22.832417582417584, + "grad_norm": 12.471541404724121, + "learning_rate": 3.8583791208791206e-05, + "loss": 0.3348, + "step": 8311 + }, + { + "epoch": 22.835164835164836, + "grad_norm": 13.96704387664795, + "learning_rate": 3.858241758241758e-05, + "loss": 0.3701, + "step": 8312 + }, + { + "epoch": 22.837912087912088, + "grad_norm": 9.122345924377441, + "learning_rate": 3.858104395604395e-05, + "loss": 0.3155, + "step": 8313 + }, + { + "epoch": 22.84065934065934, + "grad_norm": 15.782933235168457, + "learning_rate": 3.857967032967033e-05, + "loss": 0.2754, + "step": 8314 + }, + { + "epoch": 22.843406593406595, + "grad_norm": 12.163412094116211, + "learning_rate": 3.8578296703296706e-05, + "loss": 0.5084, + "step": 8315 + }, + { + "epoch": 22.846153846153847, + "grad_norm": 11.290297508239746, + "learning_rate": 3.8576923076923076e-05, + "loss": 0.3795, + "step": 8316 + }, + { + "epoch": 22.8489010989011, + "grad_norm": 14.534040451049805, + "learning_rate": 3.857554945054945e-05, + "loss": 0.3335, + "step": 8317 + }, + { + "epoch": 22.85164835164835, + "grad_norm": 11.834660530090332, + "learning_rate": 3.857417582417582e-05, + "loss": 0.3815, + "step": 8318 + }, + { + "epoch": 22.854395604395606, + "grad_norm": 13.298493385314941, + "learning_rate": 3.85728021978022e-05, + "loss": 0.3787, + "step": 8319 + }, + { + "epoch": 22.857142857142858, + "grad_norm": 10.143796920776367, + "learning_rate": 3.857142857142858e-05, + "loss": 0.3271, + "step": 8320 + }, + { + "epoch": 22.85989010989011, + "grad_norm": 20.192726135253906, + "learning_rate": 3.857005494505495e-05, + "loss": 0.7898, + "step": 8321 + }, + { + "epoch": 22.86263736263736, + "grad_norm": 18.941896438598633, + "learning_rate": 3.8568681318681324e-05, + "loss": 0.7313, + "step": 8322 + }, + { + "epoch": 22.865384615384617, + "grad_norm": 8.023146629333496, + "learning_rate": 3.8567307692307694e-05, + "loss": 0.1964, + "step": 8323 + }, + { + "epoch": 22.86813186813187, + "grad_norm": 16.856689453125, + "learning_rate": 3.856593406593407e-05, + "loss": 0.7385, + "step": 8324 + }, + { + "epoch": 22.87087912087912, + "grad_norm": 11.0931396484375, + "learning_rate": 3.856456043956044e-05, + "loss": 0.5412, + "step": 8325 + }, + { + "epoch": 22.873626373626372, + "grad_norm": 9.295215606689453, + "learning_rate": 3.856318681318681e-05, + "loss": 0.327, + "step": 8326 + }, + { + "epoch": 22.876373626373628, + "grad_norm": 9.96129322052002, + "learning_rate": 3.856181318681319e-05, + "loss": 0.2731, + "step": 8327 + }, + { + "epoch": 22.87912087912088, + "grad_norm": 7.262141704559326, + "learning_rate": 3.856043956043956e-05, + "loss": 0.1792, + "step": 8328 + }, + { + "epoch": 22.88186813186813, + "grad_norm": 6.887603759765625, + "learning_rate": 3.8559065934065934e-05, + "loss": 0.1746, + "step": 8329 + }, + { + "epoch": 22.884615384615383, + "grad_norm": 8.841240882873535, + "learning_rate": 3.855769230769231e-05, + "loss": 0.3831, + "step": 8330 + }, + { + "epoch": 22.88736263736264, + "grad_norm": 12.435884475708008, + "learning_rate": 3.855631868131868e-05, + "loss": 0.3706, + "step": 8331 + }, + { + "epoch": 22.89010989010989, + "grad_norm": 5.210202217102051, + "learning_rate": 3.855494505494506e-05, + "loss": 0.1589, + "step": 8332 + }, + { + "epoch": 22.892857142857142, + "grad_norm": 12.131162643432617, + "learning_rate": 3.855357142857143e-05, + "loss": 0.6043, + "step": 8333 + }, + { + "epoch": 22.895604395604394, + "grad_norm": 12.062472343444824, + "learning_rate": 3.8552197802197804e-05, + "loss": 0.2868, + "step": 8334 + }, + { + "epoch": 22.89835164835165, + "grad_norm": 7.556312561035156, + "learning_rate": 3.855082417582418e-05, + "loss": 0.3141, + "step": 8335 + }, + { + "epoch": 22.9010989010989, + "grad_norm": 11.106111526489258, + "learning_rate": 3.854945054945055e-05, + "loss": 0.4305, + "step": 8336 + }, + { + "epoch": 22.903846153846153, + "grad_norm": 11.978224754333496, + "learning_rate": 3.854807692307693e-05, + "loss": 0.3035, + "step": 8337 + }, + { + "epoch": 22.906593406593405, + "grad_norm": 14.576498985290527, + "learning_rate": 3.85467032967033e-05, + "loss": 0.5731, + "step": 8338 + }, + { + "epoch": 22.90934065934066, + "grad_norm": 15.760845184326172, + "learning_rate": 3.8545329670329675e-05, + "loss": 0.5648, + "step": 8339 + }, + { + "epoch": 22.912087912087912, + "grad_norm": 9.163590431213379, + "learning_rate": 3.8543956043956045e-05, + "loss": 0.401, + "step": 8340 + }, + { + "epoch": 22.914835164835164, + "grad_norm": 6.7281718254089355, + "learning_rate": 3.8542582417582415e-05, + "loss": 0.2001, + "step": 8341 + }, + { + "epoch": 22.917582417582416, + "grad_norm": 17.716154098510742, + "learning_rate": 3.854120879120879e-05, + "loss": 0.5792, + "step": 8342 + }, + { + "epoch": 22.92032967032967, + "grad_norm": 12.602338790893555, + "learning_rate": 3.853983516483516e-05, + "loss": 0.3648, + "step": 8343 + }, + { + "epoch": 22.923076923076923, + "grad_norm": 5.264873027801514, + "learning_rate": 3.853846153846154e-05, + "loss": 0.171, + "step": 8344 + }, + { + "epoch": 22.925824175824175, + "grad_norm": 14.531292915344238, + "learning_rate": 3.8537087912087915e-05, + "loss": 0.3634, + "step": 8345 + }, + { + "epoch": 22.928571428571427, + "grad_norm": 10.1838960647583, + "learning_rate": 3.8535714285714285e-05, + "loss": 0.3406, + "step": 8346 + }, + { + "epoch": 22.931318681318682, + "grad_norm": 17.591182708740234, + "learning_rate": 3.853434065934066e-05, + "loss": 0.6238, + "step": 8347 + }, + { + "epoch": 22.934065934065934, + "grad_norm": 15.363529205322266, + "learning_rate": 3.853296703296703e-05, + "loss": 0.5516, + "step": 8348 + }, + { + "epoch": 22.936813186813186, + "grad_norm": 17.08393096923828, + "learning_rate": 3.853159340659341e-05, + "loss": 0.5245, + "step": 8349 + }, + { + "epoch": 22.939560439560438, + "grad_norm": 14.651032447814941, + "learning_rate": 3.8530219780219786e-05, + "loss": 0.4973, + "step": 8350 + }, + { + "epoch": 22.942307692307693, + "grad_norm": 14.812576293945312, + "learning_rate": 3.8528846153846156e-05, + "loss": 0.6261, + "step": 8351 + }, + { + "epoch": 22.945054945054945, + "grad_norm": 10.5148344039917, + "learning_rate": 3.852747252747253e-05, + "loss": 0.2328, + "step": 8352 + }, + { + "epoch": 22.947802197802197, + "grad_norm": 18.40767478942871, + "learning_rate": 3.85260989010989e-05, + "loss": 0.8941, + "step": 8353 + }, + { + "epoch": 22.95054945054945, + "grad_norm": 14.705889701843262, + "learning_rate": 3.852472527472528e-05, + "loss": 0.4515, + "step": 8354 + }, + { + "epoch": 22.953296703296704, + "grad_norm": 14.9777250289917, + "learning_rate": 3.852335164835165e-05, + "loss": 0.6016, + "step": 8355 + }, + { + "epoch": 22.956043956043956, + "grad_norm": 19.28726577758789, + "learning_rate": 3.852197802197802e-05, + "loss": 0.6321, + "step": 8356 + }, + { + "epoch": 22.958791208791208, + "grad_norm": 10.0040922164917, + "learning_rate": 3.8520604395604396e-05, + "loss": 0.1813, + "step": 8357 + }, + { + "epoch": 22.96153846153846, + "grad_norm": 11.528509140014648, + "learning_rate": 3.8519230769230766e-05, + "loss": 0.2166, + "step": 8358 + }, + { + "epoch": 22.964285714285715, + "grad_norm": 12.06241512298584, + "learning_rate": 3.851785714285714e-05, + "loss": 0.5174, + "step": 8359 + }, + { + "epoch": 22.967032967032967, + "grad_norm": 15.849029541015625, + "learning_rate": 3.851648351648352e-05, + "loss": 0.2825, + "step": 8360 + }, + { + "epoch": 22.96978021978022, + "grad_norm": 13.96918773651123, + "learning_rate": 3.851510989010989e-05, + "loss": 0.2687, + "step": 8361 + }, + { + "epoch": 22.97252747252747, + "grad_norm": 16.07673454284668, + "learning_rate": 3.851373626373627e-05, + "loss": 0.6219, + "step": 8362 + }, + { + "epoch": 22.975274725274726, + "grad_norm": 15.633084297180176, + "learning_rate": 3.851236263736264e-05, + "loss": 0.6081, + "step": 8363 + }, + { + "epoch": 22.978021978021978, + "grad_norm": 13.890054702758789, + "learning_rate": 3.8510989010989013e-05, + "loss": 0.3889, + "step": 8364 + }, + { + "epoch": 22.98076923076923, + "grad_norm": 11.674532890319824, + "learning_rate": 3.850961538461539e-05, + "loss": 0.3542, + "step": 8365 + }, + { + "epoch": 22.983516483516482, + "grad_norm": 3.575056552886963, + "learning_rate": 3.850824175824176e-05, + "loss": 0.1276, + "step": 8366 + }, + { + "epoch": 22.986263736263737, + "grad_norm": 7.034247875213623, + "learning_rate": 3.850686813186814e-05, + "loss": 0.2369, + "step": 8367 + }, + { + "epoch": 22.98901098901099, + "grad_norm": 15.767152786254883, + "learning_rate": 3.850549450549451e-05, + "loss": 0.5636, + "step": 8368 + }, + { + "epoch": 22.99175824175824, + "grad_norm": 8.166775703430176, + "learning_rate": 3.8504120879120884e-05, + "loss": 0.17, + "step": 8369 + }, + { + "epoch": 22.994505494505496, + "grad_norm": 7.0159687995910645, + "learning_rate": 3.8502747252747254e-05, + "loss": 0.2104, + "step": 8370 + }, + { + "epoch": 22.997252747252748, + "grad_norm": 8.39345645904541, + "learning_rate": 3.8501373626373624e-05, + "loss": 0.1383, + "step": 8371 + }, + { + "epoch": 23.0, + "grad_norm": 5.843344211578369, + "learning_rate": 3.85e-05, + "loss": 0.11, + "step": 8372 + }, + { + "epoch": 23.0, + "eval_accuracy": 0.6983471074380165, + "eval_f1": 0.6925724835124655, + "eval_f1_DuraRiadoRio_64x64": 0.42328042328042326, + "eval_f1_Mole_64x64": 0.7205882352941176, + "eval_f1_Quebrado_64x64": 0.891156462585034, + "eval_f1_RiadoRio_64x64": 0.5813953488372093, + "eval_f1_RioFechado_64x64": 0.846441947565543, + "eval_loss": 1.500252604484558, + "eval_precision": 0.7762975019984013, + "eval_precision_DuraRiadoRio_64x64": 0.8888888888888888, + "eval_precision_Mole_64x64": 0.765625, + "eval_precision_Quebrado_64x64": 0.8733333333333333, + "eval_precision_RiadoRio_64x64": 0.44964028776978415, + "eval_precision_RioFechado_64x64": 0.904, + "eval_recall": 0.6972397248991022, + "eval_recall_DuraRiadoRio_64x64": 0.2777777777777778, + "eval_recall_Mole_64x64": 0.6805555555555556, + "eval_recall_Quebrado_64x64": 0.9097222222222222, + "eval_recall_RiadoRio_64x64": 0.8223684210526315, + "eval_recall_RioFechado_64x64": 0.795774647887324, + "eval_runtime": 1.7621, + "eval_samples_per_second": 412.003, + "eval_steps_per_second": 26.105, + "step": 8372 + }, + { + "epoch": 23.002747252747252, + "grad_norm": 10.441608428955078, + "learning_rate": 3.849862637362637e-05, + "loss": 0.3119, + "step": 8373 + }, + { + "epoch": 23.005494505494507, + "grad_norm": 10.082488059997559, + "learning_rate": 3.849725274725275e-05, + "loss": 0.2444, + "step": 8374 + }, + { + "epoch": 23.00824175824176, + "grad_norm": 7.86766242980957, + "learning_rate": 3.8495879120879124e-05, + "loss": 0.2151, + "step": 8375 + }, + { + "epoch": 23.01098901098901, + "grad_norm": 15.044881820678711, + "learning_rate": 3.8494505494505494e-05, + "loss": 0.4021, + "step": 8376 + }, + { + "epoch": 23.013736263736263, + "grad_norm": 9.232488632202148, + "learning_rate": 3.849313186813187e-05, + "loss": 0.2235, + "step": 8377 + }, + { + "epoch": 23.016483516483518, + "grad_norm": 14.109265327453613, + "learning_rate": 3.849175824175824e-05, + "loss": 0.316, + "step": 8378 + }, + { + "epoch": 23.01923076923077, + "grad_norm": 14.275883674621582, + "learning_rate": 3.849038461538462e-05, + "loss": 0.363, + "step": 8379 + }, + { + "epoch": 23.021978021978022, + "grad_norm": 14.379473686218262, + "learning_rate": 3.8489010989010995e-05, + "loss": 0.2805, + "step": 8380 + }, + { + "epoch": 23.024725274725274, + "grad_norm": 6.273826599121094, + "learning_rate": 3.8487637362637365e-05, + "loss": 0.1381, + "step": 8381 + }, + { + "epoch": 23.02747252747253, + "grad_norm": 15.728799819946289, + "learning_rate": 3.848626373626374e-05, + "loss": 0.4727, + "step": 8382 + }, + { + "epoch": 23.03021978021978, + "grad_norm": 16.70828628540039, + "learning_rate": 3.848489010989011e-05, + "loss": 0.5896, + "step": 8383 + }, + { + "epoch": 23.032967032967033, + "grad_norm": 3.0938527584075928, + "learning_rate": 3.848351648351649e-05, + "loss": 0.1439, + "step": 8384 + }, + { + "epoch": 23.035714285714285, + "grad_norm": 9.681946754455566, + "learning_rate": 3.848214285714286e-05, + "loss": 0.2192, + "step": 8385 + }, + { + "epoch": 23.03846153846154, + "grad_norm": 5.752763748168945, + "learning_rate": 3.848076923076923e-05, + "loss": 0.1754, + "step": 8386 + }, + { + "epoch": 23.041208791208792, + "grad_norm": 16.091796875, + "learning_rate": 3.8479395604395605e-05, + "loss": 0.5421, + "step": 8387 + }, + { + "epoch": 23.043956043956044, + "grad_norm": 18.379697799682617, + "learning_rate": 3.8478021978021975e-05, + "loss": 0.353, + "step": 8388 + }, + { + "epoch": 23.046703296703296, + "grad_norm": 14.285886764526367, + "learning_rate": 3.847664835164835e-05, + "loss": 0.4515, + "step": 8389 + }, + { + "epoch": 23.04945054945055, + "grad_norm": 10.375844955444336, + "learning_rate": 3.847527472527473e-05, + "loss": 0.3487, + "step": 8390 + }, + { + "epoch": 23.052197802197803, + "grad_norm": 14.647140502929688, + "learning_rate": 3.84739010989011e-05, + "loss": 0.4694, + "step": 8391 + }, + { + "epoch": 23.054945054945055, + "grad_norm": 7.327785968780518, + "learning_rate": 3.8472527472527476e-05, + "loss": 0.2022, + "step": 8392 + }, + { + "epoch": 23.057692307692307, + "grad_norm": 24.032567977905273, + "learning_rate": 3.8471153846153846e-05, + "loss": 0.7906, + "step": 8393 + }, + { + "epoch": 23.060439560439562, + "grad_norm": 20.228609085083008, + "learning_rate": 3.846978021978022e-05, + "loss": 0.3872, + "step": 8394 + }, + { + "epoch": 23.063186813186814, + "grad_norm": 16.8270320892334, + "learning_rate": 3.84684065934066e-05, + "loss": 0.4736, + "step": 8395 + }, + { + "epoch": 23.065934065934066, + "grad_norm": 4.19485330581665, + "learning_rate": 3.846703296703297e-05, + "loss": 0.0748, + "step": 8396 + }, + { + "epoch": 23.068681318681318, + "grad_norm": 11.704416275024414, + "learning_rate": 3.8465659340659346e-05, + "loss": 0.3031, + "step": 8397 + }, + { + "epoch": 23.071428571428573, + "grad_norm": 14.174151420593262, + "learning_rate": 3.8464285714285716e-05, + "loss": 0.4859, + "step": 8398 + }, + { + "epoch": 23.074175824175825, + "grad_norm": 13.821654319763184, + "learning_rate": 3.846291208791209e-05, + "loss": 0.5388, + "step": 8399 + }, + { + "epoch": 23.076923076923077, + "grad_norm": 7.966702938079834, + "learning_rate": 3.846153846153846e-05, + "loss": 0.2772, + "step": 8400 + }, + { + "epoch": 23.07967032967033, + "grad_norm": 16.13277244567871, + "learning_rate": 3.846016483516483e-05, + "loss": 0.8149, + "step": 8401 + }, + { + "epoch": 23.082417582417584, + "grad_norm": 6.352307319641113, + "learning_rate": 3.845879120879121e-05, + "loss": 0.1513, + "step": 8402 + }, + { + "epoch": 23.085164835164836, + "grad_norm": 13.962220191955566, + "learning_rate": 3.845741758241758e-05, + "loss": 0.4205, + "step": 8403 + }, + { + "epoch": 23.087912087912088, + "grad_norm": 18.007404327392578, + "learning_rate": 3.8456043956043956e-05, + "loss": 0.7571, + "step": 8404 + }, + { + "epoch": 23.09065934065934, + "grad_norm": 10.243729591369629, + "learning_rate": 3.845467032967033e-05, + "loss": 0.239, + "step": 8405 + }, + { + "epoch": 23.093406593406595, + "grad_norm": 12.457732200622559, + "learning_rate": 3.84532967032967e-05, + "loss": 0.3071, + "step": 8406 + }, + { + "epoch": 23.096153846153847, + "grad_norm": 13.401067733764648, + "learning_rate": 3.845192307692308e-05, + "loss": 0.5654, + "step": 8407 + }, + { + "epoch": 23.0989010989011, + "grad_norm": 8.516764640808105, + "learning_rate": 3.845054945054945e-05, + "loss": 0.3097, + "step": 8408 + }, + { + "epoch": 23.10164835164835, + "grad_norm": 5.273711681365967, + "learning_rate": 3.844917582417583e-05, + "loss": 0.1083, + "step": 8409 + }, + { + "epoch": 23.104395604395606, + "grad_norm": 14.580266952514648, + "learning_rate": 3.8447802197802204e-05, + "loss": 0.3682, + "step": 8410 + }, + { + "epoch": 23.107142857142858, + "grad_norm": 21.3730525970459, + "learning_rate": 3.8446428571428574e-05, + "loss": 0.6551, + "step": 8411 + }, + { + "epoch": 23.10989010989011, + "grad_norm": 19.691438674926758, + "learning_rate": 3.844505494505495e-05, + "loss": 0.9989, + "step": 8412 + }, + { + "epoch": 23.11263736263736, + "grad_norm": 8.708967208862305, + "learning_rate": 3.844368131868132e-05, + "loss": 0.3219, + "step": 8413 + }, + { + "epoch": 23.115384615384617, + "grad_norm": 18.095409393310547, + "learning_rate": 3.84423076923077e-05, + "loss": 0.4232, + "step": 8414 + }, + { + "epoch": 23.11813186813187, + "grad_norm": 11.325329780578613, + "learning_rate": 3.844093406593407e-05, + "loss": 0.2264, + "step": 8415 + }, + { + "epoch": 23.12087912087912, + "grad_norm": 15.775873184204102, + "learning_rate": 3.843956043956044e-05, + "loss": 0.3697, + "step": 8416 + }, + { + "epoch": 23.123626373626372, + "grad_norm": 13.273406028747559, + "learning_rate": 3.8438186813186814e-05, + "loss": 0.2602, + "step": 8417 + }, + { + "epoch": 23.126373626373628, + "grad_norm": 10.36452865600586, + "learning_rate": 3.8436813186813184e-05, + "loss": 0.3355, + "step": 8418 + }, + { + "epoch": 23.12912087912088, + "grad_norm": 11.626760482788086, + "learning_rate": 3.843543956043956e-05, + "loss": 0.4137, + "step": 8419 + }, + { + "epoch": 23.13186813186813, + "grad_norm": 10.161116600036621, + "learning_rate": 3.843406593406594e-05, + "loss": 0.3564, + "step": 8420 + }, + { + "epoch": 23.134615384615383, + "grad_norm": 11.678690910339355, + "learning_rate": 3.843269230769231e-05, + "loss": 0.4661, + "step": 8421 + }, + { + "epoch": 23.13736263736264, + "grad_norm": 10.094198226928711, + "learning_rate": 3.8431318681318685e-05, + "loss": 0.2847, + "step": 8422 + }, + { + "epoch": 23.14010989010989, + "grad_norm": 13.292349815368652, + "learning_rate": 3.8429945054945055e-05, + "loss": 0.2762, + "step": 8423 + }, + { + "epoch": 23.142857142857142, + "grad_norm": 12.720466613769531, + "learning_rate": 3.842857142857143e-05, + "loss": 0.5743, + "step": 8424 + }, + { + "epoch": 23.145604395604394, + "grad_norm": 10.615084648132324, + "learning_rate": 3.842719780219781e-05, + "loss": 0.2459, + "step": 8425 + }, + { + "epoch": 23.14835164835165, + "grad_norm": 11.78280258178711, + "learning_rate": 3.842582417582418e-05, + "loss": 0.2687, + "step": 8426 + }, + { + "epoch": 23.1510989010989, + "grad_norm": 15.819415092468262, + "learning_rate": 3.8424450549450555e-05, + "loss": 0.3554, + "step": 8427 + }, + { + "epoch": 23.153846153846153, + "grad_norm": 15.656485557556152, + "learning_rate": 3.8423076923076925e-05, + "loss": 0.5005, + "step": 8428 + }, + { + "epoch": 23.156593406593405, + "grad_norm": 21.583890914916992, + "learning_rate": 3.84217032967033e-05, + "loss": 0.902, + "step": 8429 + }, + { + "epoch": 23.15934065934066, + "grad_norm": 7.272533893585205, + "learning_rate": 3.842032967032967e-05, + "loss": 0.3062, + "step": 8430 + }, + { + "epoch": 23.162087912087912, + "grad_norm": 7.0757222175598145, + "learning_rate": 3.841895604395604e-05, + "loss": 0.1609, + "step": 8431 + }, + { + "epoch": 23.164835164835164, + "grad_norm": 6.0813446044921875, + "learning_rate": 3.841758241758242e-05, + "loss": 0.1421, + "step": 8432 + }, + { + "epoch": 23.167582417582416, + "grad_norm": 18.833110809326172, + "learning_rate": 3.841620879120879e-05, + "loss": 0.6055, + "step": 8433 + }, + { + "epoch": 23.17032967032967, + "grad_norm": 14.548980712890625, + "learning_rate": 3.8414835164835165e-05, + "loss": 0.4703, + "step": 8434 + }, + { + "epoch": 23.173076923076923, + "grad_norm": 10.94252872467041, + "learning_rate": 3.841346153846154e-05, + "loss": 0.3121, + "step": 8435 + }, + { + "epoch": 23.175824175824175, + "grad_norm": 15.021198272705078, + "learning_rate": 3.841208791208791e-05, + "loss": 0.4436, + "step": 8436 + }, + { + "epoch": 23.178571428571427, + "grad_norm": 14.721851348876953, + "learning_rate": 3.841071428571429e-05, + "loss": 0.3829, + "step": 8437 + }, + { + "epoch": 23.181318681318682, + "grad_norm": 16.611433029174805, + "learning_rate": 3.840934065934066e-05, + "loss": 0.5534, + "step": 8438 + }, + { + "epoch": 23.184065934065934, + "grad_norm": 21.877880096435547, + "learning_rate": 3.8407967032967036e-05, + "loss": 0.6263, + "step": 8439 + }, + { + "epoch": 23.186813186813186, + "grad_norm": 10.984785079956055, + "learning_rate": 3.840659340659341e-05, + "loss": 0.2982, + "step": 8440 + }, + { + "epoch": 23.189560439560438, + "grad_norm": 11.255743980407715, + "learning_rate": 3.840521978021978e-05, + "loss": 0.5041, + "step": 8441 + }, + { + "epoch": 23.192307692307693, + "grad_norm": 9.355112075805664, + "learning_rate": 3.840384615384616e-05, + "loss": 0.2106, + "step": 8442 + }, + { + "epoch": 23.195054945054945, + "grad_norm": 10.961029052734375, + "learning_rate": 3.840247252747253e-05, + "loss": 0.3858, + "step": 8443 + }, + { + "epoch": 23.197802197802197, + "grad_norm": 14.324606895446777, + "learning_rate": 3.8401098901098906e-05, + "loss": 0.3457, + "step": 8444 + }, + { + "epoch": 23.20054945054945, + "grad_norm": 6.075583457946777, + "learning_rate": 3.8399725274725276e-05, + "loss": 0.1213, + "step": 8445 + }, + { + "epoch": 23.203296703296704, + "grad_norm": 9.715846061706543, + "learning_rate": 3.8398351648351646e-05, + "loss": 0.277, + "step": 8446 + }, + { + "epoch": 23.206043956043956, + "grad_norm": 14.996987342834473, + "learning_rate": 3.839697802197802e-05, + "loss": 0.5157, + "step": 8447 + }, + { + "epoch": 23.208791208791208, + "grad_norm": 11.222780227661133, + "learning_rate": 3.839560439560439e-05, + "loss": 0.2955, + "step": 8448 + }, + { + "epoch": 23.21153846153846, + "grad_norm": 2.81882381439209, + "learning_rate": 3.839423076923077e-05, + "loss": 0.0725, + "step": 8449 + }, + { + "epoch": 23.214285714285715, + "grad_norm": 10.928925514221191, + "learning_rate": 3.839285714285715e-05, + "loss": 0.3944, + "step": 8450 + }, + { + "epoch": 23.217032967032967, + "grad_norm": 9.771979331970215, + "learning_rate": 3.839148351648352e-05, + "loss": 0.3983, + "step": 8451 + }, + { + "epoch": 23.21978021978022, + "grad_norm": 6.4664387702941895, + "learning_rate": 3.8390109890109894e-05, + "loss": 0.1317, + "step": 8452 + }, + { + "epoch": 23.22252747252747, + "grad_norm": 15.189358711242676, + "learning_rate": 3.8388736263736264e-05, + "loss": 0.3588, + "step": 8453 + }, + { + "epoch": 23.225274725274726, + "grad_norm": 18.905248641967773, + "learning_rate": 3.838736263736264e-05, + "loss": 0.5839, + "step": 8454 + }, + { + "epoch": 23.228021978021978, + "grad_norm": 9.673492431640625, + "learning_rate": 3.838598901098902e-05, + "loss": 0.2118, + "step": 8455 + }, + { + "epoch": 23.23076923076923, + "grad_norm": 14.477683067321777, + "learning_rate": 3.838461538461539e-05, + "loss": 0.4621, + "step": 8456 + }, + { + "epoch": 23.233516483516482, + "grad_norm": 8.584310531616211, + "learning_rate": 3.8383241758241764e-05, + "loss": 0.1705, + "step": 8457 + }, + { + "epoch": 23.236263736263737, + "grad_norm": 11.576020240783691, + "learning_rate": 3.8381868131868134e-05, + "loss": 0.3015, + "step": 8458 + }, + { + "epoch": 23.23901098901099, + "grad_norm": 9.996613502502441, + "learning_rate": 3.838049450549451e-05, + "loss": 0.3193, + "step": 8459 + }, + { + "epoch": 23.24175824175824, + "grad_norm": 9.310392379760742, + "learning_rate": 3.837912087912088e-05, + "loss": 0.3103, + "step": 8460 + }, + { + "epoch": 23.244505494505493, + "grad_norm": 9.752120018005371, + "learning_rate": 3.837774725274725e-05, + "loss": 0.2809, + "step": 8461 + }, + { + "epoch": 23.247252747252748, + "grad_norm": 10.724514961242676, + "learning_rate": 3.837637362637363e-05, + "loss": 0.3228, + "step": 8462 + }, + { + "epoch": 23.25, + "grad_norm": 16.558992385864258, + "learning_rate": 3.8375e-05, + "loss": 0.5676, + "step": 8463 + }, + { + "epoch": 23.252747252747252, + "grad_norm": 14.598710060119629, + "learning_rate": 3.8373626373626374e-05, + "loss": 0.5396, + "step": 8464 + }, + { + "epoch": 23.255494505494504, + "grad_norm": 10.251551628112793, + "learning_rate": 3.837225274725275e-05, + "loss": 0.2344, + "step": 8465 + }, + { + "epoch": 23.25824175824176, + "grad_norm": 11.664783477783203, + "learning_rate": 3.837087912087912e-05, + "loss": 0.3407, + "step": 8466 + }, + { + "epoch": 23.26098901098901, + "grad_norm": 7.978817939758301, + "learning_rate": 3.83695054945055e-05, + "loss": 0.191, + "step": 8467 + }, + { + "epoch": 23.263736263736263, + "grad_norm": 14.662191390991211, + "learning_rate": 3.836813186813187e-05, + "loss": 0.3402, + "step": 8468 + }, + { + "epoch": 23.266483516483518, + "grad_norm": 12.082735061645508, + "learning_rate": 3.8366758241758245e-05, + "loss": 0.4073, + "step": 8469 + }, + { + "epoch": 23.26923076923077, + "grad_norm": 7.607017517089844, + "learning_rate": 3.836538461538462e-05, + "loss": 0.2057, + "step": 8470 + }, + { + "epoch": 23.271978021978022, + "grad_norm": 15.632258415222168, + "learning_rate": 3.836401098901099e-05, + "loss": 0.602, + "step": 8471 + }, + { + "epoch": 23.274725274725274, + "grad_norm": 12.310266494750977, + "learning_rate": 3.836263736263737e-05, + "loss": 0.306, + "step": 8472 + }, + { + "epoch": 23.27747252747253, + "grad_norm": 13.355362892150879, + "learning_rate": 3.836126373626374e-05, + "loss": 0.3749, + "step": 8473 + }, + { + "epoch": 23.28021978021978, + "grad_norm": 11.174221992492676, + "learning_rate": 3.8359890109890115e-05, + "loss": 0.2513, + "step": 8474 + }, + { + "epoch": 23.282967032967033, + "grad_norm": 5.591993808746338, + "learning_rate": 3.8358516483516485e-05, + "loss": 0.0974, + "step": 8475 + }, + { + "epoch": 23.285714285714285, + "grad_norm": 11.916912078857422, + "learning_rate": 3.8357142857142855e-05, + "loss": 0.3347, + "step": 8476 + }, + { + "epoch": 23.28846153846154, + "grad_norm": 16.46291732788086, + "learning_rate": 3.835576923076923e-05, + "loss": 0.7355, + "step": 8477 + }, + { + "epoch": 23.291208791208792, + "grad_norm": 12.722371101379395, + "learning_rate": 3.83543956043956e-05, + "loss": 0.6914, + "step": 8478 + }, + { + "epoch": 23.293956043956044, + "grad_norm": 20.97608184814453, + "learning_rate": 3.835302197802198e-05, + "loss": 0.735, + "step": 8479 + }, + { + "epoch": 23.296703296703296, + "grad_norm": 12.035699844360352, + "learning_rate": 3.8351648351648356e-05, + "loss": 0.4463, + "step": 8480 + }, + { + "epoch": 23.29945054945055, + "grad_norm": 18.134239196777344, + "learning_rate": 3.8350274725274726e-05, + "loss": 0.6236, + "step": 8481 + }, + { + "epoch": 23.302197802197803, + "grad_norm": 12.078978538513184, + "learning_rate": 3.83489010989011e-05, + "loss": 0.4637, + "step": 8482 + }, + { + "epoch": 23.304945054945055, + "grad_norm": 14.200189590454102, + "learning_rate": 3.834752747252747e-05, + "loss": 0.373, + "step": 8483 + }, + { + "epoch": 23.307692307692307, + "grad_norm": 9.208455085754395, + "learning_rate": 3.834615384615385e-05, + "loss": 0.2999, + "step": 8484 + }, + { + "epoch": 23.310439560439562, + "grad_norm": 22.8696231842041, + "learning_rate": 3.8344780219780226e-05, + "loss": 0.5943, + "step": 8485 + }, + { + "epoch": 23.313186813186814, + "grad_norm": 21.72202491760254, + "learning_rate": 3.8343406593406596e-05, + "loss": 1.0854, + "step": 8486 + }, + { + "epoch": 23.315934065934066, + "grad_norm": 15.509556770324707, + "learning_rate": 3.834203296703297e-05, + "loss": 0.3982, + "step": 8487 + }, + { + "epoch": 23.318681318681318, + "grad_norm": 6.864482879638672, + "learning_rate": 3.834065934065934e-05, + "loss": 0.1699, + "step": 8488 + }, + { + "epoch": 23.321428571428573, + "grad_norm": 11.933920860290527, + "learning_rate": 3.833928571428572e-05, + "loss": 0.3241, + "step": 8489 + }, + { + "epoch": 23.324175824175825, + "grad_norm": 9.683939933776855, + "learning_rate": 3.833791208791209e-05, + "loss": 0.215, + "step": 8490 + }, + { + "epoch": 23.326923076923077, + "grad_norm": 6.037441253662109, + "learning_rate": 3.833653846153846e-05, + "loss": 0.0843, + "step": 8491 + }, + { + "epoch": 23.32967032967033, + "grad_norm": 11.13791561126709, + "learning_rate": 3.833516483516484e-05, + "loss": 0.3602, + "step": 8492 + }, + { + "epoch": 23.332417582417584, + "grad_norm": 19.03236961364746, + "learning_rate": 3.833379120879121e-05, + "loss": 0.4413, + "step": 8493 + }, + { + "epoch": 23.335164835164836, + "grad_norm": 9.969503402709961, + "learning_rate": 3.8332417582417583e-05, + "loss": 0.4844, + "step": 8494 + }, + { + "epoch": 23.337912087912088, + "grad_norm": 8.241366386413574, + "learning_rate": 3.833104395604396e-05, + "loss": 0.1746, + "step": 8495 + }, + { + "epoch": 23.34065934065934, + "grad_norm": 12.272543907165527, + "learning_rate": 3.832967032967033e-05, + "loss": 0.398, + "step": 8496 + }, + { + "epoch": 23.343406593406595, + "grad_norm": 11.056549072265625, + "learning_rate": 3.832829670329671e-05, + "loss": 0.4303, + "step": 8497 + }, + { + "epoch": 23.346153846153847, + "grad_norm": 7.938112735748291, + "learning_rate": 3.832692307692308e-05, + "loss": 0.1424, + "step": 8498 + }, + { + "epoch": 23.3489010989011, + "grad_norm": 13.943488121032715, + "learning_rate": 3.8325549450549454e-05, + "loss": 0.4325, + "step": 8499 + }, + { + "epoch": 23.35164835164835, + "grad_norm": 12.361306190490723, + "learning_rate": 3.832417582417583e-05, + "loss": 0.4873, + "step": 8500 + }, + { + "epoch": 23.354395604395606, + "grad_norm": 9.550292015075684, + "learning_rate": 3.83228021978022e-05, + "loss": 0.1694, + "step": 8501 + }, + { + "epoch": 23.357142857142858, + "grad_norm": 11.642643928527832, + "learning_rate": 3.832142857142858e-05, + "loss": 0.2653, + "step": 8502 + }, + { + "epoch": 23.35989010989011, + "grad_norm": 16.832181930541992, + "learning_rate": 3.832005494505495e-05, + "loss": 0.6404, + "step": 8503 + }, + { + "epoch": 23.36263736263736, + "grad_norm": 10.672231674194336, + "learning_rate": 3.8318681318681324e-05, + "loss": 0.4189, + "step": 8504 + }, + { + "epoch": 23.365384615384617, + "grad_norm": 14.385214805603027, + "learning_rate": 3.8317307692307694e-05, + "loss": 0.4313, + "step": 8505 + }, + { + "epoch": 23.36813186813187, + "grad_norm": 17.252553939819336, + "learning_rate": 3.8315934065934064e-05, + "loss": 0.3591, + "step": 8506 + }, + { + "epoch": 23.37087912087912, + "grad_norm": 9.73813533782959, + "learning_rate": 3.831456043956044e-05, + "loss": 0.2746, + "step": 8507 + }, + { + "epoch": 23.373626373626372, + "grad_norm": 19.78868865966797, + "learning_rate": 3.831318681318681e-05, + "loss": 0.4806, + "step": 8508 + }, + { + "epoch": 23.376373626373628, + "grad_norm": 12.535015106201172, + "learning_rate": 3.831181318681319e-05, + "loss": 0.2077, + "step": 8509 + }, + { + "epoch": 23.37912087912088, + "grad_norm": 7.923844337463379, + "learning_rate": 3.8310439560439565e-05, + "loss": 0.2834, + "step": 8510 + }, + { + "epoch": 23.38186813186813, + "grad_norm": 6.6008830070495605, + "learning_rate": 3.8309065934065935e-05, + "loss": 0.144, + "step": 8511 + }, + { + "epoch": 23.384615384615383, + "grad_norm": 17.728748321533203, + "learning_rate": 3.830769230769231e-05, + "loss": 0.3953, + "step": 8512 + }, + { + "epoch": 23.38736263736264, + "grad_norm": 20.33962059020996, + "learning_rate": 3.830631868131868e-05, + "loss": 0.5648, + "step": 8513 + }, + { + "epoch": 23.39010989010989, + "grad_norm": 7.311727046966553, + "learning_rate": 3.830494505494506e-05, + "loss": 0.1344, + "step": 8514 + }, + { + "epoch": 23.392857142857142, + "grad_norm": 12.196659088134766, + "learning_rate": 3.8303571428571435e-05, + "loss": 0.4325, + "step": 8515 + }, + { + "epoch": 23.395604395604394, + "grad_norm": 19.766408920288086, + "learning_rate": 3.8302197802197805e-05, + "loss": 0.3055, + "step": 8516 + }, + { + "epoch": 23.39835164835165, + "grad_norm": 12.15768814086914, + "learning_rate": 3.830082417582418e-05, + "loss": 0.4275, + "step": 8517 + }, + { + "epoch": 23.4010989010989, + "grad_norm": 13.021215438842773, + "learning_rate": 3.829945054945055e-05, + "loss": 0.328, + "step": 8518 + }, + { + "epoch": 23.403846153846153, + "grad_norm": 9.69762134552002, + "learning_rate": 3.829807692307693e-05, + "loss": 0.3068, + "step": 8519 + }, + { + "epoch": 23.406593406593405, + "grad_norm": 17.982236862182617, + "learning_rate": 3.82967032967033e-05, + "loss": 0.354, + "step": 8520 + }, + { + "epoch": 23.40934065934066, + "grad_norm": 16.1358699798584, + "learning_rate": 3.829532967032967e-05, + "loss": 0.4992, + "step": 8521 + }, + { + "epoch": 23.412087912087912, + "grad_norm": 16.23706817626953, + "learning_rate": 3.8293956043956046e-05, + "loss": 0.543, + "step": 8522 + }, + { + "epoch": 23.414835164835164, + "grad_norm": 16.112747192382812, + "learning_rate": 3.8292582417582416e-05, + "loss": 0.5419, + "step": 8523 + }, + { + "epoch": 23.417582417582416, + "grad_norm": 12.393624305725098, + "learning_rate": 3.829120879120879e-05, + "loss": 0.3439, + "step": 8524 + }, + { + "epoch": 23.42032967032967, + "grad_norm": 12.765237808227539, + "learning_rate": 3.828983516483517e-05, + "loss": 0.3321, + "step": 8525 + }, + { + "epoch": 23.423076923076923, + "grad_norm": 13.409838676452637, + "learning_rate": 3.828846153846154e-05, + "loss": 0.4288, + "step": 8526 + }, + { + "epoch": 23.425824175824175, + "grad_norm": 13.121481895446777, + "learning_rate": 3.8287087912087916e-05, + "loss": 0.3346, + "step": 8527 + }, + { + "epoch": 23.428571428571427, + "grad_norm": 7.009713649749756, + "learning_rate": 3.8285714285714286e-05, + "loss": 0.1856, + "step": 8528 + }, + { + "epoch": 23.431318681318682, + "grad_norm": 9.195566177368164, + "learning_rate": 3.828434065934066e-05, + "loss": 0.2458, + "step": 8529 + }, + { + "epoch": 23.434065934065934, + "grad_norm": 16.18885040283203, + "learning_rate": 3.828296703296704e-05, + "loss": 0.4517, + "step": 8530 + }, + { + "epoch": 23.436813186813186, + "grad_norm": 11.446142196655273, + "learning_rate": 3.828159340659341e-05, + "loss": 0.4589, + "step": 8531 + }, + { + "epoch": 23.439560439560438, + "grad_norm": 12.909029006958008, + "learning_rate": 3.8280219780219786e-05, + "loss": 0.3624, + "step": 8532 + }, + { + "epoch": 23.442307692307693, + "grad_norm": 14.821699142456055, + "learning_rate": 3.8278846153846156e-05, + "loss": 0.5295, + "step": 8533 + }, + { + "epoch": 23.445054945054945, + "grad_norm": 14.405866622924805, + "learning_rate": 3.827747252747253e-05, + "loss": 0.6166, + "step": 8534 + }, + { + "epoch": 23.447802197802197, + "grad_norm": 8.9923677444458, + "learning_rate": 3.82760989010989e-05, + "loss": 0.2435, + "step": 8535 + }, + { + "epoch": 23.45054945054945, + "grad_norm": 8.880017280578613, + "learning_rate": 3.827472527472527e-05, + "loss": 0.2311, + "step": 8536 + }, + { + "epoch": 23.453296703296704, + "grad_norm": 11.123983383178711, + "learning_rate": 3.827335164835165e-05, + "loss": 0.2955, + "step": 8537 + }, + { + "epoch": 23.456043956043956, + "grad_norm": 5.724560260772705, + "learning_rate": 3.827197802197802e-05, + "loss": 0.1282, + "step": 8538 + }, + { + "epoch": 23.458791208791208, + "grad_norm": 11.84685230255127, + "learning_rate": 3.82706043956044e-05, + "loss": 0.3739, + "step": 8539 + }, + { + "epoch": 23.46153846153846, + "grad_norm": 11.9026460647583, + "learning_rate": 3.826923076923077e-05, + "loss": 0.2865, + "step": 8540 + }, + { + "epoch": 23.464285714285715, + "grad_norm": 10.894906997680664, + "learning_rate": 3.8267857142857144e-05, + "loss": 0.2812, + "step": 8541 + }, + { + "epoch": 23.467032967032967, + "grad_norm": 6.758092403411865, + "learning_rate": 3.826648351648352e-05, + "loss": 0.1334, + "step": 8542 + }, + { + "epoch": 23.46978021978022, + "grad_norm": 16.208187103271484, + "learning_rate": 3.826510989010989e-05, + "loss": 0.4122, + "step": 8543 + }, + { + "epoch": 23.47252747252747, + "grad_norm": 11.125457763671875, + "learning_rate": 3.826373626373627e-05, + "loss": 0.2784, + "step": 8544 + }, + { + "epoch": 23.475274725274726, + "grad_norm": 7.639363765716553, + "learning_rate": 3.826236263736264e-05, + "loss": 0.1897, + "step": 8545 + }, + { + "epoch": 23.478021978021978, + "grad_norm": 9.062628746032715, + "learning_rate": 3.8260989010989014e-05, + "loss": 0.1785, + "step": 8546 + }, + { + "epoch": 23.48076923076923, + "grad_norm": 22.774322509765625, + "learning_rate": 3.825961538461539e-05, + "loss": 0.4462, + "step": 8547 + }, + { + "epoch": 23.483516483516482, + "grad_norm": 13.356851577758789, + "learning_rate": 3.825824175824176e-05, + "loss": 0.5174, + "step": 8548 + }, + { + "epoch": 23.486263736263737, + "grad_norm": 6.062983512878418, + "learning_rate": 3.825686813186813e-05, + "loss": 0.1986, + "step": 8549 + }, + { + "epoch": 23.48901098901099, + "grad_norm": 15.673995018005371, + "learning_rate": 3.825549450549451e-05, + "loss": 0.4503, + "step": 8550 + }, + { + "epoch": 23.49175824175824, + "grad_norm": 7.09145975112915, + "learning_rate": 3.825412087912088e-05, + "loss": 0.0927, + "step": 8551 + }, + { + "epoch": 23.494505494505496, + "grad_norm": 12.645844459533691, + "learning_rate": 3.8252747252747255e-05, + "loss": 0.4195, + "step": 8552 + }, + { + "epoch": 23.497252747252748, + "grad_norm": 29.544166564941406, + "learning_rate": 3.8251373626373625e-05, + "loss": 0.3926, + "step": 8553 + }, + { + "epoch": 23.5, + "grad_norm": 17.116830825805664, + "learning_rate": 3.825e-05, + "loss": 0.5827, + "step": 8554 + }, + { + "epoch": 23.502747252747252, + "grad_norm": 13.432312965393066, + "learning_rate": 3.824862637362637e-05, + "loss": 0.3618, + "step": 8555 + }, + { + "epoch": 23.505494505494504, + "grad_norm": 15.09457778930664, + "learning_rate": 3.824725274725275e-05, + "loss": 0.4864, + "step": 8556 + }, + { + "epoch": 23.50824175824176, + "grad_norm": 12.269668579101562, + "learning_rate": 3.8245879120879125e-05, + "loss": 0.3509, + "step": 8557 + }, + { + "epoch": 23.51098901098901, + "grad_norm": 10.009349822998047, + "learning_rate": 3.8244505494505495e-05, + "loss": 0.1912, + "step": 8558 + }, + { + "epoch": 23.513736263736263, + "grad_norm": 14.00369644165039, + "learning_rate": 3.824313186813187e-05, + "loss": 0.3933, + "step": 8559 + }, + { + "epoch": 23.516483516483518, + "grad_norm": 15.56649398803711, + "learning_rate": 3.824175824175824e-05, + "loss": 0.3507, + "step": 8560 + }, + { + "epoch": 23.51923076923077, + "grad_norm": 18.81861114501953, + "learning_rate": 3.824038461538462e-05, + "loss": 0.5494, + "step": 8561 + }, + { + "epoch": 23.521978021978022, + "grad_norm": 7.924067497253418, + "learning_rate": 3.8239010989010995e-05, + "loss": 0.3317, + "step": 8562 + }, + { + "epoch": 23.524725274725274, + "grad_norm": 5.520303726196289, + "learning_rate": 3.8237637362637365e-05, + "loss": 0.1533, + "step": 8563 + }, + { + "epoch": 23.52747252747253, + "grad_norm": 12.458758354187012, + "learning_rate": 3.8236263736263735e-05, + "loss": 0.5278, + "step": 8564 + }, + { + "epoch": 23.53021978021978, + "grad_norm": 10.721884727478027, + "learning_rate": 3.823489010989011e-05, + "loss": 0.3676, + "step": 8565 + }, + { + "epoch": 23.532967032967033, + "grad_norm": 14.648921012878418, + "learning_rate": 3.823351648351648e-05, + "loss": 0.5268, + "step": 8566 + }, + { + "epoch": 23.535714285714285, + "grad_norm": 10.292078018188477, + "learning_rate": 3.823214285714286e-05, + "loss": 0.28, + "step": 8567 + }, + { + "epoch": 23.53846153846154, + "grad_norm": 13.409637451171875, + "learning_rate": 3.823076923076923e-05, + "loss": 0.5358, + "step": 8568 + }, + { + "epoch": 23.541208791208792, + "grad_norm": 8.023114204406738, + "learning_rate": 3.8229395604395606e-05, + "loss": 0.2072, + "step": 8569 + }, + { + "epoch": 23.543956043956044, + "grad_norm": 8.612117767333984, + "learning_rate": 3.8228021978021976e-05, + "loss": 0.2866, + "step": 8570 + }, + { + "epoch": 23.546703296703296, + "grad_norm": 15.719025611877441, + "learning_rate": 3.822664835164835e-05, + "loss": 0.3612, + "step": 8571 + }, + { + "epoch": 23.54945054945055, + "grad_norm": 10.395125389099121, + "learning_rate": 3.822527472527473e-05, + "loss": 0.2626, + "step": 8572 + }, + { + "epoch": 23.552197802197803, + "grad_norm": 11.995725631713867, + "learning_rate": 3.82239010989011e-05, + "loss": 0.3518, + "step": 8573 + }, + { + "epoch": 23.554945054945055, + "grad_norm": 13.470026969909668, + "learning_rate": 3.8222527472527476e-05, + "loss": 0.417, + "step": 8574 + }, + { + "epoch": 23.557692307692307, + "grad_norm": 17.216318130493164, + "learning_rate": 3.8221153846153846e-05, + "loss": 0.5412, + "step": 8575 + }, + { + "epoch": 23.560439560439562, + "grad_norm": 4.089487552642822, + "learning_rate": 3.821978021978022e-05, + "loss": 0.0837, + "step": 8576 + }, + { + "epoch": 23.563186813186814, + "grad_norm": 16.534713745117188, + "learning_rate": 3.82184065934066e-05, + "loss": 0.4791, + "step": 8577 + }, + { + "epoch": 23.565934065934066, + "grad_norm": 10.708403587341309, + "learning_rate": 3.821703296703297e-05, + "loss": 0.2475, + "step": 8578 + }, + { + "epoch": 23.568681318681318, + "grad_norm": 10.052332878112793, + "learning_rate": 3.821565934065934e-05, + "loss": 0.2155, + "step": 8579 + }, + { + "epoch": 23.571428571428573, + "grad_norm": 7.298727035522461, + "learning_rate": 3.821428571428572e-05, + "loss": 0.2161, + "step": 8580 + }, + { + "epoch": 23.574175824175825, + "grad_norm": 22.69337272644043, + "learning_rate": 3.821291208791209e-05, + "loss": 0.787, + "step": 8581 + }, + { + "epoch": 23.576923076923077, + "grad_norm": 5.960799217224121, + "learning_rate": 3.8211538461538464e-05, + "loss": 0.1614, + "step": 8582 + }, + { + "epoch": 23.57967032967033, + "grad_norm": 18.368688583374023, + "learning_rate": 3.8210164835164834e-05, + "loss": 0.8652, + "step": 8583 + }, + { + "epoch": 23.582417582417584, + "grad_norm": 9.240177154541016, + "learning_rate": 3.820879120879121e-05, + "loss": 0.2346, + "step": 8584 + }, + { + "epoch": 23.585164835164836, + "grad_norm": 6.749775409698486, + "learning_rate": 3.820741758241758e-05, + "loss": 0.1469, + "step": 8585 + }, + { + "epoch": 23.587912087912088, + "grad_norm": 9.388236999511719, + "learning_rate": 3.820604395604396e-05, + "loss": 0.3483, + "step": 8586 + }, + { + "epoch": 23.59065934065934, + "grad_norm": 25.490432739257812, + "learning_rate": 3.8204670329670334e-05, + "loss": 0.7856, + "step": 8587 + }, + { + "epoch": 23.593406593406595, + "grad_norm": 6.164212226867676, + "learning_rate": 3.8203296703296704e-05, + "loss": 0.1704, + "step": 8588 + }, + { + "epoch": 23.596153846153847, + "grad_norm": 11.635273933410645, + "learning_rate": 3.820192307692308e-05, + "loss": 0.2664, + "step": 8589 + }, + { + "epoch": 23.5989010989011, + "grad_norm": 11.508505821228027, + "learning_rate": 3.820054945054945e-05, + "loss": 0.2643, + "step": 8590 + }, + { + "epoch": 23.60164835164835, + "grad_norm": 16.74460792541504, + "learning_rate": 3.819917582417583e-05, + "loss": 0.4079, + "step": 8591 + }, + { + "epoch": 23.604395604395606, + "grad_norm": 11.603276252746582, + "learning_rate": 3.8197802197802204e-05, + "loss": 0.3206, + "step": 8592 + }, + { + "epoch": 23.607142857142858, + "grad_norm": 12.247282028198242, + "learning_rate": 3.8196428571428574e-05, + "loss": 0.3942, + "step": 8593 + }, + { + "epoch": 23.60989010989011, + "grad_norm": 10.324556350708008, + "learning_rate": 3.8195054945054944e-05, + "loss": 0.2108, + "step": 8594 + }, + { + "epoch": 23.61263736263736, + "grad_norm": 12.25561237335205, + "learning_rate": 3.819368131868132e-05, + "loss": 0.2916, + "step": 8595 + }, + { + "epoch": 23.615384615384617, + "grad_norm": 19.735136032104492, + "learning_rate": 3.819230769230769e-05, + "loss": 1.0103, + "step": 8596 + }, + { + "epoch": 23.61813186813187, + "grad_norm": 16.042598724365234, + "learning_rate": 3.819093406593407e-05, + "loss": 0.4357, + "step": 8597 + }, + { + "epoch": 23.62087912087912, + "grad_norm": 7.5570502281188965, + "learning_rate": 3.818956043956044e-05, + "loss": 0.2303, + "step": 8598 + }, + { + "epoch": 23.623626373626372, + "grad_norm": 12.38541316986084, + "learning_rate": 3.8188186813186815e-05, + "loss": 0.5098, + "step": 8599 + }, + { + "epoch": 23.626373626373628, + "grad_norm": 7.695514678955078, + "learning_rate": 3.8186813186813185e-05, + "loss": 0.1667, + "step": 8600 + }, + { + "epoch": 23.62912087912088, + "grad_norm": 10.925411224365234, + "learning_rate": 3.818543956043956e-05, + "loss": 0.3511, + "step": 8601 + }, + { + "epoch": 23.63186813186813, + "grad_norm": 8.056395530700684, + "learning_rate": 3.818406593406594e-05, + "loss": 0.236, + "step": 8602 + }, + { + "epoch": 23.634615384615383, + "grad_norm": 8.51733112335205, + "learning_rate": 3.818269230769231e-05, + "loss": 0.2207, + "step": 8603 + }, + { + "epoch": 23.63736263736264, + "grad_norm": 12.056827545166016, + "learning_rate": 3.8181318681318685e-05, + "loss": 0.4965, + "step": 8604 + }, + { + "epoch": 23.64010989010989, + "grad_norm": 18.156755447387695, + "learning_rate": 3.8179945054945055e-05, + "loss": 0.607, + "step": 8605 + }, + { + "epoch": 23.642857142857142, + "grad_norm": 24.826871871948242, + "learning_rate": 3.817857142857143e-05, + "loss": 0.7199, + "step": 8606 + }, + { + "epoch": 23.645604395604394, + "grad_norm": 11.488668441772461, + "learning_rate": 3.817719780219781e-05, + "loss": 0.3834, + "step": 8607 + }, + { + "epoch": 23.64835164835165, + "grad_norm": 13.473639488220215, + "learning_rate": 3.817582417582418e-05, + "loss": 0.4066, + "step": 8608 + }, + { + "epoch": 23.6510989010989, + "grad_norm": 23.599864959716797, + "learning_rate": 3.817445054945055e-05, + "loss": 0.5765, + "step": 8609 + }, + { + "epoch": 23.653846153846153, + "grad_norm": 15.59689712524414, + "learning_rate": 3.8173076923076926e-05, + "loss": 0.3579, + "step": 8610 + }, + { + "epoch": 23.656593406593405, + "grad_norm": 3.8463363647460938, + "learning_rate": 3.8171703296703296e-05, + "loss": 0.0919, + "step": 8611 + }, + { + "epoch": 23.65934065934066, + "grad_norm": 15.074577331542969, + "learning_rate": 3.817032967032967e-05, + "loss": 0.4334, + "step": 8612 + }, + { + "epoch": 23.662087912087912, + "grad_norm": 13.911356925964355, + "learning_rate": 3.816895604395604e-05, + "loss": 0.3092, + "step": 8613 + }, + { + "epoch": 23.664835164835164, + "grad_norm": 16.60491371154785, + "learning_rate": 3.816758241758242e-05, + "loss": 0.2825, + "step": 8614 + }, + { + "epoch": 23.667582417582416, + "grad_norm": 10.969196319580078, + "learning_rate": 3.816620879120879e-05, + "loss": 0.5773, + "step": 8615 + }, + { + "epoch": 23.67032967032967, + "grad_norm": 8.064640998840332, + "learning_rate": 3.8164835164835166e-05, + "loss": 0.1894, + "step": 8616 + }, + { + "epoch": 23.673076923076923, + "grad_norm": 15.9471435546875, + "learning_rate": 3.816346153846154e-05, + "loss": 0.7017, + "step": 8617 + }, + { + "epoch": 23.675824175824175, + "grad_norm": 13.721918106079102, + "learning_rate": 3.816208791208791e-05, + "loss": 0.4152, + "step": 8618 + }, + { + "epoch": 23.678571428571427, + "grad_norm": 14.166242599487305, + "learning_rate": 3.816071428571429e-05, + "loss": 0.2087, + "step": 8619 + }, + { + "epoch": 23.681318681318682, + "grad_norm": 11.714885711669922, + "learning_rate": 3.815934065934066e-05, + "loss": 0.5379, + "step": 8620 + }, + { + "epoch": 23.684065934065934, + "grad_norm": 5.513306617736816, + "learning_rate": 3.8157967032967037e-05, + "loss": 0.1315, + "step": 8621 + }, + { + "epoch": 23.686813186813186, + "grad_norm": 3.707552194595337, + "learning_rate": 3.815659340659341e-05, + "loss": 0.0948, + "step": 8622 + }, + { + "epoch": 23.689560439560438, + "grad_norm": 20.09920883178711, + "learning_rate": 3.8155219780219783e-05, + "loss": 0.4645, + "step": 8623 + }, + { + "epoch": 23.692307692307693, + "grad_norm": 8.108674049377441, + "learning_rate": 3.8153846153846153e-05, + "loss": 0.1536, + "step": 8624 + }, + { + "epoch": 23.695054945054945, + "grad_norm": 16.04736328125, + "learning_rate": 3.815247252747253e-05, + "loss": 0.5586, + "step": 8625 + }, + { + "epoch": 23.697802197802197, + "grad_norm": 13.485620498657227, + "learning_rate": 3.81510989010989e-05, + "loss": 0.2677, + "step": 8626 + }, + { + "epoch": 23.70054945054945, + "grad_norm": 13.41402816772461, + "learning_rate": 3.814972527472528e-05, + "loss": 0.312, + "step": 8627 + }, + { + "epoch": 23.703296703296704, + "grad_norm": 13.298446655273438, + "learning_rate": 3.814835164835165e-05, + "loss": 0.5427, + "step": 8628 + }, + { + "epoch": 23.706043956043956, + "grad_norm": 12.779866218566895, + "learning_rate": 3.8146978021978024e-05, + "loss": 0.3623, + "step": 8629 + }, + { + "epoch": 23.708791208791208, + "grad_norm": 21.026182174682617, + "learning_rate": 3.8145604395604394e-05, + "loss": 0.4146, + "step": 8630 + }, + { + "epoch": 23.71153846153846, + "grad_norm": 20.90997886657715, + "learning_rate": 3.814423076923077e-05, + "loss": 0.363, + "step": 8631 + }, + { + "epoch": 23.714285714285715, + "grad_norm": 13.349862098693848, + "learning_rate": 3.814285714285715e-05, + "loss": 0.4601, + "step": 8632 + }, + { + "epoch": 23.717032967032967, + "grad_norm": 11.167964935302734, + "learning_rate": 3.814148351648352e-05, + "loss": 0.379, + "step": 8633 + }, + { + "epoch": 23.71978021978022, + "grad_norm": 11.378456115722656, + "learning_rate": 3.8140109890109894e-05, + "loss": 0.3228, + "step": 8634 + }, + { + "epoch": 23.72252747252747, + "grad_norm": 17.154102325439453, + "learning_rate": 3.8138736263736264e-05, + "loss": 0.5218, + "step": 8635 + }, + { + "epoch": 23.725274725274726, + "grad_norm": 10.167052268981934, + "learning_rate": 3.813736263736264e-05, + "loss": 0.206, + "step": 8636 + }, + { + "epoch": 23.728021978021978, + "grad_norm": 11.672348022460938, + "learning_rate": 3.813598901098902e-05, + "loss": 0.365, + "step": 8637 + }, + { + "epoch": 23.73076923076923, + "grad_norm": 13.208640098571777, + "learning_rate": 3.813461538461539e-05, + "loss": 0.576, + "step": 8638 + }, + { + "epoch": 23.733516483516482, + "grad_norm": 10.318205833435059, + "learning_rate": 3.813324175824176e-05, + "loss": 0.3537, + "step": 8639 + }, + { + "epoch": 23.736263736263737, + "grad_norm": 5.561139106750488, + "learning_rate": 3.8131868131868135e-05, + "loss": 0.1751, + "step": 8640 + }, + { + "epoch": 23.73901098901099, + "grad_norm": 18.62347412109375, + "learning_rate": 3.8130494505494505e-05, + "loss": 0.5876, + "step": 8641 + }, + { + "epoch": 23.74175824175824, + "grad_norm": 16.65677261352539, + "learning_rate": 3.812912087912088e-05, + "loss": 0.5606, + "step": 8642 + }, + { + "epoch": 23.744505494505496, + "grad_norm": 13.913740158081055, + "learning_rate": 3.812774725274725e-05, + "loss": 0.4689, + "step": 8643 + }, + { + "epoch": 23.747252747252748, + "grad_norm": 11.653626441955566, + "learning_rate": 3.812637362637363e-05, + "loss": 0.2822, + "step": 8644 + }, + { + "epoch": 23.75, + "grad_norm": 10.729460716247559, + "learning_rate": 3.8125e-05, + "loss": 0.3463, + "step": 8645 + }, + { + "epoch": 23.752747252747252, + "grad_norm": 17.329774856567383, + "learning_rate": 3.8123626373626375e-05, + "loss": 0.6098, + "step": 8646 + }, + { + "epoch": 23.755494505494504, + "grad_norm": 11.239041328430176, + "learning_rate": 3.812225274725275e-05, + "loss": 0.3333, + "step": 8647 + }, + { + "epoch": 23.75824175824176, + "grad_norm": 19.783388137817383, + "learning_rate": 3.812087912087912e-05, + "loss": 0.5746, + "step": 8648 + }, + { + "epoch": 23.76098901098901, + "grad_norm": 9.935765266418457, + "learning_rate": 3.81195054945055e-05, + "loss": 0.2464, + "step": 8649 + }, + { + "epoch": 23.763736263736263, + "grad_norm": 14.446221351623535, + "learning_rate": 3.811813186813187e-05, + "loss": 0.4226, + "step": 8650 + }, + { + "epoch": 23.766483516483518, + "grad_norm": 3.463505983352661, + "learning_rate": 3.8116758241758246e-05, + "loss": 0.0855, + "step": 8651 + }, + { + "epoch": 23.76923076923077, + "grad_norm": 15.444406509399414, + "learning_rate": 3.811538461538462e-05, + "loss": 0.4389, + "step": 8652 + }, + { + "epoch": 23.771978021978022, + "grad_norm": 6.918550968170166, + "learning_rate": 3.811401098901099e-05, + "loss": 0.1644, + "step": 8653 + }, + { + "epoch": 23.774725274725274, + "grad_norm": 6.173000335693359, + "learning_rate": 3.811263736263736e-05, + "loss": 0.1259, + "step": 8654 + }, + { + "epoch": 23.77747252747253, + "grad_norm": 12.080877304077148, + "learning_rate": 3.811126373626374e-05, + "loss": 0.2416, + "step": 8655 + }, + { + "epoch": 23.78021978021978, + "grad_norm": 13.379804611206055, + "learning_rate": 3.810989010989011e-05, + "loss": 0.3369, + "step": 8656 + }, + { + "epoch": 23.782967032967033, + "grad_norm": 18.433862686157227, + "learning_rate": 3.8108516483516486e-05, + "loss": 0.4837, + "step": 8657 + }, + { + "epoch": 23.785714285714285, + "grad_norm": 12.330820083618164, + "learning_rate": 3.8107142857142856e-05, + "loss": 0.5673, + "step": 8658 + }, + { + "epoch": 23.78846153846154, + "grad_norm": 8.535225868225098, + "learning_rate": 3.810576923076923e-05, + "loss": 0.2568, + "step": 8659 + }, + { + "epoch": 23.791208791208792, + "grad_norm": 17.448652267456055, + "learning_rate": 3.81043956043956e-05, + "loss": 0.3784, + "step": 8660 + }, + { + "epoch": 23.793956043956044, + "grad_norm": 23.332839965820312, + "learning_rate": 3.810302197802198e-05, + "loss": 0.4975, + "step": 8661 + }, + { + "epoch": 23.796703296703296, + "grad_norm": 17.057706832885742, + "learning_rate": 3.8101648351648356e-05, + "loss": 0.4489, + "step": 8662 + }, + { + "epoch": 23.79945054945055, + "grad_norm": 24.40637969970703, + "learning_rate": 3.8100274725274726e-05, + "loss": 0.8043, + "step": 8663 + }, + { + "epoch": 23.802197802197803, + "grad_norm": 19.757598876953125, + "learning_rate": 3.80989010989011e-05, + "loss": 0.741, + "step": 8664 + }, + { + "epoch": 23.804945054945055, + "grad_norm": 15.192201614379883, + "learning_rate": 3.809752747252747e-05, + "loss": 0.4325, + "step": 8665 + }, + { + "epoch": 23.807692307692307, + "grad_norm": 12.992513656616211, + "learning_rate": 3.809615384615385e-05, + "loss": 0.423, + "step": 8666 + }, + { + "epoch": 23.810439560439562, + "grad_norm": 13.949346542358398, + "learning_rate": 3.809478021978023e-05, + "loss": 0.4415, + "step": 8667 + }, + { + "epoch": 23.813186813186814, + "grad_norm": 5.6045145988464355, + "learning_rate": 3.80934065934066e-05, + "loss": 0.1568, + "step": 8668 + }, + { + "epoch": 23.815934065934066, + "grad_norm": 18.8535099029541, + "learning_rate": 3.809203296703297e-05, + "loss": 0.4879, + "step": 8669 + }, + { + "epoch": 23.818681318681318, + "grad_norm": 6.418431282043457, + "learning_rate": 3.8090659340659344e-05, + "loss": 0.147, + "step": 8670 + }, + { + "epoch": 23.821428571428573, + "grad_norm": 8.735944747924805, + "learning_rate": 3.8089285714285714e-05, + "loss": 0.2413, + "step": 8671 + }, + { + "epoch": 23.824175824175825, + "grad_norm": 14.425867080688477, + "learning_rate": 3.808791208791209e-05, + "loss": 0.3059, + "step": 8672 + }, + { + "epoch": 23.826923076923077, + "grad_norm": 13.597867965698242, + "learning_rate": 3.808653846153846e-05, + "loss": 0.5142, + "step": 8673 + }, + { + "epoch": 23.82967032967033, + "grad_norm": 14.458735466003418, + "learning_rate": 3.808516483516484e-05, + "loss": 0.6203, + "step": 8674 + }, + { + "epoch": 23.832417582417584, + "grad_norm": 13.314586639404297, + "learning_rate": 3.808379120879121e-05, + "loss": 0.2555, + "step": 8675 + }, + { + "epoch": 23.835164835164836, + "grad_norm": 6.864645004272461, + "learning_rate": 3.8082417582417584e-05, + "loss": 0.1593, + "step": 8676 + }, + { + "epoch": 23.837912087912088, + "grad_norm": 12.85519027709961, + "learning_rate": 3.808104395604396e-05, + "loss": 0.223, + "step": 8677 + }, + { + "epoch": 23.84065934065934, + "grad_norm": 9.491165161132812, + "learning_rate": 3.807967032967033e-05, + "loss": 0.2689, + "step": 8678 + }, + { + "epoch": 23.843406593406595, + "grad_norm": 9.229926109313965, + "learning_rate": 3.807829670329671e-05, + "loss": 0.2752, + "step": 8679 + }, + { + "epoch": 23.846153846153847, + "grad_norm": 10.118782043457031, + "learning_rate": 3.807692307692308e-05, + "loss": 0.3192, + "step": 8680 + }, + { + "epoch": 23.8489010989011, + "grad_norm": 11.91709041595459, + "learning_rate": 3.8075549450549455e-05, + "loss": 0.2577, + "step": 8681 + }, + { + "epoch": 23.85164835164835, + "grad_norm": 11.311116218566895, + "learning_rate": 3.807417582417583e-05, + "loss": 0.2361, + "step": 8682 + }, + { + "epoch": 23.854395604395606, + "grad_norm": 19.09880828857422, + "learning_rate": 3.80728021978022e-05, + "loss": 0.6501, + "step": 8683 + }, + { + "epoch": 23.857142857142858, + "grad_norm": 10.518150329589844, + "learning_rate": 3.807142857142857e-05, + "loss": 0.3204, + "step": 8684 + }, + { + "epoch": 23.85989010989011, + "grad_norm": 10.891621589660645, + "learning_rate": 3.807005494505495e-05, + "loss": 0.3109, + "step": 8685 + }, + { + "epoch": 23.86263736263736, + "grad_norm": 4.784392356872559, + "learning_rate": 3.806868131868132e-05, + "loss": 0.2098, + "step": 8686 + }, + { + "epoch": 23.865384615384617, + "grad_norm": 15.634939193725586, + "learning_rate": 3.8067307692307695e-05, + "loss": 0.3785, + "step": 8687 + }, + { + "epoch": 23.86813186813187, + "grad_norm": 16.974973678588867, + "learning_rate": 3.8065934065934065e-05, + "loss": 0.4961, + "step": 8688 + }, + { + "epoch": 23.87087912087912, + "grad_norm": 17.971994400024414, + "learning_rate": 3.806456043956044e-05, + "loss": 0.4139, + "step": 8689 + }, + { + "epoch": 23.873626373626372, + "grad_norm": 5.820348739624023, + "learning_rate": 3.806318681318681e-05, + "loss": 0.1164, + "step": 8690 + }, + { + "epoch": 23.876373626373628, + "grad_norm": 13.650958061218262, + "learning_rate": 3.806181318681319e-05, + "loss": 0.4037, + "step": 8691 + }, + { + "epoch": 23.87912087912088, + "grad_norm": 12.824470520019531, + "learning_rate": 3.8060439560439565e-05, + "loss": 0.356, + "step": 8692 + }, + { + "epoch": 23.88186813186813, + "grad_norm": 16.881113052368164, + "learning_rate": 3.8059065934065935e-05, + "loss": 0.4031, + "step": 8693 + }, + { + "epoch": 23.884615384615383, + "grad_norm": 24.58110809326172, + "learning_rate": 3.805769230769231e-05, + "loss": 0.9806, + "step": 8694 + }, + { + "epoch": 23.88736263736264, + "grad_norm": 15.413817405700684, + "learning_rate": 3.805631868131868e-05, + "loss": 0.6287, + "step": 8695 + }, + { + "epoch": 23.89010989010989, + "grad_norm": 12.872157096862793, + "learning_rate": 3.805494505494506e-05, + "loss": 0.1798, + "step": 8696 + }, + { + "epoch": 23.892857142857142, + "grad_norm": 12.924595832824707, + "learning_rate": 3.8053571428571436e-05, + "loss": 0.4005, + "step": 8697 + }, + { + "epoch": 23.895604395604394, + "grad_norm": 20.303600311279297, + "learning_rate": 3.8052197802197806e-05, + "loss": 0.5883, + "step": 8698 + }, + { + "epoch": 23.89835164835165, + "grad_norm": 8.535550117492676, + "learning_rate": 3.8050824175824176e-05, + "loss": 0.2368, + "step": 8699 + }, + { + "epoch": 23.9010989010989, + "grad_norm": 11.812253952026367, + "learning_rate": 3.804945054945055e-05, + "loss": 0.4146, + "step": 8700 + }, + { + "epoch": 23.903846153846153, + "grad_norm": 18.764949798583984, + "learning_rate": 3.804807692307692e-05, + "loss": 0.8894, + "step": 8701 + }, + { + "epoch": 23.906593406593405, + "grad_norm": 17.608322143554688, + "learning_rate": 3.80467032967033e-05, + "loss": 0.5767, + "step": 8702 + }, + { + "epoch": 23.90934065934066, + "grad_norm": 22.53227996826172, + "learning_rate": 3.804532967032967e-05, + "loss": 0.9292, + "step": 8703 + }, + { + "epoch": 23.912087912087912, + "grad_norm": 14.382438659667969, + "learning_rate": 3.8043956043956046e-05, + "loss": 0.198, + "step": 8704 + }, + { + "epoch": 23.914835164835164, + "grad_norm": 15.36958122253418, + "learning_rate": 3.8042582417582416e-05, + "loss": 0.5241, + "step": 8705 + }, + { + "epoch": 23.917582417582416, + "grad_norm": 8.28747272491455, + "learning_rate": 3.804120879120879e-05, + "loss": 0.203, + "step": 8706 + }, + { + "epoch": 23.92032967032967, + "grad_norm": 15.355551719665527, + "learning_rate": 3.803983516483517e-05, + "loss": 0.5254, + "step": 8707 + }, + { + "epoch": 23.923076923076923, + "grad_norm": 12.291885375976562, + "learning_rate": 3.803846153846154e-05, + "loss": 0.4167, + "step": 8708 + }, + { + "epoch": 23.925824175824175, + "grad_norm": 12.213713645935059, + "learning_rate": 3.803708791208792e-05, + "loss": 0.2943, + "step": 8709 + }, + { + "epoch": 23.928571428571427, + "grad_norm": 6.745100498199463, + "learning_rate": 3.803571428571429e-05, + "loss": 0.1702, + "step": 8710 + }, + { + "epoch": 23.931318681318682, + "grad_norm": 16.818906784057617, + "learning_rate": 3.8034340659340664e-05, + "loss": 0.5421, + "step": 8711 + }, + { + "epoch": 23.934065934065934, + "grad_norm": 9.678229331970215, + "learning_rate": 3.803296703296704e-05, + "loss": 0.3067, + "step": 8712 + }, + { + "epoch": 23.936813186813186, + "grad_norm": 17.332645416259766, + "learning_rate": 3.803159340659341e-05, + "loss": 0.5415, + "step": 8713 + }, + { + "epoch": 23.939560439560438, + "grad_norm": 8.515050888061523, + "learning_rate": 3.803021978021978e-05, + "loss": 0.1724, + "step": 8714 + }, + { + "epoch": 23.942307692307693, + "grad_norm": 13.828280448913574, + "learning_rate": 3.802884615384616e-05, + "loss": 0.2744, + "step": 8715 + }, + { + "epoch": 23.945054945054945, + "grad_norm": 8.389018058776855, + "learning_rate": 3.802747252747253e-05, + "loss": 0.2461, + "step": 8716 + }, + { + "epoch": 23.947802197802197, + "grad_norm": 13.802201271057129, + "learning_rate": 3.8026098901098904e-05, + "loss": 0.6184, + "step": 8717 + }, + { + "epoch": 23.95054945054945, + "grad_norm": 16.660322189331055, + "learning_rate": 3.8024725274725274e-05, + "loss": 0.4906, + "step": 8718 + }, + { + "epoch": 23.953296703296704, + "grad_norm": 11.626099586486816, + "learning_rate": 3.802335164835165e-05, + "loss": 0.37, + "step": 8719 + }, + { + "epoch": 23.956043956043956, + "grad_norm": 9.170760154724121, + "learning_rate": 3.802197802197802e-05, + "loss": 0.2466, + "step": 8720 + }, + { + "epoch": 23.958791208791208, + "grad_norm": 14.084209442138672, + "learning_rate": 3.80206043956044e-05, + "loss": 0.3897, + "step": 8721 + }, + { + "epoch": 23.96153846153846, + "grad_norm": 14.182836532592773, + "learning_rate": 3.8019230769230774e-05, + "loss": 0.2782, + "step": 8722 + }, + { + "epoch": 23.964285714285715, + "grad_norm": 13.065553665161133, + "learning_rate": 3.8017857142857144e-05, + "loss": 0.3243, + "step": 8723 + }, + { + "epoch": 23.967032967032967, + "grad_norm": 15.920832633972168, + "learning_rate": 3.801648351648352e-05, + "loss": 0.45, + "step": 8724 + }, + { + "epoch": 23.96978021978022, + "grad_norm": 12.858912467956543, + "learning_rate": 3.801510989010989e-05, + "loss": 0.4263, + "step": 8725 + }, + { + "epoch": 23.97252747252747, + "grad_norm": 14.765624046325684, + "learning_rate": 3.801373626373627e-05, + "loss": 0.4635, + "step": 8726 + }, + { + "epoch": 23.975274725274726, + "grad_norm": 13.696904182434082, + "learning_rate": 3.8012362637362645e-05, + "loss": 0.5927, + "step": 8727 + }, + { + "epoch": 23.978021978021978, + "grad_norm": 18.60618019104004, + "learning_rate": 3.8010989010989015e-05, + "loss": 0.5336, + "step": 8728 + }, + { + "epoch": 23.98076923076923, + "grad_norm": 12.470565795898438, + "learning_rate": 3.8009615384615385e-05, + "loss": 0.3598, + "step": 8729 + }, + { + "epoch": 23.983516483516482, + "grad_norm": 12.075540542602539, + "learning_rate": 3.8008241758241755e-05, + "loss": 0.4892, + "step": 8730 + }, + { + "epoch": 23.986263736263737, + "grad_norm": 9.236719131469727, + "learning_rate": 3.800686813186813e-05, + "loss": 0.2003, + "step": 8731 + }, + { + "epoch": 23.98901098901099, + "grad_norm": 9.062560081481934, + "learning_rate": 3.800549450549451e-05, + "loss": 0.1438, + "step": 8732 + }, + { + "epoch": 23.99175824175824, + "grad_norm": 9.028717041015625, + "learning_rate": 3.800412087912088e-05, + "loss": 0.1561, + "step": 8733 + }, + { + "epoch": 23.994505494505496, + "grad_norm": 19.852458953857422, + "learning_rate": 3.8002747252747255e-05, + "loss": 1.0317, + "step": 8734 + }, + { + "epoch": 23.997252747252748, + "grad_norm": 17.363582611083984, + "learning_rate": 3.8001373626373625e-05, + "loss": 0.588, + "step": 8735 + }, + { + "epoch": 24.0, + "grad_norm": 15.699222564697266, + "learning_rate": 3.8e-05, + "loss": 0.1686, + "step": 8736 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.7837465564738292, + "eval_f1": 0.7885283439761384, + "eval_f1_DuraRiadoRio_64x64": 0.8037383177570093, + "eval_f1_Mole_64x64": 0.74235807860262, + "eval_f1_Quebrado_64x64": 0.8478260869565217, + "eval_f1_RiadoRio_64x64": 0.6850828729281768, + "eval_f1_RioFechado_64x64": 0.8636363636363636, + "eval_loss": 0.7475448250770569, + "eval_precision": 0.8280159231340114, + "eval_precision_DuraRiadoRio_64x64": 0.7288135593220338, + "eval_precision_Mole_64x64": 1.0, + "eval_precision_Quebrado_64x64": 0.8863636363636364, + "eval_precision_RiadoRio_64x64": 0.5904761904761905, + "eval_precision_RioFechado_64x64": 0.9344262295081968, + "eval_recall": 0.7834434972407545, + "eval_recall_DuraRiadoRio_64x64": 0.8958333333333334, + "eval_recall_Mole_64x64": 0.5902777777777778, + "eval_recall_Quebrado_64x64": 0.8125, + "eval_recall_RiadoRio_64x64": 0.8157894736842105, + "eval_recall_RioFechado_64x64": 0.8028169014084507, + "eval_runtime": 1.7545, + "eval_samples_per_second": 413.804, + "eval_steps_per_second": 26.219, + "step": 8736 + }, + { + "epoch": 24.002747252747252, + "grad_norm": 9.725203514099121, + "learning_rate": 3.799862637362638e-05, + "loss": 0.4108, + "step": 8737 + }, + { + "epoch": 24.005494505494507, + "grad_norm": 8.702515602111816, + "learning_rate": 3.799725274725275e-05, + "loss": 0.2777, + "step": 8738 + }, + { + "epoch": 24.00824175824176, + "grad_norm": 17.26457977294922, + "learning_rate": 3.7995879120879126e-05, + "loss": 0.4588, + "step": 8739 + }, + { + "epoch": 24.01098901098901, + "grad_norm": 7.764284133911133, + "learning_rate": 3.7994505494505496e-05, + "loss": 0.2206, + "step": 8740 + }, + { + "epoch": 24.013736263736263, + "grad_norm": 10.7272367477417, + "learning_rate": 3.799313186813187e-05, + "loss": 0.3101, + "step": 8741 + }, + { + "epoch": 24.016483516483518, + "grad_norm": 10.437445640563965, + "learning_rate": 3.799175824175825e-05, + "loss": 0.3286, + "step": 8742 + }, + { + "epoch": 24.01923076923077, + "grad_norm": 11.48196029663086, + "learning_rate": 3.799038461538462e-05, + "loss": 0.4318, + "step": 8743 + }, + { + "epoch": 24.021978021978022, + "grad_norm": 14.437026977539062, + "learning_rate": 3.798901098901099e-05, + "loss": 0.5722, + "step": 8744 + }, + { + "epoch": 24.024725274725274, + "grad_norm": 14.066007614135742, + "learning_rate": 3.798763736263736e-05, + "loss": 0.5105, + "step": 8745 + }, + { + "epoch": 24.02747252747253, + "grad_norm": 13.398990631103516, + "learning_rate": 3.7986263736263736e-05, + "loss": 0.4767, + "step": 8746 + }, + { + "epoch": 24.03021978021978, + "grad_norm": 11.796093940734863, + "learning_rate": 3.798489010989011e-05, + "loss": 0.4634, + "step": 8747 + }, + { + "epoch": 24.032967032967033, + "grad_norm": 15.34101676940918, + "learning_rate": 3.798351648351648e-05, + "loss": 0.3882, + "step": 8748 + }, + { + "epoch": 24.035714285714285, + "grad_norm": 15.968863487243652, + "learning_rate": 3.798214285714286e-05, + "loss": 0.4792, + "step": 8749 + }, + { + "epoch": 24.03846153846154, + "grad_norm": 8.897640228271484, + "learning_rate": 3.798076923076923e-05, + "loss": 0.3061, + "step": 8750 + }, + { + "epoch": 24.041208791208792, + "grad_norm": 9.533554077148438, + "learning_rate": 3.7979395604395607e-05, + "loss": 0.3054, + "step": 8751 + }, + { + "epoch": 24.043956043956044, + "grad_norm": 6.564188480377197, + "learning_rate": 3.797802197802198e-05, + "loss": 0.1902, + "step": 8752 + }, + { + "epoch": 24.046703296703296, + "grad_norm": 17.10109519958496, + "learning_rate": 3.7976648351648353e-05, + "loss": 0.6794, + "step": 8753 + }, + { + "epoch": 24.04945054945055, + "grad_norm": 9.921666145324707, + "learning_rate": 3.797527472527473e-05, + "loss": 0.2309, + "step": 8754 + }, + { + "epoch": 24.052197802197803, + "grad_norm": 12.231467247009277, + "learning_rate": 3.79739010989011e-05, + "loss": 0.3136, + "step": 8755 + }, + { + "epoch": 24.054945054945055, + "grad_norm": 8.638442039489746, + "learning_rate": 3.797252747252748e-05, + "loss": 0.1568, + "step": 8756 + }, + { + "epoch": 24.057692307692307, + "grad_norm": 10.34954833984375, + "learning_rate": 3.7971153846153854e-05, + "loss": 0.2317, + "step": 8757 + }, + { + "epoch": 24.060439560439562, + "grad_norm": 9.999507904052734, + "learning_rate": 3.7969780219780224e-05, + "loss": 0.5049, + "step": 8758 + }, + { + "epoch": 24.063186813186814, + "grad_norm": 12.672060012817383, + "learning_rate": 3.7968406593406594e-05, + "loss": 0.3456, + "step": 8759 + }, + { + "epoch": 24.065934065934066, + "grad_norm": 11.115484237670898, + "learning_rate": 3.7967032967032964e-05, + "loss": 0.372, + "step": 8760 + }, + { + "epoch": 24.068681318681318, + "grad_norm": 22.32879066467285, + "learning_rate": 3.796565934065934e-05, + "loss": 0.7438, + "step": 8761 + }, + { + "epoch": 24.071428571428573, + "grad_norm": 9.574679374694824, + "learning_rate": 3.796428571428571e-05, + "loss": 0.2653, + "step": 8762 + }, + { + "epoch": 24.074175824175825, + "grad_norm": 18.681238174438477, + "learning_rate": 3.796291208791209e-05, + "loss": 0.4767, + "step": 8763 + }, + { + "epoch": 24.076923076923077, + "grad_norm": 12.420714378356934, + "learning_rate": 3.7961538461538464e-05, + "loss": 0.2175, + "step": 8764 + }, + { + "epoch": 24.07967032967033, + "grad_norm": 9.854451179504395, + "learning_rate": 3.7960164835164834e-05, + "loss": 0.1865, + "step": 8765 + }, + { + "epoch": 24.082417582417584, + "grad_norm": 7.394129753112793, + "learning_rate": 3.795879120879121e-05, + "loss": 0.1501, + "step": 8766 + }, + { + "epoch": 24.085164835164836, + "grad_norm": 10.833881378173828, + "learning_rate": 3.795741758241758e-05, + "loss": 0.2861, + "step": 8767 + }, + { + "epoch": 24.087912087912088, + "grad_norm": 5.9422807693481445, + "learning_rate": 3.795604395604396e-05, + "loss": 0.1474, + "step": 8768 + }, + { + "epoch": 24.09065934065934, + "grad_norm": 19.47040557861328, + "learning_rate": 3.7954670329670335e-05, + "loss": 0.572, + "step": 8769 + }, + { + "epoch": 24.093406593406595, + "grad_norm": 7.786744117736816, + "learning_rate": 3.7953296703296705e-05, + "loss": 0.2653, + "step": 8770 + }, + { + "epoch": 24.096153846153847, + "grad_norm": 9.688826560974121, + "learning_rate": 3.795192307692308e-05, + "loss": 0.4149, + "step": 8771 + }, + { + "epoch": 24.0989010989011, + "grad_norm": 7.226606369018555, + "learning_rate": 3.795054945054945e-05, + "loss": 0.1907, + "step": 8772 + }, + { + "epoch": 24.10164835164835, + "grad_norm": 8.475167274475098, + "learning_rate": 3.794917582417583e-05, + "loss": 0.2394, + "step": 8773 + }, + { + "epoch": 24.104395604395606, + "grad_norm": 12.012716293334961, + "learning_rate": 3.79478021978022e-05, + "loss": 0.3841, + "step": 8774 + }, + { + "epoch": 24.107142857142858, + "grad_norm": 18.990678787231445, + "learning_rate": 3.794642857142857e-05, + "loss": 0.5516, + "step": 8775 + }, + { + "epoch": 24.10989010989011, + "grad_norm": 5.825415134429932, + "learning_rate": 3.7945054945054945e-05, + "loss": 0.1448, + "step": 8776 + }, + { + "epoch": 24.11263736263736, + "grad_norm": 14.154302597045898, + "learning_rate": 3.7943681318681315e-05, + "loss": 0.3266, + "step": 8777 + }, + { + "epoch": 24.115384615384617, + "grad_norm": 16.245548248291016, + "learning_rate": 3.794230769230769e-05, + "loss": 0.4963, + "step": 8778 + }, + { + "epoch": 24.11813186813187, + "grad_norm": 15.868147850036621, + "learning_rate": 3.794093406593407e-05, + "loss": 0.4144, + "step": 8779 + }, + { + "epoch": 24.12087912087912, + "grad_norm": 21.602357864379883, + "learning_rate": 3.793956043956044e-05, + "loss": 0.8476, + "step": 8780 + }, + { + "epoch": 24.123626373626372, + "grad_norm": 10.788211822509766, + "learning_rate": 3.7938186813186816e-05, + "loss": 0.2417, + "step": 8781 + }, + { + "epoch": 24.126373626373628, + "grad_norm": 19.751190185546875, + "learning_rate": 3.7936813186813186e-05, + "loss": 0.6212, + "step": 8782 + }, + { + "epoch": 24.12912087912088, + "grad_norm": 12.542656898498535, + "learning_rate": 3.793543956043956e-05, + "loss": 0.2459, + "step": 8783 + }, + { + "epoch": 24.13186813186813, + "grad_norm": 8.493664741516113, + "learning_rate": 3.793406593406594e-05, + "loss": 0.2852, + "step": 8784 + }, + { + "epoch": 24.134615384615383, + "grad_norm": 13.530123710632324, + "learning_rate": 3.793269230769231e-05, + "loss": 0.3625, + "step": 8785 + }, + { + "epoch": 24.13736263736264, + "grad_norm": 4.121054172515869, + "learning_rate": 3.7931318681318686e-05, + "loss": 0.0943, + "step": 8786 + }, + { + "epoch": 24.14010989010989, + "grad_norm": 8.46533203125, + "learning_rate": 3.7929945054945056e-05, + "loss": 0.3625, + "step": 8787 + }, + { + "epoch": 24.142857142857142, + "grad_norm": 8.203034400939941, + "learning_rate": 3.792857142857143e-05, + "loss": 0.2844, + "step": 8788 + }, + { + "epoch": 24.145604395604394, + "grad_norm": 5.924255847930908, + "learning_rate": 3.79271978021978e-05, + "loss": 0.162, + "step": 8789 + }, + { + "epoch": 24.14835164835165, + "grad_norm": 11.803083419799805, + "learning_rate": 3.792582417582417e-05, + "loss": 0.484, + "step": 8790 + }, + { + "epoch": 24.1510989010989, + "grad_norm": 15.140972137451172, + "learning_rate": 3.792445054945055e-05, + "loss": 0.3794, + "step": 8791 + }, + { + "epoch": 24.153846153846153, + "grad_norm": 19.413127899169922, + "learning_rate": 3.792307692307692e-05, + "loss": 0.6774, + "step": 8792 + }, + { + "epoch": 24.156593406593405, + "grad_norm": 8.522829055786133, + "learning_rate": 3.7921703296703296e-05, + "loss": 0.2715, + "step": 8793 + }, + { + "epoch": 24.15934065934066, + "grad_norm": 13.195072174072266, + "learning_rate": 3.792032967032967e-05, + "loss": 0.5658, + "step": 8794 + }, + { + "epoch": 24.162087912087912, + "grad_norm": 12.441943168640137, + "learning_rate": 3.791895604395604e-05, + "loss": 0.4366, + "step": 8795 + }, + { + "epoch": 24.164835164835164, + "grad_norm": 14.128765106201172, + "learning_rate": 3.791758241758242e-05, + "loss": 0.6508, + "step": 8796 + }, + { + "epoch": 24.167582417582416, + "grad_norm": 12.280111312866211, + "learning_rate": 3.791620879120879e-05, + "loss": 0.3373, + "step": 8797 + }, + { + "epoch": 24.17032967032967, + "grad_norm": 14.042806625366211, + "learning_rate": 3.791483516483517e-05, + "loss": 0.2617, + "step": 8798 + }, + { + "epoch": 24.173076923076923, + "grad_norm": 11.347024917602539, + "learning_rate": 3.7913461538461544e-05, + "loss": 0.2104, + "step": 8799 + }, + { + "epoch": 24.175824175824175, + "grad_norm": 8.140678405761719, + "learning_rate": 3.7912087912087914e-05, + "loss": 0.1667, + "step": 8800 + }, + { + "epoch": 24.178571428571427, + "grad_norm": 18.310115814208984, + "learning_rate": 3.791071428571429e-05, + "loss": 0.6858, + "step": 8801 + }, + { + "epoch": 24.181318681318682, + "grad_norm": 8.57905387878418, + "learning_rate": 3.790934065934066e-05, + "loss": 0.1638, + "step": 8802 + }, + { + "epoch": 24.184065934065934, + "grad_norm": 22.144380569458008, + "learning_rate": 3.790796703296704e-05, + "loss": 0.5319, + "step": 8803 + }, + { + "epoch": 24.186813186813186, + "grad_norm": 14.550398826599121, + "learning_rate": 3.790659340659341e-05, + "loss": 0.5197, + "step": 8804 + }, + { + "epoch": 24.189560439560438, + "grad_norm": 14.25172233581543, + "learning_rate": 3.790521978021978e-05, + "loss": 0.4946, + "step": 8805 + }, + { + "epoch": 24.192307692307693, + "grad_norm": 21.819564819335938, + "learning_rate": 3.7903846153846154e-05, + "loss": 0.5914, + "step": 8806 + }, + { + "epoch": 24.195054945054945, + "grad_norm": 14.931233406066895, + "learning_rate": 3.7902472527472524e-05, + "loss": 0.471, + "step": 8807 + }, + { + "epoch": 24.197802197802197, + "grad_norm": 9.863611221313477, + "learning_rate": 3.79010989010989e-05, + "loss": 0.3267, + "step": 8808 + }, + { + "epoch": 24.20054945054945, + "grad_norm": 10.534736633300781, + "learning_rate": 3.789972527472528e-05, + "loss": 0.376, + "step": 8809 + }, + { + "epoch": 24.203296703296704, + "grad_norm": 16.357158660888672, + "learning_rate": 3.789835164835165e-05, + "loss": 0.7196, + "step": 8810 + }, + { + "epoch": 24.206043956043956, + "grad_norm": 6.864002704620361, + "learning_rate": 3.7896978021978025e-05, + "loss": 0.1807, + "step": 8811 + }, + { + "epoch": 24.208791208791208, + "grad_norm": 9.875140190124512, + "learning_rate": 3.7895604395604395e-05, + "loss": 0.4134, + "step": 8812 + }, + { + "epoch": 24.21153846153846, + "grad_norm": 16.990846633911133, + "learning_rate": 3.789423076923077e-05, + "loss": 0.5165, + "step": 8813 + }, + { + "epoch": 24.214285714285715, + "grad_norm": 10.026374816894531, + "learning_rate": 3.789285714285715e-05, + "loss": 0.3134, + "step": 8814 + }, + { + "epoch": 24.217032967032967, + "grad_norm": 16.777650833129883, + "learning_rate": 3.789148351648352e-05, + "loss": 0.6629, + "step": 8815 + }, + { + "epoch": 24.21978021978022, + "grad_norm": 14.240157127380371, + "learning_rate": 3.7890109890109895e-05, + "loss": 0.2866, + "step": 8816 + }, + { + "epoch": 24.22252747252747, + "grad_norm": 16.235506057739258, + "learning_rate": 3.7888736263736265e-05, + "loss": 0.5467, + "step": 8817 + }, + { + "epoch": 24.225274725274726, + "grad_norm": 6.166211128234863, + "learning_rate": 3.788736263736264e-05, + "loss": 0.2262, + "step": 8818 + }, + { + "epoch": 24.228021978021978, + "grad_norm": 14.251526832580566, + "learning_rate": 3.788598901098901e-05, + "loss": 0.3682, + "step": 8819 + }, + { + "epoch": 24.23076923076923, + "grad_norm": 14.423735618591309, + "learning_rate": 3.788461538461538e-05, + "loss": 0.2994, + "step": 8820 + }, + { + "epoch": 24.233516483516482, + "grad_norm": 9.678414344787598, + "learning_rate": 3.788324175824176e-05, + "loss": 0.2203, + "step": 8821 + }, + { + "epoch": 24.236263736263737, + "grad_norm": 12.24538803100586, + "learning_rate": 3.788186813186813e-05, + "loss": 0.4749, + "step": 8822 + }, + { + "epoch": 24.23901098901099, + "grad_norm": 22.066089630126953, + "learning_rate": 3.7880494505494505e-05, + "loss": 0.6822, + "step": 8823 + }, + { + "epoch": 24.24175824175824, + "grad_norm": 13.076249122619629, + "learning_rate": 3.787912087912088e-05, + "loss": 0.2159, + "step": 8824 + }, + { + "epoch": 24.244505494505493, + "grad_norm": 24.377506256103516, + "learning_rate": 3.787774725274725e-05, + "loss": 0.6066, + "step": 8825 + }, + { + "epoch": 24.247252747252748, + "grad_norm": 8.837224960327148, + "learning_rate": 3.787637362637363e-05, + "loss": 0.2252, + "step": 8826 + }, + { + "epoch": 24.25, + "grad_norm": 15.573099136352539, + "learning_rate": 3.7875e-05, + "loss": 0.3377, + "step": 8827 + }, + { + "epoch": 24.252747252747252, + "grad_norm": 17.880502700805664, + "learning_rate": 3.7873626373626376e-05, + "loss": 0.7265, + "step": 8828 + }, + { + "epoch": 24.255494505494504, + "grad_norm": 12.763751029968262, + "learning_rate": 3.787225274725275e-05, + "loss": 0.3967, + "step": 8829 + }, + { + "epoch": 24.25824175824176, + "grad_norm": 12.507830619812012, + "learning_rate": 3.787087912087912e-05, + "loss": 0.3383, + "step": 8830 + }, + { + "epoch": 24.26098901098901, + "grad_norm": 14.699376106262207, + "learning_rate": 3.78695054945055e-05, + "loss": 0.4765, + "step": 8831 + }, + { + "epoch": 24.263736263736263, + "grad_norm": 13.850567817687988, + "learning_rate": 3.786813186813187e-05, + "loss": 0.4588, + "step": 8832 + }, + { + "epoch": 24.266483516483518, + "grad_norm": 11.860542297363281, + "learning_rate": 3.7866758241758246e-05, + "loss": 0.2725, + "step": 8833 + }, + { + "epoch": 24.26923076923077, + "grad_norm": 16.224489212036133, + "learning_rate": 3.7865384615384616e-05, + "loss": 0.4351, + "step": 8834 + }, + { + "epoch": 24.271978021978022, + "grad_norm": 13.399018287658691, + "learning_rate": 3.7864010989010986e-05, + "loss": 0.4645, + "step": 8835 + }, + { + "epoch": 24.274725274725274, + "grad_norm": 12.96436882019043, + "learning_rate": 3.786263736263736e-05, + "loss": 0.5809, + "step": 8836 + }, + { + "epoch": 24.27747252747253, + "grad_norm": 11.865667343139648, + "learning_rate": 3.786126373626373e-05, + "loss": 0.4983, + "step": 8837 + }, + { + "epoch": 24.28021978021978, + "grad_norm": 7.924510955810547, + "learning_rate": 3.785989010989011e-05, + "loss": 0.2316, + "step": 8838 + }, + { + "epoch": 24.282967032967033, + "grad_norm": 18.333173751831055, + "learning_rate": 3.785851648351649e-05, + "loss": 0.7136, + "step": 8839 + }, + { + "epoch": 24.285714285714285, + "grad_norm": 10.066705703735352, + "learning_rate": 3.785714285714286e-05, + "loss": 0.2186, + "step": 8840 + }, + { + "epoch": 24.28846153846154, + "grad_norm": 15.257952690124512, + "learning_rate": 3.7855769230769234e-05, + "loss": 0.3419, + "step": 8841 + }, + { + "epoch": 24.291208791208792, + "grad_norm": 9.03156566619873, + "learning_rate": 3.7854395604395604e-05, + "loss": 0.4685, + "step": 8842 + }, + { + "epoch": 24.293956043956044, + "grad_norm": 5.9974822998046875, + "learning_rate": 3.785302197802198e-05, + "loss": 0.1542, + "step": 8843 + }, + { + "epoch": 24.296703296703296, + "grad_norm": 11.373427391052246, + "learning_rate": 3.785164835164836e-05, + "loss": 0.4237, + "step": 8844 + }, + { + "epoch": 24.29945054945055, + "grad_norm": 14.080435752868652, + "learning_rate": 3.785027472527473e-05, + "loss": 0.4221, + "step": 8845 + }, + { + "epoch": 24.302197802197803, + "grad_norm": 3.337087869644165, + "learning_rate": 3.7848901098901104e-05, + "loss": 0.0846, + "step": 8846 + }, + { + "epoch": 24.304945054945055, + "grad_norm": 8.366045951843262, + "learning_rate": 3.7847527472527474e-05, + "loss": 0.2876, + "step": 8847 + }, + { + "epoch": 24.307692307692307, + "grad_norm": 17.963850021362305, + "learning_rate": 3.784615384615385e-05, + "loss": 0.4785, + "step": 8848 + }, + { + "epoch": 24.310439560439562, + "grad_norm": 7.549898147583008, + "learning_rate": 3.784478021978022e-05, + "loss": 0.2658, + "step": 8849 + }, + { + "epoch": 24.313186813186814, + "grad_norm": 14.269329071044922, + "learning_rate": 3.784340659340659e-05, + "loss": 0.4331, + "step": 8850 + }, + { + "epoch": 24.315934065934066, + "grad_norm": 13.577853202819824, + "learning_rate": 3.784203296703297e-05, + "loss": 0.4988, + "step": 8851 + }, + { + "epoch": 24.318681318681318, + "grad_norm": 15.16123104095459, + "learning_rate": 3.784065934065934e-05, + "loss": 0.3515, + "step": 8852 + }, + { + "epoch": 24.321428571428573, + "grad_norm": 8.007973670959473, + "learning_rate": 3.7839285714285714e-05, + "loss": 0.2351, + "step": 8853 + }, + { + "epoch": 24.324175824175825, + "grad_norm": 7.042808532714844, + "learning_rate": 3.783791208791209e-05, + "loss": 0.1745, + "step": 8854 + }, + { + "epoch": 24.326923076923077, + "grad_norm": 8.956265449523926, + "learning_rate": 3.783653846153846e-05, + "loss": 0.2004, + "step": 8855 + }, + { + "epoch": 24.32967032967033, + "grad_norm": 7.104252815246582, + "learning_rate": 3.783516483516484e-05, + "loss": 0.1308, + "step": 8856 + }, + { + "epoch": 24.332417582417584, + "grad_norm": 12.75611686706543, + "learning_rate": 3.783379120879121e-05, + "loss": 0.426, + "step": 8857 + }, + { + "epoch": 24.335164835164836, + "grad_norm": 11.791158676147461, + "learning_rate": 3.7832417582417585e-05, + "loss": 0.3625, + "step": 8858 + }, + { + "epoch": 24.337912087912088, + "grad_norm": 8.183632850646973, + "learning_rate": 3.783104395604396e-05, + "loss": 0.3064, + "step": 8859 + }, + { + "epoch": 24.34065934065934, + "grad_norm": 13.092015266418457, + "learning_rate": 3.782967032967033e-05, + "loss": 0.3636, + "step": 8860 + }, + { + "epoch": 24.343406593406595, + "grad_norm": 13.921565055847168, + "learning_rate": 3.782829670329671e-05, + "loss": 0.5589, + "step": 8861 + }, + { + "epoch": 24.346153846153847, + "grad_norm": 15.362099647521973, + "learning_rate": 3.782692307692308e-05, + "loss": 0.2968, + "step": 8862 + }, + { + "epoch": 24.3489010989011, + "grad_norm": 9.844620704650879, + "learning_rate": 3.7825549450549455e-05, + "loss": 0.3077, + "step": 8863 + }, + { + "epoch": 24.35164835164835, + "grad_norm": 8.888580322265625, + "learning_rate": 3.7824175824175825e-05, + "loss": 0.2235, + "step": 8864 + }, + { + "epoch": 24.354395604395606, + "grad_norm": 15.646408081054688, + "learning_rate": 3.7822802197802195e-05, + "loss": 0.4817, + "step": 8865 + }, + { + "epoch": 24.357142857142858, + "grad_norm": 5.91176700592041, + "learning_rate": 3.782142857142857e-05, + "loss": 0.1428, + "step": 8866 + }, + { + "epoch": 24.35989010989011, + "grad_norm": 22.603187561035156, + "learning_rate": 3.782005494505494e-05, + "loss": 0.9186, + "step": 8867 + }, + { + "epoch": 24.36263736263736, + "grad_norm": 15.879271507263184, + "learning_rate": 3.781868131868132e-05, + "loss": 0.6378, + "step": 8868 + }, + { + "epoch": 24.365384615384617, + "grad_norm": 11.685379981994629, + "learning_rate": 3.7817307692307696e-05, + "loss": 0.3056, + "step": 8869 + }, + { + "epoch": 24.36813186813187, + "grad_norm": 8.016148567199707, + "learning_rate": 3.7815934065934066e-05, + "loss": 0.1928, + "step": 8870 + }, + { + "epoch": 24.37087912087912, + "grad_norm": 10.616703987121582, + "learning_rate": 3.781456043956044e-05, + "loss": 0.323, + "step": 8871 + }, + { + "epoch": 24.373626373626372, + "grad_norm": 14.353520393371582, + "learning_rate": 3.781318681318681e-05, + "loss": 0.4444, + "step": 8872 + }, + { + "epoch": 24.376373626373628, + "grad_norm": 13.382771492004395, + "learning_rate": 3.781181318681319e-05, + "loss": 0.4118, + "step": 8873 + }, + { + "epoch": 24.37912087912088, + "grad_norm": 12.056011199951172, + "learning_rate": 3.7810439560439566e-05, + "loss": 0.4255, + "step": 8874 + }, + { + "epoch": 24.38186813186813, + "grad_norm": 3.464430332183838, + "learning_rate": 3.7809065934065936e-05, + "loss": 0.1116, + "step": 8875 + }, + { + "epoch": 24.384615384615383, + "grad_norm": 9.785865783691406, + "learning_rate": 3.780769230769231e-05, + "loss": 0.2277, + "step": 8876 + }, + { + "epoch": 24.38736263736264, + "grad_norm": 17.974136352539062, + "learning_rate": 3.780631868131868e-05, + "loss": 0.5876, + "step": 8877 + }, + { + "epoch": 24.39010989010989, + "grad_norm": 12.188488006591797, + "learning_rate": 3.780494505494506e-05, + "loss": 0.3213, + "step": 8878 + }, + { + "epoch": 24.392857142857142, + "grad_norm": 18.46025276184082, + "learning_rate": 3.780357142857143e-05, + "loss": 0.4982, + "step": 8879 + }, + { + "epoch": 24.395604395604394, + "grad_norm": 14.915495872497559, + "learning_rate": 3.78021978021978e-05, + "loss": 0.6895, + "step": 8880 + }, + { + "epoch": 24.39835164835165, + "grad_norm": 6.993436336517334, + "learning_rate": 3.7800824175824177e-05, + "loss": 0.1413, + "step": 8881 + }, + { + "epoch": 24.4010989010989, + "grad_norm": 9.868965148925781, + "learning_rate": 3.7799450549450547e-05, + "loss": 0.3333, + "step": 8882 + }, + { + "epoch": 24.403846153846153, + "grad_norm": 12.968503952026367, + "learning_rate": 3.7798076923076923e-05, + "loss": 0.4541, + "step": 8883 + }, + { + "epoch": 24.406593406593405, + "grad_norm": 17.223472595214844, + "learning_rate": 3.77967032967033e-05, + "loss": 0.3102, + "step": 8884 + }, + { + "epoch": 24.40934065934066, + "grad_norm": 10.755099296569824, + "learning_rate": 3.779532967032967e-05, + "loss": 0.4089, + "step": 8885 + }, + { + "epoch": 24.412087912087912, + "grad_norm": 17.668930053710938, + "learning_rate": 3.779395604395605e-05, + "loss": 0.6538, + "step": 8886 + }, + { + "epoch": 24.414835164835164, + "grad_norm": 16.603776931762695, + "learning_rate": 3.779258241758242e-05, + "loss": 0.3905, + "step": 8887 + }, + { + "epoch": 24.417582417582416, + "grad_norm": 13.457953453063965, + "learning_rate": 3.7791208791208794e-05, + "loss": 0.39, + "step": 8888 + }, + { + "epoch": 24.42032967032967, + "grad_norm": 11.93896198272705, + "learning_rate": 3.778983516483517e-05, + "loss": 0.4189, + "step": 8889 + }, + { + "epoch": 24.423076923076923, + "grad_norm": 8.598388671875, + "learning_rate": 3.778846153846154e-05, + "loss": 0.2098, + "step": 8890 + }, + { + "epoch": 24.425824175824175, + "grad_norm": 13.025726318359375, + "learning_rate": 3.778708791208792e-05, + "loss": 0.3524, + "step": 8891 + }, + { + "epoch": 24.428571428571427, + "grad_norm": 13.124335289001465, + "learning_rate": 3.778571428571429e-05, + "loss": 0.4723, + "step": 8892 + }, + { + "epoch": 24.431318681318682, + "grad_norm": 11.859095573425293, + "learning_rate": 3.7784340659340664e-05, + "loss": 0.2377, + "step": 8893 + }, + { + "epoch": 24.434065934065934, + "grad_norm": 5.875703811645508, + "learning_rate": 3.7782967032967034e-05, + "loss": 0.1404, + "step": 8894 + }, + { + "epoch": 24.436813186813186, + "grad_norm": 11.664632797241211, + "learning_rate": 3.7781593406593404e-05, + "loss": 0.3543, + "step": 8895 + }, + { + "epoch": 24.439560439560438, + "grad_norm": 19.806671142578125, + "learning_rate": 3.778021978021978e-05, + "loss": 0.5355, + "step": 8896 + }, + { + "epoch": 24.442307692307693, + "grad_norm": 9.814300537109375, + "learning_rate": 3.777884615384615e-05, + "loss": 0.231, + "step": 8897 + }, + { + "epoch": 24.445054945054945, + "grad_norm": 20.181589126586914, + "learning_rate": 3.777747252747253e-05, + "loss": 0.4716, + "step": 8898 + }, + { + "epoch": 24.447802197802197, + "grad_norm": 11.304797172546387, + "learning_rate": 3.7776098901098905e-05, + "loss": 0.1946, + "step": 8899 + }, + { + "epoch": 24.45054945054945, + "grad_norm": 8.947501182556152, + "learning_rate": 3.7774725274725275e-05, + "loss": 0.3628, + "step": 8900 + }, + { + "epoch": 24.453296703296704, + "grad_norm": 18.326269149780273, + "learning_rate": 3.777335164835165e-05, + "loss": 0.7061, + "step": 8901 + }, + { + "epoch": 24.456043956043956, + "grad_norm": 13.788545608520508, + "learning_rate": 3.777197802197802e-05, + "loss": 0.305, + "step": 8902 + }, + { + "epoch": 24.458791208791208, + "grad_norm": 9.46928882598877, + "learning_rate": 3.77706043956044e-05, + "loss": 0.2226, + "step": 8903 + }, + { + "epoch": 24.46153846153846, + "grad_norm": 15.394376754760742, + "learning_rate": 3.7769230769230775e-05, + "loss": 0.6989, + "step": 8904 + }, + { + "epoch": 24.464285714285715, + "grad_norm": 7.296290874481201, + "learning_rate": 3.7767857142857145e-05, + "loss": 0.1584, + "step": 8905 + }, + { + "epoch": 24.467032967032967, + "grad_norm": 10.406493186950684, + "learning_rate": 3.776648351648352e-05, + "loss": 0.2127, + "step": 8906 + }, + { + "epoch": 24.46978021978022, + "grad_norm": 11.672332763671875, + "learning_rate": 3.776510989010989e-05, + "loss": 0.2877, + "step": 8907 + }, + { + "epoch": 24.47252747252747, + "grad_norm": 9.939157485961914, + "learning_rate": 3.776373626373627e-05, + "loss": 0.1839, + "step": 8908 + }, + { + "epoch": 24.475274725274726, + "grad_norm": 17.19172477722168, + "learning_rate": 3.776236263736264e-05, + "loss": 0.7685, + "step": 8909 + }, + { + "epoch": 24.478021978021978, + "grad_norm": 16.128629684448242, + "learning_rate": 3.776098901098901e-05, + "loss": 0.5015, + "step": 8910 + }, + { + "epoch": 24.48076923076923, + "grad_norm": 15.836339950561523, + "learning_rate": 3.7759615384615386e-05, + "loss": 0.8577, + "step": 8911 + }, + { + "epoch": 24.483516483516482, + "grad_norm": 10.948392868041992, + "learning_rate": 3.7758241758241756e-05, + "loss": 0.3902, + "step": 8912 + }, + { + "epoch": 24.486263736263737, + "grad_norm": 11.640900611877441, + "learning_rate": 3.775686813186813e-05, + "loss": 0.2362, + "step": 8913 + }, + { + "epoch": 24.48901098901099, + "grad_norm": 4.78270959854126, + "learning_rate": 3.775549450549451e-05, + "loss": 0.0989, + "step": 8914 + }, + { + "epoch": 24.49175824175824, + "grad_norm": 11.016200065612793, + "learning_rate": 3.775412087912088e-05, + "loss": 0.2611, + "step": 8915 + }, + { + "epoch": 24.494505494505496, + "grad_norm": 11.673843383789062, + "learning_rate": 3.7752747252747256e-05, + "loss": 0.4028, + "step": 8916 + }, + { + "epoch": 24.497252747252748, + "grad_norm": 18.627317428588867, + "learning_rate": 3.7751373626373626e-05, + "loss": 0.5599, + "step": 8917 + }, + { + "epoch": 24.5, + "grad_norm": 5.399916172027588, + "learning_rate": 3.775e-05, + "loss": 0.1673, + "step": 8918 + }, + { + "epoch": 24.502747252747252, + "grad_norm": 7.074141502380371, + "learning_rate": 3.774862637362638e-05, + "loss": 0.2205, + "step": 8919 + }, + { + "epoch": 24.505494505494504, + "grad_norm": 9.776447296142578, + "learning_rate": 3.774725274725275e-05, + "loss": 0.3364, + "step": 8920 + }, + { + "epoch": 24.50824175824176, + "grad_norm": 16.865236282348633, + "learning_rate": 3.7745879120879126e-05, + "loss": 0.3448, + "step": 8921 + }, + { + "epoch": 24.51098901098901, + "grad_norm": 13.744149208068848, + "learning_rate": 3.7744505494505496e-05, + "loss": 0.7589, + "step": 8922 + }, + { + "epoch": 24.513736263736263, + "grad_norm": 11.137139320373535, + "learning_rate": 3.774313186813187e-05, + "loss": 0.4263, + "step": 8923 + }, + { + "epoch": 24.516483516483518, + "grad_norm": 11.378152847290039, + "learning_rate": 3.774175824175824e-05, + "loss": 0.2936, + "step": 8924 + }, + { + "epoch": 24.51923076923077, + "grad_norm": 11.934825897216797, + "learning_rate": 3.774038461538461e-05, + "loss": 0.785, + "step": 8925 + }, + { + "epoch": 24.521978021978022, + "grad_norm": 12.251779556274414, + "learning_rate": 3.773901098901099e-05, + "loss": 0.5077, + "step": 8926 + }, + { + "epoch": 24.524725274725274, + "grad_norm": 9.79491901397705, + "learning_rate": 3.773763736263736e-05, + "loss": 0.3796, + "step": 8927 + }, + { + "epoch": 24.52747252747253, + "grad_norm": 7.559499263763428, + "learning_rate": 3.773626373626374e-05, + "loss": 0.1535, + "step": 8928 + }, + { + "epoch": 24.53021978021978, + "grad_norm": 14.399822235107422, + "learning_rate": 3.7734890109890114e-05, + "loss": 0.3663, + "step": 8929 + }, + { + "epoch": 24.532967032967033, + "grad_norm": 19.780942916870117, + "learning_rate": 3.7733516483516484e-05, + "loss": 0.5594, + "step": 8930 + }, + { + "epoch": 24.535714285714285, + "grad_norm": 17.411048889160156, + "learning_rate": 3.773214285714286e-05, + "loss": 0.7948, + "step": 8931 + }, + { + "epoch": 24.53846153846154, + "grad_norm": 5.764687538146973, + "learning_rate": 3.773076923076923e-05, + "loss": 0.1276, + "step": 8932 + }, + { + "epoch": 24.541208791208792, + "grad_norm": 10.217782020568848, + "learning_rate": 3.772939560439561e-05, + "loss": 0.1887, + "step": 8933 + }, + { + "epoch": 24.543956043956044, + "grad_norm": 16.708311080932617, + "learning_rate": 3.7728021978021984e-05, + "loss": 0.4579, + "step": 8934 + }, + { + "epoch": 24.546703296703296, + "grad_norm": 12.046256065368652, + "learning_rate": 3.7726648351648354e-05, + "loss": 0.458, + "step": 8935 + }, + { + "epoch": 24.54945054945055, + "grad_norm": 15.624783515930176, + "learning_rate": 3.772527472527473e-05, + "loss": 0.4511, + "step": 8936 + }, + { + "epoch": 24.552197802197803, + "grad_norm": 13.86174488067627, + "learning_rate": 3.77239010989011e-05, + "loss": 0.8845, + "step": 8937 + }, + { + "epoch": 24.554945054945055, + "grad_norm": 14.82982063293457, + "learning_rate": 3.772252747252748e-05, + "loss": 0.2934, + "step": 8938 + }, + { + "epoch": 24.557692307692307, + "grad_norm": 12.978719711303711, + "learning_rate": 3.772115384615385e-05, + "loss": 0.3254, + "step": 8939 + }, + { + "epoch": 24.560439560439562, + "grad_norm": 11.375593185424805, + "learning_rate": 3.771978021978022e-05, + "loss": 0.296, + "step": 8940 + }, + { + "epoch": 24.563186813186814, + "grad_norm": 16.880760192871094, + "learning_rate": 3.7718406593406595e-05, + "loss": 0.6505, + "step": 8941 + }, + { + "epoch": 24.565934065934066, + "grad_norm": 6.321883201599121, + "learning_rate": 3.7717032967032965e-05, + "loss": 0.1353, + "step": 8942 + }, + { + "epoch": 24.568681318681318, + "grad_norm": 12.484282493591309, + "learning_rate": 3.771565934065934e-05, + "loss": 0.3389, + "step": 8943 + }, + { + "epoch": 24.571428571428573, + "grad_norm": 5.715498447418213, + "learning_rate": 3.771428571428572e-05, + "loss": 0.1016, + "step": 8944 + }, + { + "epoch": 24.574175824175825, + "grad_norm": 11.89148235321045, + "learning_rate": 3.771291208791209e-05, + "loss": 0.4238, + "step": 8945 + }, + { + "epoch": 24.576923076923077, + "grad_norm": 9.079180717468262, + "learning_rate": 3.7711538461538465e-05, + "loss": 0.1723, + "step": 8946 + }, + { + "epoch": 24.57967032967033, + "grad_norm": 19.896381378173828, + "learning_rate": 3.7710164835164835e-05, + "loss": 0.5958, + "step": 8947 + }, + { + "epoch": 24.582417582417584, + "grad_norm": 12.853544235229492, + "learning_rate": 3.770879120879121e-05, + "loss": 0.5802, + "step": 8948 + }, + { + "epoch": 24.585164835164836, + "grad_norm": 5.758126735687256, + "learning_rate": 3.770741758241759e-05, + "loss": 0.1319, + "step": 8949 + }, + { + "epoch": 24.587912087912088, + "grad_norm": 4.320342540740967, + "learning_rate": 3.770604395604396e-05, + "loss": 0.0936, + "step": 8950 + }, + { + "epoch": 24.59065934065934, + "grad_norm": 22.855154037475586, + "learning_rate": 3.7704670329670335e-05, + "loss": 0.7902, + "step": 8951 + }, + { + "epoch": 24.593406593406595, + "grad_norm": 11.314411163330078, + "learning_rate": 3.7703296703296705e-05, + "loss": 0.3318, + "step": 8952 + }, + { + "epoch": 24.596153846153847, + "grad_norm": 10.976347923278809, + "learning_rate": 3.770192307692308e-05, + "loss": 0.2199, + "step": 8953 + }, + { + "epoch": 24.5989010989011, + "grad_norm": 8.126609802246094, + "learning_rate": 3.770054945054945e-05, + "loss": 0.2562, + "step": 8954 + }, + { + "epoch": 24.60164835164835, + "grad_norm": 14.536544799804688, + "learning_rate": 3.769917582417582e-05, + "loss": 0.497, + "step": 8955 + }, + { + "epoch": 24.604395604395606, + "grad_norm": 5.860301494598389, + "learning_rate": 3.76978021978022e-05, + "loss": 0.1167, + "step": 8956 + }, + { + "epoch": 24.607142857142858, + "grad_norm": 7.548299312591553, + "learning_rate": 3.769642857142857e-05, + "loss": 0.2812, + "step": 8957 + }, + { + "epoch": 24.60989010989011, + "grad_norm": 22.519628524780273, + "learning_rate": 3.7695054945054946e-05, + "loss": 0.6053, + "step": 8958 + }, + { + "epoch": 24.61263736263736, + "grad_norm": 14.653684616088867, + "learning_rate": 3.769368131868132e-05, + "loss": 0.3416, + "step": 8959 + }, + { + "epoch": 24.615384615384617, + "grad_norm": 22.093578338623047, + "learning_rate": 3.769230769230769e-05, + "loss": 0.5262, + "step": 8960 + }, + { + "epoch": 24.61813186813187, + "grad_norm": 12.601491928100586, + "learning_rate": 3.769093406593407e-05, + "loss": 0.3771, + "step": 8961 + }, + { + "epoch": 24.62087912087912, + "grad_norm": 14.500751495361328, + "learning_rate": 3.768956043956044e-05, + "loss": 0.3744, + "step": 8962 + }, + { + "epoch": 24.623626373626372, + "grad_norm": 14.854469299316406, + "learning_rate": 3.7688186813186816e-05, + "loss": 0.4446, + "step": 8963 + }, + { + "epoch": 24.626373626373628, + "grad_norm": 11.40496826171875, + "learning_rate": 3.768681318681319e-05, + "loss": 0.3135, + "step": 8964 + }, + { + "epoch": 24.62912087912088, + "grad_norm": 17.491409301757812, + "learning_rate": 3.768543956043956e-05, + "loss": 0.5389, + "step": 8965 + }, + { + "epoch": 24.63186813186813, + "grad_norm": 14.241576194763184, + "learning_rate": 3.768406593406594e-05, + "loss": 0.7716, + "step": 8966 + }, + { + "epoch": 24.634615384615383, + "grad_norm": 13.048595428466797, + "learning_rate": 3.768269230769231e-05, + "loss": 0.5442, + "step": 8967 + }, + { + "epoch": 24.63736263736264, + "grad_norm": 16.629125595092773, + "learning_rate": 3.768131868131869e-05, + "loss": 0.6913, + "step": 8968 + }, + { + "epoch": 24.64010989010989, + "grad_norm": 5.3273468017578125, + "learning_rate": 3.767994505494506e-05, + "loss": 0.1158, + "step": 8969 + }, + { + "epoch": 24.642857142857142, + "grad_norm": 7.643458366394043, + "learning_rate": 3.767857142857143e-05, + "loss": 0.1602, + "step": 8970 + }, + { + "epoch": 24.645604395604394, + "grad_norm": 15.964607238769531, + "learning_rate": 3.7677197802197804e-05, + "loss": 0.5435, + "step": 8971 + }, + { + "epoch": 24.64835164835165, + "grad_norm": 13.963533401489258, + "learning_rate": 3.7675824175824174e-05, + "loss": 0.4246, + "step": 8972 + }, + { + "epoch": 24.6510989010989, + "grad_norm": 9.150176048278809, + "learning_rate": 3.767445054945055e-05, + "loss": 0.2921, + "step": 8973 + }, + { + "epoch": 24.653846153846153, + "grad_norm": 17.2174015045166, + "learning_rate": 3.767307692307693e-05, + "loss": 0.7943, + "step": 8974 + }, + { + "epoch": 24.656593406593405, + "grad_norm": 4.344949245452881, + "learning_rate": 3.76717032967033e-05, + "loss": 0.0998, + "step": 8975 + }, + { + "epoch": 24.65934065934066, + "grad_norm": 16.054901123046875, + "learning_rate": 3.7670329670329674e-05, + "loss": 0.5692, + "step": 8976 + }, + { + "epoch": 24.662087912087912, + "grad_norm": 8.65925121307373, + "learning_rate": 3.7668956043956044e-05, + "loss": 0.2087, + "step": 8977 + }, + { + "epoch": 24.664835164835164, + "grad_norm": 10.34578800201416, + "learning_rate": 3.766758241758242e-05, + "loss": 0.4042, + "step": 8978 + }, + { + "epoch": 24.667582417582416, + "grad_norm": 11.285049438476562, + "learning_rate": 3.76662087912088e-05, + "loss": 0.1809, + "step": 8979 + }, + { + "epoch": 24.67032967032967, + "grad_norm": 12.231816291809082, + "learning_rate": 3.766483516483517e-05, + "loss": 0.3657, + "step": 8980 + }, + { + "epoch": 24.673076923076923, + "grad_norm": 11.081371307373047, + "learning_rate": 3.7663461538461544e-05, + "loss": 0.2311, + "step": 8981 + }, + { + "epoch": 24.675824175824175, + "grad_norm": 14.534594535827637, + "learning_rate": 3.7662087912087914e-05, + "loss": 0.5542, + "step": 8982 + }, + { + "epoch": 24.678571428571427, + "grad_norm": 11.15147876739502, + "learning_rate": 3.766071428571429e-05, + "loss": 0.2328, + "step": 8983 + }, + { + "epoch": 24.681318681318682, + "grad_norm": 26.244054794311523, + "learning_rate": 3.765934065934066e-05, + "loss": 1.5102, + "step": 8984 + }, + { + "epoch": 24.684065934065934, + "grad_norm": 16.25495719909668, + "learning_rate": 3.765796703296703e-05, + "loss": 0.6327, + "step": 8985 + }, + { + "epoch": 24.686813186813186, + "grad_norm": 11.141584396362305, + "learning_rate": 3.765659340659341e-05, + "loss": 0.3045, + "step": 8986 + }, + { + "epoch": 24.689560439560438, + "grad_norm": 7.27592658996582, + "learning_rate": 3.765521978021978e-05, + "loss": 0.1256, + "step": 8987 + }, + { + "epoch": 24.692307692307693, + "grad_norm": 15.620270729064941, + "learning_rate": 3.7653846153846155e-05, + "loss": 0.3605, + "step": 8988 + }, + { + "epoch": 24.695054945054945, + "grad_norm": 15.034167289733887, + "learning_rate": 3.7652472527472525e-05, + "loss": 0.3322, + "step": 8989 + }, + { + "epoch": 24.697802197802197, + "grad_norm": 8.397627830505371, + "learning_rate": 3.76510989010989e-05, + "loss": 0.1986, + "step": 8990 + }, + { + "epoch": 24.70054945054945, + "grad_norm": 8.449346542358398, + "learning_rate": 3.764972527472528e-05, + "loss": 0.1674, + "step": 8991 + }, + { + "epoch": 24.703296703296704, + "grad_norm": 18.64571762084961, + "learning_rate": 3.764835164835165e-05, + "loss": 0.5861, + "step": 8992 + }, + { + "epoch": 24.706043956043956, + "grad_norm": 10.455089569091797, + "learning_rate": 3.7646978021978025e-05, + "loss": 0.3866, + "step": 8993 + }, + { + "epoch": 24.708791208791208, + "grad_norm": 10.020354270935059, + "learning_rate": 3.7645604395604395e-05, + "loss": 0.2316, + "step": 8994 + }, + { + "epoch": 24.71153846153846, + "grad_norm": 21.49954605102539, + "learning_rate": 3.764423076923077e-05, + "loss": 0.6897, + "step": 8995 + }, + { + "epoch": 24.714285714285715, + "grad_norm": 16.768413543701172, + "learning_rate": 3.764285714285715e-05, + "loss": 0.5168, + "step": 8996 + }, + { + "epoch": 24.717032967032967, + "grad_norm": 7.351863861083984, + "learning_rate": 3.764148351648352e-05, + "loss": 0.1939, + "step": 8997 + }, + { + "epoch": 24.71978021978022, + "grad_norm": 12.325108528137207, + "learning_rate": 3.7640109890109896e-05, + "loss": 0.1931, + "step": 8998 + }, + { + "epoch": 24.72252747252747, + "grad_norm": 18.022079467773438, + "learning_rate": 3.7638736263736266e-05, + "loss": 0.5666, + "step": 8999 + }, + { + "epoch": 24.725274725274726, + "grad_norm": 10.287960052490234, + "learning_rate": 3.7637362637362636e-05, + "loss": 0.2139, + "step": 9000 + }, + { + "epoch": 24.728021978021978, + "grad_norm": 6.537809371948242, + "learning_rate": 3.763598901098901e-05, + "loss": 0.2456, + "step": 9001 + }, + { + "epoch": 24.73076923076923, + "grad_norm": 12.588159561157227, + "learning_rate": 3.763461538461538e-05, + "loss": 0.4745, + "step": 9002 + }, + { + "epoch": 24.733516483516482, + "grad_norm": 7.304887294769287, + "learning_rate": 3.763324175824176e-05, + "loss": 0.2113, + "step": 9003 + }, + { + "epoch": 24.736263736263737, + "grad_norm": 7.351181507110596, + "learning_rate": 3.763186813186813e-05, + "loss": 0.26, + "step": 9004 + }, + { + "epoch": 24.73901098901099, + "grad_norm": 9.198769569396973, + "learning_rate": 3.7630494505494506e-05, + "loss": 0.3853, + "step": 9005 + }, + { + "epoch": 24.74175824175824, + "grad_norm": 20.87617301940918, + "learning_rate": 3.762912087912088e-05, + "loss": 0.6231, + "step": 9006 + }, + { + "epoch": 24.744505494505496, + "grad_norm": 15.642412185668945, + "learning_rate": 3.762774725274725e-05, + "loss": 0.4007, + "step": 9007 + }, + { + "epoch": 24.747252747252748, + "grad_norm": 12.786712646484375, + "learning_rate": 3.762637362637363e-05, + "loss": 0.5224, + "step": 9008 + }, + { + "epoch": 24.75, + "grad_norm": 10.837982177734375, + "learning_rate": 3.7625e-05, + "loss": 0.4152, + "step": 9009 + }, + { + "epoch": 24.752747252747252, + "grad_norm": 21.842741012573242, + "learning_rate": 3.7623626373626377e-05, + "loss": 0.6031, + "step": 9010 + }, + { + "epoch": 24.755494505494504, + "grad_norm": 14.202892303466797, + "learning_rate": 3.762225274725275e-05, + "loss": 0.4931, + "step": 9011 + }, + { + "epoch": 24.75824175824176, + "grad_norm": 8.480677604675293, + "learning_rate": 3.762087912087912e-05, + "loss": 0.1775, + "step": 9012 + }, + { + "epoch": 24.76098901098901, + "grad_norm": 16.600622177124023, + "learning_rate": 3.76195054945055e-05, + "loss": 0.4879, + "step": 9013 + }, + { + "epoch": 24.763736263736263, + "grad_norm": 20.132530212402344, + "learning_rate": 3.761813186813187e-05, + "loss": 0.6071, + "step": 9014 + }, + { + "epoch": 24.766483516483518, + "grad_norm": 11.60565185546875, + "learning_rate": 3.761675824175824e-05, + "loss": 0.2767, + "step": 9015 + }, + { + "epoch": 24.76923076923077, + "grad_norm": 6.553802013397217, + "learning_rate": 3.761538461538462e-05, + "loss": 0.1032, + "step": 9016 + }, + { + "epoch": 24.771978021978022, + "grad_norm": 9.749641418457031, + "learning_rate": 3.761401098901099e-05, + "loss": 0.2422, + "step": 9017 + }, + { + "epoch": 24.774725274725274, + "grad_norm": 13.733137130737305, + "learning_rate": 3.7612637362637364e-05, + "loss": 0.3964, + "step": 9018 + }, + { + "epoch": 24.77747252747253, + "grad_norm": 16.493759155273438, + "learning_rate": 3.7611263736263734e-05, + "loss": 0.5377, + "step": 9019 + }, + { + "epoch": 24.78021978021978, + "grad_norm": 6.911892414093018, + "learning_rate": 3.760989010989011e-05, + "loss": 0.1839, + "step": 9020 + }, + { + "epoch": 24.782967032967033, + "grad_norm": 3.597158432006836, + "learning_rate": 3.760851648351649e-05, + "loss": 0.0769, + "step": 9021 + }, + { + "epoch": 24.785714285714285, + "grad_norm": 8.561395645141602, + "learning_rate": 3.760714285714286e-05, + "loss": 0.2537, + "step": 9022 + }, + { + "epoch": 24.78846153846154, + "grad_norm": 9.042856216430664, + "learning_rate": 3.7605769230769234e-05, + "loss": 0.2354, + "step": 9023 + }, + { + "epoch": 24.791208791208792, + "grad_norm": 10.62063217163086, + "learning_rate": 3.7604395604395604e-05, + "loss": 0.578, + "step": 9024 + }, + { + "epoch": 24.793956043956044, + "grad_norm": 10.369928359985352, + "learning_rate": 3.760302197802198e-05, + "loss": 0.2846, + "step": 9025 + }, + { + "epoch": 24.796703296703296, + "grad_norm": 12.629817008972168, + "learning_rate": 3.760164835164836e-05, + "loss": 0.4805, + "step": 9026 + }, + { + "epoch": 24.79945054945055, + "grad_norm": 14.872379302978516, + "learning_rate": 3.760027472527473e-05, + "loss": 0.6417, + "step": 9027 + }, + { + "epoch": 24.802197802197803, + "grad_norm": 17.255542755126953, + "learning_rate": 3.7598901098901105e-05, + "loss": 0.5029, + "step": 9028 + }, + { + "epoch": 24.804945054945055, + "grad_norm": 16.081865310668945, + "learning_rate": 3.7597527472527475e-05, + "loss": 0.6191, + "step": 9029 + }, + { + "epoch": 24.807692307692307, + "grad_norm": 16.325599670410156, + "learning_rate": 3.7596153846153845e-05, + "loss": 0.5358, + "step": 9030 + }, + { + "epoch": 24.810439560439562, + "grad_norm": 16.71994972229004, + "learning_rate": 3.759478021978022e-05, + "loss": 0.6659, + "step": 9031 + }, + { + "epoch": 24.813186813186814, + "grad_norm": 9.327435493469238, + "learning_rate": 3.759340659340659e-05, + "loss": 0.1161, + "step": 9032 + }, + { + "epoch": 24.815934065934066, + "grad_norm": 12.6051664352417, + "learning_rate": 3.759203296703297e-05, + "loss": 0.2915, + "step": 9033 + }, + { + "epoch": 24.818681318681318, + "grad_norm": 10.861885070800781, + "learning_rate": 3.759065934065934e-05, + "loss": 0.4732, + "step": 9034 + }, + { + "epoch": 24.821428571428573, + "grad_norm": 12.252662658691406, + "learning_rate": 3.7589285714285715e-05, + "loss": 0.3804, + "step": 9035 + }, + { + "epoch": 24.824175824175825, + "grad_norm": 7.329134941101074, + "learning_rate": 3.758791208791209e-05, + "loss": 0.3427, + "step": 9036 + }, + { + "epoch": 24.826923076923077, + "grad_norm": 16.37885093688965, + "learning_rate": 3.758653846153846e-05, + "loss": 0.5207, + "step": 9037 + }, + { + "epoch": 24.82967032967033, + "grad_norm": 4.887317657470703, + "learning_rate": 3.758516483516484e-05, + "loss": 0.1015, + "step": 9038 + }, + { + "epoch": 24.832417582417584, + "grad_norm": 9.407127380371094, + "learning_rate": 3.758379120879121e-05, + "loss": 0.1997, + "step": 9039 + }, + { + "epoch": 24.835164835164836, + "grad_norm": 9.709733963012695, + "learning_rate": 3.7582417582417586e-05, + "loss": 0.2696, + "step": 9040 + }, + { + "epoch": 24.837912087912088, + "grad_norm": 9.645843505859375, + "learning_rate": 3.758104395604396e-05, + "loss": 0.3795, + "step": 9041 + }, + { + "epoch": 24.84065934065934, + "grad_norm": 10.84106731414795, + "learning_rate": 3.757967032967033e-05, + "loss": 0.3121, + "step": 9042 + }, + { + "epoch": 24.843406593406595, + "grad_norm": 22.859037399291992, + "learning_rate": 3.757829670329671e-05, + "loss": 0.806, + "step": 9043 + }, + { + "epoch": 24.846153846153847, + "grad_norm": 16.814983367919922, + "learning_rate": 3.757692307692308e-05, + "loss": 0.4753, + "step": 9044 + }, + { + "epoch": 24.8489010989011, + "grad_norm": 12.710043907165527, + "learning_rate": 3.757554945054945e-05, + "loss": 0.2079, + "step": 9045 + }, + { + "epoch": 24.85164835164835, + "grad_norm": 10.405463218688965, + "learning_rate": 3.7574175824175826e-05, + "loss": 0.3847, + "step": 9046 + }, + { + "epoch": 24.854395604395606, + "grad_norm": 12.140028953552246, + "learning_rate": 3.7572802197802196e-05, + "loss": 0.4262, + "step": 9047 + }, + { + "epoch": 24.857142857142858, + "grad_norm": 7.319334506988525, + "learning_rate": 3.757142857142857e-05, + "loss": 0.2155, + "step": 9048 + }, + { + "epoch": 24.85989010989011, + "grad_norm": 7.331462860107422, + "learning_rate": 3.757005494505494e-05, + "loss": 0.2292, + "step": 9049 + }, + { + "epoch": 24.86263736263736, + "grad_norm": 14.209742546081543, + "learning_rate": 3.756868131868132e-05, + "loss": 0.346, + "step": 9050 + }, + { + "epoch": 24.865384615384617, + "grad_norm": 13.470032691955566, + "learning_rate": 3.7567307692307696e-05, + "loss": 0.3876, + "step": 9051 + }, + { + "epoch": 24.86813186813187, + "grad_norm": 13.1031494140625, + "learning_rate": 3.7565934065934066e-05, + "loss": 0.4629, + "step": 9052 + }, + { + "epoch": 24.87087912087912, + "grad_norm": 17.37920570373535, + "learning_rate": 3.756456043956044e-05, + "loss": 0.7094, + "step": 9053 + }, + { + "epoch": 24.873626373626372, + "grad_norm": 13.980599403381348, + "learning_rate": 3.756318681318681e-05, + "loss": 0.6273, + "step": 9054 + }, + { + "epoch": 24.876373626373628, + "grad_norm": 13.722429275512695, + "learning_rate": 3.756181318681319e-05, + "loss": 0.3802, + "step": 9055 + }, + { + "epoch": 24.87912087912088, + "grad_norm": 16.905778884887695, + "learning_rate": 3.756043956043957e-05, + "loss": 0.5338, + "step": 9056 + }, + { + "epoch": 24.88186813186813, + "grad_norm": 23.91407585144043, + "learning_rate": 3.755906593406594e-05, + "loss": 0.5862, + "step": 9057 + }, + { + "epoch": 24.884615384615383, + "grad_norm": 8.33436393737793, + "learning_rate": 3.7557692307692314e-05, + "loss": 0.1401, + "step": 9058 + }, + { + "epoch": 24.88736263736264, + "grad_norm": 13.59007740020752, + "learning_rate": 3.7556318681318684e-05, + "loss": 0.349, + "step": 9059 + }, + { + "epoch": 24.89010989010989, + "grad_norm": 12.346296310424805, + "learning_rate": 3.7554945054945054e-05, + "loss": 0.408, + "step": 9060 + }, + { + "epoch": 24.892857142857142, + "grad_norm": 19.39501953125, + "learning_rate": 3.755357142857143e-05, + "loss": 0.7986, + "step": 9061 + }, + { + "epoch": 24.895604395604394, + "grad_norm": 5.999484062194824, + "learning_rate": 3.75521978021978e-05, + "loss": 0.2914, + "step": 9062 + }, + { + "epoch": 24.89835164835165, + "grad_norm": 12.09804916381836, + "learning_rate": 3.755082417582418e-05, + "loss": 0.2975, + "step": 9063 + }, + { + "epoch": 24.9010989010989, + "grad_norm": 13.968620300292969, + "learning_rate": 3.754945054945055e-05, + "loss": 0.3896, + "step": 9064 + }, + { + "epoch": 24.903846153846153, + "grad_norm": 14.695919036865234, + "learning_rate": 3.7548076923076924e-05, + "loss": 0.6122, + "step": 9065 + }, + { + "epoch": 24.906593406593405, + "grad_norm": 17.533496856689453, + "learning_rate": 3.75467032967033e-05, + "loss": 0.6558, + "step": 9066 + }, + { + "epoch": 24.90934065934066, + "grad_norm": 5.985957145690918, + "learning_rate": 3.754532967032967e-05, + "loss": 0.1628, + "step": 9067 + }, + { + "epoch": 24.912087912087912, + "grad_norm": 11.413103103637695, + "learning_rate": 3.754395604395605e-05, + "loss": 0.226, + "step": 9068 + }, + { + "epoch": 24.914835164835164, + "grad_norm": 11.065105438232422, + "learning_rate": 3.754258241758242e-05, + "loss": 0.1935, + "step": 9069 + }, + { + "epoch": 24.917582417582416, + "grad_norm": 17.47427749633789, + "learning_rate": 3.7541208791208795e-05, + "loss": 0.5232, + "step": 9070 + }, + { + "epoch": 24.92032967032967, + "grad_norm": 17.971065521240234, + "learning_rate": 3.753983516483517e-05, + "loss": 0.5599, + "step": 9071 + }, + { + "epoch": 24.923076923076923, + "grad_norm": 17.740684509277344, + "learning_rate": 3.753846153846154e-05, + "loss": 0.5235, + "step": 9072 + }, + { + "epoch": 24.925824175824175, + "grad_norm": 17.932518005371094, + "learning_rate": 3.753708791208792e-05, + "loss": 0.6213, + "step": 9073 + }, + { + "epoch": 24.928571428571427, + "grad_norm": 8.273660659790039, + "learning_rate": 3.753571428571429e-05, + "loss": 0.3612, + "step": 9074 + }, + { + "epoch": 24.931318681318682, + "grad_norm": 10.475858688354492, + "learning_rate": 3.753434065934066e-05, + "loss": 0.2422, + "step": 9075 + }, + { + "epoch": 24.934065934065934, + "grad_norm": 17.365604400634766, + "learning_rate": 3.7532967032967035e-05, + "loss": 0.4696, + "step": 9076 + }, + { + "epoch": 24.936813186813186, + "grad_norm": 9.335957527160645, + "learning_rate": 3.7531593406593405e-05, + "loss": 0.1879, + "step": 9077 + }, + { + "epoch": 24.939560439560438, + "grad_norm": 11.73932933807373, + "learning_rate": 3.753021978021978e-05, + "loss": 0.2812, + "step": 9078 + }, + { + "epoch": 24.942307692307693, + "grad_norm": 19.303630828857422, + "learning_rate": 3.752884615384615e-05, + "loss": 0.7481, + "step": 9079 + }, + { + "epoch": 24.945054945054945, + "grad_norm": 5.881444931030273, + "learning_rate": 3.752747252747253e-05, + "loss": 0.1571, + "step": 9080 + }, + { + "epoch": 24.947802197802197, + "grad_norm": 9.147769927978516, + "learning_rate": 3.7526098901098905e-05, + "loss": 0.2838, + "step": 9081 + }, + { + "epoch": 24.95054945054945, + "grad_norm": 14.282475471496582, + "learning_rate": 3.7524725274725275e-05, + "loss": 0.3534, + "step": 9082 + }, + { + "epoch": 24.953296703296704, + "grad_norm": 12.276284217834473, + "learning_rate": 3.752335164835165e-05, + "loss": 0.3004, + "step": 9083 + }, + { + "epoch": 24.956043956043956, + "grad_norm": 14.929719924926758, + "learning_rate": 3.752197802197802e-05, + "loss": 0.4704, + "step": 9084 + }, + { + "epoch": 24.958791208791208, + "grad_norm": 7.597989082336426, + "learning_rate": 3.75206043956044e-05, + "loss": 0.209, + "step": 9085 + }, + { + "epoch": 24.96153846153846, + "grad_norm": 11.59796142578125, + "learning_rate": 3.7519230769230776e-05, + "loss": 0.3465, + "step": 9086 + }, + { + "epoch": 24.964285714285715, + "grad_norm": 15.813131332397461, + "learning_rate": 3.7517857142857146e-05, + "loss": 0.3935, + "step": 9087 + }, + { + "epoch": 24.967032967032967, + "grad_norm": 11.830594062805176, + "learning_rate": 3.751648351648352e-05, + "loss": 0.2816, + "step": 9088 + }, + { + "epoch": 24.96978021978022, + "grad_norm": 17.271991729736328, + "learning_rate": 3.751510989010989e-05, + "loss": 0.5711, + "step": 9089 + }, + { + "epoch": 24.97252747252747, + "grad_norm": 12.444321632385254, + "learning_rate": 3.751373626373626e-05, + "loss": 0.4093, + "step": 9090 + }, + { + "epoch": 24.975274725274726, + "grad_norm": 11.546058654785156, + "learning_rate": 3.751236263736264e-05, + "loss": 0.3716, + "step": 9091 + }, + { + "epoch": 24.978021978021978, + "grad_norm": 13.14449405670166, + "learning_rate": 3.751098901098901e-05, + "loss": 0.3822, + "step": 9092 + }, + { + "epoch": 24.98076923076923, + "grad_norm": 7.8331170082092285, + "learning_rate": 3.7509615384615386e-05, + "loss": 0.1684, + "step": 9093 + }, + { + "epoch": 24.983516483516482, + "grad_norm": 11.406132698059082, + "learning_rate": 3.7508241758241756e-05, + "loss": 0.2707, + "step": 9094 + }, + { + "epoch": 24.986263736263737, + "grad_norm": 12.450634002685547, + "learning_rate": 3.750686813186813e-05, + "loss": 0.3012, + "step": 9095 + }, + { + "epoch": 24.98901098901099, + "grad_norm": 14.170184135437012, + "learning_rate": 3.750549450549451e-05, + "loss": 0.3602, + "step": 9096 + }, + { + "epoch": 24.99175824175824, + "grad_norm": 9.029106140136719, + "learning_rate": 3.750412087912088e-05, + "loss": 0.197, + "step": 9097 + }, + { + "epoch": 24.994505494505496, + "grad_norm": 4.952722072601318, + "learning_rate": 3.750274725274726e-05, + "loss": 0.09, + "step": 9098 + }, + { + "epoch": 24.997252747252748, + "grad_norm": 21.402456283569336, + "learning_rate": 3.750137362637363e-05, + "loss": 0.8359, + "step": 9099 + }, + { + "epoch": 25.0, + "grad_norm": 46.95182418823242, + "learning_rate": 3.7500000000000003e-05, + "loss": 2.2972, + "step": 9100 + }, + { + "epoch": 25.0, + "eval_accuracy": 0.6432506887052342, + "eval_f1": 0.5977948922808004, + "eval_f1_DuraRiadoRio_64x64": 0.1875, + "eval_f1_Mole_64x64": 0.5779816513761468, + "eval_f1_Quebrado_64x64": 0.8703703703703703, + "eval_f1_RiadoRio_64x64": 0.5667447306791569, + "eval_f1_RioFechado_64x64": 0.7863777089783281, + "eval_loss": 2.4066860675811768, + "eval_precision": 0.7427684286496441, + "eval_precision_DuraRiadoRio_64x64": 0.9375, + "eval_precision_Mole_64x64": 0.8513513513513513, + "eval_precision_Quebrado_64x64": 0.7833333333333333, + "eval_precision_RiadoRio_64x64": 0.44, + "eval_precision_RioFechado_64x64": 0.7016574585635359, + "eval_recall": 0.6422504324190759, + "eval_recall_DuraRiadoRio_64x64": 0.10416666666666667, + "eval_recall_Mole_64x64": 0.4375, + "eval_recall_Quebrado_64x64": 0.9791666666666666, + "eval_recall_RiadoRio_64x64": 0.7960526315789473, + "eval_recall_RioFechado_64x64": 0.8943661971830986, + "eval_runtime": 1.7458, + "eval_samples_per_second": 415.865, + "eval_steps_per_second": 26.35, + "step": 9100 + }, + { + "epoch": 25.002747252747252, + "grad_norm": 13.946184158325195, + "learning_rate": 3.749862637362638e-05, + "loss": 0.3765, + "step": 9101 + }, + { + "epoch": 25.005494505494507, + "grad_norm": 22.892478942871094, + "learning_rate": 3.749725274725275e-05, + "loss": 0.7265, + "step": 9102 + }, + { + "epoch": 25.00824175824176, + "grad_norm": 13.944304466247559, + "learning_rate": 3.749587912087913e-05, + "loss": 0.3897, + "step": 9103 + }, + { + "epoch": 25.01098901098901, + "grad_norm": 9.66353702545166, + "learning_rate": 3.74945054945055e-05, + "loss": 0.1939, + "step": 9104 + }, + { + "epoch": 25.013736263736263, + "grad_norm": 6.507574558258057, + "learning_rate": 3.749313186813187e-05, + "loss": 0.2442, + "step": 9105 + }, + { + "epoch": 25.016483516483518, + "grad_norm": 17.139245986938477, + "learning_rate": 3.7491758241758244e-05, + "loss": 0.3784, + "step": 9106 + }, + { + "epoch": 25.01923076923077, + "grad_norm": 6.740762710571289, + "learning_rate": 3.7490384615384614e-05, + "loss": 0.1572, + "step": 9107 + }, + { + "epoch": 25.021978021978022, + "grad_norm": 5.7969584465026855, + "learning_rate": 3.748901098901099e-05, + "loss": 0.1418, + "step": 9108 + }, + { + "epoch": 25.024725274725274, + "grad_norm": 7.900578498840332, + "learning_rate": 3.748763736263736e-05, + "loss": 0.1619, + "step": 9109 + }, + { + "epoch": 25.02747252747253, + "grad_norm": 15.754951477050781, + "learning_rate": 3.748626373626374e-05, + "loss": 0.5376, + "step": 9110 + }, + { + "epoch": 25.03021978021978, + "grad_norm": 7.138824939727783, + "learning_rate": 3.7484890109890114e-05, + "loss": 0.1353, + "step": 9111 + }, + { + "epoch": 25.032967032967033, + "grad_norm": 11.82419490814209, + "learning_rate": 3.7483516483516484e-05, + "loss": 0.2224, + "step": 9112 + }, + { + "epoch": 25.035714285714285, + "grad_norm": 9.414441108703613, + "learning_rate": 3.748214285714286e-05, + "loss": 0.2862, + "step": 9113 + }, + { + "epoch": 25.03846153846154, + "grad_norm": 14.53856372833252, + "learning_rate": 3.748076923076923e-05, + "loss": 0.3375, + "step": 9114 + }, + { + "epoch": 25.041208791208792, + "grad_norm": 10.471875190734863, + "learning_rate": 3.747939560439561e-05, + "loss": 0.4656, + "step": 9115 + }, + { + "epoch": 25.043956043956044, + "grad_norm": 9.857769966125488, + "learning_rate": 3.7478021978021985e-05, + "loss": 0.3665, + "step": 9116 + }, + { + "epoch": 25.046703296703296, + "grad_norm": 7.556129455566406, + "learning_rate": 3.7476648351648355e-05, + "loss": 0.2147, + "step": 9117 + }, + { + "epoch": 25.04945054945055, + "grad_norm": 12.530426979064941, + "learning_rate": 3.747527472527473e-05, + "loss": 0.4235, + "step": 9118 + }, + { + "epoch": 25.052197802197803, + "grad_norm": 14.788814544677734, + "learning_rate": 3.74739010989011e-05, + "loss": 0.5521, + "step": 9119 + }, + { + "epoch": 25.054945054945055, + "grad_norm": 13.27530574798584, + "learning_rate": 3.747252747252747e-05, + "loss": 0.5047, + "step": 9120 + }, + { + "epoch": 25.057692307692307, + "grad_norm": 6.24655818939209, + "learning_rate": 3.747115384615385e-05, + "loss": 0.1342, + "step": 9121 + }, + { + "epoch": 25.060439560439562, + "grad_norm": 9.0219087600708, + "learning_rate": 3.746978021978022e-05, + "loss": 0.2159, + "step": 9122 + }, + { + "epoch": 25.063186813186814, + "grad_norm": 12.164389610290527, + "learning_rate": 3.7468406593406595e-05, + "loss": 0.2932, + "step": 9123 + }, + { + "epoch": 25.065934065934066, + "grad_norm": 6.9045515060424805, + "learning_rate": 3.7467032967032965e-05, + "loss": 0.1302, + "step": 9124 + }, + { + "epoch": 25.068681318681318, + "grad_norm": 14.511847496032715, + "learning_rate": 3.746565934065934e-05, + "loss": 0.4675, + "step": 9125 + }, + { + "epoch": 25.071428571428573, + "grad_norm": 21.43306541442871, + "learning_rate": 3.746428571428572e-05, + "loss": 0.8678, + "step": 9126 + }, + { + "epoch": 25.074175824175825, + "grad_norm": 10.585012435913086, + "learning_rate": 3.746291208791209e-05, + "loss": 0.2696, + "step": 9127 + }, + { + "epoch": 25.076923076923077, + "grad_norm": 15.97829818725586, + "learning_rate": 3.7461538461538466e-05, + "loss": 0.5372, + "step": 9128 + }, + { + "epoch": 25.07967032967033, + "grad_norm": 5.1274518966674805, + "learning_rate": 3.7460164835164836e-05, + "loss": 0.0885, + "step": 9129 + }, + { + "epoch": 25.082417582417584, + "grad_norm": 13.008062362670898, + "learning_rate": 3.745879120879121e-05, + "loss": 0.2816, + "step": 9130 + }, + { + "epoch": 25.085164835164836, + "grad_norm": 13.325881958007812, + "learning_rate": 3.745741758241759e-05, + "loss": 0.3387, + "step": 9131 + }, + { + "epoch": 25.087912087912088, + "grad_norm": 16.909671783447266, + "learning_rate": 3.745604395604396e-05, + "loss": 0.6527, + "step": 9132 + }, + { + "epoch": 25.09065934065934, + "grad_norm": 10.646294593811035, + "learning_rate": 3.7454670329670336e-05, + "loss": 0.2372, + "step": 9133 + }, + { + "epoch": 25.093406593406595, + "grad_norm": 15.604997634887695, + "learning_rate": 3.7453296703296706e-05, + "loss": 0.5446, + "step": 9134 + }, + { + "epoch": 25.096153846153847, + "grad_norm": 15.865652084350586, + "learning_rate": 3.7451923076923076e-05, + "loss": 0.4917, + "step": 9135 + }, + { + "epoch": 25.0989010989011, + "grad_norm": 7.613763809204102, + "learning_rate": 3.745054945054945e-05, + "loss": 0.2178, + "step": 9136 + }, + { + "epoch": 25.10164835164835, + "grad_norm": 6.668140888214111, + "learning_rate": 3.744917582417582e-05, + "loss": 0.22, + "step": 9137 + }, + { + "epoch": 25.104395604395606, + "grad_norm": 15.911171913146973, + "learning_rate": 3.74478021978022e-05, + "loss": 0.4821, + "step": 9138 + }, + { + "epoch": 25.107142857142858, + "grad_norm": 14.134224891662598, + "learning_rate": 3.744642857142857e-05, + "loss": 0.3784, + "step": 9139 + }, + { + "epoch": 25.10989010989011, + "grad_norm": 12.097954750061035, + "learning_rate": 3.7445054945054947e-05, + "loss": 0.3171, + "step": 9140 + }, + { + "epoch": 25.11263736263736, + "grad_norm": 20.15308952331543, + "learning_rate": 3.744368131868132e-05, + "loss": 0.9714, + "step": 9141 + }, + { + "epoch": 25.115384615384617, + "grad_norm": 15.006136894226074, + "learning_rate": 3.744230769230769e-05, + "loss": 0.4236, + "step": 9142 + }, + { + "epoch": 25.11813186813187, + "grad_norm": 17.559627532958984, + "learning_rate": 3.744093406593407e-05, + "loss": 0.8599, + "step": 9143 + }, + { + "epoch": 25.12087912087912, + "grad_norm": 12.28458309173584, + "learning_rate": 3.743956043956044e-05, + "loss": 0.4421, + "step": 9144 + }, + { + "epoch": 25.123626373626372, + "grad_norm": 6.232069492340088, + "learning_rate": 3.743818681318682e-05, + "loss": 0.2893, + "step": 9145 + }, + { + "epoch": 25.126373626373628, + "grad_norm": 13.0318603515625, + "learning_rate": 3.7436813186813194e-05, + "loss": 0.3312, + "step": 9146 + }, + { + "epoch": 25.12912087912088, + "grad_norm": 3.530388593673706, + "learning_rate": 3.7435439560439564e-05, + "loss": 0.0904, + "step": 9147 + }, + { + "epoch": 25.13186813186813, + "grad_norm": 18.50090217590332, + "learning_rate": 3.743406593406594e-05, + "loss": 0.5719, + "step": 9148 + }, + { + "epoch": 25.134615384615383, + "grad_norm": 8.004965782165527, + "learning_rate": 3.743269230769231e-05, + "loss": 0.2323, + "step": 9149 + }, + { + "epoch": 25.13736263736264, + "grad_norm": 17.97041893005371, + "learning_rate": 3.743131868131868e-05, + "loss": 0.4241, + "step": 9150 + }, + { + "epoch": 25.14010989010989, + "grad_norm": 11.182672500610352, + "learning_rate": 3.742994505494506e-05, + "loss": 0.2178, + "step": 9151 + }, + { + "epoch": 25.142857142857142, + "grad_norm": 12.461216926574707, + "learning_rate": 3.742857142857143e-05, + "loss": 0.2501, + "step": 9152 + }, + { + "epoch": 25.145604395604394, + "grad_norm": 11.080989837646484, + "learning_rate": 3.7427197802197804e-05, + "loss": 0.3468, + "step": 9153 + }, + { + "epoch": 25.14835164835165, + "grad_norm": 12.619038581848145, + "learning_rate": 3.7425824175824174e-05, + "loss": 0.4464, + "step": 9154 + }, + { + "epoch": 25.1510989010989, + "grad_norm": 11.89403247833252, + "learning_rate": 3.742445054945055e-05, + "loss": 0.23, + "step": 9155 + }, + { + "epoch": 25.153846153846153, + "grad_norm": 9.759471893310547, + "learning_rate": 3.742307692307693e-05, + "loss": 0.2412, + "step": 9156 + }, + { + "epoch": 25.156593406593405, + "grad_norm": 16.441652297973633, + "learning_rate": 3.74217032967033e-05, + "loss": 0.5912, + "step": 9157 + }, + { + "epoch": 25.15934065934066, + "grad_norm": 14.97240161895752, + "learning_rate": 3.7420329670329675e-05, + "loss": 0.3742, + "step": 9158 + }, + { + "epoch": 25.162087912087912, + "grad_norm": 13.643247604370117, + "learning_rate": 3.7418956043956045e-05, + "loss": 0.5524, + "step": 9159 + }, + { + "epoch": 25.164835164835164, + "grad_norm": 11.925232887268066, + "learning_rate": 3.741758241758242e-05, + "loss": 0.3698, + "step": 9160 + }, + { + "epoch": 25.167582417582416, + "grad_norm": 15.252153396606445, + "learning_rate": 3.74162087912088e-05, + "loss": 0.6744, + "step": 9161 + }, + { + "epoch": 25.17032967032967, + "grad_norm": 5.958505153656006, + "learning_rate": 3.741483516483517e-05, + "loss": 0.1412, + "step": 9162 + }, + { + "epoch": 25.173076923076923, + "grad_norm": 8.758308410644531, + "learning_rate": 3.741346153846154e-05, + "loss": 0.2177, + "step": 9163 + }, + { + "epoch": 25.175824175824175, + "grad_norm": 6.414635181427002, + "learning_rate": 3.7412087912087915e-05, + "loss": 0.1924, + "step": 9164 + }, + { + "epoch": 25.178571428571427, + "grad_norm": 28.073373794555664, + "learning_rate": 3.7410714285714285e-05, + "loss": 1.2835, + "step": 9165 + }, + { + "epoch": 25.181318681318682, + "grad_norm": 9.696208953857422, + "learning_rate": 3.740934065934066e-05, + "loss": 0.3086, + "step": 9166 + }, + { + "epoch": 25.184065934065934, + "grad_norm": 13.966565132141113, + "learning_rate": 3.740796703296703e-05, + "loss": 0.4172, + "step": 9167 + }, + { + "epoch": 25.186813186813186, + "grad_norm": 13.31496524810791, + "learning_rate": 3.740659340659341e-05, + "loss": 0.3514, + "step": 9168 + }, + { + "epoch": 25.189560439560438, + "grad_norm": 18.110177993774414, + "learning_rate": 3.740521978021978e-05, + "loss": 0.609, + "step": 9169 + }, + { + "epoch": 25.192307692307693, + "grad_norm": 15.31110668182373, + "learning_rate": 3.7403846153846156e-05, + "loss": 0.6055, + "step": 9170 + }, + { + "epoch": 25.195054945054945, + "grad_norm": 11.535323143005371, + "learning_rate": 3.740247252747253e-05, + "loss": 0.3976, + "step": 9171 + }, + { + "epoch": 25.197802197802197, + "grad_norm": 9.28097915649414, + "learning_rate": 3.74010989010989e-05, + "loss": 0.2983, + "step": 9172 + }, + { + "epoch": 25.20054945054945, + "grad_norm": 20.50033187866211, + "learning_rate": 3.739972527472528e-05, + "loss": 0.5405, + "step": 9173 + }, + { + "epoch": 25.203296703296704, + "grad_norm": 9.633137702941895, + "learning_rate": 3.739835164835165e-05, + "loss": 0.2819, + "step": 9174 + }, + { + "epoch": 25.206043956043956, + "grad_norm": 9.08519458770752, + "learning_rate": 3.7396978021978026e-05, + "loss": 0.2066, + "step": 9175 + }, + { + "epoch": 25.208791208791208, + "grad_norm": 12.4362211227417, + "learning_rate": 3.73956043956044e-05, + "loss": 0.3516, + "step": 9176 + }, + { + "epoch": 25.21153846153846, + "grad_norm": 10.900044441223145, + "learning_rate": 3.739423076923077e-05, + "loss": 0.2797, + "step": 9177 + }, + { + "epoch": 25.214285714285715, + "grad_norm": 11.893043518066406, + "learning_rate": 3.739285714285714e-05, + "loss": 0.2891, + "step": 9178 + }, + { + "epoch": 25.217032967032967, + "grad_norm": 9.459686279296875, + "learning_rate": 3.739148351648352e-05, + "loss": 0.2352, + "step": 9179 + }, + { + "epoch": 25.21978021978022, + "grad_norm": 8.58569049835205, + "learning_rate": 3.739010989010989e-05, + "loss": 0.2266, + "step": 9180 + }, + { + "epoch": 25.22252747252747, + "grad_norm": 9.95629596710205, + "learning_rate": 3.7388736263736266e-05, + "loss": 0.1335, + "step": 9181 + }, + { + "epoch": 25.225274725274726, + "grad_norm": 5.111014366149902, + "learning_rate": 3.7387362637362636e-05, + "loss": 0.1429, + "step": 9182 + }, + { + "epoch": 25.228021978021978, + "grad_norm": 6.779106616973877, + "learning_rate": 3.738598901098901e-05, + "loss": 0.204, + "step": 9183 + }, + { + "epoch": 25.23076923076923, + "grad_norm": 10.782526969909668, + "learning_rate": 3.738461538461538e-05, + "loss": 0.3432, + "step": 9184 + }, + { + "epoch": 25.233516483516482, + "grad_norm": 5.744568824768066, + "learning_rate": 3.738324175824176e-05, + "loss": 0.1311, + "step": 9185 + }, + { + "epoch": 25.236263736263737, + "grad_norm": 7.385867118835449, + "learning_rate": 3.738186813186814e-05, + "loss": 0.2237, + "step": 9186 + }, + { + "epoch": 25.23901098901099, + "grad_norm": 10.199136734008789, + "learning_rate": 3.738049450549451e-05, + "loss": 0.298, + "step": 9187 + }, + { + "epoch": 25.24175824175824, + "grad_norm": 9.79828929901123, + "learning_rate": 3.7379120879120884e-05, + "loss": 0.3105, + "step": 9188 + }, + { + "epoch": 25.244505494505493, + "grad_norm": 9.877573013305664, + "learning_rate": 3.7377747252747254e-05, + "loss": 0.3601, + "step": 9189 + }, + { + "epoch": 25.247252747252748, + "grad_norm": 14.078390121459961, + "learning_rate": 3.737637362637363e-05, + "loss": 0.3577, + "step": 9190 + }, + { + "epoch": 25.25, + "grad_norm": 3.986957550048828, + "learning_rate": 3.737500000000001e-05, + "loss": 0.0969, + "step": 9191 + }, + { + "epoch": 25.252747252747252, + "grad_norm": 13.538015365600586, + "learning_rate": 3.737362637362638e-05, + "loss": 0.2981, + "step": 9192 + }, + { + "epoch": 25.255494505494504, + "grad_norm": 11.396666526794434, + "learning_rate": 3.737225274725275e-05, + "loss": 0.2904, + "step": 9193 + }, + { + "epoch": 25.25824175824176, + "grad_norm": 17.924131393432617, + "learning_rate": 3.7370879120879124e-05, + "loss": 0.9159, + "step": 9194 + }, + { + "epoch": 25.26098901098901, + "grad_norm": 12.484707832336426, + "learning_rate": 3.7369505494505494e-05, + "loss": 0.3608, + "step": 9195 + }, + { + "epoch": 25.263736263736263, + "grad_norm": 17.696842193603516, + "learning_rate": 3.736813186813187e-05, + "loss": 0.498, + "step": 9196 + }, + { + "epoch": 25.266483516483518, + "grad_norm": 14.436257362365723, + "learning_rate": 3.736675824175824e-05, + "loss": 0.3478, + "step": 9197 + }, + { + "epoch": 25.26923076923077, + "grad_norm": 17.575448989868164, + "learning_rate": 3.736538461538462e-05, + "loss": 0.4644, + "step": 9198 + }, + { + "epoch": 25.271978021978022, + "grad_norm": 18.24235725402832, + "learning_rate": 3.736401098901099e-05, + "loss": 0.7091, + "step": 9199 + }, + { + "epoch": 25.274725274725274, + "grad_norm": 9.304117202758789, + "learning_rate": 3.7362637362637365e-05, + "loss": 0.1983, + "step": 9200 + }, + { + "epoch": 25.27747252747253, + "grad_norm": 11.611109733581543, + "learning_rate": 3.736126373626374e-05, + "loss": 0.4514, + "step": 9201 + }, + { + "epoch": 25.28021978021978, + "grad_norm": 6.192009925842285, + "learning_rate": 3.735989010989011e-05, + "loss": 0.167, + "step": 9202 + }, + { + "epoch": 25.282967032967033, + "grad_norm": 19.007509231567383, + "learning_rate": 3.735851648351649e-05, + "loss": 0.4344, + "step": 9203 + }, + { + "epoch": 25.285714285714285, + "grad_norm": 16.572507858276367, + "learning_rate": 3.735714285714286e-05, + "loss": 0.4825, + "step": 9204 + }, + { + "epoch": 25.28846153846154, + "grad_norm": 14.403847694396973, + "learning_rate": 3.7355769230769235e-05, + "loss": 0.3632, + "step": 9205 + }, + { + "epoch": 25.291208791208792, + "grad_norm": 10.140050888061523, + "learning_rate": 3.735439560439561e-05, + "loss": 0.3695, + "step": 9206 + }, + { + "epoch": 25.293956043956044, + "grad_norm": 14.571784973144531, + "learning_rate": 3.735302197802198e-05, + "loss": 0.2692, + "step": 9207 + }, + { + "epoch": 25.296703296703296, + "grad_norm": 12.69416618347168, + "learning_rate": 3.735164835164835e-05, + "loss": 0.4131, + "step": 9208 + }, + { + "epoch": 25.29945054945055, + "grad_norm": 6.154922962188721, + "learning_rate": 3.735027472527473e-05, + "loss": 0.1844, + "step": 9209 + }, + { + "epoch": 25.302197802197803, + "grad_norm": 13.137086868286133, + "learning_rate": 3.73489010989011e-05, + "loss": 0.3038, + "step": 9210 + }, + { + "epoch": 25.304945054945055, + "grad_norm": 15.0573148727417, + "learning_rate": 3.7347527472527475e-05, + "loss": 0.3975, + "step": 9211 + }, + { + "epoch": 25.307692307692307, + "grad_norm": 10.619291305541992, + "learning_rate": 3.7346153846153845e-05, + "loss": 0.1979, + "step": 9212 + }, + { + "epoch": 25.310439560439562, + "grad_norm": 12.183173179626465, + "learning_rate": 3.734478021978022e-05, + "loss": 0.4305, + "step": 9213 + }, + { + "epoch": 25.313186813186814, + "grad_norm": 11.63663101196289, + "learning_rate": 3.734340659340659e-05, + "loss": 0.254, + "step": 9214 + }, + { + "epoch": 25.315934065934066, + "grad_norm": 5.535271167755127, + "learning_rate": 3.734203296703297e-05, + "loss": 0.1293, + "step": 9215 + }, + { + "epoch": 25.318681318681318, + "grad_norm": 3.806004285812378, + "learning_rate": 3.734065934065934e-05, + "loss": 0.1096, + "step": 9216 + }, + { + "epoch": 25.321428571428573, + "grad_norm": 8.742783546447754, + "learning_rate": 3.7339285714285716e-05, + "loss": 0.3075, + "step": 9217 + }, + { + "epoch": 25.324175824175825, + "grad_norm": 12.392004013061523, + "learning_rate": 3.733791208791209e-05, + "loss": 0.3436, + "step": 9218 + }, + { + "epoch": 25.326923076923077, + "grad_norm": 16.726200103759766, + "learning_rate": 3.733653846153846e-05, + "loss": 0.4055, + "step": 9219 + }, + { + "epoch": 25.32967032967033, + "grad_norm": 10.272600173950195, + "learning_rate": 3.733516483516484e-05, + "loss": 0.2168, + "step": 9220 + }, + { + "epoch": 25.332417582417584, + "grad_norm": 17.028661727905273, + "learning_rate": 3.733379120879121e-05, + "loss": 0.6169, + "step": 9221 + }, + { + "epoch": 25.335164835164836, + "grad_norm": 8.836112022399902, + "learning_rate": 3.7332417582417586e-05, + "loss": 0.3757, + "step": 9222 + }, + { + "epoch": 25.337912087912088, + "grad_norm": 11.631603240966797, + "learning_rate": 3.7331043956043956e-05, + "loss": 0.3509, + "step": 9223 + }, + { + "epoch": 25.34065934065934, + "grad_norm": 19.00942039489746, + "learning_rate": 3.732967032967033e-05, + "loss": 0.7089, + "step": 9224 + }, + { + "epoch": 25.343406593406595, + "grad_norm": 14.280183792114258, + "learning_rate": 3.73282967032967e-05, + "loss": 0.4638, + "step": 9225 + }, + { + "epoch": 25.346153846153847, + "grad_norm": 10.661148071289062, + "learning_rate": 3.732692307692307e-05, + "loss": 0.4844, + "step": 9226 + }, + { + "epoch": 25.3489010989011, + "grad_norm": 5.751552104949951, + "learning_rate": 3.732554945054945e-05, + "loss": 0.135, + "step": 9227 + }, + { + "epoch": 25.35164835164835, + "grad_norm": 15.79524040222168, + "learning_rate": 3.732417582417583e-05, + "loss": 0.2804, + "step": 9228 + }, + { + "epoch": 25.354395604395606, + "grad_norm": 20.54088020324707, + "learning_rate": 3.73228021978022e-05, + "loss": 0.9958, + "step": 9229 + }, + { + "epoch": 25.357142857142858, + "grad_norm": 11.0092134475708, + "learning_rate": 3.7321428571428573e-05, + "loss": 0.3882, + "step": 9230 + }, + { + "epoch": 25.35989010989011, + "grad_norm": 22.441911697387695, + "learning_rate": 3.7320054945054944e-05, + "loss": 0.7328, + "step": 9231 + }, + { + "epoch": 25.36263736263736, + "grad_norm": 9.23974323272705, + "learning_rate": 3.731868131868132e-05, + "loss": 0.0905, + "step": 9232 + }, + { + "epoch": 25.365384615384617, + "grad_norm": 8.429768562316895, + "learning_rate": 3.73173076923077e-05, + "loss": 0.2327, + "step": 9233 + }, + { + "epoch": 25.36813186813187, + "grad_norm": 10.456599235534668, + "learning_rate": 3.731593406593407e-05, + "loss": 0.1884, + "step": 9234 + }, + { + "epoch": 25.37087912087912, + "grad_norm": 18.0915470123291, + "learning_rate": 3.7314560439560444e-05, + "loss": 0.7365, + "step": 9235 + }, + { + "epoch": 25.373626373626372, + "grad_norm": 5.739102363586426, + "learning_rate": 3.7313186813186814e-05, + "loss": 0.0877, + "step": 9236 + }, + { + "epoch": 25.376373626373628, + "grad_norm": 21.188709259033203, + "learning_rate": 3.731181318681319e-05, + "loss": 0.8327, + "step": 9237 + }, + { + "epoch": 25.37912087912088, + "grad_norm": 12.45578384399414, + "learning_rate": 3.731043956043956e-05, + "loss": 0.281, + "step": 9238 + }, + { + "epoch": 25.38186813186813, + "grad_norm": 14.989973068237305, + "learning_rate": 3.730906593406594e-05, + "loss": 0.566, + "step": 9239 + }, + { + "epoch": 25.384615384615383, + "grad_norm": 17.511991500854492, + "learning_rate": 3.730769230769231e-05, + "loss": 0.5442, + "step": 9240 + }, + { + "epoch": 25.38736263736264, + "grad_norm": 17.541549682617188, + "learning_rate": 3.730631868131868e-05, + "loss": 0.6116, + "step": 9241 + }, + { + "epoch": 25.39010989010989, + "grad_norm": 11.864349365234375, + "learning_rate": 3.7304945054945054e-05, + "loss": 0.2355, + "step": 9242 + }, + { + "epoch": 25.392857142857142, + "grad_norm": 12.791468620300293, + "learning_rate": 3.730357142857143e-05, + "loss": 0.418, + "step": 9243 + }, + { + "epoch": 25.395604395604394, + "grad_norm": 15.769091606140137, + "learning_rate": 3.73021978021978e-05, + "loss": 0.5291, + "step": 9244 + }, + { + "epoch": 25.39835164835165, + "grad_norm": 8.870540618896484, + "learning_rate": 3.730082417582418e-05, + "loss": 0.2441, + "step": 9245 + }, + { + "epoch": 25.4010989010989, + "grad_norm": 15.549786567687988, + "learning_rate": 3.729945054945055e-05, + "loss": 0.4184, + "step": 9246 + }, + { + "epoch": 25.403846153846153, + "grad_norm": 7.012197494506836, + "learning_rate": 3.7298076923076925e-05, + "loss": 0.2397, + "step": 9247 + }, + { + "epoch": 25.406593406593405, + "grad_norm": 7.890620231628418, + "learning_rate": 3.72967032967033e-05, + "loss": 0.2246, + "step": 9248 + }, + { + "epoch": 25.40934065934066, + "grad_norm": 14.866674423217773, + "learning_rate": 3.729532967032967e-05, + "loss": 0.5538, + "step": 9249 + }, + { + "epoch": 25.412087912087912, + "grad_norm": 9.916772842407227, + "learning_rate": 3.729395604395605e-05, + "loss": 0.3634, + "step": 9250 + }, + { + "epoch": 25.414835164835164, + "grad_norm": 16.24110984802246, + "learning_rate": 3.729258241758242e-05, + "loss": 0.3309, + "step": 9251 + }, + { + "epoch": 25.417582417582416, + "grad_norm": 12.722020149230957, + "learning_rate": 3.7291208791208795e-05, + "loss": 0.5318, + "step": 9252 + }, + { + "epoch": 25.42032967032967, + "grad_norm": 23.12980079650879, + "learning_rate": 3.7289835164835165e-05, + "loss": 0.9968, + "step": 9253 + }, + { + "epoch": 25.423076923076923, + "grad_norm": 6.908982276916504, + "learning_rate": 3.728846153846154e-05, + "loss": 0.2274, + "step": 9254 + }, + { + "epoch": 25.425824175824175, + "grad_norm": 7.648162364959717, + "learning_rate": 3.728708791208791e-05, + "loss": 0.1969, + "step": 9255 + }, + { + "epoch": 25.428571428571427, + "grad_norm": 13.379570960998535, + "learning_rate": 3.728571428571428e-05, + "loss": 0.3139, + "step": 9256 + }, + { + "epoch": 25.431318681318682, + "grad_norm": 11.097332954406738, + "learning_rate": 3.728434065934066e-05, + "loss": 0.2122, + "step": 9257 + }, + { + "epoch": 25.434065934065934, + "grad_norm": 11.357673645019531, + "learning_rate": 3.7282967032967036e-05, + "loss": 0.2303, + "step": 9258 + }, + { + "epoch": 25.436813186813186, + "grad_norm": 7.040155410766602, + "learning_rate": 3.7281593406593406e-05, + "loss": 0.1133, + "step": 9259 + }, + { + "epoch": 25.439560439560438, + "grad_norm": 2.2842142581939697, + "learning_rate": 3.728021978021978e-05, + "loss": 0.0479, + "step": 9260 + }, + { + "epoch": 25.442307692307693, + "grad_norm": 5.854620456695557, + "learning_rate": 3.727884615384615e-05, + "loss": 0.2022, + "step": 9261 + }, + { + "epoch": 25.445054945054945, + "grad_norm": 20.46478843688965, + "learning_rate": 3.727747252747253e-05, + "loss": 0.8535, + "step": 9262 + }, + { + "epoch": 25.447802197802197, + "grad_norm": 20.38022804260254, + "learning_rate": 3.7276098901098906e-05, + "loss": 0.6191, + "step": 9263 + }, + { + "epoch": 25.45054945054945, + "grad_norm": 8.603999137878418, + "learning_rate": 3.7274725274725276e-05, + "loss": 0.336, + "step": 9264 + }, + { + "epoch": 25.453296703296704, + "grad_norm": 8.717483520507812, + "learning_rate": 3.727335164835165e-05, + "loss": 0.1685, + "step": 9265 + }, + { + "epoch": 25.456043956043956, + "grad_norm": 9.086076736450195, + "learning_rate": 3.727197802197802e-05, + "loss": 0.2, + "step": 9266 + }, + { + "epoch": 25.458791208791208, + "grad_norm": 17.570846557617188, + "learning_rate": 3.72706043956044e-05, + "loss": 0.4514, + "step": 9267 + }, + { + "epoch": 25.46153846153846, + "grad_norm": 7.965526103973389, + "learning_rate": 3.726923076923077e-05, + "loss": 0.1776, + "step": 9268 + }, + { + "epoch": 25.464285714285715, + "grad_norm": 8.297584533691406, + "learning_rate": 3.7267857142857147e-05, + "loss": 0.162, + "step": 9269 + }, + { + "epoch": 25.467032967032967, + "grad_norm": 12.901081085205078, + "learning_rate": 3.7266483516483517e-05, + "loss": 0.3444, + "step": 9270 + }, + { + "epoch": 25.46978021978022, + "grad_norm": 8.824379920959473, + "learning_rate": 3.7265109890109887e-05, + "loss": 0.2101, + "step": 9271 + }, + { + "epoch": 25.47252747252747, + "grad_norm": 10.369950294494629, + "learning_rate": 3.726373626373626e-05, + "loss": 0.2756, + "step": 9272 + }, + { + "epoch": 25.475274725274726, + "grad_norm": 12.204039573669434, + "learning_rate": 3.726236263736264e-05, + "loss": 0.4064, + "step": 9273 + }, + { + "epoch": 25.478021978021978, + "grad_norm": 12.105179786682129, + "learning_rate": 3.726098901098901e-05, + "loss": 0.5561, + "step": 9274 + }, + { + "epoch": 25.48076923076923, + "grad_norm": 16.761198043823242, + "learning_rate": 3.725961538461539e-05, + "loss": 0.473, + "step": 9275 + }, + { + "epoch": 25.483516483516482, + "grad_norm": 12.739823341369629, + "learning_rate": 3.725824175824176e-05, + "loss": 0.3669, + "step": 9276 + }, + { + "epoch": 25.486263736263737, + "grad_norm": 4.947963237762451, + "learning_rate": 3.7256868131868134e-05, + "loss": 0.1258, + "step": 9277 + }, + { + "epoch": 25.48901098901099, + "grad_norm": 17.73102378845215, + "learning_rate": 3.725549450549451e-05, + "loss": 0.2543, + "step": 9278 + }, + { + "epoch": 25.49175824175824, + "grad_norm": 9.719658851623535, + "learning_rate": 3.725412087912088e-05, + "loss": 0.1765, + "step": 9279 + }, + { + "epoch": 25.494505494505496, + "grad_norm": 14.271537780761719, + "learning_rate": 3.725274725274726e-05, + "loss": 0.3763, + "step": 9280 + }, + { + "epoch": 25.497252747252748, + "grad_norm": 12.165803909301758, + "learning_rate": 3.725137362637363e-05, + "loss": 0.3062, + "step": 9281 + }, + { + "epoch": 25.5, + "grad_norm": 10.268932342529297, + "learning_rate": 3.7250000000000004e-05, + "loss": 0.2992, + "step": 9282 + }, + { + "epoch": 25.502747252747252, + "grad_norm": 18.426774978637695, + "learning_rate": 3.7248626373626374e-05, + "loss": 0.4686, + "step": 9283 + }, + { + "epoch": 25.505494505494504, + "grad_norm": 9.911799430847168, + "learning_rate": 3.724725274725275e-05, + "loss": 0.2565, + "step": 9284 + }, + { + "epoch": 25.50824175824176, + "grad_norm": 10.470175743103027, + "learning_rate": 3.724587912087912e-05, + "loss": 0.2245, + "step": 9285 + }, + { + "epoch": 25.51098901098901, + "grad_norm": 6.828438758850098, + "learning_rate": 3.724450549450549e-05, + "loss": 0.1972, + "step": 9286 + }, + { + "epoch": 25.513736263736263, + "grad_norm": 16.13922882080078, + "learning_rate": 3.724313186813187e-05, + "loss": 0.6631, + "step": 9287 + }, + { + "epoch": 25.516483516483518, + "grad_norm": 19.871286392211914, + "learning_rate": 3.7241758241758245e-05, + "loss": 0.5211, + "step": 9288 + }, + { + "epoch": 25.51923076923077, + "grad_norm": 20.37676429748535, + "learning_rate": 3.7240384615384615e-05, + "loss": 0.6077, + "step": 9289 + }, + { + "epoch": 25.521978021978022, + "grad_norm": 12.370899200439453, + "learning_rate": 3.723901098901099e-05, + "loss": 0.2485, + "step": 9290 + }, + { + "epoch": 25.524725274725274, + "grad_norm": 14.7835054397583, + "learning_rate": 3.723763736263736e-05, + "loss": 0.4505, + "step": 9291 + }, + { + "epoch": 25.52747252747253, + "grad_norm": 11.373558044433594, + "learning_rate": 3.723626373626374e-05, + "loss": 0.2483, + "step": 9292 + }, + { + "epoch": 25.53021978021978, + "grad_norm": 18.153099060058594, + "learning_rate": 3.7234890109890115e-05, + "loss": 0.4356, + "step": 9293 + }, + { + "epoch": 25.532967032967033, + "grad_norm": 5.201816082000732, + "learning_rate": 3.7233516483516485e-05, + "loss": 0.1359, + "step": 9294 + }, + { + "epoch": 25.535714285714285, + "grad_norm": 13.687350273132324, + "learning_rate": 3.723214285714286e-05, + "loss": 0.3477, + "step": 9295 + }, + { + "epoch": 25.53846153846154, + "grad_norm": 11.84277629852295, + "learning_rate": 3.723076923076923e-05, + "loss": 0.2615, + "step": 9296 + }, + { + "epoch": 25.541208791208792, + "grad_norm": 17.01654052734375, + "learning_rate": 3.722939560439561e-05, + "loss": 0.4581, + "step": 9297 + }, + { + "epoch": 25.543956043956044, + "grad_norm": 12.70566177368164, + "learning_rate": 3.722802197802198e-05, + "loss": 0.3028, + "step": 9298 + }, + { + "epoch": 25.546703296703296, + "grad_norm": 15.730944633483887, + "learning_rate": 3.7226648351648355e-05, + "loss": 0.5942, + "step": 9299 + }, + { + "epoch": 25.54945054945055, + "grad_norm": 25.658851623535156, + "learning_rate": 3.7225274725274726e-05, + "loss": 0.9074, + "step": 9300 + }, + { + "epoch": 25.552197802197803, + "grad_norm": 5.337262153625488, + "learning_rate": 3.7223901098901096e-05, + "loss": 0.1481, + "step": 9301 + }, + { + "epoch": 25.554945054945055, + "grad_norm": 10.677544593811035, + "learning_rate": 3.722252747252747e-05, + "loss": 0.2823, + "step": 9302 + }, + { + "epoch": 25.557692307692307, + "grad_norm": 10.50950813293457, + "learning_rate": 3.722115384615385e-05, + "loss": 0.254, + "step": 9303 + }, + { + "epoch": 25.560439560439562, + "grad_norm": 12.936104774475098, + "learning_rate": 3.721978021978022e-05, + "loss": 0.6568, + "step": 9304 + }, + { + "epoch": 25.563186813186814, + "grad_norm": 8.396175384521484, + "learning_rate": 3.7218406593406596e-05, + "loss": 0.2299, + "step": 9305 + }, + { + "epoch": 25.565934065934066, + "grad_norm": 7.077843189239502, + "learning_rate": 3.7217032967032966e-05, + "loss": 0.2241, + "step": 9306 + }, + { + "epoch": 25.568681318681318, + "grad_norm": 10.758414268493652, + "learning_rate": 3.721565934065934e-05, + "loss": 0.2334, + "step": 9307 + }, + { + "epoch": 25.571428571428573, + "grad_norm": 6.087218761444092, + "learning_rate": 3.721428571428572e-05, + "loss": 0.182, + "step": 9308 + }, + { + "epoch": 25.574175824175825, + "grad_norm": 14.081457138061523, + "learning_rate": 3.721291208791209e-05, + "loss": 0.4578, + "step": 9309 + }, + { + "epoch": 25.576923076923077, + "grad_norm": 13.331624031066895, + "learning_rate": 3.7211538461538466e-05, + "loss": 0.51, + "step": 9310 + }, + { + "epoch": 25.57967032967033, + "grad_norm": 6.757822036743164, + "learning_rate": 3.7210164835164836e-05, + "loss": 0.1864, + "step": 9311 + }, + { + "epoch": 25.582417582417584, + "grad_norm": 6.762018203735352, + "learning_rate": 3.720879120879121e-05, + "loss": 0.1782, + "step": 9312 + }, + { + "epoch": 25.585164835164836, + "grad_norm": 4.192893028259277, + "learning_rate": 3.720741758241758e-05, + "loss": 0.0915, + "step": 9313 + }, + { + "epoch": 25.587912087912088, + "grad_norm": 4.536380767822266, + "learning_rate": 3.720604395604396e-05, + "loss": 0.0648, + "step": 9314 + }, + { + "epoch": 25.59065934065934, + "grad_norm": 16.277198791503906, + "learning_rate": 3.720467032967033e-05, + "loss": 0.3978, + "step": 9315 + }, + { + "epoch": 25.593406593406595, + "grad_norm": 13.255393981933594, + "learning_rate": 3.72032967032967e-05, + "loss": 0.3365, + "step": 9316 + }, + { + "epoch": 25.596153846153847, + "grad_norm": 14.28991985321045, + "learning_rate": 3.720192307692308e-05, + "loss": 0.3041, + "step": 9317 + }, + { + "epoch": 25.5989010989011, + "grad_norm": 10.517239570617676, + "learning_rate": 3.7200549450549454e-05, + "loss": 0.2587, + "step": 9318 + }, + { + "epoch": 25.60164835164835, + "grad_norm": 16.305374145507812, + "learning_rate": 3.7199175824175824e-05, + "loss": 0.3953, + "step": 9319 + }, + { + "epoch": 25.604395604395606, + "grad_norm": 8.84156608581543, + "learning_rate": 3.71978021978022e-05, + "loss": 0.2634, + "step": 9320 + }, + { + "epoch": 25.607142857142858, + "grad_norm": 11.339903831481934, + "learning_rate": 3.719642857142857e-05, + "loss": 0.3581, + "step": 9321 + }, + { + "epoch": 25.60989010989011, + "grad_norm": 13.451335906982422, + "learning_rate": 3.719505494505495e-05, + "loss": 0.3991, + "step": 9322 + }, + { + "epoch": 25.61263736263736, + "grad_norm": 12.282387733459473, + "learning_rate": 3.7193681318681324e-05, + "loss": 0.4277, + "step": 9323 + }, + { + "epoch": 25.615384615384617, + "grad_norm": 11.083048820495605, + "learning_rate": 3.7192307692307694e-05, + "loss": 0.2438, + "step": 9324 + }, + { + "epoch": 25.61813186813187, + "grad_norm": 15.442487716674805, + "learning_rate": 3.719093406593407e-05, + "loss": 0.3522, + "step": 9325 + }, + { + "epoch": 25.62087912087912, + "grad_norm": 13.406861305236816, + "learning_rate": 3.718956043956044e-05, + "loss": 0.4184, + "step": 9326 + }, + { + "epoch": 25.623626373626372, + "grad_norm": 14.819930076599121, + "learning_rate": 3.718818681318682e-05, + "loss": 0.4565, + "step": 9327 + }, + { + "epoch": 25.626373626373628, + "grad_norm": 8.410797119140625, + "learning_rate": 3.718681318681319e-05, + "loss": 0.215, + "step": 9328 + }, + { + "epoch": 25.62912087912088, + "grad_norm": 24.089664459228516, + "learning_rate": 3.7185439560439564e-05, + "loss": 0.8783, + "step": 9329 + }, + { + "epoch": 25.63186813186813, + "grad_norm": 24.152565002441406, + "learning_rate": 3.7184065934065934e-05, + "loss": 0.812, + "step": 9330 + }, + { + "epoch": 25.634615384615383, + "grad_norm": 11.486531257629395, + "learning_rate": 3.7182692307692305e-05, + "loss": 0.3071, + "step": 9331 + }, + { + "epoch": 25.63736263736264, + "grad_norm": 23.84818458557129, + "learning_rate": 3.718131868131868e-05, + "loss": 1.0679, + "step": 9332 + }, + { + "epoch": 25.64010989010989, + "grad_norm": 16.802175521850586, + "learning_rate": 3.717994505494506e-05, + "loss": 0.4553, + "step": 9333 + }, + { + "epoch": 25.642857142857142, + "grad_norm": 11.532137870788574, + "learning_rate": 3.717857142857143e-05, + "loss": 0.2959, + "step": 9334 + }, + { + "epoch": 25.645604395604394, + "grad_norm": 6.416680812835693, + "learning_rate": 3.7177197802197805e-05, + "loss": 0.2132, + "step": 9335 + }, + { + "epoch": 25.64835164835165, + "grad_norm": 10.715987205505371, + "learning_rate": 3.7175824175824175e-05, + "loss": 0.3243, + "step": 9336 + }, + { + "epoch": 25.6510989010989, + "grad_norm": 14.099827766418457, + "learning_rate": 3.717445054945055e-05, + "loss": 0.4327, + "step": 9337 + }, + { + "epoch": 25.653846153846153, + "grad_norm": 11.343733787536621, + "learning_rate": 3.717307692307693e-05, + "loss": 0.1921, + "step": 9338 + }, + { + "epoch": 25.656593406593405, + "grad_norm": 14.629898071289062, + "learning_rate": 3.71717032967033e-05, + "loss": 0.4001, + "step": 9339 + }, + { + "epoch": 25.65934065934066, + "grad_norm": 15.610794067382812, + "learning_rate": 3.7170329670329675e-05, + "loss": 0.2953, + "step": 9340 + }, + { + "epoch": 25.662087912087912, + "grad_norm": 17.114404678344727, + "learning_rate": 3.7168956043956045e-05, + "loss": 0.555, + "step": 9341 + }, + { + "epoch": 25.664835164835164, + "grad_norm": 4.484837055206299, + "learning_rate": 3.716758241758242e-05, + "loss": 0.0712, + "step": 9342 + }, + { + "epoch": 25.667582417582416, + "grad_norm": 9.863622665405273, + "learning_rate": 3.716620879120879e-05, + "loss": 0.2351, + "step": 9343 + }, + { + "epoch": 25.67032967032967, + "grad_norm": 12.879068374633789, + "learning_rate": 3.716483516483517e-05, + "loss": 0.2824, + "step": 9344 + }, + { + "epoch": 25.673076923076923, + "grad_norm": 12.674867630004883, + "learning_rate": 3.716346153846154e-05, + "loss": 0.3864, + "step": 9345 + }, + { + "epoch": 25.675824175824175, + "grad_norm": 7.090794086456299, + "learning_rate": 3.716208791208791e-05, + "loss": 0.1137, + "step": 9346 + }, + { + "epoch": 25.678571428571427, + "grad_norm": 12.549978256225586, + "learning_rate": 3.7160714285714286e-05, + "loss": 0.45, + "step": 9347 + }, + { + "epoch": 25.681318681318682, + "grad_norm": 8.904743194580078, + "learning_rate": 3.715934065934066e-05, + "loss": 0.209, + "step": 9348 + }, + { + "epoch": 25.684065934065934, + "grad_norm": 10.113986015319824, + "learning_rate": 3.715796703296703e-05, + "loss": 0.2179, + "step": 9349 + }, + { + "epoch": 25.686813186813186, + "grad_norm": 19.927310943603516, + "learning_rate": 3.715659340659341e-05, + "loss": 0.6105, + "step": 9350 + }, + { + "epoch": 25.689560439560438, + "grad_norm": 13.776759147644043, + "learning_rate": 3.715521978021978e-05, + "loss": 0.3882, + "step": 9351 + }, + { + "epoch": 25.692307692307693, + "grad_norm": 11.718843460083008, + "learning_rate": 3.7153846153846156e-05, + "loss": 0.449, + "step": 9352 + }, + { + "epoch": 25.695054945054945, + "grad_norm": 8.418780326843262, + "learning_rate": 3.715247252747253e-05, + "loss": 0.1479, + "step": 9353 + }, + { + "epoch": 25.697802197802197, + "grad_norm": 9.224392890930176, + "learning_rate": 3.71510989010989e-05, + "loss": 0.2214, + "step": 9354 + }, + { + "epoch": 25.70054945054945, + "grad_norm": 14.227075576782227, + "learning_rate": 3.714972527472528e-05, + "loss": 0.4535, + "step": 9355 + }, + { + "epoch": 25.703296703296704, + "grad_norm": 13.919013977050781, + "learning_rate": 3.714835164835165e-05, + "loss": 0.2463, + "step": 9356 + }, + { + "epoch": 25.706043956043956, + "grad_norm": 13.184746742248535, + "learning_rate": 3.714697802197803e-05, + "loss": 0.2959, + "step": 9357 + }, + { + "epoch": 25.708791208791208, + "grad_norm": 13.327954292297363, + "learning_rate": 3.71456043956044e-05, + "loss": 0.4139, + "step": 9358 + }, + { + "epoch": 25.71153846153846, + "grad_norm": 10.560026168823242, + "learning_rate": 3.714423076923077e-05, + "loss": 0.2882, + "step": 9359 + }, + { + "epoch": 25.714285714285715, + "grad_norm": 9.656006813049316, + "learning_rate": 3.7142857142857143e-05, + "loss": 0.1954, + "step": 9360 + }, + { + "epoch": 25.717032967032967, + "grad_norm": 14.577866554260254, + "learning_rate": 3.7141483516483513e-05, + "loss": 0.4004, + "step": 9361 + }, + { + "epoch": 25.71978021978022, + "grad_norm": 10.327437400817871, + "learning_rate": 3.714010989010989e-05, + "loss": 0.2325, + "step": 9362 + }, + { + "epoch": 25.72252747252747, + "grad_norm": 17.018112182617188, + "learning_rate": 3.713873626373627e-05, + "loss": 0.5062, + "step": 9363 + }, + { + "epoch": 25.725274725274726, + "grad_norm": 14.281503677368164, + "learning_rate": 3.713736263736264e-05, + "loss": 0.3463, + "step": 9364 + }, + { + "epoch": 25.728021978021978, + "grad_norm": 16.680164337158203, + "learning_rate": 3.7135989010989014e-05, + "loss": 0.6538, + "step": 9365 + }, + { + "epoch": 25.73076923076923, + "grad_norm": 12.96855640411377, + "learning_rate": 3.7134615384615384e-05, + "loss": 0.3027, + "step": 9366 + }, + { + "epoch": 25.733516483516482, + "grad_norm": 10.99572467803955, + "learning_rate": 3.713324175824176e-05, + "loss": 0.4858, + "step": 9367 + }, + { + "epoch": 25.736263736263737, + "grad_norm": 7.562969207763672, + "learning_rate": 3.713186813186814e-05, + "loss": 0.2155, + "step": 9368 + }, + { + "epoch": 25.73901098901099, + "grad_norm": 20.482725143432617, + "learning_rate": 3.713049450549451e-05, + "loss": 0.5672, + "step": 9369 + }, + { + "epoch": 25.74175824175824, + "grad_norm": 19.910762786865234, + "learning_rate": 3.7129120879120884e-05, + "loss": 0.6745, + "step": 9370 + }, + { + "epoch": 25.744505494505496, + "grad_norm": 12.202184677124023, + "learning_rate": 3.7127747252747254e-05, + "loss": 0.3024, + "step": 9371 + }, + { + "epoch": 25.747252747252748, + "grad_norm": 19.020915985107422, + "learning_rate": 3.712637362637363e-05, + "loss": 0.891, + "step": 9372 + }, + { + "epoch": 25.75, + "grad_norm": 11.928464889526367, + "learning_rate": 3.7125e-05, + "loss": 0.3186, + "step": 9373 + }, + { + "epoch": 25.752747252747252, + "grad_norm": 11.349554061889648, + "learning_rate": 3.712362637362637e-05, + "loss": 0.3361, + "step": 9374 + }, + { + "epoch": 25.755494505494504, + "grad_norm": 7.639123916625977, + "learning_rate": 3.712225274725275e-05, + "loss": 0.1891, + "step": 9375 + }, + { + "epoch": 25.75824175824176, + "grad_norm": 16.267175674438477, + "learning_rate": 3.712087912087912e-05, + "loss": 0.4343, + "step": 9376 + }, + { + "epoch": 25.76098901098901, + "grad_norm": 8.511448860168457, + "learning_rate": 3.7119505494505495e-05, + "loss": 0.2541, + "step": 9377 + }, + { + "epoch": 25.763736263736263, + "grad_norm": 16.207183837890625, + "learning_rate": 3.711813186813187e-05, + "loss": 0.6383, + "step": 9378 + }, + { + "epoch": 25.766483516483518, + "grad_norm": 10.214344024658203, + "learning_rate": 3.711675824175824e-05, + "loss": 0.3009, + "step": 9379 + }, + { + "epoch": 25.76923076923077, + "grad_norm": 12.595736503601074, + "learning_rate": 3.711538461538462e-05, + "loss": 0.2093, + "step": 9380 + }, + { + "epoch": 25.771978021978022, + "grad_norm": 4.249059200286865, + "learning_rate": 3.711401098901099e-05, + "loss": 0.0819, + "step": 9381 + }, + { + "epoch": 25.774725274725274, + "grad_norm": 7.925235271453857, + "learning_rate": 3.7112637362637365e-05, + "loss": 0.1579, + "step": 9382 + }, + { + "epoch": 25.77747252747253, + "grad_norm": 16.40094566345215, + "learning_rate": 3.711126373626374e-05, + "loss": 0.6029, + "step": 9383 + }, + { + "epoch": 25.78021978021978, + "grad_norm": 11.413153648376465, + "learning_rate": 3.710989010989011e-05, + "loss": 0.2401, + "step": 9384 + }, + { + "epoch": 25.782967032967033, + "grad_norm": 13.787174224853516, + "learning_rate": 3.710851648351649e-05, + "loss": 0.4369, + "step": 9385 + }, + { + "epoch": 25.785714285714285, + "grad_norm": 11.457903861999512, + "learning_rate": 3.710714285714286e-05, + "loss": 0.2505, + "step": 9386 + }, + { + "epoch": 25.78846153846154, + "grad_norm": 22.052560806274414, + "learning_rate": 3.7105769230769236e-05, + "loss": 0.3521, + "step": 9387 + }, + { + "epoch": 25.791208791208792, + "grad_norm": 6.044987678527832, + "learning_rate": 3.7104395604395606e-05, + "loss": 0.1174, + "step": 9388 + }, + { + "epoch": 25.793956043956044, + "grad_norm": 11.07060718536377, + "learning_rate": 3.7103021978021976e-05, + "loss": 0.2823, + "step": 9389 + }, + { + "epoch": 25.796703296703296, + "grad_norm": 4.994873523712158, + "learning_rate": 3.710164835164835e-05, + "loss": 0.1042, + "step": 9390 + }, + { + "epoch": 25.79945054945055, + "grad_norm": 12.382598876953125, + "learning_rate": 3.710027472527472e-05, + "loss": 0.4556, + "step": 9391 + }, + { + "epoch": 25.802197802197803, + "grad_norm": 12.870163917541504, + "learning_rate": 3.70989010989011e-05, + "loss": 0.3584, + "step": 9392 + }, + { + "epoch": 25.804945054945055, + "grad_norm": 16.357118606567383, + "learning_rate": 3.7097527472527476e-05, + "loss": 0.5334, + "step": 9393 + }, + { + "epoch": 25.807692307692307, + "grad_norm": 13.82541275024414, + "learning_rate": 3.7096153846153846e-05, + "loss": 0.378, + "step": 9394 + }, + { + "epoch": 25.810439560439562, + "grad_norm": 9.1154146194458, + "learning_rate": 3.709478021978022e-05, + "loss": 0.209, + "step": 9395 + }, + { + "epoch": 25.813186813186814, + "grad_norm": 16.056312561035156, + "learning_rate": 3.709340659340659e-05, + "loss": 0.6627, + "step": 9396 + }, + { + "epoch": 25.815934065934066, + "grad_norm": 13.49536418914795, + "learning_rate": 3.709203296703297e-05, + "loss": 0.3521, + "step": 9397 + }, + { + "epoch": 25.818681318681318, + "grad_norm": 11.245060920715332, + "learning_rate": 3.7090659340659346e-05, + "loss": 0.2163, + "step": 9398 + }, + { + "epoch": 25.821428571428573, + "grad_norm": 6.182387828826904, + "learning_rate": 3.7089285714285717e-05, + "loss": 0.1499, + "step": 9399 + }, + { + "epoch": 25.824175824175825, + "grad_norm": 13.290282249450684, + "learning_rate": 3.708791208791209e-05, + "loss": 0.3917, + "step": 9400 + }, + { + "epoch": 25.826923076923077, + "grad_norm": 14.941198348999023, + "learning_rate": 3.708653846153846e-05, + "loss": 0.4416, + "step": 9401 + }, + { + "epoch": 25.82967032967033, + "grad_norm": 9.99909496307373, + "learning_rate": 3.708516483516484e-05, + "loss": 0.2574, + "step": 9402 + }, + { + "epoch": 25.832417582417584, + "grad_norm": 9.555607795715332, + "learning_rate": 3.708379120879121e-05, + "loss": 0.3016, + "step": 9403 + }, + { + "epoch": 25.835164835164836, + "grad_norm": 20.454479217529297, + "learning_rate": 3.708241758241758e-05, + "loss": 0.7614, + "step": 9404 + }, + { + "epoch": 25.837912087912088, + "grad_norm": 12.174168586730957, + "learning_rate": 3.708104395604396e-05, + "loss": 0.2256, + "step": 9405 + }, + { + "epoch": 25.84065934065934, + "grad_norm": 12.603070259094238, + "learning_rate": 3.707967032967033e-05, + "loss": 0.284, + "step": 9406 + }, + { + "epoch": 25.843406593406595, + "grad_norm": 9.312495231628418, + "learning_rate": 3.7078296703296704e-05, + "loss": 0.3102, + "step": 9407 + }, + { + "epoch": 25.846153846153847, + "grad_norm": 15.444828987121582, + "learning_rate": 3.707692307692308e-05, + "loss": 0.6678, + "step": 9408 + }, + { + "epoch": 25.8489010989011, + "grad_norm": 6.894623756408691, + "learning_rate": 3.707554945054945e-05, + "loss": 0.0976, + "step": 9409 + }, + { + "epoch": 25.85164835164835, + "grad_norm": 10.654683113098145, + "learning_rate": 3.707417582417583e-05, + "loss": 0.3456, + "step": 9410 + }, + { + "epoch": 25.854395604395606, + "grad_norm": 10.485060691833496, + "learning_rate": 3.70728021978022e-05, + "loss": 0.1933, + "step": 9411 + }, + { + "epoch": 25.857142857142858, + "grad_norm": 11.885894775390625, + "learning_rate": 3.7071428571428574e-05, + "loss": 0.3254, + "step": 9412 + }, + { + "epoch": 25.85989010989011, + "grad_norm": 7.864598751068115, + "learning_rate": 3.707005494505495e-05, + "loss": 0.2164, + "step": 9413 + }, + { + "epoch": 25.86263736263736, + "grad_norm": 9.301809310913086, + "learning_rate": 3.706868131868132e-05, + "loss": 0.2151, + "step": 9414 + }, + { + "epoch": 25.865384615384617, + "grad_norm": 12.216044425964355, + "learning_rate": 3.70673076923077e-05, + "loss": 0.3106, + "step": 9415 + }, + { + "epoch": 25.86813186813187, + "grad_norm": 14.104870796203613, + "learning_rate": 3.706593406593407e-05, + "loss": 0.4773, + "step": 9416 + }, + { + "epoch": 25.87087912087912, + "grad_norm": 13.669697761535645, + "learning_rate": 3.7064560439560445e-05, + "loss": 0.3048, + "step": 9417 + }, + { + "epoch": 25.873626373626372, + "grad_norm": 8.850950241088867, + "learning_rate": 3.7063186813186815e-05, + "loss": 0.2804, + "step": 9418 + }, + { + "epoch": 25.876373626373628, + "grad_norm": 7.399057388305664, + "learning_rate": 3.7061813186813185e-05, + "loss": 0.1711, + "step": 9419 + }, + { + "epoch": 25.87912087912088, + "grad_norm": 8.956089973449707, + "learning_rate": 3.706043956043956e-05, + "loss": 0.1971, + "step": 9420 + }, + { + "epoch": 25.88186813186813, + "grad_norm": 8.932470321655273, + "learning_rate": 3.705906593406593e-05, + "loss": 0.1805, + "step": 9421 + }, + { + "epoch": 25.884615384615383, + "grad_norm": 14.612832069396973, + "learning_rate": 3.705769230769231e-05, + "loss": 0.4645, + "step": 9422 + }, + { + "epoch": 25.88736263736264, + "grad_norm": 10.268270492553711, + "learning_rate": 3.7056318681318685e-05, + "loss": 0.1655, + "step": 9423 + }, + { + "epoch": 25.89010989010989, + "grad_norm": 13.60512924194336, + "learning_rate": 3.7054945054945055e-05, + "loss": 0.3762, + "step": 9424 + }, + { + "epoch": 25.892857142857142, + "grad_norm": 22.011545181274414, + "learning_rate": 3.705357142857143e-05, + "loss": 0.6144, + "step": 9425 + }, + { + "epoch": 25.895604395604394, + "grad_norm": 16.9603271484375, + "learning_rate": 3.70521978021978e-05, + "loss": 0.6382, + "step": 9426 + }, + { + "epoch": 25.89835164835165, + "grad_norm": 10.293010711669922, + "learning_rate": 3.705082417582418e-05, + "loss": 0.3541, + "step": 9427 + }, + { + "epoch": 25.9010989010989, + "grad_norm": 7.919678211212158, + "learning_rate": 3.7049450549450555e-05, + "loss": 0.1755, + "step": 9428 + }, + { + "epoch": 25.903846153846153, + "grad_norm": 8.852709770202637, + "learning_rate": 3.7048076923076925e-05, + "loss": 0.2979, + "step": 9429 + }, + { + "epoch": 25.906593406593405, + "grad_norm": 11.456340789794922, + "learning_rate": 3.70467032967033e-05, + "loss": 0.491, + "step": 9430 + }, + { + "epoch": 25.90934065934066, + "grad_norm": 11.612655639648438, + "learning_rate": 3.704532967032967e-05, + "loss": 0.4537, + "step": 9431 + }, + { + "epoch": 25.912087912087912, + "grad_norm": 8.300247192382812, + "learning_rate": 3.704395604395605e-05, + "loss": 0.1483, + "step": 9432 + }, + { + "epoch": 25.914835164835164, + "grad_norm": 16.739065170288086, + "learning_rate": 3.704258241758242e-05, + "loss": 0.5918, + "step": 9433 + }, + { + "epoch": 25.917582417582416, + "grad_norm": 18.27642059326172, + "learning_rate": 3.704120879120879e-05, + "loss": 0.5672, + "step": 9434 + }, + { + "epoch": 25.92032967032967, + "grad_norm": 14.308303833007812, + "learning_rate": 3.7039835164835166e-05, + "loss": 0.4999, + "step": 9435 + }, + { + "epoch": 25.923076923076923, + "grad_norm": 7.894735813140869, + "learning_rate": 3.7038461538461536e-05, + "loss": 0.2504, + "step": 9436 + }, + { + "epoch": 25.925824175824175, + "grad_norm": 3.0720901489257812, + "learning_rate": 3.703708791208791e-05, + "loss": 0.0574, + "step": 9437 + }, + { + "epoch": 25.928571428571427, + "grad_norm": 6.638164520263672, + "learning_rate": 3.703571428571429e-05, + "loss": 0.2097, + "step": 9438 + }, + { + "epoch": 25.931318681318682, + "grad_norm": 14.479371070861816, + "learning_rate": 3.703434065934066e-05, + "loss": 0.4197, + "step": 9439 + }, + { + "epoch": 25.934065934065934, + "grad_norm": 14.42638111114502, + "learning_rate": 3.7032967032967036e-05, + "loss": 0.4494, + "step": 9440 + }, + { + "epoch": 25.936813186813186, + "grad_norm": 11.734234809875488, + "learning_rate": 3.7031593406593406e-05, + "loss": 0.3563, + "step": 9441 + }, + { + "epoch": 25.939560439560438, + "grad_norm": 3.616157054901123, + "learning_rate": 3.703021978021978e-05, + "loss": 0.0864, + "step": 9442 + }, + { + "epoch": 25.942307692307693, + "grad_norm": 9.868013381958008, + "learning_rate": 3.702884615384615e-05, + "loss": 0.2623, + "step": 9443 + }, + { + "epoch": 25.945054945054945, + "grad_norm": 15.851133346557617, + "learning_rate": 3.702747252747253e-05, + "loss": 0.5026, + "step": 9444 + }, + { + "epoch": 25.947802197802197, + "grad_norm": 9.478490829467773, + "learning_rate": 3.702609890109891e-05, + "loss": 0.2105, + "step": 9445 + }, + { + "epoch": 25.95054945054945, + "grad_norm": 12.333938598632812, + "learning_rate": 3.702472527472528e-05, + "loss": 0.2892, + "step": 9446 + }, + { + "epoch": 25.953296703296704, + "grad_norm": 18.383563995361328, + "learning_rate": 3.7023351648351654e-05, + "loss": 0.7087, + "step": 9447 + }, + { + "epoch": 25.956043956043956, + "grad_norm": 13.794363021850586, + "learning_rate": 3.7021978021978024e-05, + "loss": 0.3553, + "step": 9448 + }, + { + "epoch": 25.958791208791208, + "grad_norm": 7.353118896484375, + "learning_rate": 3.7020604395604394e-05, + "loss": 0.183, + "step": 9449 + }, + { + "epoch": 25.96153846153846, + "grad_norm": 14.718778610229492, + "learning_rate": 3.701923076923077e-05, + "loss": 0.3724, + "step": 9450 + }, + { + "epoch": 25.964285714285715, + "grad_norm": 8.210458755493164, + "learning_rate": 3.701785714285714e-05, + "loss": 0.1425, + "step": 9451 + }, + { + "epoch": 25.967032967032967, + "grad_norm": 24.155967712402344, + "learning_rate": 3.701648351648352e-05, + "loss": 0.8115, + "step": 9452 + }, + { + "epoch": 25.96978021978022, + "grad_norm": 10.791915893554688, + "learning_rate": 3.701510989010989e-05, + "loss": 0.2541, + "step": 9453 + }, + { + "epoch": 25.97252747252747, + "grad_norm": 10.916638374328613, + "learning_rate": 3.7013736263736264e-05, + "loss": 0.2187, + "step": 9454 + }, + { + "epoch": 25.975274725274726, + "grad_norm": 7.249306678771973, + "learning_rate": 3.701236263736264e-05, + "loss": 0.1496, + "step": 9455 + }, + { + "epoch": 25.978021978021978, + "grad_norm": 9.741283416748047, + "learning_rate": 3.701098901098901e-05, + "loss": 0.3553, + "step": 9456 + }, + { + "epoch": 25.98076923076923, + "grad_norm": 13.605823516845703, + "learning_rate": 3.700961538461539e-05, + "loss": 0.3559, + "step": 9457 + }, + { + "epoch": 25.983516483516482, + "grad_norm": 14.525527000427246, + "learning_rate": 3.700824175824176e-05, + "loss": 0.4223, + "step": 9458 + }, + { + "epoch": 25.986263736263737, + "grad_norm": 10.382120132446289, + "learning_rate": 3.7006868131868134e-05, + "loss": 0.2362, + "step": 9459 + }, + { + "epoch": 25.98901098901099, + "grad_norm": 19.34379005432129, + "learning_rate": 3.700549450549451e-05, + "loss": 0.6642, + "step": 9460 + }, + { + "epoch": 25.99175824175824, + "grad_norm": 15.24706745147705, + "learning_rate": 3.700412087912088e-05, + "loss": 0.7072, + "step": 9461 + }, + { + "epoch": 25.994505494505496, + "grad_norm": 13.760215759277344, + "learning_rate": 3.700274725274726e-05, + "loss": 0.3966, + "step": 9462 + }, + { + "epoch": 25.997252747252748, + "grad_norm": 18.529386520385742, + "learning_rate": 3.700137362637363e-05, + "loss": 0.5796, + "step": 9463 + }, + { + "epoch": 26.0, + "grad_norm": 31.58148956298828, + "learning_rate": 3.7e-05, + "loss": 0.7441, + "step": 9464 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.6735537190082644, + "eval_f1": 0.6613360068043009, + "eval_f1_DuraRiadoRio_64x64": 0.75, + "eval_f1_Mole_64x64": 0.2754491017964072, + "eval_f1_Quebrado_64x64": 0.841726618705036, + "eval_f1_RiadoRio_64x64": 0.5654885654885655, + "eval_f1_RioFechado_64x64": 0.8740157480314961, + "eval_loss": 1.559377670288086, + "eval_precision": 0.8148909234224015, + "eval_precision_DuraRiadoRio_64x64": 0.796875, + "eval_precision_Mole_64x64": 1.0, + "eval_precision_Quebrado_64x64": 0.8731343283582089, + "eval_precision_RiadoRio_64x64": 0.4133738601823708, + "eval_precision_RioFechado_64x64": 0.9910714285714286, + "eval_recall": 0.6713965077011779, + "eval_recall_DuraRiadoRio_64x64": 0.7083333333333334, + "eval_recall_Mole_64x64": 0.1597222222222222, + "eval_recall_Quebrado_64x64": 0.8125, + "eval_recall_RiadoRio_64x64": 0.8947368421052632, + "eval_recall_RioFechado_64x64": 0.7816901408450704, + "eval_runtime": 1.816, + "eval_samples_per_second": 399.784, + "eval_steps_per_second": 25.331, + "step": 9464 + }, + { + "epoch": 26.002747252747252, + "grad_norm": 11.614012718200684, + "learning_rate": 3.6998626373626375e-05, + "loss": 0.2161, + "step": 9465 + }, + { + "epoch": 26.005494505494507, + "grad_norm": 11.616311073303223, + "learning_rate": 3.6997252747252745e-05, + "loss": 0.4329, + "step": 9466 + }, + { + "epoch": 26.00824175824176, + "grad_norm": 11.448172569274902, + "learning_rate": 3.699587912087912e-05, + "loss": 0.3744, + "step": 9467 + }, + { + "epoch": 26.01098901098901, + "grad_norm": 9.654143333435059, + "learning_rate": 3.699450549450549e-05, + "loss": 0.2845, + "step": 9468 + }, + { + "epoch": 26.013736263736263, + "grad_norm": 5.499485969543457, + "learning_rate": 3.699313186813187e-05, + "loss": 0.0946, + "step": 9469 + }, + { + "epoch": 26.016483516483518, + "grad_norm": 19.572132110595703, + "learning_rate": 3.6991758241758245e-05, + "loss": 0.8031, + "step": 9470 + }, + { + "epoch": 26.01923076923077, + "grad_norm": 13.182259559631348, + "learning_rate": 3.6990384615384615e-05, + "loss": 0.1189, + "step": 9471 + }, + { + "epoch": 26.021978021978022, + "grad_norm": 12.744014739990234, + "learning_rate": 3.698901098901099e-05, + "loss": 0.5009, + "step": 9472 + }, + { + "epoch": 26.024725274725274, + "grad_norm": 10.541091918945312, + "learning_rate": 3.698763736263736e-05, + "loss": 0.336, + "step": 9473 + }, + { + "epoch": 26.02747252747253, + "grad_norm": 6.040704250335693, + "learning_rate": 3.698626373626374e-05, + "loss": 0.1289, + "step": 9474 + }, + { + "epoch": 26.03021978021978, + "grad_norm": 10.69350814819336, + "learning_rate": 3.6984890109890116e-05, + "loss": 0.2502, + "step": 9475 + }, + { + "epoch": 26.032967032967033, + "grad_norm": 11.185173988342285, + "learning_rate": 3.6983516483516486e-05, + "loss": 0.1527, + "step": 9476 + }, + { + "epoch": 26.035714285714285, + "grad_norm": 13.908745765686035, + "learning_rate": 3.698214285714286e-05, + "loss": 0.3146, + "step": 9477 + }, + { + "epoch": 26.03846153846154, + "grad_norm": 16.6208553314209, + "learning_rate": 3.698076923076923e-05, + "loss": 0.7127, + "step": 9478 + }, + { + "epoch": 26.041208791208792, + "grad_norm": 8.618412017822266, + "learning_rate": 3.69793956043956e-05, + "loss": 0.3018, + "step": 9479 + }, + { + "epoch": 26.043956043956044, + "grad_norm": 11.789335250854492, + "learning_rate": 3.697802197802198e-05, + "loss": 0.2447, + "step": 9480 + }, + { + "epoch": 26.046703296703296, + "grad_norm": 16.378541946411133, + "learning_rate": 3.697664835164835e-05, + "loss": 0.3332, + "step": 9481 + }, + { + "epoch": 26.04945054945055, + "grad_norm": 16.987228393554688, + "learning_rate": 3.6975274725274726e-05, + "loss": 0.5682, + "step": 9482 + }, + { + "epoch": 26.052197802197803, + "grad_norm": 10.320233345031738, + "learning_rate": 3.6973901098901096e-05, + "loss": 0.2652, + "step": 9483 + }, + { + "epoch": 26.054945054945055, + "grad_norm": 16.35675811767578, + "learning_rate": 3.697252747252747e-05, + "loss": 0.7317, + "step": 9484 + }, + { + "epoch": 26.057692307692307, + "grad_norm": 9.987165451049805, + "learning_rate": 3.697115384615385e-05, + "loss": 0.1841, + "step": 9485 + }, + { + "epoch": 26.060439560439562, + "grad_norm": 12.572309494018555, + "learning_rate": 3.696978021978022e-05, + "loss": 0.399, + "step": 9486 + }, + { + "epoch": 26.063186813186814, + "grad_norm": 17.48652458190918, + "learning_rate": 3.69684065934066e-05, + "loss": 0.5181, + "step": 9487 + }, + { + "epoch": 26.065934065934066, + "grad_norm": 13.984444618225098, + "learning_rate": 3.696703296703297e-05, + "loss": 0.5123, + "step": 9488 + }, + { + "epoch": 26.068681318681318, + "grad_norm": 18.491779327392578, + "learning_rate": 3.6965659340659343e-05, + "loss": 0.6177, + "step": 9489 + }, + { + "epoch": 26.071428571428573, + "grad_norm": 15.614147186279297, + "learning_rate": 3.696428571428572e-05, + "loss": 0.4915, + "step": 9490 + }, + { + "epoch": 26.074175824175825, + "grad_norm": 11.322050094604492, + "learning_rate": 3.696291208791209e-05, + "loss": 0.387, + "step": 9491 + }, + { + "epoch": 26.076923076923077, + "grad_norm": 17.9296875, + "learning_rate": 3.696153846153847e-05, + "loss": 0.7284, + "step": 9492 + }, + { + "epoch": 26.07967032967033, + "grad_norm": 6.765779972076416, + "learning_rate": 3.696016483516484e-05, + "loss": 0.1806, + "step": 9493 + }, + { + "epoch": 26.082417582417584, + "grad_norm": 9.334267616271973, + "learning_rate": 3.695879120879121e-05, + "loss": 0.3459, + "step": 9494 + }, + { + "epoch": 26.085164835164836, + "grad_norm": 12.629280090332031, + "learning_rate": 3.6957417582417584e-05, + "loss": 0.209, + "step": 9495 + }, + { + "epoch": 26.087912087912088, + "grad_norm": 20.135860443115234, + "learning_rate": 3.6956043956043954e-05, + "loss": 0.7627, + "step": 9496 + }, + { + "epoch": 26.09065934065934, + "grad_norm": 16.21981430053711, + "learning_rate": 3.695467032967033e-05, + "loss": 0.4471, + "step": 9497 + }, + { + "epoch": 26.093406593406595, + "grad_norm": 23.953956604003906, + "learning_rate": 3.69532967032967e-05, + "loss": 0.5698, + "step": 9498 + }, + { + "epoch": 26.096153846153847, + "grad_norm": 3.046037435531616, + "learning_rate": 3.695192307692308e-05, + "loss": 0.0535, + "step": 9499 + }, + { + "epoch": 26.0989010989011, + "grad_norm": 11.148266792297363, + "learning_rate": 3.6950549450549454e-05, + "loss": 0.4185, + "step": 9500 + }, + { + "epoch": 26.10164835164835, + "grad_norm": 6.081242084503174, + "learning_rate": 3.6949175824175824e-05, + "loss": 0.1112, + "step": 9501 + }, + { + "epoch": 26.104395604395606, + "grad_norm": 8.162826538085938, + "learning_rate": 3.69478021978022e-05, + "loss": 0.1471, + "step": 9502 + }, + { + "epoch": 26.107142857142858, + "grad_norm": 6.968388557434082, + "learning_rate": 3.694642857142857e-05, + "loss": 0.1607, + "step": 9503 + }, + { + "epoch": 26.10989010989011, + "grad_norm": 19.85961151123047, + "learning_rate": 3.694505494505495e-05, + "loss": 0.772, + "step": 9504 + }, + { + "epoch": 26.11263736263736, + "grad_norm": 11.887901306152344, + "learning_rate": 3.6943681318681325e-05, + "loss": 0.3173, + "step": 9505 + }, + { + "epoch": 26.115384615384617, + "grad_norm": 18.32550048828125, + "learning_rate": 3.6942307692307695e-05, + "loss": 0.5915, + "step": 9506 + }, + { + "epoch": 26.11813186813187, + "grad_norm": 4.388763427734375, + "learning_rate": 3.694093406593407e-05, + "loss": 0.0748, + "step": 9507 + }, + { + "epoch": 26.12087912087912, + "grad_norm": 16.103288650512695, + "learning_rate": 3.693956043956044e-05, + "loss": 0.4232, + "step": 9508 + }, + { + "epoch": 26.123626373626372, + "grad_norm": 13.264561653137207, + "learning_rate": 3.693818681318681e-05, + "loss": 0.3856, + "step": 9509 + }, + { + "epoch": 26.126373626373628, + "grad_norm": 14.634603500366211, + "learning_rate": 3.693681318681319e-05, + "loss": 0.3745, + "step": 9510 + }, + { + "epoch": 26.12912087912088, + "grad_norm": 12.814756393432617, + "learning_rate": 3.693543956043956e-05, + "loss": 0.3368, + "step": 9511 + }, + { + "epoch": 26.13186813186813, + "grad_norm": 11.415949821472168, + "learning_rate": 3.6934065934065935e-05, + "loss": 0.1181, + "step": 9512 + }, + { + "epoch": 26.134615384615383, + "grad_norm": 13.293095588684082, + "learning_rate": 3.6932692307692305e-05, + "loss": 0.3844, + "step": 9513 + }, + { + "epoch": 26.13736263736264, + "grad_norm": 4.541163921356201, + "learning_rate": 3.693131868131868e-05, + "loss": 0.0673, + "step": 9514 + }, + { + "epoch": 26.14010989010989, + "grad_norm": 12.747525215148926, + "learning_rate": 3.692994505494506e-05, + "loss": 0.3393, + "step": 9515 + }, + { + "epoch": 26.142857142857142, + "grad_norm": 6.416889190673828, + "learning_rate": 3.692857142857143e-05, + "loss": 0.151, + "step": 9516 + }, + { + "epoch": 26.145604395604394, + "grad_norm": 6.736471176147461, + "learning_rate": 3.6927197802197806e-05, + "loss": 0.1208, + "step": 9517 + }, + { + "epoch": 26.14835164835165, + "grad_norm": 15.538418769836426, + "learning_rate": 3.6925824175824176e-05, + "loss": 0.6458, + "step": 9518 + }, + { + "epoch": 26.1510989010989, + "grad_norm": 8.790092468261719, + "learning_rate": 3.692445054945055e-05, + "loss": 0.2106, + "step": 9519 + }, + { + "epoch": 26.153846153846153, + "grad_norm": 5.869418621063232, + "learning_rate": 3.692307692307693e-05, + "loss": 0.1114, + "step": 9520 + }, + { + "epoch": 26.156593406593405, + "grad_norm": 11.276187896728516, + "learning_rate": 3.69217032967033e-05, + "loss": 0.3036, + "step": 9521 + }, + { + "epoch": 26.15934065934066, + "grad_norm": 19.862762451171875, + "learning_rate": 3.6920329670329676e-05, + "loss": 0.5094, + "step": 9522 + }, + { + "epoch": 26.162087912087912, + "grad_norm": 13.818260192871094, + "learning_rate": 3.6918956043956046e-05, + "loss": 0.3661, + "step": 9523 + }, + { + "epoch": 26.164835164835164, + "grad_norm": 20.246042251586914, + "learning_rate": 3.6917582417582416e-05, + "loss": 0.8506, + "step": 9524 + }, + { + "epoch": 26.167582417582416, + "grad_norm": 12.2147216796875, + "learning_rate": 3.691620879120879e-05, + "loss": 0.4401, + "step": 9525 + }, + { + "epoch": 26.17032967032967, + "grad_norm": 11.692352294921875, + "learning_rate": 3.691483516483516e-05, + "loss": 0.2636, + "step": 9526 + }, + { + "epoch": 26.173076923076923, + "grad_norm": 9.364662170410156, + "learning_rate": 3.691346153846154e-05, + "loss": 0.2477, + "step": 9527 + }, + { + "epoch": 26.175824175824175, + "grad_norm": 6.425754070281982, + "learning_rate": 3.691208791208791e-05, + "loss": 0.1802, + "step": 9528 + }, + { + "epoch": 26.178571428571427, + "grad_norm": 18.770662307739258, + "learning_rate": 3.6910714285714286e-05, + "loss": 0.5114, + "step": 9529 + }, + { + "epoch": 26.181318681318682, + "grad_norm": 9.57158088684082, + "learning_rate": 3.690934065934066e-05, + "loss": 0.2263, + "step": 9530 + }, + { + "epoch": 26.184065934065934, + "grad_norm": 21.43385124206543, + "learning_rate": 3.690796703296703e-05, + "loss": 0.9174, + "step": 9531 + }, + { + "epoch": 26.186813186813186, + "grad_norm": 10.16480827331543, + "learning_rate": 3.690659340659341e-05, + "loss": 0.2332, + "step": 9532 + }, + { + "epoch": 26.189560439560438, + "grad_norm": 17.15281105041504, + "learning_rate": 3.690521978021978e-05, + "loss": 0.4418, + "step": 9533 + }, + { + "epoch": 26.192307692307693, + "grad_norm": 23.722822189331055, + "learning_rate": 3.690384615384616e-05, + "loss": 0.8267, + "step": 9534 + }, + { + "epoch": 26.195054945054945, + "grad_norm": 12.046010971069336, + "learning_rate": 3.6902472527472534e-05, + "loss": 0.4008, + "step": 9535 + }, + { + "epoch": 26.197802197802197, + "grad_norm": 6.756278991699219, + "learning_rate": 3.6901098901098904e-05, + "loss": 0.1641, + "step": 9536 + }, + { + "epoch": 26.20054945054945, + "grad_norm": 8.30820083618164, + "learning_rate": 3.689972527472528e-05, + "loss": 0.1432, + "step": 9537 + }, + { + "epoch": 26.203296703296704, + "grad_norm": 15.093056678771973, + "learning_rate": 3.689835164835165e-05, + "loss": 0.4849, + "step": 9538 + }, + { + "epoch": 26.206043956043956, + "grad_norm": 14.596179962158203, + "learning_rate": 3.689697802197802e-05, + "loss": 0.373, + "step": 9539 + }, + { + "epoch": 26.208791208791208, + "grad_norm": 10.828590393066406, + "learning_rate": 3.68956043956044e-05, + "loss": 0.2593, + "step": 9540 + }, + { + "epoch": 26.21153846153846, + "grad_norm": 17.007736206054688, + "learning_rate": 3.689423076923077e-05, + "loss": 0.4464, + "step": 9541 + }, + { + "epoch": 26.214285714285715, + "grad_norm": 10.513142585754395, + "learning_rate": 3.6892857142857144e-05, + "loss": 0.2565, + "step": 9542 + }, + { + "epoch": 26.217032967032967, + "grad_norm": 19.651315689086914, + "learning_rate": 3.6891483516483514e-05, + "loss": 0.4057, + "step": 9543 + }, + { + "epoch": 26.21978021978022, + "grad_norm": 21.3778133392334, + "learning_rate": 3.689010989010989e-05, + "loss": 0.7974, + "step": 9544 + }, + { + "epoch": 26.22252747252747, + "grad_norm": 8.240935325622559, + "learning_rate": 3.688873626373627e-05, + "loss": 0.2174, + "step": 9545 + }, + { + "epoch": 26.225274725274726, + "grad_norm": 18.971328735351562, + "learning_rate": 3.688736263736264e-05, + "loss": 0.5737, + "step": 9546 + }, + { + "epoch": 26.228021978021978, + "grad_norm": 12.411297798156738, + "learning_rate": 3.6885989010989015e-05, + "loss": 0.2855, + "step": 9547 + }, + { + "epoch": 26.23076923076923, + "grad_norm": 6.0794172286987305, + "learning_rate": 3.6884615384615385e-05, + "loss": 0.1726, + "step": 9548 + }, + { + "epoch": 26.233516483516482, + "grad_norm": 8.39115047454834, + "learning_rate": 3.688324175824176e-05, + "loss": 0.2004, + "step": 9549 + }, + { + "epoch": 26.236263736263737, + "grad_norm": 11.293669700622559, + "learning_rate": 3.688186813186814e-05, + "loss": 0.2591, + "step": 9550 + }, + { + "epoch": 26.23901098901099, + "grad_norm": 11.151416778564453, + "learning_rate": 3.688049450549451e-05, + "loss": 0.314, + "step": 9551 + }, + { + "epoch": 26.24175824175824, + "grad_norm": 17.235692977905273, + "learning_rate": 3.6879120879120885e-05, + "loss": 0.4563, + "step": 9552 + }, + { + "epoch": 26.244505494505493, + "grad_norm": 19.26764678955078, + "learning_rate": 3.6877747252747255e-05, + "loss": 0.7723, + "step": 9553 + }, + { + "epoch": 26.247252747252748, + "grad_norm": 11.030932426452637, + "learning_rate": 3.6876373626373625e-05, + "loss": 0.212, + "step": 9554 + }, + { + "epoch": 26.25, + "grad_norm": 13.302947998046875, + "learning_rate": 3.6875e-05, + "loss": 0.3724, + "step": 9555 + }, + { + "epoch": 26.252747252747252, + "grad_norm": 9.462953567504883, + "learning_rate": 3.687362637362637e-05, + "loss": 0.266, + "step": 9556 + }, + { + "epoch": 26.255494505494504, + "grad_norm": 10.243459701538086, + "learning_rate": 3.687225274725275e-05, + "loss": 0.2024, + "step": 9557 + }, + { + "epoch": 26.25824175824176, + "grad_norm": 8.570685386657715, + "learning_rate": 3.687087912087912e-05, + "loss": 0.1606, + "step": 9558 + }, + { + "epoch": 26.26098901098901, + "grad_norm": 7.665686130523682, + "learning_rate": 3.6869505494505495e-05, + "loss": 0.1516, + "step": 9559 + }, + { + "epoch": 26.263736263736263, + "grad_norm": 11.81169319152832, + "learning_rate": 3.686813186813187e-05, + "loss": 0.2757, + "step": 9560 + }, + { + "epoch": 26.266483516483518, + "grad_norm": 15.769100189208984, + "learning_rate": 3.686675824175824e-05, + "loss": 0.7591, + "step": 9561 + }, + { + "epoch": 26.26923076923077, + "grad_norm": 10.862905502319336, + "learning_rate": 3.686538461538462e-05, + "loss": 0.2251, + "step": 9562 + }, + { + "epoch": 26.271978021978022, + "grad_norm": 20.584609985351562, + "learning_rate": 3.686401098901099e-05, + "loss": 0.6229, + "step": 9563 + }, + { + "epoch": 26.274725274725274, + "grad_norm": 5.9545063972473145, + "learning_rate": 3.6862637362637366e-05, + "loss": 0.1213, + "step": 9564 + }, + { + "epoch": 26.27747252747253, + "grad_norm": 5.406290054321289, + "learning_rate": 3.686126373626374e-05, + "loss": 0.1488, + "step": 9565 + }, + { + "epoch": 26.28021978021978, + "grad_norm": 13.298454284667969, + "learning_rate": 3.685989010989011e-05, + "loss": 0.2447, + "step": 9566 + }, + { + "epoch": 26.282967032967033, + "grad_norm": 12.567136764526367, + "learning_rate": 3.685851648351649e-05, + "loss": 0.1997, + "step": 9567 + }, + { + "epoch": 26.285714285714285, + "grad_norm": 26.16644859313965, + "learning_rate": 3.685714285714286e-05, + "loss": 0.7505, + "step": 9568 + }, + { + "epoch": 26.28846153846154, + "grad_norm": 7.330954074859619, + "learning_rate": 3.685576923076923e-05, + "loss": 0.1878, + "step": 9569 + }, + { + "epoch": 26.291208791208792, + "grad_norm": 9.20647144317627, + "learning_rate": 3.6854395604395606e-05, + "loss": 0.2903, + "step": 9570 + }, + { + "epoch": 26.293956043956044, + "grad_norm": 11.708423614501953, + "learning_rate": 3.6853021978021976e-05, + "loss": 0.2399, + "step": 9571 + }, + { + "epoch": 26.296703296703296, + "grad_norm": 9.53999137878418, + "learning_rate": 3.685164835164835e-05, + "loss": 0.2961, + "step": 9572 + }, + { + "epoch": 26.29945054945055, + "grad_norm": 15.034415245056152, + "learning_rate": 3.685027472527472e-05, + "loss": 0.4405, + "step": 9573 + }, + { + "epoch": 26.302197802197803, + "grad_norm": 8.705774307250977, + "learning_rate": 3.68489010989011e-05, + "loss": 0.3729, + "step": 9574 + }, + { + "epoch": 26.304945054945055, + "grad_norm": 22.314834594726562, + "learning_rate": 3.684752747252748e-05, + "loss": 0.6866, + "step": 9575 + }, + { + "epoch": 26.307692307692307, + "grad_norm": 13.586042404174805, + "learning_rate": 3.684615384615385e-05, + "loss": 0.2921, + "step": 9576 + }, + { + "epoch": 26.310439560439562, + "grad_norm": 17.95404052734375, + "learning_rate": 3.6844780219780224e-05, + "loss": 0.6181, + "step": 9577 + }, + { + "epoch": 26.313186813186814, + "grad_norm": 19.82953453063965, + "learning_rate": 3.6843406593406594e-05, + "loss": 1.1497, + "step": 9578 + }, + { + "epoch": 26.315934065934066, + "grad_norm": 13.043920516967773, + "learning_rate": 3.684203296703297e-05, + "loss": 0.2246, + "step": 9579 + }, + { + "epoch": 26.318681318681318, + "grad_norm": 20.8714599609375, + "learning_rate": 3.684065934065935e-05, + "loss": 0.8776, + "step": 9580 + }, + { + "epoch": 26.321428571428573, + "grad_norm": 8.483237266540527, + "learning_rate": 3.683928571428572e-05, + "loss": 0.2155, + "step": 9581 + }, + { + "epoch": 26.324175824175825, + "grad_norm": 12.053481101989746, + "learning_rate": 3.6837912087912094e-05, + "loss": 0.2789, + "step": 9582 + }, + { + "epoch": 26.326923076923077, + "grad_norm": 13.63552188873291, + "learning_rate": 3.6836538461538464e-05, + "loss": 0.29, + "step": 9583 + }, + { + "epoch": 26.32967032967033, + "grad_norm": 16.371740341186523, + "learning_rate": 3.6835164835164834e-05, + "loss": 0.328, + "step": 9584 + }, + { + "epoch": 26.332417582417584, + "grad_norm": 9.946239471435547, + "learning_rate": 3.683379120879121e-05, + "loss": 0.1545, + "step": 9585 + }, + { + "epoch": 26.335164835164836, + "grad_norm": 17.146692276000977, + "learning_rate": 3.683241758241758e-05, + "loss": 0.5257, + "step": 9586 + }, + { + "epoch": 26.337912087912088, + "grad_norm": 8.164093017578125, + "learning_rate": 3.683104395604396e-05, + "loss": 0.1548, + "step": 9587 + }, + { + "epoch": 26.34065934065934, + "grad_norm": 10.502830505371094, + "learning_rate": 3.682967032967033e-05, + "loss": 0.2447, + "step": 9588 + }, + { + "epoch": 26.343406593406595, + "grad_norm": 13.534666061401367, + "learning_rate": 3.6828296703296704e-05, + "loss": 0.8344, + "step": 9589 + }, + { + "epoch": 26.346153846153847, + "grad_norm": 6.7695512771606445, + "learning_rate": 3.682692307692308e-05, + "loss": 0.156, + "step": 9590 + }, + { + "epoch": 26.3489010989011, + "grad_norm": 11.771897315979004, + "learning_rate": 3.682554945054945e-05, + "loss": 0.3961, + "step": 9591 + }, + { + "epoch": 26.35164835164835, + "grad_norm": 11.795360565185547, + "learning_rate": 3.682417582417583e-05, + "loss": 0.2124, + "step": 9592 + }, + { + "epoch": 26.354395604395606, + "grad_norm": 4.480839729309082, + "learning_rate": 3.68228021978022e-05, + "loss": 0.0881, + "step": 9593 + }, + { + "epoch": 26.357142857142858, + "grad_norm": 11.788296699523926, + "learning_rate": 3.6821428571428575e-05, + "loss": 0.3197, + "step": 9594 + }, + { + "epoch": 26.35989010989011, + "grad_norm": 11.824758529663086, + "learning_rate": 3.682005494505495e-05, + "loss": 0.2193, + "step": 9595 + }, + { + "epoch": 26.36263736263736, + "grad_norm": 16.15694808959961, + "learning_rate": 3.681868131868132e-05, + "loss": 0.6441, + "step": 9596 + }, + { + "epoch": 26.365384615384617, + "grad_norm": 11.979642868041992, + "learning_rate": 3.68173076923077e-05, + "loss": 0.2766, + "step": 9597 + }, + { + "epoch": 26.36813186813187, + "grad_norm": 14.420812606811523, + "learning_rate": 3.681593406593407e-05, + "loss": 0.4914, + "step": 9598 + }, + { + "epoch": 26.37087912087912, + "grad_norm": 14.802803993225098, + "learning_rate": 3.681456043956044e-05, + "loss": 0.7473, + "step": 9599 + }, + { + "epoch": 26.373626373626372, + "grad_norm": 16.191604614257812, + "learning_rate": 3.6813186813186815e-05, + "loss": 0.4268, + "step": 9600 + }, + { + "epoch": 26.376373626373628, + "grad_norm": 19.824447631835938, + "learning_rate": 3.6811813186813185e-05, + "loss": 0.7756, + "step": 9601 + }, + { + "epoch": 26.37912087912088, + "grad_norm": 8.10120677947998, + "learning_rate": 3.681043956043956e-05, + "loss": 0.1888, + "step": 9602 + }, + { + "epoch": 26.38186813186813, + "grad_norm": 8.531935691833496, + "learning_rate": 3.680906593406593e-05, + "loss": 0.2216, + "step": 9603 + }, + { + "epoch": 26.384615384615383, + "grad_norm": 19.94138526916504, + "learning_rate": 3.680769230769231e-05, + "loss": 0.8244, + "step": 9604 + }, + { + "epoch": 26.38736263736264, + "grad_norm": 19.58982276916504, + "learning_rate": 3.6806318681318686e-05, + "loss": 0.7287, + "step": 9605 + }, + { + "epoch": 26.39010989010989, + "grad_norm": 6.533220291137695, + "learning_rate": 3.6804945054945056e-05, + "loss": 0.1339, + "step": 9606 + }, + { + "epoch": 26.392857142857142, + "grad_norm": 24.59722137451172, + "learning_rate": 3.680357142857143e-05, + "loss": 1.3803, + "step": 9607 + }, + { + "epoch": 26.395604395604394, + "grad_norm": 8.495579719543457, + "learning_rate": 3.68021978021978e-05, + "loss": 0.2023, + "step": 9608 + }, + { + "epoch": 26.39835164835165, + "grad_norm": 11.373773574829102, + "learning_rate": 3.680082417582418e-05, + "loss": 0.2923, + "step": 9609 + }, + { + "epoch": 26.4010989010989, + "grad_norm": 8.385321617126465, + "learning_rate": 3.6799450549450556e-05, + "loss": 0.1601, + "step": 9610 + }, + { + "epoch": 26.403846153846153, + "grad_norm": 17.733959197998047, + "learning_rate": 3.6798076923076926e-05, + "loss": 0.3841, + "step": 9611 + }, + { + "epoch": 26.406593406593405, + "grad_norm": 18.14854621887207, + "learning_rate": 3.67967032967033e-05, + "loss": 0.3957, + "step": 9612 + }, + { + "epoch": 26.40934065934066, + "grad_norm": 13.384390830993652, + "learning_rate": 3.679532967032967e-05, + "loss": 0.3063, + "step": 9613 + }, + { + "epoch": 26.412087912087912, + "grad_norm": 18.159912109375, + "learning_rate": 3.679395604395604e-05, + "loss": 0.9006, + "step": 9614 + }, + { + "epoch": 26.414835164835164, + "grad_norm": 17.434024810791016, + "learning_rate": 3.679258241758242e-05, + "loss": 0.5458, + "step": 9615 + }, + { + "epoch": 26.417582417582416, + "grad_norm": 6.570836067199707, + "learning_rate": 3.679120879120879e-05, + "loss": 0.1337, + "step": 9616 + }, + { + "epoch": 26.42032967032967, + "grad_norm": 10.244503021240234, + "learning_rate": 3.678983516483517e-05, + "loss": 0.288, + "step": 9617 + }, + { + "epoch": 26.423076923076923, + "grad_norm": 18.56777000427246, + "learning_rate": 3.678846153846154e-05, + "loss": 0.6729, + "step": 9618 + }, + { + "epoch": 26.425824175824175, + "grad_norm": 5.794090747833252, + "learning_rate": 3.6787087912087913e-05, + "loss": 0.1785, + "step": 9619 + }, + { + "epoch": 26.428571428571427, + "grad_norm": 8.134588241577148, + "learning_rate": 3.678571428571429e-05, + "loss": 0.1821, + "step": 9620 + }, + { + "epoch": 26.431318681318682, + "grad_norm": 17.985782623291016, + "learning_rate": 3.678434065934066e-05, + "loss": 0.6022, + "step": 9621 + }, + { + "epoch": 26.434065934065934, + "grad_norm": 10.591775894165039, + "learning_rate": 3.678296703296704e-05, + "loss": 0.2499, + "step": 9622 + }, + { + "epoch": 26.436813186813186, + "grad_norm": 17.476839065551758, + "learning_rate": 3.678159340659341e-05, + "loss": 0.4797, + "step": 9623 + }, + { + "epoch": 26.439560439560438, + "grad_norm": 4.025318622589111, + "learning_rate": 3.6780219780219784e-05, + "loss": 0.106, + "step": 9624 + }, + { + "epoch": 26.442307692307693, + "grad_norm": 11.614422798156738, + "learning_rate": 3.677884615384616e-05, + "loss": 0.3609, + "step": 9625 + }, + { + "epoch": 26.445054945054945, + "grad_norm": 12.438979148864746, + "learning_rate": 3.677747252747253e-05, + "loss": 0.5884, + "step": 9626 + }, + { + "epoch": 26.447802197802197, + "grad_norm": 10.268710136413574, + "learning_rate": 3.677609890109891e-05, + "loss": 0.1988, + "step": 9627 + }, + { + "epoch": 26.45054945054945, + "grad_norm": 8.879971504211426, + "learning_rate": 3.677472527472528e-05, + "loss": 0.258, + "step": 9628 + }, + { + "epoch": 26.453296703296704, + "grad_norm": 11.210281372070312, + "learning_rate": 3.677335164835165e-05, + "loss": 0.2202, + "step": 9629 + }, + { + "epoch": 26.456043956043956, + "grad_norm": 7.398625373840332, + "learning_rate": 3.6771978021978024e-05, + "loss": 0.1651, + "step": 9630 + }, + { + "epoch": 26.458791208791208, + "grad_norm": 14.399060249328613, + "learning_rate": 3.6770604395604394e-05, + "loss": 0.5114, + "step": 9631 + }, + { + "epoch": 26.46153846153846, + "grad_norm": 11.396145820617676, + "learning_rate": 3.676923076923077e-05, + "loss": 0.3073, + "step": 9632 + }, + { + "epoch": 26.464285714285715, + "grad_norm": 19.977258682250977, + "learning_rate": 3.676785714285714e-05, + "loss": 0.4605, + "step": 9633 + }, + { + "epoch": 26.467032967032967, + "grad_norm": 8.980561256408691, + "learning_rate": 3.676648351648352e-05, + "loss": 0.2529, + "step": 9634 + }, + { + "epoch": 26.46978021978022, + "grad_norm": 12.751999855041504, + "learning_rate": 3.6765109890109895e-05, + "loss": 0.1668, + "step": 9635 + }, + { + "epoch": 26.47252747252747, + "grad_norm": 19.827722549438477, + "learning_rate": 3.6763736263736265e-05, + "loss": 0.5767, + "step": 9636 + }, + { + "epoch": 26.475274725274726, + "grad_norm": 10.190296173095703, + "learning_rate": 3.676236263736264e-05, + "loss": 0.4161, + "step": 9637 + }, + { + "epoch": 26.478021978021978, + "grad_norm": 14.66604995727539, + "learning_rate": 3.676098901098901e-05, + "loss": 0.3414, + "step": 9638 + }, + { + "epoch": 26.48076923076923, + "grad_norm": 12.744043350219727, + "learning_rate": 3.675961538461539e-05, + "loss": 0.257, + "step": 9639 + }, + { + "epoch": 26.483516483516482, + "grad_norm": 14.638962745666504, + "learning_rate": 3.6758241758241765e-05, + "loss": 0.2764, + "step": 9640 + }, + { + "epoch": 26.486263736263737, + "grad_norm": 13.9072847366333, + "learning_rate": 3.6756868131868135e-05, + "loss": 0.3346, + "step": 9641 + }, + { + "epoch": 26.48901098901099, + "grad_norm": 8.072287559509277, + "learning_rate": 3.675549450549451e-05, + "loss": 0.2296, + "step": 9642 + }, + { + "epoch": 26.49175824175824, + "grad_norm": 6.871133804321289, + "learning_rate": 3.675412087912088e-05, + "loss": 0.2211, + "step": 9643 + }, + { + "epoch": 26.494505494505496, + "grad_norm": 18.02638816833496, + "learning_rate": 3.675274725274725e-05, + "loss": 0.8111, + "step": 9644 + }, + { + "epoch": 26.497252747252748, + "grad_norm": 9.883481979370117, + "learning_rate": 3.675137362637363e-05, + "loss": 0.2289, + "step": 9645 + }, + { + "epoch": 26.5, + "grad_norm": 10.236937522888184, + "learning_rate": 3.675e-05, + "loss": 0.3413, + "step": 9646 + }, + { + "epoch": 26.502747252747252, + "grad_norm": 14.782581329345703, + "learning_rate": 3.6748626373626376e-05, + "loss": 0.4857, + "step": 9647 + }, + { + "epoch": 26.505494505494504, + "grad_norm": 12.241652488708496, + "learning_rate": 3.6747252747252746e-05, + "loss": 0.3655, + "step": 9648 + }, + { + "epoch": 26.50824175824176, + "grad_norm": 15.55265998840332, + "learning_rate": 3.674587912087912e-05, + "loss": 0.3447, + "step": 9649 + }, + { + "epoch": 26.51098901098901, + "grad_norm": 6.3888139724731445, + "learning_rate": 3.67445054945055e-05, + "loss": 0.1554, + "step": 9650 + }, + { + "epoch": 26.513736263736263, + "grad_norm": 8.404879570007324, + "learning_rate": 3.674313186813187e-05, + "loss": 0.277, + "step": 9651 + }, + { + "epoch": 26.516483516483518, + "grad_norm": 11.284627914428711, + "learning_rate": 3.6741758241758246e-05, + "loss": 0.358, + "step": 9652 + }, + { + "epoch": 26.51923076923077, + "grad_norm": 15.73671817779541, + "learning_rate": 3.6740384615384616e-05, + "loss": 0.4762, + "step": 9653 + }, + { + "epoch": 26.521978021978022, + "grad_norm": 9.352349281311035, + "learning_rate": 3.673901098901099e-05, + "loss": 0.2021, + "step": 9654 + }, + { + "epoch": 26.524725274725274, + "grad_norm": 19.57081413269043, + "learning_rate": 3.673763736263737e-05, + "loss": 0.6185, + "step": 9655 + }, + { + "epoch": 26.52747252747253, + "grad_norm": 15.989470481872559, + "learning_rate": 3.673626373626374e-05, + "loss": 0.38, + "step": 9656 + }, + { + "epoch": 26.53021978021978, + "grad_norm": 20.3962345123291, + "learning_rate": 3.6734890109890116e-05, + "loss": 0.3802, + "step": 9657 + }, + { + "epoch": 26.532967032967033, + "grad_norm": 12.742364883422852, + "learning_rate": 3.6733516483516486e-05, + "loss": 0.2624, + "step": 9658 + }, + { + "epoch": 26.535714285714285, + "grad_norm": 11.458913803100586, + "learning_rate": 3.6732142857142856e-05, + "loss": 0.1983, + "step": 9659 + }, + { + "epoch": 26.53846153846154, + "grad_norm": 13.318756103515625, + "learning_rate": 3.673076923076923e-05, + "loss": 0.4123, + "step": 9660 + }, + { + "epoch": 26.541208791208792, + "grad_norm": 12.718971252441406, + "learning_rate": 3.67293956043956e-05, + "loss": 0.4282, + "step": 9661 + }, + { + "epoch": 26.543956043956044, + "grad_norm": 9.541794776916504, + "learning_rate": 3.672802197802198e-05, + "loss": 0.3309, + "step": 9662 + }, + { + "epoch": 26.546703296703296, + "grad_norm": 16.199785232543945, + "learning_rate": 3.672664835164835e-05, + "loss": 0.4798, + "step": 9663 + }, + { + "epoch": 26.54945054945055, + "grad_norm": 8.351095199584961, + "learning_rate": 3.672527472527473e-05, + "loss": 0.2463, + "step": 9664 + }, + { + "epoch": 26.552197802197803, + "grad_norm": 12.398375511169434, + "learning_rate": 3.6723901098901104e-05, + "loss": 0.3825, + "step": 9665 + }, + { + "epoch": 26.554945054945055, + "grad_norm": 6.475156307220459, + "learning_rate": 3.6722527472527474e-05, + "loss": 0.2523, + "step": 9666 + }, + { + "epoch": 26.557692307692307, + "grad_norm": 15.826150894165039, + "learning_rate": 3.672115384615385e-05, + "loss": 0.5567, + "step": 9667 + }, + { + "epoch": 26.560439560439562, + "grad_norm": 12.619484901428223, + "learning_rate": 3.671978021978022e-05, + "loss": 0.3035, + "step": 9668 + }, + { + "epoch": 26.563186813186814, + "grad_norm": 7.243463039398193, + "learning_rate": 3.67184065934066e-05, + "loss": 0.1508, + "step": 9669 + }, + { + "epoch": 26.565934065934066, + "grad_norm": 5.084860324859619, + "learning_rate": 3.671703296703297e-05, + "loss": 0.131, + "step": 9670 + }, + { + "epoch": 26.568681318681318, + "grad_norm": 11.629416465759277, + "learning_rate": 3.6715659340659344e-05, + "loss": 0.3738, + "step": 9671 + }, + { + "epoch": 26.571428571428573, + "grad_norm": 10.802777290344238, + "learning_rate": 3.671428571428572e-05, + "loss": 0.2283, + "step": 9672 + }, + { + "epoch": 26.574175824175825, + "grad_norm": 9.969077110290527, + "learning_rate": 3.671291208791209e-05, + "loss": 0.1433, + "step": 9673 + }, + { + "epoch": 26.576923076923077, + "grad_norm": 8.954876899719238, + "learning_rate": 3.671153846153846e-05, + "loss": 0.2224, + "step": 9674 + }, + { + "epoch": 26.57967032967033, + "grad_norm": 8.882163047790527, + "learning_rate": 3.671016483516483e-05, + "loss": 0.2205, + "step": 9675 + }, + { + "epoch": 26.582417582417584, + "grad_norm": 18.5499324798584, + "learning_rate": 3.670879120879121e-05, + "loss": 0.598, + "step": 9676 + }, + { + "epoch": 26.585164835164836, + "grad_norm": 12.917468070983887, + "learning_rate": 3.6707417582417585e-05, + "loss": 0.4104, + "step": 9677 + }, + { + "epoch": 26.587912087912088, + "grad_norm": 12.375897407531738, + "learning_rate": 3.6706043956043955e-05, + "loss": 0.4743, + "step": 9678 + }, + { + "epoch": 26.59065934065934, + "grad_norm": 17.204708099365234, + "learning_rate": 3.670467032967033e-05, + "loss": 0.6892, + "step": 9679 + }, + { + "epoch": 26.593406593406595, + "grad_norm": 5.678321361541748, + "learning_rate": 3.67032967032967e-05, + "loss": 0.1825, + "step": 9680 + }, + { + "epoch": 26.596153846153847, + "grad_norm": 10.014814376831055, + "learning_rate": 3.670192307692308e-05, + "loss": 0.1934, + "step": 9681 + }, + { + "epoch": 26.5989010989011, + "grad_norm": 11.028564453125, + "learning_rate": 3.6700549450549455e-05, + "loss": 0.1909, + "step": 9682 + }, + { + "epoch": 26.60164835164835, + "grad_norm": 4.683743000030518, + "learning_rate": 3.6699175824175825e-05, + "loss": 0.1261, + "step": 9683 + }, + { + "epoch": 26.604395604395606, + "grad_norm": 23.061559677124023, + "learning_rate": 3.66978021978022e-05, + "loss": 0.9546, + "step": 9684 + }, + { + "epoch": 26.607142857142858, + "grad_norm": 12.897494316101074, + "learning_rate": 3.669642857142857e-05, + "loss": 0.2111, + "step": 9685 + }, + { + "epoch": 26.60989010989011, + "grad_norm": 13.491900444030762, + "learning_rate": 3.669505494505495e-05, + "loss": 0.2938, + "step": 9686 + }, + { + "epoch": 26.61263736263736, + "grad_norm": 19.772876739501953, + "learning_rate": 3.6693681318681325e-05, + "loss": 0.5639, + "step": 9687 + }, + { + "epoch": 26.615384615384617, + "grad_norm": 6.460422992706299, + "learning_rate": 3.6692307692307695e-05, + "loss": 0.1589, + "step": 9688 + }, + { + "epoch": 26.61813186813187, + "grad_norm": 8.688584327697754, + "learning_rate": 3.6690934065934065e-05, + "loss": 0.2077, + "step": 9689 + }, + { + "epoch": 26.62087912087912, + "grad_norm": 14.722671508789062, + "learning_rate": 3.6689560439560435e-05, + "loss": 0.7799, + "step": 9690 + }, + { + "epoch": 26.623626373626372, + "grad_norm": 8.431962966918945, + "learning_rate": 3.668818681318681e-05, + "loss": 0.129, + "step": 9691 + }, + { + "epoch": 26.626373626373628, + "grad_norm": 14.09415054321289, + "learning_rate": 3.668681318681319e-05, + "loss": 0.4818, + "step": 9692 + }, + { + "epoch": 26.62912087912088, + "grad_norm": 10.893895149230957, + "learning_rate": 3.668543956043956e-05, + "loss": 0.2425, + "step": 9693 + }, + { + "epoch": 26.63186813186813, + "grad_norm": 19.581462860107422, + "learning_rate": 3.6684065934065936e-05, + "loss": 0.6387, + "step": 9694 + }, + { + "epoch": 26.634615384615383, + "grad_norm": 11.694140434265137, + "learning_rate": 3.6682692307692306e-05, + "loss": 0.5602, + "step": 9695 + }, + { + "epoch": 26.63736263736264, + "grad_norm": 9.12775993347168, + "learning_rate": 3.668131868131868e-05, + "loss": 0.2071, + "step": 9696 + }, + { + "epoch": 26.64010989010989, + "grad_norm": 2.4980716705322266, + "learning_rate": 3.667994505494506e-05, + "loss": 0.0521, + "step": 9697 + }, + { + "epoch": 26.642857142857142, + "grad_norm": 10.514328002929688, + "learning_rate": 3.667857142857143e-05, + "loss": 0.278, + "step": 9698 + }, + { + "epoch": 26.645604395604394, + "grad_norm": 9.355535507202148, + "learning_rate": 3.6677197802197806e-05, + "loss": 0.1558, + "step": 9699 + }, + { + "epoch": 26.64835164835165, + "grad_norm": 5.988527297973633, + "learning_rate": 3.6675824175824176e-05, + "loss": 0.1067, + "step": 9700 + }, + { + "epoch": 26.6510989010989, + "grad_norm": 10.987343788146973, + "learning_rate": 3.667445054945055e-05, + "loss": 0.2308, + "step": 9701 + }, + { + "epoch": 26.653846153846153, + "grad_norm": 8.809089660644531, + "learning_rate": 3.667307692307693e-05, + "loss": 0.1804, + "step": 9702 + }, + { + "epoch": 26.656593406593405, + "grad_norm": 20.586688995361328, + "learning_rate": 3.66717032967033e-05, + "loss": 0.5366, + "step": 9703 + }, + { + "epoch": 26.65934065934066, + "grad_norm": 17.04631996154785, + "learning_rate": 3.667032967032967e-05, + "loss": 0.3831, + "step": 9704 + }, + { + "epoch": 26.662087912087912, + "grad_norm": 10.348749160766602, + "learning_rate": 3.666895604395604e-05, + "loss": 0.3106, + "step": 9705 + }, + { + "epoch": 26.664835164835164, + "grad_norm": 5.257486343383789, + "learning_rate": 3.666758241758242e-05, + "loss": 0.0617, + "step": 9706 + }, + { + "epoch": 26.667582417582416, + "grad_norm": 13.05841064453125, + "learning_rate": 3.6666208791208794e-05, + "loss": 0.3532, + "step": 9707 + }, + { + "epoch": 26.67032967032967, + "grad_norm": 5.756591796875, + "learning_rate": 3.6664835164835164e-05, + "loss": 0.1006, + "step": 9708 + }, + { + "epoch": 26.673076923076923, + "grad_norm": 7.1250901222229, + "learning_rate": 3.666346153846154e-05, + "loss": 0.1324, + "step": 9709 + }, + { + "epoch": 26.675824175824175, + "grad_norm": 9.501718521118164, + "learning_rate": 3.666208791208791e-05, + "loss": 0.2027, + "step": 9710 + }, + { + "epoch": 26.678571428571427, + "grad_norm": 9.572122573852539, + "learning_rate": 3.666071428571429e-05, + "loss": 0.2014, + "step": 9711 + }, + { + "epoch": 26.681318681318682, + "grad_norm": 9.07001781463623, + "learning_rate": 3.6659340659340664e-05, + "loss": 0.2545, + "step": 9712 + }, + { + "epoch": 26.684065934065934, + "grad_norm": 8.713043212890625, + "learning_rate": 3.6657967032967034e-05, + "loss": 0.3111, + "step": 9713 + }, + { + "epoch": 26.686813186813186, + "grad_norm": 12.21651554107666, + "learning_rate": 3.665659340659341e-05, + "loss": 0.3808, + "step": 9714 + }, + { + "epoch": 26.689560439560438, + "grad_norm": 18.717735290527344, + "learning_rate": 3.665521978021978e-05, + "loss": 0.4971, + "step": 9715 + }, + { + "epoch": 26.692307692307693, + "grad_norm": 12.019463539123535, + "learning_rate": 3.665384615384616e-05, + "loss": 0.2599, + "step": 9716 + }, + { + "epoch": 26.695054945054945, + "grad_norm": 16.30522346496582, + "learning_rate": 3.6652472527472534e-05, + "loss": 0.5744, + "step": 9717 + }, + { + "epoch": 26.697802197802197, + "grad_norm": 8.606179237365723, + "learning_rate": 3.6651098901098904e-05, + "loss": 0.2114, + "step": 9718 + }, + { + "epoch": 26.70054945054945, + "grad_norm": 8.677001953125, + "learning_rate": 3.6649725274725274e-05, + "loss": 0.1456, + "step": 9719 + }, + { + "epoch": 26.703296703296704, + "grad_norm": 7.331374168395996, + "learning_rate": 3.6648351648351644e-05, + "loss": 0.1403, + "step": 9720 + }, + { + "epoch": 26.706043956043956, + "grad_norm": 10.600261688232422, + "learning_rate": 3.664697802197802e-05, + "loss": 0.3301, + "step": 9721 + }, + { + "epoch": 26.708791208791208, + "grad_norm": 12.439352035522461, + "learning_rate": 3.66456043956044e-05, + "loss": 0.3936, + "step": 9722 + }, + { + "epoch": 26.71153846153846, + "grad_norm": 4.708601474761963, + "learning_rate": 3.664423076923077e-05, + "loss": 0.1363, + "step": 9723 + }, + { + "epoch": 26.714285714285715, + "grad_norm": 23.1015567779541, + "learning_rate": 3.6642857142857145e-05, + "loss": 0.9166, + "step": 9724 + }, + { + "epoch": 26.717032967032967, + "grad_norm": 10.079388618469238, + "learning_rate": 3.6641483516483515e-05, + "loss": 0.3213, + "step": 9725 + }, + { + "epoch": 26.71978021978022, + "grad_norm": 10.167990684509277, + "learning_rate": 3.664010989010989e-05, + "loss": 0.1991, + "step": 9726 + }, + { + "epoch": 26.72252747252747, + "grad_norm": 11.231207847595215, + "learning_rate": 3.663873626373627e-05, + "loss": 0.4451, + "step": 9727 + }, + { + "epoch": 26.725274725274726, + "grad_norm": 5.908658027648926, + "learning_rate": 3.663736263736264e-05, + "loss": 0.1185, + "step": 9728 + }, + { + "epoch": 26.728021978021978, + "grad_norm": 23.237154006958008, + "learning_rate": 3.6635989010989015e-05, + "loss": 0.4319, + "step": 9729 + }, + { + "epoch": 26.73076923076923, + "grad_norm": 13.474909782409668, + "learning_rate": 3.6634615384615385e-05, + "loss": 0.4663, + "step": 9730 + }, + { + "epoch": 26.733516483516482, + "grad_norm": 16.52572250366211, + "learning_rate": 3.663324175824176e-05, + "loss": 0.561, + "step": 9731 + }, + { + "epoch": 26.736263736263737, + "grad_norm": 14.041104316711426, + "learning_rate": 3.663186813186814e-05, + "loss": 0.5893, + "step": 9732 + }, + { + "epoch": 26.73901098901099, + "grad_norm": 14.264930725097656, + "learning_rate": 3.663049450549451e-05, + "loss": 0.3762, + "step": 9733 + }, + { + "epoch": 26.74175824175824, + "grad_norm": 11.937067031860352, + "learning_rate": 3.662912087912088e-05, + "loss": 0.3288, + "step": 9734 + }, + { + "epoch": 26.744505494505496, + "grad_norm": 7.58118200302124, + "learning_rate": 3.662774725274725e-05, + "loss": 0.137, + "step": 9735 + }, + { + "epoch": 26.747252747252748, + "grad_norm": 5.362982749938965, + "learning_rate": 3.6626373626373626e-05, + "loss": 0.0913, + "step": 9736 + }, + { + "epoch": 26.75, + "grad_norm": 19.83641242980957, + "learning_rate": 3.6625e-05, + "loss": 0.678, + "step": 9737 + }, + { + "epoch": 26.752747252747252, + "grad_norm": 16.364803314208984, + "learning_rate": 3.662362637362637e-05, + "loss": 0.4537, + "step": 9738 + }, + { + "epoch": 26.755494505494504, + "grad_norm": 6.483348846435547, + "learning_rate": 3.662225274725275e-05, + "loss": 0.1861, + "step": 9739 + }, + { + "epoch": 26.75824175824176, + "grad_norm": 2.3819127082824707, + "learning_rate": 3.662087912087912e-05, + "loss": 0.0503, + "step": 9740 + }, + { + "epoch": 26.76098901098901, + "grad_norm": 7.382034778594971, + "learning_rate": 3.6619505494505496e-05, + "loss": 0.2445, + "step": 9741 + }, + { + "epoch": 26.763736263736263, + "grad_norm": 10.242380142211914, + "learning_rate": 3.661813186813187e-05, + "loss": 0.2618, + "step": 9742 + }, + { + "epoch": 26.766483516483518, + "grad_norm": 23.908571243286133, + "learning_rate": 3.661675824175824e-05, + "loss": 0.9274, + "step": 9743 + }, + { + "epoch": 26.76923076923077, + "grad_norm": 18.442270278930664, + "learning_rate": 3.661538461538462e-05, + "loss": 0.6009, + "step": 9744 + }, + { + "epoch": 26.771978021978022, + "grad_norm": 20.130168914794922, + "learning_rate": 3.661401098901099e-05, + "loss": 0.8325, + "step": 9745 + }, + { + "epoch": 26.774725274725274, + "grad_norm": 17.873661041259766, + "learning_rate": 3.6612637362637367e-05, + "loss": 0.3052, + "step": 9746 + }, + { + "epoch": 26.77747252747253, + "grad_norm": 8.779824256896973, + "learning_rate": 3.6611263736263743e-05, + "loss": 0.2388, + "step": 9747 + }, + { + "epoch": 26.78021978021978, + "grad_norm": 20.50206184387207, + "learning_rate": 3.6609890109890113e-05, + "loss": 0.7514, + "step": 9748 + }, + { + "epoch": 26.782967032967033, + "grad_norm": 22.29668617248535, + "learning_rate": 3.6608516483516483e-05, + "loss": 0.7491, + "step": 9749 + }, + { + "epoch": 26.785714285714285, + "grad_norm": 16.9957332611084, + "learning_rate": 3.6607142857142853e-05, + "loss": 0.4974, + "step": 9750 + }, + { + "epoch": 26.78846153846154, + "grad_norm": 11.738690376281738, + "learning_rate": 3.660576923076923e-05, + "loss": 0.2705, + "step": 9751 + }, + { + "epoch": 26.791208791208792, + "grad_norm": 9.945929527282715, + "learning_rate": 3.660439560439561e-05, + "loss": 0.1883, + "step": 9752 + }, + { + "epoch": 26.793956043956044, + "grad_norm": 10.007089614868164, + "learning_rate": 3.660302197802198e-05, + "loss": 0.1748, + "step": 9753 + }, + { + "epoch": 26.796703296703296, + "grad_norm": 12.825909614562988, + "learning_rate": 3.6601648351648354e-05, + "loss": 0.4353, + "step": 9754 + }, + { + "epoch": 26.79945054945055, + "grad_norm": 6.320801734924316, + "learning_rate": 3.6600274725274724e-05, + "loss": 0.0869, + "step": 9755 + }, + { + "epoch": 26.802197802197803, + "grad_norm": 16.575532913208008, + "learning_rate": 3.65989010989011e-05, + "loss": 0.5353, + "step": 9756 + }, + { + "epoch": 26.804945054945055, + "grad_norm": 12.297894477844238, + "learning_rate": 3.659752747252748e-05, + "loss": 0.3743, + "step": 9757 + }, + { + "epoch": 26.807692307692307, + "grad_norm": 18.13033103942871, + "learning_rate": 3.659615384615385e-05, + "loss": 0.44, + "step": 9758 + }, + { + "epoch": 26.810439560439562, + "grad_norm": 20.003095626831055, + "learning_rate": 3.6594780219780224e-05, + "loss": 0.5748, + "step": 9759 + }, + { + "epoch": 26.813186813186814, + "grad_norm": 11.076376914978027, + "learning_rate": 3.6593406593406594e-05, + "loss": 0.1649, + "step": 9760 + }, + { + "epoch": 26.815934065934066, + "grad_norm": 12.067994117736816, + "learning_rate": 3.659203296703297e-05, + "loss": 0.2847, + "step": 9761 + }, + { + "epoch": 26.818681318681318, + "grad_norm": 12.329846382141113, + "learning_rate": 3.659065934065935e-05, + "loss": 0.212, + "step": 9762 + }, + { + "epoch": 26.821428571428573, + "grad_norm": 20.406095504760742, + "learning_rate": 3.658928571428572e-05, + "loss": 0.694, + "step": 9763 + }, + { + "epoch": 26.824175824175825, + "grad_norm": 20.39051055908203, + "learning_rate": 3.658791208791209e-05, + "loss": 0.7591, + "step": 9764 + }, + { + "epoch": 26.826923076923077, + "grad_norm": 21.453907012939453, + "learning_rate": 3.658653846153846e-05, + "loss": 0.6883, + "step": 9765 + }, + { + "epoch": 26.82967032967033, + "grad_norm": 17.397314071655273, + "learning_rate": 3.6585164835164835e-05, + "loss": 0.4994, + "step": 9766 + }, + { + "epoch": 26.832417582417584, + "grad_norm": 14.606059074401855, + "learning_rate": 3.658379120879121e-05, + "loss": 0.5043, + "step": 9767 + }, + { + "epoch": 26.835164835164836, + "grad_norm": 12.922459602355957, + "learning_rate": 3.658241758241758e-05, + "loss": 0.2365, + "step": 9768 + }, + { + "epoch": 26.837912087912088, + "grad_norm": 17.67158317565918, + "learning_rate": 3.658104395604396e-05, + "loss": 0.5558, + "step": 9769 + }, + { + "epoch": 26.84065934065934, + "grad_norm": 12.66042709350586, + "learning_rate": 3.657967032967033e-05, + "loss": 0.1779, + "step": 9770 + }, + { + "epoch": 26.843406593406595, + "grad_norm": 12.137431144714355, + "learning_rate": 3.6578296703296705e-05, + "loss": 0.2473, + "step": 9771 + }, + { + "epoch": 26.846153846153847, + "grad_norm": 12.499289512634277, + "learning_rate": 3.657692307692308e-05, + "loss": 0.229, + "step": 9772 + }, + { + "epoch": 26.8489010989011, + "grad_norm": 18.0395565032959, + "learning_rate": 3.657554945054945e-05, + "loss": 0.7016, + "step": 9773 + }, + { + "epoch": 26.85164835164835, + "grad_norm": 13.930984497070312, + "learning_rate": 3.657417582417583e-05, + "loss": 0.3556, + "step": 9774 + }, + { + "epoch": 26.854395604395606, + "grad_norm": 6.242995262145996, + "learning_rate": 3.65728021978022e-05, + "loss": 0.145, + "step": 9775 + }, + { + "epoch": 26.857142857142858, + "grad_norm": 8.540786743164062, + "learning_rate": 3.6571428571428576e-05, + "loss": 0.2412, + "step": 9776 + }, + { + "epoch": 26.85989010989011, + "grad_norm": 6.044703006744385, + "learning_rate": 3.657005494505495e-05, + "loss": 0.1743, + "step": 9777 + }, + { + "epoch": 26.86263736263736, + "grad_norm": 18.337604522705078, + "learning_rate": 3.656868131868132e-05, + "loss": 0.6923, + "step": 9778 + }, + { + "epoch": 26.865384615384617, + "grad_norm": 7.66289758682251, + "learning_rate": 3.656730769230769e-05, + "loss": 0.238, + "step": 9779 + }, + { + "epoch": 26.86813186813187, + "grad_norm": 12.678824424743652, + "learning_rate": 3.656593406593406e-05, + "loss": 0.3262, + "step": 9780 + }, + { + "epoch": 26.87087912087912, + "grad_norm": 10.467549324035645, + "learning_rate": 3.656456043956044e-05, + "loss": 0.4029, + "step": 9781 + }, + { + "epoch": 26.873626373626372, + "grad_norm": 11.870549201965332, + "learning_rate": 3.6563186813186816e-05, + "loss": 0.3871, + "step": 9782 + }, + { + "epoch": 26.876373626373628, + "grad_norm": 17.487499237060547, + "learning_rate": 3.6561813186813186e-05, + "loss": 0.4153, + "step": 9783 + }, + { + "epoch": 26.87912087912088, + "grad_norm": 12.630683898925781, + "learning_rate": 3.656043956043956e-05, + "loss": 0.2303, + "step": 9784 + }, + { + "epoch": 26.88186813186813, + "grad_norm": 11.029541969299316, + "learning_rate": 3.655906593406593e-05, + "loss": 0.2912, + "step": 9785 + }, + { + "epoch": 26.884615384615383, + "grad_norm": 13.04067611694336, + "learning_rate": 3.655769230769231e-05, + "loss": 0.3881, + "step": 9786 + }, + { + "epoch": 26.88736263736264, + "grad_norm": 12.324219703674316, + "learning_rate": 3.6556318681318686e-05, + "loss": 0.4489, + "step": 9787 + }, + { + "epoch": 26.89010989010989, + "grad_norm": 15.023966789245605, + "learning_rate": 3.6554945054945056e-05, + "loss": 0.3334, + "step": 9788 + }, + { + "epoch": 26.892857142857142, + "grad_norm": 12.962600708007812, + "learning_rate": 3.655357142857143e-05, + "loss": 0.3807, + "step": 9789 + }, + { + "epoch": 26.895604395604394, + "grad_norm": 20.120080947875977, + "learning_rate": 3.65521978021978e-05, + "loss": 0.7667, + "step": 9790 + }, + { + "epoch": 26.89835164835165, + "grad_norm": 16.652565002441406, + "learning_rate": 3.655082417582418e-05, + "loss": 0.492, + "step": 9791 + }, + { + "epoch": 26.9010989010989, + "grad_norm": 18.548200607299805, + "learning_rate": 3.654945054945055e-05, + "loss": 0.6333, + "step": 9792 + }, + { + "epoch": 26.903846153846153, + "grad_norm": 20.4060115814209, + "learning_rate": 3.654807692307693e-05, + "loss": 0.6023, + "step": 9793 + }, + { + "epoch": 26.906593406593405, + "grad_norm": 17.580034255981445, + "learning_rate": 3.65467032967033e-05, + "loss": 0.415, + "step": 9794 + }, + { + "epoch": 26.90934065934066, + "grad_norm": 10.496953964233398, + "learning_rate": 3.654532967032967e-05, + "loss": 0.499, + "step": 9795 + }, + { + "epoch": 26.912087912087912, + "grad_norm": 12.590564727783203, + "learning_rate": 3.6543956043956044e-05, + "loss": 0.1555, + "step": 9796 + }, + { + "epoch": 26.914835164835164, + "grad_norm": 11.093110084533691, + "learning_rate": 3.654258241758242e-05, + "loss": 0.3102, + "step": 9797 + }, + { + "epoch": 26.917582417582416, + "grad_norm": 11.032681465148926, + "learning_rate": 3.654120879120879e-05, + "loss": 0.3257, + "step": 9798 + }, + { + "epoch": 26.92032967032967, + "grad_norm": 17.99238395690918, + "learning_rate": 3.653983516483517e-05, + "loss": 0.4541, + "step": 9799 + }, + { + "epoch": 26.923076923076923, + "grad_norm": 17.953781127929688, + "learning_rate": 3.653846153846154e-05, + "loss": 0.7423, + "step": 9800 + }, + { + "epoch": 26.925824175824175, + "grad_norm": 18.876588821411133, + "learning_rate": 3.6537087912087914e-05, + "loss": 0.797, + "step": 9801 + }, + { + "epoch": 26.928571428571427, + "grad_norm": 6.125787258148193, + "learning_rate": 3.653571428571429e-05, + "loss": 0.1768, + "step": 9802 + }, + { + "epoch": 26.931318681318682, + "grad_norm": 12.586727142333984, + "learning_rate": 3.653434065934066e-05, + "loss": 0.2177, + "step": 9803 + }, + { + "epoch": 26.934065934065934, + "grad_norm": 11.69239330291748, + "learning_rate": 3.653296703296704e-05, + "loss": 0.2688, + "step": 9804 + }, + { + "epoch": 26.936813186813186, + "grad_norm": 10.063895225524902, + "learning_rate": 3.653159340659341e-05, + "loss": 0.3434, + "step": 9805 + }, + { + "epoch": 26.939560439560438, + "grad_norm": 23.15831184387207, + "learning_rate": 3.6530219780219785e-05, + "loss": 0.7834, + "step": 9806 + }, + { + "epoch": 26.942307692307693, + "grad_norm": 20.45014190673828, + "learning_rate": 3.6528846153846155e-05, + "loss": 0.7475, + "step": 9807 + }, + { + "epoch": 26.945054945054945, + "grad_norm": 16.84454917907715, + "learning_rate": 3.652747252747253e-05, + "loss": 0.3934, + "step": 9808 + }, + { + "epoch": 26.947802197802197, + "grad_norm": 15.00521183013916, + "learning_rate": 3.65260989010989e-05, + "loss": 0.423, + "step": 9809 + }, + { + "epoch": 26.95054945054945, + "grad_norm": 12.141385078430176, + "learning_rate": 3.652472527472527e-05, + "loss": 0.3529, + "step": 9810 + }, + { + "epoch": 26.953296703296704, + "grad_norm": 9.77743148803711, + "learning_rate": 3.652335164835165e-05, + "loss": 0.2164, + "step": 9811 + }, + { + "epoch": 26.956043956043956, + "grad_norm": 7.438374996185303, + "learning_rate": 3.6521978021978025e-05, + "loss": 0.1236, + "step": 9812 + }, + { + "epoch": 26.958791208791208, + "grad_norm": 16.37738037109375, + "learning_rate": 3.6520604395604395e-05, + "loss": 0.5483, + "step": 9813 + }, + { + "epoch": 26.96153846153846, + "grad_norm": 12.132769584655762, + "learning_rate": 3.651923076923077e-05, + "loss": 0.2879, + "step": 9814 + }, + { + "epoch": 26.964285714285715, + "grad_norm": 12.379446983337402, + "learning_rate": 3.651785714285714e-05, + "loss": 0.2317, + "step": 9815 + }, + { + "epoch": 26.967032967032967, + "grad_norm": 14.835348129272461, + "learning_rate": 3.651648351648352e-05, + "loss": 0.4667, + "step": 9816 + }, + { + "epoch": 26.96978021978022, + "grad_norm": 9.115324974060059, + "learning_rate": 3.6515109890109895e-05, + "loss": 0.2678, + "step": 9817 + }, + { + "epoch": 26.97252747252747, + "grad_norm": 6.252261638641357, + "learning_rate": 3.6513736263736265e-05, + "loss": 0.139, + "step": 9818 + }, + { + "epoch": 26.975274725274726, + "grad_norm": 20.280181884765625, + "learning_rate": 3.651236263736264e-05, + "loss": 0.7202, + "step": 9819 + }, + { + "epoch": 26.978021978021978, + "grad_norm": 9.855993270874023, + "learning_rate": 3.651098901098901e-05, + "loss": 0.1335, + "step": 9820 + }, + { + "epoch": 26.98076923076923, + "grad_norm": 9.974821090698242, + "learning_rate": 3.650961538461539e-05, + "loss": 0.367, + "step": 9821 + }, + { + "epoch": 26.983516483516482, + "grad_norm": 25.096820831298828, + "learning_rate": 3.650824175824176e-05, + "loss": 0.7845, + "step": 9822 + }, + { + "epoch": 26.986263736263737, + "grad_norm": 10.258068084716797, + "learning_rate": 3.6506868131868136e-05, + "loss": 0.3705, + "step": 9823 + }, + { + "epoch": 26.98901098901099, + "grad_norm": 11.733412742614746, + "learning_rate": 3.6505494505494506e-05, + "loss": 0.404, + "step": 9824 + }, + { + "epoch": 26.99175824175824, + "grad_norm": 17.18430519104004, + "learning_rate": 3.6504120879120876e-05, + "loss": 0.6507, + "step": 9825 + }, + { + "epoch": 26.994505494505496, + "grad_norm": 19.293445587158203, + "learning_rate": 3.650274725274725e-05, + "loss": 0.9343, + "step": 9826 + }, + { + "epoch": 26.997252747252748, + "grad_norm": 18.14872169494629, + "learning_rate": 3.650137362637363e-05, + "loss": 0.6712, + "step": 9827 + }, + { + "epoch": 27.0, + "grad_norm": 25.659648895263672, + "learning_rate": 3.65e-05, + "loss": 0.2518, + "step": 9828 + }, + { + "epoch": 27.0, + "eval_accuracy": 0.6955922865013774, + "eval_f1": 0.6803358213674107, + "eval_f1_DuraRiadoRio_64x64": 0.43478260869565216, + "eval_f1_Mole_64x64": 0.704119850187266, + "eval_f1_Quebrado_64x64": 0.8439716312056738, + "eval_f1_RiadoRio_64x64": 0.6066838046272494, + "eval_f1_RioFechado_64x64": 0.8121212121212121, + "eval_loss": 1.4697198867797852, + "eval_precision": 0.7674405471323184, + "eval_precision_DuraRiadoRio_64x64": 1.0, + "eval_precision_Mole_64x64": 0.7642276422764228, + "eval_precision_Quebrado_64x64": 0.8623188405797102, + "eval_precision_RiadoRio_64x64": 0.4978902953586498, + "eval_precision_RioFechado_64x64": 0.7127659574468085, + "eval_recall": 0.6953844411498229, + "eval_recall_DuraRiadoRio_64x64": 0.2777777777777778, + "eval_recall_Mole_64x64": 0.6527777777777778, + "eval_recall_Quebrado_64x64": 0.8263888888888888, + "eval_recall_RiadoRio_64x64": 0.7763157894736842, + "eval_recall_RioFechado_64x64": 0.9436619718309859, + "eval_runtime": 1.7442, + "eval_samples_per_second": 416.247, + "eval_steps_per_second": 26.374, + "step": 9828 + }, + { + "epoch": 27.002747252747252, + "grad_norm": 7.732112884521484, + "learning_rate": 3.6498626373626376e-05, + "loss": 0.3467, + "step": 9829 + }, + { + "epoch": 27.005494505494507, + "grad_norm": 3.5897562503814697, + "learning_rate": 3.6497252747252746e-05, + "loss": 0.0657, + "step": 9830 + }, + { + "epoch": 27.00824175824176, + "grad_norm": 6.333550930023193, + "learning_rate": 3.649587912087912e-05, + "loss": 0.1363, + "step": 9831 + }, + { + "epoch": 27.01098901098901, + "grad_norm": 14.959059715270996, + "learning_rate": 3.64945054945055e-05, + "loss": 0.3609, + "step": 9832 + }, + { + "epoch": 27.013736263736263, + "grad_norm": 9.99619197845459, + "learning_rate": 3.649313186813187e-05, + "loss": 0.2461, + "step": 9833 + }, + { + "epoch": 27.016483516483518, + "grad_norm": 7.3258748054504395, + "learning_rate": 3.649175824175825e-05, + "loss": 0.1476, + "step": 9834 + }, + { + "epoch": 27.01923076923077, + "grad_norm": 8.106319427490234, + "learning_rate": 3.649038461538462e-05, + "loss": 0.175, + "step": 9835 + }, + { + "epoch": 27.021978021978022, + "grad_norm": 18.922142028808594, + "learning_rate": 3.6489010989010994e-05, + "loss": 0.5703, + "step": 9836 + }, + { + "epoch": 27.024725274725274, + "grad_norm": 13.1036958694458, + "learning_rate": 3.6487637362637364e-05, + "loss": 0.2976, + "step": 9837 + }, + { + "epoch": 27.02747252747253, + "grad_norm": 11.808691024780273, + "learning_rate": 3.648626373626374e-05, + "loss": 0.4283, + "step": 9838 + }, + { + "epoch": 27.03021978021978, + "grad_norm": 4.500726222991943, + "learning_rate": 3.648489010989011e-05, + "loss": 0.0727, + "step": 9839 + }, + { + "epoch": 27.032967032967033, + "grad_norm": 21.705921173095703, + "learning_rate": 3.648351648351648e-05, + "loss": 0.5277, + "step": 9840 + }, + { + "epoch": 27.035714285714285, + "grad_norm": 13.627009391784668, + "learning_rate": 3.648214285714286e-05, + "loss": 0.2811, + "step": 9841 + }, + { + "epoch": 27.03846153846154, + "grad_norm": 4.704741477966309, + "learning_rate": 3.6480769230769234e-05, + "loss": 0.1281, + "step": 9842 + }, + { + "epoch": 27.041208791208792, + "grad_norm": 7.416924953460693, + "learning_rate": 3.6479395604395604e-05, + "loss": 0.1396, + "step": 9843 + }, + { + "epoch": 27.043956043956044, + "grad_norm": 10.57283878326416, + "learning_rate": 3.647802197802198e-05, + "loss": 0.3271, + "step": 9844 + }, + { + "epoch": 27.046703296703296, + "grad_norm": 9.653741836547852, + "learning_rate": 3.647664835164835e-05, + "loss": 0.3051, + "step": 9845 + }, + { + "epoch": 27.04945054945055, + "grad_norm": 8.692473411560059, + "learning_rate": 3.647527472527473e-05, + "loss": 0.2462, + "step": 9846 + }, + { + "epoch": 27.052197802197803, + "grad_norm": 13.358376502990723, + "learning_rate": 3.6473901098901104e-05, + "loss": 0.6323, + "step": 9847 + }, + { + "epoch": 27.054945054945055, + "grad_norm": 8.538630485534668, + "learning_rate": 3.6472527472527474e-05, + "loss": 0.2375, + "step": 9848 + }, + { + "epoch": 27.057692307692307, + "grad_norm": 15.717301368713379, + "learning_rate": 3.647115384615385e-05, + "loss": 0.4273, + "step": 9849 + }, + { + "epoch": 27.060439560439562, + "grad_norm": 8.260815620422363, + "learning_rate": 3.646978021978022e-05, + "loss": 0.2837, + "step": 9850 + }, + { + "epoch": 27.063186813186814, + "grad_norm": 1.963890790939331, + "learning_rate": 3.64684065934066e-05, + "loss": 0.0448, + "step": 9851 + }, + { + "epoch": 27.065934065934066, + "grad_norm": 10.215816497802734, + "learning_rate": 3.646703296703297e-05, + "loss": 0.2623, + "step": 9852 + }, + { + "epoch": 27.068681318681318, + "grad_norm": 12.303175926208496, + "learning_rate": 3.6465659340659345e-05, + "loss": 0.3098, + "step": 9853 + }, + { + "epoch": 27.071428571428573, + "grad_norm": 8.350207328796387, + "learning_rate": 3.6464285714285715e-05, + "loss": 0.1734, + "step": 9854 + }, + { + "epoch": 27.074175824175825, + "grad_norm": 10.37121295928955, + "learning_rate": 3.6462912087912085e-05, + "loss": 0.2316, + "step": 9855 + }, + { + "epoch": 27.076923076923077, + "grad_norm": 5.530529975891113, + "learning_rate": 3.646153846153846e-05, + "loss": 0.1651, + "step": 9856 + }, + { + "epoch": 27.07967032967033, + "grad_norm": 10.30085563659668, + "learning_rate": 3.646016483516484e-05, + "loss": 0.247, + "step": 9857 + }, + { + "epoch": 27.082417582417584, + "grad_norm": 14.974800109863281, + "learning_rate": 3.645879120879121e-05, + "loss": 0.2627, + "step": 9858 + }, + { + "epoch": 27.085164835164836, + "grad_norm": 15.718395233154297, + "learning_rate": 3.6457417582417585e-05, + "loss": 0.2845, + "step": 9859 + }, + { + "epoch": 27.087912087912088, + "grad_norm": 6.826552867889404, + "learning_rate": 3.6456043956043955e-05, + "loss": 0.1346, + "step": 9860 + }, + { + "epoch": 27.09065934065934, + "grad_norm": 17.127470016479492, + "learning_rate": 3.645467032967033e-05, + "loss": 0.4826, + "step": 9861 + }, + { + "epoch": 27.093406593406595, + "grad_norm": 25.27817726135254, + "learning_rate": 3.645329670329671e-05, + "loss": 0.8207, + "step": 9862 + }, + { + "epoch": 27.096153846153847, + "grad_norm": 12.662299156188965, + "learning_rate": 3.645192307692308e-05, + "loss": 0.2711, + "step": 9863 + }, + { + "epoch": 27.0989010989011, + "grad_norm": 19.80123519897461, + "learning_rate": 3.6450549450549456e-05, + "loss": 0.5547, + "step": 9864 + }, + { + "epoch": 27.10164835164835, + "grad_norm": 6.979974269866943, + "learning_rate": 3.6449175824175826e-05, + "loss": 0.1876, + "step": 9865 + }, + { + "epoch": 27.104395604395606, + "grad_norm": 8.795106887817383, + "learning_rate": 3.64478021978022e-05, + "loss": 0.2057, + "step": 9866 + }, + { + "epoch": 27.107142857142858, + "grad_norm": 6.82505989074707, + "learning_rate": 3.644642857142857e-05, + "loss": 0.1384, + "step": 9867 + }, + { + "epoch": 27.10989010989011, + "grad_norm": 5.657000541687012, + "learning_rate": 3.644505494505495e-05, + "loss": 0.2021, + "step": 9868 + }, + { + "epoch": 27.11263736263736, + "grad_norm": 8.378275871276855, + "learning_rate": 3.644368131868132e-05, + "loss": 0.1428, + "step": 9869 + }, + { + "epoch": 27.115384615384617, + "grad_norm": 19.563892364501953, + "learning_rate": 3.644230769230769e-05, + "loss": 0.579, + "step": 9870 + }, + { + "epoch": 27.11813186813187, + "grad_norm": 12.364229202270508, + "learning_rate": 3.6440934065934066e-05, + "loss": 0.2555, + "step": 9871 + }, + { + "epoch": 27.12087912087912, + "grad_norm": 11.68818473815918, + "learning_rate": 3.643956043956044e-05, + "loss": 0.3885, + "step": 9872 + }, + { + "epoch": 27.123626373626372, + "grad_norm": 7.710341930389404, + "learning_rate": 3.643818681318681e-05, + "loss": 0.1453, + "step": 9873 + }, + { + "epoch": 27.126373626373628, + "grad_norm": 14.758988380432129, + "learning_rate": 3.643681318681319e-05, + "loss": 0.4339, + "step": 9874 + }, + { + "epoch": 27.12912087912088, + "grad_norm": 5.636462688446045, + "learning_rate": 3.643543956043956e-05, + "loss": 0.1218, + "step": 9875 + }, + { + "epoch": 27.13186813186813, + "grad_norm": 6.262296676635742, + "learning_rate": 3.6434065934065937e-05, + "loss": 0.1612, + "step": 9876 + }, + { + "epoch": 27.134615384615383, + "grad_norm": 8.894691467285156, + "learning_rate": 3.643269230769231e-05, + "loss": 0.1468, + "step": 9877 + }, + { + "epoch": 27.13736263736264, + "grad_norm": 8.158132553100586, + "learning_rate": 3.6431318681318683e-05, + "loss": 0.1421, + "step": 9878 + }, + { + "epoch": 27.14010989010989, + "grad_norm": 12.047856330871582, + "learning_rate": 3.642994505494506e-05, + "loss": 0.491, + "step": 9879 + }, + { + "epoch": 27.142857142857142, + "grad_norm": 8.008456230163574, + "learning_rate": 3.642857142857143e-05, + "loss": 0.1528, + "step": 9880 + }, + { + "epoch": 27.145604395604394, + "grad_norm": 9.789299011230469, + "learning_rate": 3.642719780219781e-05, + "loss": 0.3505, + "step": 9881 + }, + { + "epoch": 27.14835164835165, + "grad_norm": 7.3201584815979, + "learning_rate": 3.642582417582418e-05, + "loss": 0.1372, + "step": 9882 + }, + { + "epoch": 27.1510989010989, + "grad_norm": 9.824567794799805, + "learning_rate": 3.6424450549450554e-05, + "loss": 0.2469, + "step": 9883 + }, + { + "epoch": 27.153846153846153, + "grad_norm": 5.414333820343018, + "learning_rate": 3.6423076923076924e-05, + "loss": 0.1172, + "step": 9884 + }, + { + "epoch": 27.156593406593405, + "grad_norm": 13.342567443847656, + "learning_rate": 3.6421703296703294e-05, + "loss": 0.3995, + "step": 9885 + }, + { + "epoch": 27.15934065934066, + "grad_norm": 3.0885672569274902, + "learning_rate": 3.642032967032967e-05, + "loss": 0.0789, + "step": 9886 + }, + { + "epoch": 27.162087912087912, + "grad_norm": 17.171098709106445, + "learning_rate": 3.641895604395605e-05, + "loss": 0.41, + "step": 9887 + }, + { + "epoch": 27.164835164835164, + "grad_norm": 12.276874542236328, + "learning_rate": 3.641758241758242e-05, + "loss": 0.2356, + "step": 9888 + }, + { + "epoch": 27.167582417582416, + "grad_norm": 10.54737377166748, + "learning_rate": 3.6416208791208794e-05, + "loss": 0.2042, + "step": 9889 + }, + { + "epoch": 27.17032967032967, + "grad_norm": 10.832208633422852, + "learning_rate": 3.6414835164835164e-05, + "loss": 0.2629, + "step": 9890 + }, + { + "epoch": 27.173076923076923, + "grad_norm": 11.150413513183594, + "learning_rate": 3.641346153846154e-05, + "loss": 0.1945, + "step": 9891 + }, + { + "epoch": 27.175824175824175, + "grad_norm": 17.15532684326172, + "learning_rate": 3.641208791208792e-05, + "loss": 0.5201, + "step": 9892 + }, + { + "epoch": 27.178571428571427, + "grad_norm": 11.366924285888672, + "learning_rate": 3.641071428571429e-05, + "loss": 0.4362, + "step": 9893 + }, + { + "epoch": 27.181318681318682, + "grad_norm": 6.754191875457764, + "learning_rate": 3.6409340659340665e-05, + "loss": 0.1645, + "step": 9894 + }, + { + "epoch": 27.184065934065934, + "grad_norm": 12.312421798706055, + "learning_rate": 3.6407967032967035e-05, + "loss": 0.4034, + "step": 9895 + }, + { + "epoch": 27.186813186813186, + "grad_norm": 14.191339492797852, + "learning_rate": 3.640659340659341e-05, + "loss": 0.3513, + "step": 9896 + }, + { + "epoch": 27.189560439560438, + "grad_norm": 8.926496505737305, + "learning_rate": 3.640521978021978e-05, + "loss": 0.2015, + "step": 9897 + }, + { + "epoch": 27.192307692307693, + "grad_norm": 14.383660316467285, + "learning_rate": 3.640384615384616e-05, + "loss": 0.4406, + "step": 9898 + }, + { + "epoch": 27.195054945054945, + "grad_norm": 11.966470718383789, + "learning_rate": 3.640247252747253e-05, + "loss": 0.2133, + "step": 9899 + }, + { + "epoch": 27.197802197802197, + "grad_norm": 7.880778789520264, + "learning_rate": 3.64010989010989e-05, + "loss": 0.2503, + "step": 9900 + }, + { + "epoch": 27.20054945054945, + "grad_norm": 12.097557067871094, + "learning_rate": 3.6399725274725275e-05, + "loss": 0.2314, + "step": 9901 + }, + { + "epoch": 27.203296703296704, + "grad_norm": 11.577656745910645, + "learning_rate": 3.6398351648351645e-05, + "loss": 0.2333, + "step": 9902 + }, + { + "epoch": 27.206043956043956, + "grad_norm": 23.190906524658203, + "learning_rate": 3.639697802197802e-05, + "loss": 1.1939, + "step": 9903 + }, + { + "epoch": 27.208791208791208, + "grad_norm": 7.948437690734863, + "learning_rate": 3.63956043956044e-05, + "loss": 0.2092, + "step": 9904 + }, + { + "epoch": 27.21153846153846, + "grad_norm": 12.883320808410645, + "learning_rate": 3.639423076923077e-05, + "loss": 0.406, + "step": 9905 + }, + { + "epoch": 27.214285714285715, + "grad_norm": 14.560802459716797, + "learning_rate": 3.6392857142857146e-05, + "loss": 0.5538, + "step": 9906 + }, + { + "epoch": 27.217032967032967, + "grad_norm": 15.632973670959473, + "learning_rate": 3.6391483516483516e-05, + "loss": 0.4007, + "step": 9907 + }, + { + "epoch": 27.21978021978022, + "grad_norm": 15.168334007263184, + "learning_rate": 3.639010989010989e-05, + "loss": 0.8015, + "step": 9908 + }, + { + "epoch": 27.22252747252747, + "grad_norm": 8.189483642578125, + "learning_rate": 3.638873626373627e-05, + "loss": 0.1131, + "step": 9909 + }, + { + "epoch": 27.225274725274726, + "grad_norm": 10.923386573791504, + "learning_rate": 3.638736263736264e-05, + "loss": 0.327, + "step": 9910 + }, + { + "epoch": 27.228021978021978, + "grad_norm": 14.093984603881836, + "learning_rate": 3.6385989010989016e-05, + "loss": 0.3458, + "step": 9911 + }, + { + "epoch": 27.23076923076923, + "grad_norm": 17.7584171295166, + "learning_rate": 3.6384615384615386e-05, + "loss": 0.5516, + "step": 9912 + }, + { + "epoch": 27.233516483516482, + "grad_norm": 8.962573051452637, + "learning_rate": 3.638324175824176e-05, + "loss": 0.1546, + "step": 9913 + }, + { + "epoch": 27.236263736263737, + "grad_norm": 9.48843765258789, + "learning_rate": 3.638186813186813e-05, + "loss": 0.2469, + "step": 9914 + }, + { + "epoch": 27.23901098901099, + "grad_norm": 11.262405395507812, + "learning_rate": 3.63804945054945e-05, + "loss": 0.2426, + "step": 9915 + }, + { + "epoch": 27.24175824175824, + "grad_norm": 8.77823257446289, + "learning_rate": 3.637912087912088e-05, + "loss": 0.2117, + "step": 9916 + }, + { + "epoch": 27.244505494505493, + "grad_norm": 15.034404754638672, + "learning_rate": 3.637774725274725e-05, + "loss": 0.4072, + "step": 9917 + }, + { + "epoch": 27.247252747252748, + "grad_norm": 22.196243286132812, + "learning_rate": 3.6376373626373626e-05, + "loss": 0.8213, + "step": 9918 + }, + { + "epoch": 27.25, + "grad_norm": 14.984221458435059, + "learning_rate": 3.6375e-05, + "loss": 0.553, + "step": 9919 + }, + { + "epoch": 27.252747252747252, + "grad_norm": 6.059372425079346, + "learning_rate": 3.637362637362637e-05, + "loss": 0.0715, + "step": 9920 + }, + { + "epoch": 27.255494505494504, + "grad_norm": 8.028680801391602, + "learning_rate": 3.637225274725275e-05, + "loss": 0.1433, + "step": 9921 + }, + { + "epoch": 27.25824175824176, + "grad_norm": 14.461670875549316, + "learning_rate": 3.637087912087912e-05, + "loss": 0.4835, + "step": 9922 + }, + { + "epoch": 27.26098901098901, + "grad_norm": 12.327629089355469, + "learning_rate": 3.63695054945055e-05, + "loss": 0.307, + "step": 9923 + }, + { + "epoch": 27.263736263736263, + "grad_norm": 8.970982551574707, + "learning_rate": 3.6368131868131874e-05, + "loss": 0.1208, + "step": 9924 + }, + { + "epoch": 27.266483516483518, + "grad_norm": 16.0908203125, + "learning_rate": 3.6366758241758244e-05, + "loss": 0.9535, + "step": 9925 + }, + { + "epoch": 27.26923076923077, + "grad_norm": 8.814960479736328, + "learning_rate": 3.636538461538462e-05, + "loss": 0.185, + "step": 9926 + }, + { + "epoch": 27.271978021978022, + "grad_norm": 9.019630432128906, + "learning_rate": 3.636401098901099e-05, + "loss": 0.3384, + "step": 9927 + }, + { + "epoch": 27.274725274725274, + "grad_norm": 11.407771110534668, + "learning_rate": 3.636263736263737e-05, + "loss": 0.3191, + "step": 9928 + }, + { + "epoch": 27.27747252747253, + "grad_norm": 14.611776351928711, + "learning_rate": 3.636126373626374e-05, + "loss": 0.4329, + "step": 9929 + }, + { + "epoch": 27.28021978021978, + "grad_norm": 15.214702606201172, + "learning_rate": 3.635989010989011e-05, + "loss": 0.3891, + "step": 9930 + }, + { + "epoch": 27.282967032967033, + "grad_norm": 10.276212692260742, + "learning_rate": 3.6358516483516484e-05, + "loss": 0.204, + "step": 9931 + }, + { + "epoch": 27.285714285714285, + "grad_norm": 15.685917854309082, + "learning_rate": 3.6357142857142854e-05, + "loss": 0.5183, + "step": 9932 + }, + { + "epoch": 27.28846153846154, + "grad_norm": 10.778517723083496, + "learning_rate": 3.635576923076923e-05, + "loss": 0.2766, + "step": 9933 + }, + { + "epoch": 27.291208791208792, + "grad_norm": 12.225455284118652, + "learning_rate": 3.635439560439561e-05, + "loss": 0.3077, + "step": 9934 + }, + { + "epoch": 27.293956043956044, + "grad_norm": 14.595208168029785, + "learning_rate": 3.635302197802198e-05, + "loss": 0.5702, + "step": 9935 + }, + { + "epoch": 27.296703296703296, + "grad_norm": 14.796758651733398, + "learning_rate": 3.6351648351648355e-05, + "loss": 0.4697, + "step": 9936 + }, + { + "epoch": 27.29945054945055, + "grad_norm": 12.786432266235352, + "learning_rate": 3.6350274725274725e-05, + "loss": 0.3069, + "step": 9937 + }, + { + "epoch": 27.302197802197803, + "grad_norm": 13.827219009399414, + "learning_rate": 3.63489010989011e-05, + "loss": 0.3389, + "step": 9938 + }, + { + "epoch": 27.304945054945055, + "grad_norm": 13.839727401733398, + "learning_rate": 3.634752747252748e-05, + "loss": 0.6664, + "step": 9939 + }, + { + "epoch": 27.307692307692307, + "grad_norm": 21.923049926757812, + "learning_rate": 3.634615384615385e-05, + "loss": 0.5368, + "step": 9940 + }, + { + "epoch": 27.310439560439562, + "grad_norm": 16.234569549560547, + "learning_rate": 3.6344780219780225e-05, + "loss": 0.4774, + "step": 9941 + }, + { + "epoch": 27.313186813186814, + "grad_norm": 13.760459899902344, + "learning_rate": 3.6343406593406595e-05, + "loss": 0.3543, + "step": 9942 + }, + { + "epoch": 27.315934065934066, + "grad_norm": 12.565333366394043, + "learning_rate": 3.634203296703297e-05, + "loss": 0.4544, + "step": 9943 + }, + { + "epoch": 27.318681318681318, + "grad_norm": 20.783767700195312, + "learning_rate": 3.634065934065934e-05, + "loss": 0.8711, + "step": 9944 + }, + { + "epoch": 27.321428571428573, + "grad_norm": 21.864503860473633, + "learning_rate": 3.633928571428571e-05, + "loss": 0.6113, + "step": 9945 + }, + { + "epoch": 27.324175824175825, + "grad_norm": 9.215608596801758, + "learning_rate": 3.633791208791209e-05, + "loss": 0.2071, + "step": 9946 + }, + { + "epoch": 27.326923076923077, + "grad_norm": 8.942501068115234, + "learning_rate": 3.633653846153846e-05, + "loss": 0.1834, + "step": 9947 + }, + { + "epoch": 27.32967032967033, + "grad_norm": 11.388312339782715, + "learning_rate": 3.6335164835164835e-05, + "loss": 0.254, + "step": 9948 + }, + { + "epoch": 27.332417582417584, + "grad_norm": 21.435523986816406, + "learning_rate": 3.633379120879121e-05, + "loss": 0.6048, + "step": 9949 + }, + { + "epoch": 27.335164835164836, + "grad_norm": 15.456206321716309, + "learning_rate": 3.633241758241758e-05, + "loss": 0.5621, + "step": 9950 + }, + { + "epoch": 27.337912087912088, + "grad_norm": 12.374911308288574, + "learning_rate": 3.633104395604396e-05, + "loss": 0.415, + "step": 9951 + }, + { + "epoch": 27.34065934065934, + "grad_norm": 7.3902907371521, + "learning_rate": 3.632967032967033e-05, + "loss": 0.2636, + "step": 9952 + }, + { + "epoch": 27.343406593406595, + "grad_norm": 9.959351539611816, + "learning_rate": 3.6328296703296706e-05, + "loss": 0.2866, + "step": 9953 + }, + { + "epoch": 27.346153846153847, + "grad_norm": 9.410449028015137, + "learning_rate": 3.632692307692308e-05, + "loss": 0.2635, + "step": 9954 + }, + { + "epoch": 27.3489010989011, + "grad_norm": 23.955698013305664, + "learning_rate": 3.632554945054945e-05, + "loss": 0.9474, + "step": 9955 + }, + { + "epoch": 27.35164835164835, + "grad_norm": 16.19575309753418, + "learning_rate": 3.632417582417583e-05, + "loss": 0.3593, + "step": 9956 + }, + { + "epoch": 27.354395604395606, + "grad_norm": 13.397210121154785, + "learning_rate": 3.63228021978022e-05, + "loss": 0.5298, + "step": 9957 + }, + { + "epoch": 27.357142857142858, + "grad_norm": 7.037095069885254, + "learning_rate": 3.6321428571428576e-05, + "loss": 0.1503, + "step": 9958 + }, + { + "epoch": 27.35989010989011, + "grad_norm": 15.616595268249512, + "learning_rate": 3.6320054945054946e-05, + "loss": 0.4598, + "step": 9959 + }, + { + "epoch": 27.36263736263736, + "grad_norm": 9.767714500427246, + "learning_rate": 3.6318681318681316e-05, + "loss": 0.2463, + "step": 9960 + }, + { + "epoch": 27.365384615384617, + "grad_norm": 16.47960090637207, + "learning_rate": 3.631730769230769e-05, + "loss": 0.4146, + "step": 9961 + }, + { + "epoch": 27.36813186813187, + "grad_norm": 11.843270301818848, + "learning_rate": 3.631593406593406e-05, + "loss": 0.2917, + "step": 9962 + }, + { + "epoch": 27.37087912087912, + "grad_norm": 14.98618221282959, + "learning_rate": 3.631456043956044e-05, + "loss": 0.3383, + "step": 9963 + }, + { + "epoch": 27.373626373626372, + "grad_norm": 15.678271293640137, + "learning_rate": 3.631318681318682e-05, + "loss": 0.361, + "step": 9964 + }, + { + "epoch": 27.376373626373628, + "grad_norm": 12.268863677978516, + "learning_rate": 3.631181318681319e-05, + "loss": 0.2898, + "step": 9965 + }, + { + "epoch": 27.37912087912088, + "grad_norm": 10.24158763885498, + "learning_rate": 3.6310439560439564e-05, + "loss": 0.2154, + "step": 9966 + }, + { + "epoch": 27.38186813186813, + "grad_norm": 12.915182113647461, + "learning_rate": 3.6309065934065934e-05, + "loss": 0.2367, + "step": 9967 + }, + { + "epoch": 27.384615384615383, + "grad_norm": 11.135976791381836, + "learning_rate": 3.630769230769231e-05, + "loss": 0.4029, + "step": 9968 + }, + { + "epoch": 27.38736263736264, + "grad_norm": 14.566719055175781, + "learning_rate": 3.630631868131869e-05, + "loss": 0.4013, + "step": 9969 + }, + { + "epoch": 27.39010989010989, + "grad_norm": 20.682313919067383, + "learning_rate": 3.630494505494506e-05, + "loss": 0.4458, + "step": 9970 + }, + { + "epoch": 27.392857142857142, + "grad_norm": 18.70482635498047, + "learning_rate": 3.6303571428571434e-05, + "loss": 0.5854, + "step": 9971 + }, + { + "epoch": 27.395604395604394, + "grad_norm": 16.11227798461914, + "learning_rate": 3.6302197802197804e-05, + "loss": 0.4348, + "step": 9972 + }, + { + "epoch": 27.39835164835165, + "grad_norm": 19.495750427246094, + "learning_rate": 3.6300824175824174e-05, + "loss": 0.7923, + "step": 9973 + }, + { + "epoch": 27.4010989010989, + "grad_norm": 16.825767517089844, + "learning_rate": 3.629945054945055e-05, + "loss": 0.4765, + "step": 9974 + }, + { + "epoch": 27.403846153846153, + "grad_norm": 7.529753684997559, + "learning_rate": 3.629807692307692e-05, + "loss": 0.1796, + "step": 9975 + }, + { + "epoch": 27.406593406593405, + "grad_norm": 16.038747787475586, + "learning_rate": 3.62967032967033e-05, + "loss": 0.3314, + "step": 9976 + }, + { + "epoch": 27.40934065934066, + "grad_norm": 17.39542579650879, + "learning_rate": 3.629532967032967e-05, + "loss": 0.3994, + "step": 9977 + }, + { + "epoch": 27.412087912087912, + "grad_norm": 14.987398147583008, + "learning_rate": 3.6293956043956044e-05, + "loss": 0.3266, + "step": 9978 + }, + { + "epoch": 27.414835164835164, + "grad_norm": 10.96994686126709, + "learning_rate": 3.629258241758242e-05, + "loss": 0.1833, + "step": 9979 + }, + { + "epoch": 27.417582417582416, + "grad_norm": 18.00509262084961, + "learning_rate": 3.629120879120879e-05, + "loss": 0.4606, + "step": 9980 + }, + { + "epoch": 27.42032967032967, + "grad_norm": 11.284506797790527, + "learning_rate": 3.628983516483517e-05, + "loss": 0.309, + "step": 9981 + }, + { + "epoch": 27.423076923076923, + "grad_norm": 17.828350067138672, + "learning_rate": 3.628846153846154e-05, + "loss": 0.5803, + "step": 9982 + }, + { + "epoch": 27.425824175824175, + "grad_norm": 16.61498260498047, + "learning_rate": 3.6287087912087915e-05, + "loss": 0.5005, + "step": 9983 + }, + { + "epoch": 27.428571428571427, + "grad_norm": 8.597978591918945, + "learning_rate": 3.628571428571429e-05, + "loss": 0.1755, + "step": 9984 + }, + { + "epoch": 27.431318681318682, + "grad_norm": 8.656704902648926, + "learning_rate": 3.628434065934066e-05, + "loss": 0.1534, + "step": 9985 + }, + { + "epoch": 27.434065934065934, + "grad_norm": 12.609801292419434, + "learning_rate": 3.628296703296704e-05, + "loss": 0.2257, + "step": 9986 + }, + { + "epoch": 27.436813186813186, + "grad_norm": 11.226791381835938, + "learning_rate": 3.628159340659341e-05, + "loss": 0.3106, + "step": 9987 + }, + { + "epoch": 27.439560439560438, + "grad_norm": 11.238381385803223, + "learning_rate": 3.628021978021978e-05, + "loss": 0.38, + "step": 9988 + }, + { + "epoch": 27.442307692307693, + "grad_norm": 11.287888526916504, + "learning_rate": 3.6278846153846155e-05, + "loss": 0.368, + "step": 9989 + }, + { + "epoch": 27.445054945054945, + "grad_norm": 11.031997680664062, + "learning_rate": 3.6277472527472525e-05, + "loss": 0.2732, + "step": 9990 + }, + { + "epoch": 27.447802197802197, + "grad_norm": 7.19204044342041, + "learning_rate": 3.62760989010989e-05, + "loss": 0.1909, + "step": 9991 + }, + { + "epoch": 27.45054945054945, + "grad_norm": 16.194787979125977, + "learning_rate": 3.627472527472527e-05, + "loss": 0.686, + "step": 9992 + }, + { + "epoch": 27.453296703296704, + "grad_norm": 9.318824768066406, + "learning_rate": 3.627335164835165e-05, + "loss": 0.2048, + "step": 9993 + }, + { + "epoch": 27.456043956043956, + "grad_norm": 10.65080738067627, + "learning_rate": 3.6271978021978026e-05, + "loss": 0.5437, + "step": 9994 + }, + { + "epoch": 27.458791208791208, + "grad_norm": 7.722966194152832, + "learning_rate": 3.6270604395604396e-05, + "loss": 0.1553, + "step": 9995 + }, + { + "epoch": 27.46153846153846, + "grad_norm": 8.179861068725586, + "learning_rate": 3.626923076923077e-05, + "loss": 0.317, + "step": 9996 + }, + { + "epoch": 27.464285714285715, + "grad_norm": 9.257987022399902, + "learning_rate": 3.626785714285714e-05, + "loss": 0.3147, + "step": 9997 + }, + { + "epoch": 27.467032967032967, + "grad_norm": 10.158096313476562, + "learning_rate": 3.626648351648352e-05, + "loss": 0.2652, + "step": 9998 + }, + { + "epoch": 27.46978021978022, + "grad_norm": 18.779420852661133, + "learning_rate": 3.6265109890109896e-05, + "loss": 0.6624, + "step": 9999 + }, + { + "epoch": 27.47252747252747, + "grad_norm": 13.577601432800293, + "learning_rate": 3.6263736263736266e-05, + "loss": 0.2857, + "step": 10000 + }, + { + "epoch": 27.475274725274726, + "grad_norm": 11.258530616760254, + "learning_rate": 3.626236263736264e-05, + "loss": 0.4128, + "step": 10001 + }, + { + "epoch": 27.478021978021978, + "grad_norm": 6.9757490158081055, + "learning_rate": 3.626098901098901e-05, + "loss": 0.1146, + "step": 10002 + }, + { + "epoch": 27.48076923076923, + "grad_norm": 12.926072120666504, + "learning_rate": 3.625961538461538e-05, + "loss": 0.5932, + "step": 10003 + }, + { + "epoch": 27.483516483516482, + "grad_norm": 6.667939186096191, + "learning_rate": 3.625824175824176e-05, + "loss": 0.1619, + "step": 10004 + }, + { + "epoch": 27.486263736263737, + "grad_norm": 12.37983226776123, + "learning_rate": 3.625686813186813e-05, + "loss": 0.5849, + "step": 10005 + }, + { + "epoch": 27.48901098901099, + "grad_norm": 16.109838485717773, + "learning_rate": 3.6255494505494507e-05, + "loss": 0.42, + "step": 10006 + }, + { + "epoch": 27.49175824175824, + "grad_norm": 6.608904838562012, + "learning_rate": 3.6254120879120877e-05, + "loss": 0.1722, + "step": 10007 + }, + { + "epoch": 27.494505494505496, + "grad_norm": 9.461627006530762, + "learning_rate": 3.6252747252747253e-05, + "loss": 0.2417, + "step": 10008 + }, + { + "epoch": 27.497252747252748, + "grad_norm": 9.473798751831055, + "learning_rate": 3.625137362637363e-05, + "loss": 0.1806, + "step": 10009 + }, + { + "epoch": 27.5, + "grad_norm": 10.805136680603027, + "learning_rate": 3.625e-05, + "loss": 0.2371, + "step": 10010 + }, + { + "epoch": 27.502747252747252, + "grad_norm": 15.079334259033203, + "learning_rate": 3.624862637362638e-05, + "loss": 0.3999, + "step": 10011 + }, + { + "epoch": 27.505494505494504, + "grad_norm": 12.1066312789917, + "learning_rate": 3.624725274725275e-05, + "loss": 0.33, + "step": 10012 + }, + { + "epoch": 27.50824175824176, + "grad_norm": 8.338591575622559, + "learning_rate": 3.6245879120879124e-05, + "loss": 0.1795, + "step": 10013 + }, + { + "epoch": 27.51098901098901, + "grad_norm": 13.672590255737305, + "learning_rate": 3.62445054945055e-05, + "loss": 0.5481, + "step": 10014 + }, + { + "epoch": 27.513736263736263, + "grad_norm": 14.714829444885254, + "learning_rate": 3.624313186813187e-05, + "loss": 0.4842, + "step": 10015 + }, + { + "epoch": 27.516483516483518, + "grad_norm": 10.065131187438965, + "learning_rate": 3.624175824175825e-05, + "loss": 0.2122, + "step": 10016 + }, + { + "epoch": 27.51923076923077, + "grad_norm": 17.64122200012207, + "learning_rate": 3.624038461538462e-05, + "loss": 0.6217, + "step": 10017 + }, + { + "epoch": 27.521978021978022, + "grad_norm": 5.3669538497924805, + "learning_rate": 3.623901098901099e-05, + "loss": 0.1459, + "step": 10018 + }, + { + "epoch": 27.524725274725274, + "grad_norm": 3.280381917953491, + "learning_rate": 3.6237637362637364e-05, + "loss": 0.0849, + "step": 10019 + }, + { + "epoch": 27.52747252747253, + "grad_norm": 11.954460144042969, + "learning_rate": 3.6236263736263734e-05, + "loss": 0.4149, + "step": 10020 + }, + { + "epoch": 27.53021978021978, + "grad_norm": 4.567395210266113, + "learning_rate": 3.623489010989011e-05, + "loss": 0.1533, + "step": 10021 + }, + { + "epoch": 27.532967032967033, + "grad_norm": 14.154289245605469, + "learning_rate": 3.623351648351648e-05, + "loss": 0.4677, + "step": 10022 + }, + { + "epoch": 27.535714285714285, + "grad_norm": 4.6236677169799805, + "learning_rate": 3.623214285714286e-05, + "loss": 0.0875, + "step": 10023 + }, + { + "epoch": 27.53846153846154, + "grad_norm": 14.07481575012207, + "learning_rate": 3.6230769230769235e-05, + "loss": 0.4411, + "step": 10024 + }, + { + "epoch": 27.541208791208792, + "grad_norm": 13.715203285217285, + "learning_rate": 3.6229395604395605e-05, + "loss": 0.3268, + "step": 10025 + }, + { + "epoch": 27.543956043956044, + "grad_norm": 14.611964225769043, + "learning_rate": 3.622802197802198e-05, + "loss": 0.6599, + "step": 10026 + }, + { + "epoch": 27.546703296703296, + "grad_norm": 7.133979320526123, + "learning_rate": 3.622664835164835e-05, + "loss": 0.1227, + "step": 10027 + }, + { + "epoch": 27.54945054945055, + "grad_norm": 13.421699523925781, + "learning_rate": 3.622527472527473e-05, + "loss": 0.372, + "step": 10028 + }, + { + "epoch": 27.552197802197803, + "grad_norm": 16.134456634521484, + "learning_rate": 3.6223901098901105e-05, + "loss": 0.2517, + "step": 10029 + }, + { + "epoch": 27.554945054945055, + "grad_norm": 18.63653564453125, + "learning_rate": 3.6222527472527475e-05, + "loss": 0.4949, + "step": 10030 + }, + { + "epoch": 27.557692307692307, + "grad_norm": 12.074524879455566, + "learning_rate": 3.622115384615385e-05, + "loss": 0.1959, + "step": 10031 + }, + { + "epoch": 27.560439560439562, + "grad_norm": 16.742324829101562, + "learning_rate": 3.621978021978022e-05, + "loss": 0.4669, + "step": 10032 + }, + { + "epoch": 27.563186813186814, + "grad_norm": 15.519390106201172, + "learning_rate": 3.621840659340659e-05, + "loss": 0.652, + "step": 10033 + }, + { + "epoch": 27.565934065934066, + "grad_norm": 15.736527442932129, + "learning_rate": 3.621703296703297e-05, + "loss": 0.4512, + "step": 10034 + }, + { + "epoch": 27.568681318681318, + "grad_norm": 19.356035232543945, + "learning_rate": 3.621565934065934e-05, + "loss": 0.6523, + "step": 10035 + }, + { + "epoch": 27.571428571428573, + "grad_norm": 4.688735008239746, + "learning_rate": 3.6214285714285716e-05, + "loss": 0.1011, + "step": 10036 + }, + { + "epoch": 27.574175824175825, + "grad_norm": 7.19173526763916, + "learning_rate": 3.6212912087912086e-05, + "loss": 0.1234, + "step": 10037 + }, + { + "epoch": 27.576923076923077, + "grad_norm": 9.028497695922852, + "learning_rate": 3.621153846153846e-05, + "loss": 0.1891, + "step": 10038 + }, + { + "epoch": 27.57967032967033, + "grad_norm": 4.006039619445801, + "learning_rate": 3.621016483516484e-05, + "loss": 0.1071, + "step": 10039 + }, + { + "epoch": 27.582417582417584, + "grad_norm": 13.14933967590332, + "learning_rate": 3.620879120879121e-05, + "loss": 0.2296, + "step": 10040 + }, + { + "epoch": 27.585164835164836, + "grad_norm": 7.1783037185668945, + "learning_rate": 3.6207417582417586e-05, + "loss": 0.2704, + "step": 10041 + }, + { + "epoch": 27.587912087912088, + "grad_norm": 18.67154312133789, + "learning_rate": 3.6206043956043956e-05, + "loss": 0.6124, + "step": 10042 + }, + { + "epoch": 27.59065934065934, + "grad_norm": 9.546162605285645, + "learning_rate": 3.620467032967033e-05, + "loss": 0.3236, + "step": 10043 + }, + { + "epoch": 27.593406593406595, + "grad_norm": 11.361262321472168, + "learning_rate": 3.620329670329671e-05, + "loss": 0.2313, + "step": 10044 + }, + { + "epoch": 27.596153846153847, + "grad_norm": 14.64783763885498, + "learning_rate": 3.620192307692308e-05, + "loss": 0.6966, + "step": 10045 + }, + { + "epoch": 27.5989010989011, + "grad_norm": 5.980258941650391, + "learning_rate": 3.6200549450549456e-05, + "loss": 0.233, + "step": 10046 + }, + { + "epoch": 27.60164835164835, + "grad_norm": 4.883533477783203, + "learning_rate": 3.6199175824175826e-05, + "loss": 0.0904, + "step": 10047 + }, + { + "epoch": 27.604395604395606, + "grad_norm": 13.867695808410645, + "learning_rate": 3.6197802197802196e-05, + "loss": 0.4111, + "step": 10048 + }, + { + "epoch": 27.607142857142858, + "grad_norm": 14.33853530883789, + "learning_rate": 3.619642857142857e-05, + "loss": 0.4503, + "step": 10049 + }, + { + "epoch": 27.60989010989011, + "grad_norm": 19.227649688720703, + "learning_rate": 3.619505494505494e-05, + "loss": 0.6227, + "step": 10050 + }, + { + "epoch": 27.61263736263736, + "grad_norm": 7.351199626922607, + "learning_rate": 3.619368131868132e-05, + "loss": 0.1453, + "step": 10051 + }, + { + "epoch": 27.615384615384617, + "grad_norm": 13.119966506958008, + "learning_rate": 3.619230769230769e-05, + "loss": 0.4165, + "step": 10052 + }, + { + "epoch": 27.61813186813187, + "grad_norm": 8.31117057800293, + "learning_rate": 3.619093406593407e-05, + "loss": 0.2285, + "step": 10053 + }, + { + "epoch": 27.62087912087912, + "grad_norm": 15.237289428710938, + "learning_rate": 3.6189560439560444e-05, + "loss": 0.6379, + "step": 10054 + }, + { + "epoch": 27.623626373626372, + "grad_norm": 14.777719497680664, + "learning_rate": 3.6188186813186814e-05, + "loss": 0.3801, + "step": 10055 + }, + { + "epoch": 27.626373626373628, + "grad_norm": 12.30472469329834, + "learning_rate": 3.618681318681319e-05, + "loss": 0.4106, + "step": 10056 + }, + { + "epoch": 27.62912087912088, + "grad_norm": 7.3325958251953125, + "learning_rate": 3.618543956043956e-05, + "loss": 0.2241, + "step": 10057 + }, + { + "epoch": 27.63186813186813, + "grad_norm": 18.050546646118164, + "learning_rate": 3.618406593406594e-05, + "loss": 0.7137, + "step": 10058 + }, + { + "epoch": 27.634615384615383, + "grad_norm": 15.495357513427734, + "learning_rate": 3.6182692307692314e-05, + "loss": 0.5024, + "step": 10059 + }, + { + "epoch": 27.63736263736264, + "grad_norm": 10.313262939453125, + "learning_rate": 3.6181318681318684e-05, + "loss": 0.3084, + "step": 10060 + }, + { + "epoch": 27.64010989010989, + "grad_norm": 16.238513946533203, + "learning_rate": 3.617994505494506e-05, + "loss": 0.5016, + "step": 10061 + }, + { + "epoch": 27.642857142857142, + "grad_norm": 11.9266996383667, + "learning_rate": 3.617857142857143e-05, + "loss": 0.2423, + "step": 10062 + }, + { + "epoch": 27.645604395604394, + "grad_norm": 14.70141315460205, + "learning_rate": 3.61771978021978e-05, + "loss": 0.3804, + "step": 10063 + }, + { + "epoch": 27.64835164835165, + "grad_norm": 7.393425941467285, + "learning_rate": 3.617582417582418e-05, + "loss": 0.1756, + "step": 10064 + }, + { + "epoch": 27.6510989010989, + "grad_norm": 15.452702522277832, + "learning_rate": 3.617445054945055e-05, + "loss": 0.5081, + "step": 10065 + }, + { + "epoch": 27.653846153846153, + "grad_norm": 18.076427459716797, + "learning_rate": 3.6173076923076925e-05, + "loss": 0.7199, + "step": 10066 + }, + { + "epoch": 27.656593406593405, + "grad_norm": 20.868080139160156, + "learning_rate": 3.6171703296703295e-05, + "loss": 1.074, + "step": 10067 + }, + { + "epoch": 27.65934065934066, + "grad_norm": 10.21838092803955, + "learning_rate": 3.617032967032967e-05, + "loss": 0.2596, + "step": 10068 + }, + { + "epoch": 27.662087912087912, + "grad_norm": 13.015653610229492, + "learning_rate": 3.616895604395605e-05, + "loss": 0.4705, + "step": 10069 + }, + { + "epoch": 27.664835164835164, + "grad_norm": 17.543922424316406, + "learning_rate": 3.616758241758242e-05, + "loss": 0.4578, + "step": 10070 + }, + { + "epoch": 27.667582417582416, + "grad_norm": 8.33901309967041, + "learning_rate": 3.6166208791208795e-05, + "loss": 0.1864, + "step": 10071 + }, + { + "epoch": 27.67032967032967, + "grad_norm": 3.0980052947998047, + "learning_rate": 3.6164835164835165e-05, + "loss": 0.0529, + "step": 10072 + }, + { + "epoch": 27.673076923076923, + "grad_norm": 10.81517219543457, + "learning_rate": 3.616346153846154e-05, + "loss": 0.3113, + "step": 10073 + }, + { + "epoch": 27.675824175824175, + "grad_norm": 9.595523834228516, + "learning_rate": 3.616208791208792e-05, + "loss": 0.2358, + "step": 10074 + }, + { + "epoch": 27.678571428571427, + "grad_norm": 10.418200492858887, + "learning_rate": 3.616071428571429e-05, + "loss": 0.2375, + "step": 10075 + }, + { + "epoch": 27.681318681318682, + "grad_norm": 12.30338191986084, + "learning_rate": 3.6159340659340665e-05, + "loss": 0.3585, + "step": 10076 + }, + { + "epoch": 27.684065934065934, + "grad_norm": 5.04302453994751, + "learning_rate": 3.6157967032967035e-05, + "loss": 0.1229, + "step": 10077 + }, + { + "epoch": 27.686813186813186, + "grad_norm": 12.881863594055176, + "learning_rate": 3.6156593406593405e-05, + "loss": 0.3999, + "step": 10078 + }, + { + "epoch": 27.689560439560438, + "grad_norm": 8.385943412780762, + "learning_rate": 3.615521978021978e-05, + "loss": 0.3903, + "step": 10079 + }, + { + "epoch": 27.692307692307693, + "grad_norm": 15.348808288574219, + "learning_rate": 3.615384615384615e-05, + "loss": 0.3467, + "step": 10080 + }, + { + "epoch": 27.695054945054945, + "grad_norm": 16.59235191345215, + "learning_rate": 3.615247252747253e-05, + "loss": 0.6731, + "step": 10081 + }, + { + "epoch": 27.697802197802197, + "grad_norm": 10.336043357849121, + "learning_rate": 3.61510989010989e-05, + "loss": 0.3187, + "step": 10082 + }, + { + "epoch": 27.70054945054945, + "grad_norm": 13.704445838928223, + "learning_rate": 3.6149725274725276e-05, + "loss": 0.3382, + "step": 10083 + }, + { + "epoch": 27.703296703296704, + "grad_norm": 11.895407676696777, + "learning_rate": 3.614835164835165e-05, + "loss": 0.2901, + "step": 10084 + }, + { + "epoch": 27.706043956043956, + "grad_norm": 17.668895721435547, + "learning_rate": 3.614697802197802e-05, + "loss": 0.5659, + "step": 10085 + }, + { + "epoch": 27.708791208791208, + "grad_norm": 15.378302574157715, + "learning_rate": 3.61456043956044e-05, + "loss": 0.4682, + "step": 10086 + }, + { + "epoch": 27.71153846153846, + "grad_norm": 9.425179481506348, + "learning_rate": 3.614423076923077e-05, + "loss": 0.3072, + "step": 10087 + }, + { + "epoch": 27.714285714285715, + "grad_norm": 6.895470142364502, + "learning_rate": 3.6142857142857146e-05, + "loss": 0.1587, + "step": 10088 + }, + { + "epoch": 27.717032967032967, + "grad_norm": 12.980820655822754, + "learning_rate": 3.614148351648352e-05, + "loss": 0.3433, + "step": 10089 + }, + { + "epoch": 27.71978021978022, + "grad_norm": 5.5646233558654785, + "learning_rate": 3.614010989010989e-05, + "loss": 0.144, + "step": 10090 + }, + { + "epoch": 27.72252747252747, + "grad_norm": 13.987099647521973, + "learning_rate": 3.613873626373627e-05, + "loss": 0.2873, + "step": 10091 + }, + { + "epoch": 27.725274725274726, + "grad_norm": 6.8032307624816895, + "learning_rate": 3.613736263736264e-05, + "loss": 0.1501, + "step": 10092 + }, + { + "epoch": 27.728021978021978, + "grad_norm": 11.40967845916748, + "learning_rate": 3.613598901098901e-05, + "loss": 0.3809, + "step": 10093 + }, + { + "epoch": 27.73076923076923, + "grad_norm": 6.706883430480957, + "learning_rate": 3.613461538461539e-05, + "loss": 0.2681, + "step": 10094 + }, + { + "epoch": 27.733516483516482, + "grad_norm": 21.021900177001953, + "learning_rate": 3.613324175824176e-05, + "loss": 0.8666, + "step": 10095 + }, + { + "epoch": 27.736263736263737, + "grad_norm": 19.95192527770996, + "learning_rate": 3.6131868131868134e-05, + "loss": 0.4261, + "step": 10096 + }, + { + "epoch": 27.73901098901099, + "grad_norm": 10.979415893554688, + "learning_rate": 3.6130494505494504e-05, + "loss": 0.3636, + "step": 10097 + }, + { + "epoch": 27.74175824175824, + "grad_norm": 17.23504638671875, + "learning_rate": 3.612912087912088e-05, + "loss": 0.5659, + "step": 10098 + }, + { + "epoch": 27.744505494505496, + "grad_norm": 9.563322067260742, + "learning_rate": 3.612774725274726e-05, + "loss": 0.252, + "step": 10099 + }, + { + "epoch": 27.747252747252748, + "grad_norm": 13.677831649780273, + "learning_rate": 3.612637362637363e-05, + "loss": 0.3349, + "step": 10100 + }, + { + "epoch": 27.75, + "grad_norm": 12.877453804016113, + "learning_rate": 3.6125000000000004e-05, + "loss": 0.3468, + "step": 10101 + }, + { + "epoch": 27.752747252747252, + "grad_norm": 6.789028167724609, + "learning_rate": 3.6123626373626374e-05, + "loss": 0.214, + "step": 10102 + }, + { + "epoch": 27.755494505494504, + "grad_norm": 26.709341049194336, + "learning_rate": 3.612225274725275e-05, + "loss": 0.9037, + "step": 10103 + }, + { + "epoch": 27.75824175824176, + "grad_norm": 5.0395283699035645, + "learning_rate": 3.612087912087913e-05, + "loss": 0.1136, + "step": 10104 + }, + { + "epoch": 27.76098901098901, + "grad_norm": 10.69231128692627, + "learning_rate": 3.61195054945055e-05, + "loss": 0.4383, + "step": 10105 + }, + { + "epoch": 27.763736263736263, + "grad_norm": 8.99402141571045, + "learning_rate": 3.6118131868131874e-05, + "loss": 0.1711, + "step": 10106 + }, + { + "epoch": 27.766483516483518, + "grad_norm": 14.128033638000488, + "learning_rate": 3.6116758241758244e-05, + "loss": 0.4791, + "step": 10107 + }, + { + "epoch": 27.76923076923077, + "grad_norm": 16.62894058227539, + "learning_rate": 3.6115384615384614e-05, + "loss": 0.8267, + "step": 10108 + }, + { + "epoch": 27.771978021978022, + "grad_norm": 5.706470966339111, + "learning_rate": 3.611401098901099e-05, + "loss": 0.1822, + "step": 10109 + }, + { + "epoch": 27.774725274725274, + "grad_norm": 17.44046974182129, + "learning_rate": 3.611263736263736e-05, + "loss": 0.6738, + "step": 10110 + }, + { + "epoch": 27.77747252747253, + "grad_norm": 9.632387161254883, + "learning_rate": 3.611126373626374e-05, + "loss": 0.3324, + "step": 10111 + }, + { + "epoch": 27.78021978021978, + "grad_norm": 8.643752098083496, + "learning_rate": 3.610989010989011e-05, + "loss": 0.2941, + "step": 10112 + }, + { + "epoch": 27.782967032967033, + "grad_norm": 12.602219581604004, + "learning_rate": 3.6108516483516485e-05, + "loss": 0.3434, + "step": 10113 + }, + { + "epoch": 27.785714285714285, + "grad_norm": 5.233205318450928, + "learning_rate": 3.610714285714286e-05, + "loss": 0.1132, + "step": 10114 + }, + { + "epoch": 27.78846153846154, + "grad_norm": 14.015498161315918, + "learning_rate": 3.610576923076923e-05, + "loss": 0.4216, + "step": 10115 + }, + { + "epoch": 27.791208791208792, + "grad_norm": 6.217955112457275, + "learning_rate": 3.610439560439561e-05, + "loss": 0.1133, + "step": 10116 + }, + { + "epoch": 27.793956043956044, + "grad_norm": 12.592963218688965, + "learning_rate": 3.610302197802198e-05, + "loss": 0.4672, + "step": 10117 + }, + { + "epoch": 27.796703296703296, + "grad_norm": 9.676267623901367, + "learning_rate": 3.6101648351648355e-05, + "loss": 0.2477, + "step": 10118 + }, + { + "epoch": 27.79945054945055, + "grad_norm": 13.383903503417969, + "learning_rate": 3.610027472527473e-05, + "loss": 0.4259, + "step": 10119 + }, + { + "epoch": 27.802197802197803, + "grad_norm": 14.832396507263184, + "learning_rate": 3.60989010989011e-05, + "loss": 0.7875, + "step": 10120 + }, + { + "epoch": 27.804945054945055, + "grad_norm": 10.7505464553833, + "learning_rate": 3.609752747252748e-05, + "loss": 0.4134, + "step": 10121 + }, + { + "epoch": 27.807692307692307, + "grad_norm": 19.12865447998047, + "learning_rate": 3.609615384615385e-05, + "loss": 0.8411, + "step": 10122 + }, + { + "epoch": 27.810439560439562, + "grad_norm": 8.57884693145752, + "learning_rate": 3.609478021978022e-05, + "loss": 0.1934, + "step": 10123 + }, + { + "epoch": 27.813186813186814, + "grad_norm": 13.793776512145996, + "learning_rate": 3.6093406593406596e-05, + "loss": 0.3413, + "step": 10124 + }, + { + "epoch": 27.815934065934066, + "grad_norm": 8.325299263000488, + "learning_rate": 3.6092032967032966e-05, + "loss": 0.2739, + "step": 10125 + }, + { + "epoch": 27.818681318681318, + "grad_norm": 7.235463619232178, + "learning_rate": 3.609065934065934e-05, + "loss": 0.0859, + "step": 10126 + }, + { + "epoch": 27.821428571428573, + "grad_norm": 10.763140678405762, + "learning_rate": 3.608928571428571e-05, + "loss": 0.3412, + "step": 10127 + }, + { + "epoch": 27.824175824175825, + "grad_norm": 7.095037460327148, + "learning_rate": 3.608791208791209e-05, + "loss": 0.3172, + "step": 10128 + }, + { + "epoch": 27.826923076923077, + "grad_norm": 15.248198509216309, + "learning_rate": 3.608653846153846e-05, + "loss": 0.5171, + "step": 10129 + }, + { + "epoch": 27.82967032967033, + "grad_norm": 9.270771980285645, + "learning_rate": 3.6085164835164836e-05, + "loss": 0.2488, + "step": 10130 + }, + { + "epoch": 27.832417582417584, + "grad_norm": 12.188980102539062, + "learning_rate": 3.608379120879121e-05, + "loss": 0.4463, + "step": 10131 + }, + { + "epoch": 27.835164835164836, + "grad_norm": 21.718320846557617, + "learning_rate": 3.608241758241758e-05, + "loss": 1.0437, + "step": 10132 + }, + { + "epoch": 27.837912087912088, + "grad_norm": 10.24265193939209, + "learning_rate": 3.608104395604396e-05, + "loss": 0.2879, + "step": 10133 + }, + { + "epoch": 27.84065934065934, + "grad_norm": 9.351962089538574, + "learning_rate": 3.607967032967033e-05, + "loss": 0.2577, + "step": 10134 + }, + { + "epoch": 27.843406593406595, + "grad_norm": 5.6029815673828125, + "learning_rate": 3.6078296703296707e-05, + "loss": 0.1302, + "step": 10135 + }, + { + "epoch": 27.846153846153847, + "grad_norm": 16.177427291870117, + "learning_rate": 3.607692307692308e-05, + "loss": 0.5365, + "step": 10136 + }, + { + "epoch": 27.8489010989011, + "grad_norm": 11.005950927734375, + "learning_rate": 3.607554945054945e-05, + "loss": 0.2032, + "step": 10137 + }, + { + "epoch": 27.85164835164835, + "grad_norm": 8.820962905883789, + "learning_rate": 3.6074175824175823e-05, + "loss": 0.1853, + "step": 10138 + }, + { + "epoch": 27.854395604395606, + "grad_norm": 14.788429260253906, + "learning_rate": 3.60728021978022e-05, + "loss": 0.5508, + "step": 10139 + }, + { + "epoch": 27.857142857142858, + "grad_norm": 11.496435165405273, + "learning_rate": 3.607142857142857e-05, + "loss": 0.3563, + "step": 10140 + }, + { + "epoch": 27.85989010989011, + "grad_norm": 17.57969856262207, + "learning_rate": 3.607005494505495e-05, + "loss": 0.4366, + "step": 10141 + }, + { + "epoch": 27.86263736263736, + "grad_norm": 9.184833526611328, + "learning_rate": 3.606868131868132e-05, + "loss": 0.1873, + "step": 10142 + }, + { + "epoch": 27.865384615384617, + "grad_norm": 7.069273471832275, + "learning_rate": 3.6067307692307694e-05, + "loss": 0.2493, + "step": 10143 + }, + { + "epoch": 27.86813186813187, + "grad_norm": 8.357882499694824, + "learning_rate": 3.6065934065934064e-05, + "loss": 0.2375, + "step": 10144 + }, + { + "epoch": 27.87087912087912, + "grad_norm": 5.751255035400391, + "learning_rate": 3.606456043956044e-05, + "loss": 0.2032, + "step": 10145 + }, + { + "epoch": 27.873626373626372, + "grad_norm": 8.91563892364502, + "learning_rate": 3.606318681318682e-05, + "loss": 0.2264, + "step": 10146 + }, + { + "epoch": 27.876373626373628, + "grad_norm": 8.069158554077148, + "learning_rate": 3.606181318681319e-05, + "loss": 0.2252, + "step": 10147 + }, + { + "epoch": 27.87912087912088, + "grad_norm": 21.778779983520508, + "learning_rate": 3.6060439560439564e-05, + "loss": 0.8717, + "step": 10148 + }, + { + "epoch": 27.88186813186813, + "grad_norm": 13.398590087890625, + "learning_rate": 3.6059065934065934e-05, + "loss": 0.539, + "step": 10149 + }, + { + "epoch": 27.884615384615383, + "grad_norm": 7.117788791656494, + "learning_rate": 3.605769230769231e-05, + "loss": 0.1359, + "step": 10150 + }, + { + "epoch": 27.88736263736264, + "grad_norm": 13.801362037658691, + "learning_rate": 3.605631868131869e-05, + "loss": 0.2981, + "step": 10151 + }, + { + "epoch": 27.89010989010989, + "grad_norm": 10.35262393951416, + "learning_rate": 3.605494505494506e-05, + "loss": 0.2431, + "step": 10152 + }, + { + "epoch": 27.892857142857142, + "grad_norm": 13.705507278442383, + "learning_rate": 3.605357142857143e-05, + "loss": 0.5395, + "step": 10153 + }, + { + "epoch": 27.895604395604394, + "grad_norm": 17.594398498535156, + "learning_rate": 3.6052197802197805e-05, + "loss": 0.5797, + "step": 10154 + }, + { + "epoch": 27.89835164835165, + "grad_norm": 16.703630447387695, + "learning_rate": 3.6050824175824175e-05, + "loss": 0.4018, + "step": 10155 + }, + { + "epoch": 27.9010989010989, + "grad_norm": 21.502456665039062, + "learning_rate": 3.604945054945055e-05, + "loss": 0.7933, + "step": 10156 + }, + { + "epoch": 27.903846153846153, + "grad_norm": 20.494020462036133, + "learning_rate": 3.604807692307692e-05, + "loss": 0.6741, + "step": 10157 + }, + { + "epoch": 27.906593406593405, + "grad_norm": 11.090108871459961, + "learning_rate": 3.60467032967033e-05, + "loss": 0.157, + "step": 10158 + }, + { + "epoch": 27.90934065934066, + "grad_norm": 10.028225898742676, + "learning_rate": 3.604532967032967e-05, + "loss": 0.1732, + "step": 10159 + }, + { + "epoch": 27.912087912087912, + "grad_norm": 9.992424011230469, + "learning_rate": 3.6043956043956045e-05, + "loss": 0.1881, + "step": 10160 + }, + { + "epoch": 27.914835164835164, + "grad_norm": 19.129364013671875, + "learning_rate": 3.604258241758242e-05, + "loss": 0.8483, + "step": 10161 + }, + { + "epoch": 27.917582417582416, + "grad_norm": 15.24349594116211, + "learning_rate": 3.604120879120879e-05, + "loss": 0.5839, + "step": 10162 + }, + { + "epoch": 27.92032967032967, + "grad_norm": 8.973248481750488, + "learning_rate": 3.603983516483517e-05, + "loss": 0.3122, + "step": 10163 + }, + { + "epoch": 27.923076923076923, + "grad_norm": 2.5595908164978027, + "learning_rate": 3.603846153846154e-05, + "loss": 0.0564, + "step": 10164 + }, + { + "epoch": 27.925824175824175, + "grad_norm": 16.311595916748047, + "learning_rate": 3.6037087912087916e-05, + "loss": 0.6461, + "step": 10165 + }, + { + "epoch": 27.928571428571427, + "grad_norm": 10.349066734313965, + "learning_rate": 3.603571428571429e-05, + "loss": 0.2408, + "step": 10166 + }, + { + "epoch": 27.931318681318682, + "grad_norm": 16.931419372558594, + "learning_rate": 3.603434065934066e-05, + "loss": 0.5569, + "step": 10167 + }, + { + "epoch": 27.934065934065934, + "grad_norm": 14.325125694274902, + "learning_rate": 3.603296703296703e-05, + "loss": 0.3052, + "step": 10168 + }, + { + "epoch": 27.936813186813186, + "grad_norm": 11.594461441040039, + "learning_rate": 3.60315934065934e-05, + "loss": 0.3082, + "step": 10169 + }, + { + "epoch": 27.939560439560438, + "grad_norm": 19.92701530456543, + "learning_rate": 3.603021978021978e-05, + "loss": 0.7988, + "step": 10170 + }, + { + "epoch": 27.942307692307693, + "grad_norm": 21.369260787963867, + "learning_rate": 3.6028846153846156e-05, + "loss": 0.601, + "step": 10171 + }, + { + "epoch": 27.945054945054945, + "grad_norm": 13.331512451171875, + "learning_rate": 3.6027472527472526e-05, + "loss": 0.4485, + "step": 10172 + }, + { + "epoch": 27.947802197802197, + "grad_norm": 9.262471199035645, + "learning_rate": 3.60260989010989e-05, + "loss": 0.2629, + "step": 10173 + }, + { + "epoch": 27.95054945054945, + "grad_norm": 7.881640434265137, + "learning_rate": 3.602472527472527e-05, + "loss": 0.1286, + "step": 10174 + }, + { + "epoch": 27.953296703296704, + "grad_norm": 7.9886155128479, + "learning_rate": 3.602335164835165e-05, + "loss": 0.1397, + "step": 10175 + }, + { + "epoch": 27.956043956043956, + "grad_norm": 9.21322250366211, + "learning_rate": 3.6021978021978026e-05, + "loss": 0.2547, + "step": 10176 + }, + { + "epoch": 27.958791208791208, + "grad_norm": 8.844331741333008, + "learning_rate": 3.6020604395604396e-05, + "loss": 0.1668, + "step": 10177 + }, + { + "epoch": 27.96153846153846, + "grad_norm": 9.689526557922363, + "learning_rate": 3.601923076923077e-05, + "loss": 0.2149, + "step": 10178 + }, + { + "epoch": 27.964285714285715, + "grad_norm": 8.749919891357422, + "learning_rate": 3.601785714285714e-05, + "loss": 0.2183, + "step": 10179 + }, + { + "epoch": 27.967032967032967, + "grad_norm": 22.443147659301758, + "learning_rate": 3.601648351648352e-05, + "loss": 0.8507, + "step": 10180 + }, + { + "epoch": 27.96978021978022, + "grad_norm": 9.643106460571289, + "learning_rate": 3.60151098901099e-05, + "loss": 0.2019, + "step": 10181 + }, + { + "epoch": 27.97252747252747, + "grad_norm": 8.445186614990234, + "learning_rate": 3.601373626373627e-05, + "loss": 0.1814, + "step": 10182 + }, + { + "epoch": 27.975274725274726, + "grad_norm": 3.702328681945801, + "learning_rate": 3.601236263736264e-05, + "loss": 0.0606, + "step": 10183 + }, + { + "epoch": 27.978021978021978, + "grad_norm": 10.424860000610352, + "learning_rate": 3.601098901098901e-05, + "loss": 0.3361, + "step": 10184 + }, + { + "epoch": 27.98076923076923, + "grad_norm": 6.862601280212402, + "learning_rate": 3.6009615384615384e-05, + "loss": 0.1974, + "step": 10185 + }, + { + "epoch": 27.983516483516482, + "grad_norm": 17.559255599975586, + "learning_rate": 3.600824175824176e-05, + "loss": 0.4892, + "step": 10186 + }, + { + "epoch": 27.986263736263737, + "grad_norm": 9.681272506713867, + "learning_rate": 3.600686813186813e-05, + "loss": 0.3645, + "step": 10187 + }, + { + "epoch": 27.98901098901099, + "grad_norm": 12.495352745056152, + "learning_rate": 3.600549450549451e-05, + "loss": 0.3716, + "step": 10188 + }, + { + "epoch": 27.99175824175824, + "grad_norm": 10.993497848510742, + "learning_rate": 3.600412087912088e-05, + "loss": 0.2225, + "step": 10189 + }, + { + "epoch": 27.994505494505496, + "grad_norm": 10.64615249633789, + "learning_rate": 3.6002747252747254e-05, + "loss": 0.2572, + "step": 10190 + }, + { + "epoch": 27.997252747252748, + "grad_norm": 6.703449249267578, + "learning_rate": 3.600137362637363e-05, + "loss": 0.2068, + "step": 10191 + }, + { + "epoch": 28.0, + "grad_norm": 104.1155014038086, + "learning_rate": 3.6e-05, + "loss": 0.8784, + "step": 10192 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.6391184573002755, + "eval_f1": 0.6291502789908836, + "eval_f1_DuraRiadoRio_64x64": 0.41304347826086957, + "eval_f1_Mole_64x64": 0.5943775100401606, + "eval_f1_Quebrado_64x64": 0.8231884057971014, + "eval_f1_RiadoRio_64x64": 0.5314685314685315, + "eval_f1_RioFechado_64x64": 0.7836734693877551, + "eval_loss": 1.6700365543365479, + "eval_precision": 0.7409641495950601, + "eval_precision_DuraRiadoRio_64x64": 0.95, + "eval_precision_Mole_64x64": 0.7047619047619048, + "eval_precision_Quebrado_64x64": 0.7064676616915423, + "eval_precision_RiadoRio_64x64": 0.41155234657039713, + "eval_precision_RioFechado_64x64": 0.9320388349514563, + "eval_recall": 0.6379890453834116, + "eval_recall_DuraRiadoRio_64x64": 0.2638888888888889, + "eval_recall_Mole_64x64": 0.5138888888888888, + "eval_recall_Quebrado_64x64": 0.9861111111111112, + "eval_recall_RiadoRio_64x64": 0.75, + "eval_recall_RioFechado_64x64": 0.676056338028169, + "eval_runtime": 1.7473, + "eval_samples_per_second": 415.499, + "eval_steps_per_second": 26.326, + "step": 10192 + }, + { + "epoch": 28.002747252747252, + "grad_norm": 14.620924949645996, + "learning_rate": 3.599862637362638e-05, + "loss": 0.5463, + "step": 10193 + }, + { + "epoch": 28.005494505494507, + "grad_norm": 5.221594333648682, + "learning_rate": 3.599725274725275e-05, + "loss": 0.1138, + "step": 10194 + }, + { + "epoch": 28.00824175824176, + "grad_norm": 12.069748878479004, + "learning_rate": 3.5995879120879125e-05, + "loss": 0.2417, + "step": 10195 + }, + { + "epoch": 28.01098901098901, + "grad_norm": 15.550763130187988, + "learning_rate": 3.59945054945055e-05, + "loss": 0.5616, + "step": 10196 + }, + { + "epoch": 28.013736263736263, + "grad_norm": 7.8197407722473145, + "learning_rate": 3.599313186813187e-05, + "loss": 0.3201, + "step": 10197 + }, + { + "epoch": 28.016483516483518, + "grad_norm": 19.83554458618164, + "learning_rate": 3.599175824175824e-05, + "loss": 0.7504, + "step": 10198 + }, + { + "epoch": 28.01923076923077, + "grad_norm": 8.817326545715332, + "learning_rate": 3.599038461538461e-05, + "loss": 0.3717, + "step": 10199 + }, + { + "epoch": 28.021978021978022, + "grad_norm": 18.18480110168457, + "learning_rate": 3.598901098901099e-05, + "loss": 0.5856, + "step": 10200 + }, + { + "epoch": 28.024725274725274, + "grad_norm": 6.225010395050049, + "learning_rate": 3.5987637362637365e-05, + "loss": 0.1816, + "step": 10201 + }, + { + "epoch": 28.02747252747253, + "grad_norm": 17.669403076171875, + "learning_rate": 3.5986263736263735e-05, + "loss": 0.5405, + "step": 10202 + }, + { + "epoch": 28.03021978021978, + "grad_norm": 7.265518665313721, + "learning_rate": 3.598489010989011e-05, + "loss": 0.2536, + "step": 10203 + }, + { + "epoch": 28.032967032967033, + "grad_norm": 12.367027282714844, + "learning_rate": 3.598351648351648e-05, + "loss": 0.2814, + "step": 10204 + }, + { + "epoch": 28.035714285714285, + "grad_norm": 17.880399703979492, + "learning_rate": 3.598214285714286e-05, + "loss": 0.7257, + "step": 10205 + }, + { + "epoch": 28.03846153846154, + "grad_norm": 3.7378578186035156, + "learning_rate": 3.5980769230769235e-05, + "loss": 0.0668, + "step": 10206 + }, + { + "epoch": 28.041208791208792, + "grad_norm": 12.042914390563965, + "learning_rate": 3.5979395604395605e-05, + "loss": 0.3364, + "step": 10207 + }, + { + "epoch": 28.043956043956044, + "grad_norm": 5.383286476135254, + "learning_rate": 3.597802197802198e-05, + "loss": 0.1465, + "step": 10208 + }, + { + "epoch": 28.046703296703296, + "grad_norm": 15.738519668579102, + "learning_rate": 3.597664835164835e-05, + "loss": 0.3836, + "step": 10209 + }, + { + "epoch": 28.04945054945055, + "grad_norm": 11.854079246520996, + "learning_rate": 3.597527472527473e-05, + "loss": 0.6326, + "step": 10210 + }, + { + "epoch": 28.052197802197803, + "grad_norm": 12.852325439453125, + "learning_rate": 3.5973901098901106e-05, + "loss": 0.2829, + "step": 10211 + }, + { + "epoch": 28.054945054945055, + "grad_norm": 11.99965763092041, + "learning_rate": 3.5972527472527476e-05, + "loss": 0.2954, + "step": 10212 + }, + { + "epoch": 28.057692307692307, + "grad_norm": 10.625, + "learning_rate": 3.5971153846153846e-05, + "loss": 0.3459, + "step": 10213 + }, + { + "epoch": 28.060439560439562, + "grad_norm": 8.084039688110352, + "learning_rate": 3.5969780219780216e-05, + "loss": 0.1656, + "step": 10214 + }, + { + "epoch": 28.063186813186814, + "grad_norm": 11.286232948303223, + "learning_rate": 3.596840659340659e-05, + "loss": 0.2717, + "step": 10215 + }, + { + "epoch": 28.065934065934066, + "grad_norm": 11.697070121765137, + "learning_rate": 3.596703296703297e-05, + "loss": 0.2818, + "step": 10216 + }, + { + "epoch": 28.068681318681318, + "grad_norm": 6.001651763916016, + "learning_rate": 3.596565934065934e-05, + "loss": 0.1524, + "step": 10217 + }, + { + "epoch": 28.071428571428573, + "grad_norm": 15.282734870910645, + "learning_rate": 3.5964285714285716e-05, + "loss": 0.6117, + "step": 10218 + }, + { + "epoch": 28.074175824175825, + "grad_norm": 7.188813209533691, + "learning_rate": 3.5962912087912086e-05, + "loss": 0.1613, + "step": 10219 + }, + { + "epoch": 28.076923076923077, + "grad_norm": 16.851659774780273, + "learning_rate": 3.596153846153846e-05, + "loss": 0.4312, + "step": 10220 + }, + { + "epoch": 28.07967032967033, + "grad_norm": 14.885961532592773, + "learning_rate": 3.596016483516484e-05, + "loss": 0.7119, + "step": 10221 + }, + { + "epoch": 28.082417582417584, + "grad_norm": 16.429841995239258, + "learning_rate": 3.595879120879121e-05, + "loss": 0.5912, + "step": 10222 + }, + { + "epoch": 28.085164835164836, + "grad_norm": 15.125107765197754, + "learning_rate": 3.595741758241759e-05, + "loss": 0.4028, + "step": 10223 + }, + { + "epoch": 28.087912087912088, + "grad_norm": 13.150558471679688, + "learning_rate": 3.595604395604396e-05, + "loss": 0.4079, + "step": 10224 + }, + { + "epoch": 28.09065934065934, + "grad_norm": 15.038056373596191, + "learning_rate": 3.5954670329670334e-05, + "loss": 0.4621, + "step": 10225 + }, + { + "epoch": 28.093406593406595, + "grad_norm": 18.534543991088867, + "learning_rate": 3.595329670329671e-05, + "loss": 0.3191, + "step": 10226 + }, + { + "epoch": 28.096153846153847, + "grad_norm": 11.345908164978027, + "learning_rate": 3.595192307692308e-05, + "loss": 0.302, + "step": 10227 + }, + { + "epoch": 28.0989010989011, + "grad_norm": 12.200845718383789, + "learning_rate": 3.595054945054945e-05, + "loss": 0.2167, + "step": 10228 + }, + { + "epoch": 28.10164835164835, + "grad_norm": 12.297248840332031, + "learning_rate": 3.594917582417582e-05, + "loss": 0.565, + "step": 10229 + }, + { + "epoch": 28.104395604395606, + "grad_norm": 9.460098266601562, + "learning_rate": 3.59478021978022e-05, + "loss": 0.1697, + "step": 10230 + }, + { + "epoch": 28.107142857142858, + "grad_norm": 13.3514986038208, + "learning_rate": 3.5946428571428574e-05, + "loss": 0.2992, + "step": 10231 + }, + { + "epoch": 28.10989010989011, + "grad_norm": 9.961359977722168, + "learning_rate": 3.5945054945054944e-05, + "loss": 0.2317, + "step": 10232 + }, + { + "epoch": 28.11263736263736, + "grad_norm": 12.363043785095215, + "learning_rate": 3.594368131868132e-05, + "loss": 0.4018, + "step": 10233 + }, + { + "epoch": 28.115384615384617, + "grad_norm": 7.421289443969727, + "learning_rate": 3.594230769230769e-05, + "loss": 0.1759, + "step": 10234 + }, + { + "epoch": 28.11813186813187, + "grad_norm": 15.013051986694336, + "learning_rate": 3.594093406593407e-05, + "loss": 0.6732, + "step": 10235 + }, + { + "epoch": 28.12087912087912, + "grad_norm": 14.420844078063965, + "learning_rate": 3.5939560439560444e-05, + "loss": 0.4841, + "step": 10236 + }, + { + "epoch": 28.123626373626372, + "grad_norm": 10.200533866882324, + "learning_rate": 3.5938186813186814e-05, + "loss": 0.2048, + "step": 10237 + }, + { + "epoch": 28.126373626373628, + "grad_norm": 17.085012435913086, + "learning_rate": 3.593681318681319e-05, + "loss": 0.6629, + "step": 10238 + }, + { + "epoch": 28.12912087912088, + "grad_norm": 10.073315620422363, + "learning_rate": 3.593543956043956e-05, + "loss": 0.1906, + "step": 10239 + }, + { + "epoch": 28.13186813186813, + "grad_norm": 9.11343002319336, + "learning_rate": 3.593406593406594e-05, + "loss": 0.2599, + "step": 10240 + }, + { + "epoch": 28.134615384615383, + "grad_norm": 23.191682815551758, + "learning_rate": 3.5932692307692315e-05, + "loss": 1.2556, + "step": 10241 + }, + { + "epoch": 28.13736263736264, + "grad_norm": 10.530037879943848, + "learning_rate": 3.5931318681318685e-05, + "loss": 0.2019, + "step": 10242 + }, + { + "epoch": 28.14010989010989, + "grad_norm": 8.592596054077148, + "learning_rate": 3.5929945054945055e-05, + "loss": 0.2323, + "step": 10243 + }, + { + "epoch": 28.142857142857142, + "grad_norm": 8.492413520812988, + "learning_rate": 3.5928571428571425e-05, + "loss": 0.2499, + "step": 10244 + }, + { + "epoch": 28.145604395604394, + "grad_norm": 6.654016494750977, + "learning_rate": 3.59271978021978e-05, + "loss": 0.1987, + "step": 10245 + }, + { + "epoch": 28.14835164835165, + "grad_norm": 9.836711883544922, + "learning_rate": 3.592582417582418e-05, + "loss": 0.2408, + "step": 10246 + }, + { + "epoch": 28.1510989010989, + "grad_norm": 4.970877170562744, + "learning_rate": 3.592445054945055e-05, + "loss": 0.1245, + "step": 10247 + }, + { + "epoch": 28.153846153846153, + "grad_norm": 23.551868438720703, + "learning_rate": 3.5923076923076925e-05, + "loss": 1.0037, + "step": 10248 + }, + { + "epoch": 28.156593406593405, + "grad_norm": 12.661401748657227, + "learning_rate": 3.5921703296703295e-05, + "loss": 0.4658, + "step": 10249 + }, + { + "epoch": 28.15934065934066, + "grad_norm": 22.887407302856445, + "learning_rate": 3.592032967032967e-05, + "loss": 0.6334, + "step": 10250 + }, + { + "epoch": 28.162087912087912, + "grad_norm": 14.57048225402832, + "learning_rate": 3.591895604395605e-05, + "loss": 0.4962, + "step": 10251 + }, + { + "epoch": 28.164835164835164, + "grad_norm": 13.196683883666992, + "learning_rate": 3.591758241758242e-05, + "loss": 0.4628, + "step": 10252 + }, + { + "epoch": 28.167582417582416, + "grad_norm": 12.172648429870605, + "learning_rate": 3.5916208791208796e-05, + "loss": 0.41, + "step": 10253 + }, + { + "epoch": 28.17032967032967, + "grad_norm": 10.082924842834473, + "learning_rate": 3.5914835164835166e-05, + "loss": 0.2414, + "step": 10254 + }, + { + "epoch": 28.173076923076923, + "grad_norm": 7.663496017456055, + "learning_rate": 3.591346153846154e-05, + "loss": 0.2105, + "step": 10255 + }, + { + "epoch": 28.175824175824175, + "grad_norm": 10.9874906539917, + "learning_rate": 3.591208791208792e-05, + "loss": 0.2198, + "step": 10256 + }, + { + "epoch": 28.178571428571427, + "grad_norm": 13.386527061462402, + "learning_rate": 3.591071428571429e-05, + "loss": 0.2991, + "step": 10257 + }, + { + "epoch": 28.181318681318682, + "grad_norm": 11.075968742370605, + "learning_rate": 3.590934065934066e-05, + "loss": 0.2955, + "step": 10258 + }, + { + "epoch": 28.184065934065934, + "grad_norm": 12.172162055969238, + "learning_rate": 3.590796703296703e-05, + "loss": 0.4473, + "step": 10259 + }, + { + "epoch": 28.186813186813186, + "grad_norm": 17.17937660217285, + "learning_rate": 3.5906593406593406e-05, + "loss": 0.6459, + "step": 10260 + }, + { + "epoch": 28.189560439560438, + "grad_norm": 5.818181991577148, + "learning_rate": 3.590521978021978e-05, + "loss": 0.1264, + "step": 10261 + }, + { + "epoch": 28.192307692307693, + "grad_norm": 7.239465236663818, + "learning_rate": 3.590384615384615e-05, + "loss": 0.189, + "step": 10262 + }, + { + "epoch": 28.195054945054945, + "grad_norm": 11.360658645629883, + "learning_rate": 3.590247252747253e-05, + "loss": 0.2828, + "step": 10263 + }, + { + "epoch": 28.197802197802197, + "grad_norm": 14.637805938720703, + "learning_rate": 3.59010989010989e-05, + "loss": 0.434, + "step": 10264 + }, + { + "epoch": 28.20054945054945, + "grad_norm": 5.860729694366455, + "learning_rate": 3.5899725274725277e-05, + "loss": 0.1423, + "step": 10265 + }, + { + "epoch": 28.203296703296704, + "grad_norm": 10.006720542907715, + "learning_rate": 3.589835164835165e-05, + "loss": 0.1965, + "step": 10266 + }, + { + "epoch": 28.206043956043956, + "grad_norm": 14.415104866027832, + "learning_rate": 3.589697802197802e-05, + "loss": 0.3433, + "step": 10267 + }, + { + "epoch": 28.208791208791208, + "grad_norm": 4.811302185058594, + "learning_rate": 3.58956043956044e-05, + "loss": 0.1207, + "step": 10268 + }, + { + "epoch": 28.21153846153846, + "grad_norm": 13.284889221191406, + "learning_rate": 3.589423076923077e-05, + "loss": 0.5352, + "step": 10269 + }, + { + "epoch": 28.214285714285715, + "grad_norm": 8.936640739440918, + "learning_rate": 3.589285714285715e-05, + "loss": 0.1439, + "step": 10270 + }, + { + "epoch": 28.217032967032967, + "grad_norm": 15.797677040100098, + "learning_rate": 3.5891483516483524e-05, + "loss": 0.8341, + "step": 10271 + }, + { + "epoch": 28.21978021978022, + "grad_norm": 9.002609252929688, + "learning_rate": 3.5890109890109894e-05, + "loss": 0.3296, + "step": 10272 + }, + { + "epoch": 28.22252747252747, + "grad_norm": 12.97247314453125, + "learning_rate": 3.5888736263736264e-05, + "loss": 0.5298, + "step": 10273 + }, + { + "epoch": 28.225274725274726, + "grad_norm": 11.819692611694336, + "learning_rate": 3.5887362637362634e-05, + "loss": 0.2093, + "step": 10274 + }, + { + "epoch": 28.228021978021978, + "grad_norm": 7.131650924682617, + "learning_rate": 3.588598901098901e-05, + "loss": 0.251, + "step": 10275 + }, + { + "epoch": 28.23076923076923, + "grad_norm": 12.354632377624512, + "learning_rate": 3.588461538461539e-05, + "loss": 0.348, + "step": 10276 + }, + { + "epoch": 28.233516483516482, + "grad_norm": 10.923727035522461, + "learning_rate": 3.588324175824176e-05, + "loss": 0.3876, + "step": 10277 + }, + { + "epoch": 28.236263736263737, + "grad_norm": 10.836634635925293, + "learning_rate": 3.5881868131868134e-05, + "loss": 0.2024, + "step": 10278 + }, + { + "epoch": 28.23901098901099, + "grad_norm": 16.07442283630371, + "learning_rate": 3.5880494505494504e-05, + "loss": 0.4498, + "step": 10279 + }, + { + "epoch": 28.24175824175824, + "grad_norm": 8.586912155151367, + "learning_rate": 3.587912087912088e-05, + "loss": 0.199, + "step": 10280 + }, + { + "epoch": 28.244505494505493, + "grad_norm": 8.819646835327148, + "learning_rate": 3.587774725274726e-05, + "loss": 0.3306, + "step": 10281 + }, + { + "epoch": 28.247252747252748, + "grad_norm": 15.173517227172852, + "learning_rate": 3.587637362637363e-05, + "loss": 0.5569, + "step": 10282 + }, + { + "epoch": 28.25, + "grad_norm": 18.259679794311523, + "learning_rate": 3.5875000000000005e-05, + "loss": 0.3692, + "step": 10283 + }, + { + "epoch": 28.252747252747252, + "grad_norm": 8.021675109863281, + "learning_rate": 3.5873626373626375e-05, + "loss": 0.0964, + "step": 10284 + }, + { + "epoch": 28.255494505494504, + "grad_norm": 19.810312271118164, + "learning_rate": 3.587225274725275e-05, + "loss": 0.4639, + "step": 10285 + }, + { + "epoch": 28.25824175824176, + "grad_norm": 9.904889106750488, + "learning_rate": 3.587087912087913e-05, + "loss": 0.2096, + "step": 10286 + }, + { + "epoch": 28.26098901098901, + "grad_norm": 10.436646461486816, + "learning_rate": 3.58695054945055e-05, + "loss": 0.2898, + "step": 10287 + }, + { + "epoch": 28.263736263736263, + "grad_norm": 11.544940948486328, + "learning_rate": 3.586813186813187e-05, + "loss": 0.3618, + "step": 10288 + }, + { + "epoch": 28.266483516483518, + "grad_norm": 20.115737915039062, + "learning_rate": 3.586675824175824e-05, + "loss": 0.5271, + "step": 10289 + }, + { + "epoch": 28.26923076923077, + "grad_norm": 19.771936416625977, + "learning_rate": 3.5865384615384615e-05, + "loss": 0.2299, + "step": 10290 + }, + { + "epoch": 28.271978021978022, + "grad_norm": 7.870462417602539, + "learning_rate": 3.586401098901099e-05, + "loss": 0.294, + "step": 10291 + }, + { + "epoch": 28.274725274725274, + "grad_norm": 7.03557014465332, + "learning_rate": 3.586263736263736e-05, + "loss": 0.263, + "step": 10292 + }, + { + "epoch": 28.27747252747253, + "grad_norm": 12.928913116455078, + "learning_rate": 3.586126373626374e-05, + "loss": 0.5916, + "step": 10293 + }, + { + "epoch": 28.28021978021978, + "grad_norm": 13.974520683288574, + "learning_rate": 3.585989010989011e-05, + "loss": 0.2386, + "step": 10294 + }, + { + "epoch": 28.282967032967033, + "grad_norm": 5.864034652709961, + "learning_rate": 3.5858516483516486e-05, + "loss": 0.1149, + "step": 10295 + }, + { + "epoch": 28.285714285714285, + "grad_norm": 10.671887397766113, + "learning_rate": 3.585714285714286e-05, + "loss": 0.1716, + "step": 10296 + }, + { + "epoch": 28.28846153846154, + "grad_norm": 7.74149227142334, + "learning_rate": 3.585576923076923e-05, + "loss": 0.147, + "step": 10297 + }, + { + "epoch": 28.291208791208792, + "grad_norm": 16.53226089477539, + "learning_rate": 3.585439560439561e-05, + "loss": 0.4431, + "step": 10298 + }, + { + "epoch": 28.293956043956044, + "grad_norm": 19.265037536621094, + "learning_rate": 3.585302197802198e-05, + "loss": 0.6684, + "step": 10299 + }, + { + "epoch": 28.296703296703296, + "grad_norm": 3.6794986724853516, + "learning_rate": 3.5851648351648356e-05, + "loss": 0.0493, + "step": 10300 + }, + { + "epoch": 28.29945054945055, + "grad_norm": 6.901312828063965, + "learning_rate": 3.585027472527473e-05, + "loss": 0.1065, + "step": 10301 + }, + { + "epoch": 28.302197802197803, + "grad_norm": 7.159487724304199, + "learning_rate": 3.58489010989011e-05, + "loss": 0.1048, + "step": 10302 + }, + { + "epoch": 28.304945054945055, + "grad_norm": 15.979450225830078, + "learning_rate": 3.584752747252747e-05, + "loss": 0.4609, + "step": 10303 + }, + { + "epoch": 28.307692307692307, + "grad_norm": 5.813874244689941, + "learning_rate": 3.584615384615384e-05, + "loss": 0.1017, + "step": 10304 + }, + { + "epoch": 28.310439560439562, + "grad_norm": 10.842218399047852, + "learning_rate": 3.584478021978022e-05, + "loss": 0.3568, + "step": 10305 + }, + { + "epoch": 28.313186813186814, + "grad_norm": 20.555816650390625, + "learning_rate": 3.5843406593406596e-05, + "loss": 0.5386, + "step": 10306 + }, + { + "epoch": 28.315934065934066, + "grad_norm": 11.526387214660645, + "learning_rate": 3.5842032967032966e-05, + "loss": 0.2732, + "step": 10307 + }, + { + "epoch": 28.318681318681318, + "grad_norm": 7.186716079711914, + "learning_rate": 3.584065934065934e-05, + "loss": 0.17, + "step": 10308 + }, + { + "epoch": 28.321428571428573, + "grad_norm": 10.937382698059082, + "learning_rate": 3.583928571428571e-05, + "loss": 0.3436, + "step": 10309 + }, + { + "epoch": 28.324175824175825, + "grad_norm": 8.812371253967285, + "learning_rate": 3.583791208791209e-05, + "loss": 0.4591, + "step": 10310 + }, + { + "epoch": 28.326923076923077, + "grad_norm": 6.509864807128906, + "learning_rate": 3.583653846153847e-05, + "loss": 0.1294, + "step": 10311 + }, + { + "epoch": 28.32967032967033, + "grad_norm": 14.091096878051758, + "learning_rate": 3.583516483516484e-05, + "loss": 0.442, + "step": 10312 + }, + { + "epoch": 28.332417582417584, + "grad_norm": 5.223963260650635, + "learning_rate": 3.5833791208791214e-05, + "loss": 0.108, + "step": 10313 + }, + { + "epoch": 28.335164835164836, + "grad_norm": 15.431097984313965, + "learning_rate": 3.5832417582417584e-05, + "loss": 0.3657, + "step": 10314 + }, + { + "epoch": 28.337912087912088, + "grad_norm": 11.322135925292969, + "learning_rate": 3.583104395604396e-05, + "loss": 0.323, + "step": 10315 + }, + { + "epoch": 28.34065934065934, + "grad_norm": 4.767606735229492, + "learning_rate": 3.582967032967034e-05, + "loss": 0.1056, + "step": 10316 + }, + { + "epoch": 28.343406593406595, + "grad_norm": 20.399410247802734, + "learning_rate": 3.582829670329671e-05, + "loss": 0.9065, + "step": 10317 + }, + { + "epoch": 28.346153846153847, + "grad_norm": 18.435340881347656, + "learning_rate": 3.582692307692308e-05, + "loss": 0.6909, + "step": 10318 + }, + { + "epoch": 28.3489010989011, + "grad_norm": 23.899450302124023, + "learning_rate": 3.582554945054945e-05, + "loss": 0.9542, + "step": 10319 + }, + { + "epoch": 28.35164835164835, + "grad_norm": 13.7052640914917, + "learning_rate": 3.5824175824175824e-05, + "loss": 0.4419, + "step": 10320 + }, + { + "epoch": 28.354395604395606, + "grad_norm": 11.924777030944824, + "learning_rate": 3.58228021978022e-05, + "loss": 0.3143, + "step": 10321 + }, + { + "epoch": 28.357142857142858, + "grad_norm": 8.881939888000488, + "learning_rate": 3.582142857142857e-05, + "loss": 0.2857, + "step": 10322 + }, + { + "epoch": 28.35989010989011, + "grad_norm": 7.03204345703125, + "learning_rate": 3.582005494505495e-05, + "loss": 0.2219, + "step": 10323 + }, + { + "epoch": 28.36263736263736, + "grad_norm": 16.521602630615234, + "learning_rate": 3.581868131868132e-05, + "loss": 0.5556, + "step": 10324 + }, + { + "epoch": 28.365384615384617, + "grad_norm": 12.867427825927734, + "learning_rate": 3.5817307692307695e-05, + "loss": 0.3584, + "step": 10325 + }, + { + "epoch": 28.36813186813187, + "grad_norm": 7.314250469207764, + "learning_rate": 3.581593406593407e-05, + "loss": 0.1699, + "step": 10326 + }, + { + "epoch": 28.37087912087912, + "grad_norm": 10.894787788391113, + "learning_rate": 3.581456043956044e-05, + "loss": 0.3837, + "step": 10327 + }, + { + "epoch": 28.373626373626372, + "grad_norm": 15.633135795593262, + "learning_rate": 3.581318681318682e-05, + "loss": 0.4105, + "step": 10328 + }, + { + "epoch": 28.376373626373628, + "grad_norm": 11.388879776000977, + "learning_rate": 3.581181318681319e-05, + "loss": 0.2015, + "step": 10329 + }, + { + "epoch": 28.37912087912088, + "grad_norm": 13.80054759979248, + "learning_rate": 3.5810439560439565e-05, + "loss": 0.2684, + "step": 10330 + }, + { + "epoch": 28.38186813186813, + "grad_norm": 21.629579544067383, + "learning_rate": 3.580906593406594e-05, + "loss": 0.7974, + "step": 10331 + }, + { + "epoch": 28.384615384615383, + "grad_norm": 6.529715538024902, + "learning_rate": 3.580769230769231e-05, + "loss": 0.1544, + "step": 10332 + }, + { + "epoch": 28.38736263736264, + "grad_norm": 12.582356452941895, + "learning_rate": 3.580631868131868e-05, + "loss": 0.2848, + "step": 10333 + }, + { + "epoch": 28.39010989010989, + "grad_norm": 21.808700561523438, + "learning_rate": 3.580494505494505e-05, + "loss": 0.8469, + "step": 10334 + }, + { + "epoch": 28.392857142857142, + "grad_norm": 16.33551025390625, + "learning_rate": 3.580357142857143e-05, + "loss": 0.5497, + "step": 10335 + }, + { + "epoch": 28.395604395604394, + "grad_norm": 17.986778259277344, + "learning_rate": 3.5802197802197805e-05, + "loss": 0.3972, + "step": 10336 + }, + { + "epoch": 28.39835164835165, + "grad_norm": 10.639017105102539, + "learning_rate": 3.5800824175824175e-05, + "loss": 0.3567, + "step": 10337 + }, + { + "epoch": 28.4010989010989, + "grad_norm": 9.342557907104492, + "learning_rate": 3.579945054945055e-05, + "loss": 0.2621, + "step": 10338 + }, + { + "epoch": 28.403846153846153, + "grad_norm": 10.496455192565918, + "learning_rate": 3.579807692307692e-05, + "loss": 0.2601, + "step": 10339 + }, + { + "epoch": 28.406593406593405, + "grad_norm": 7.571717262268066, + "learning_rate": 3.57967032967033e-05, + "loss": 0.1588, + "step": 10340 + }, + { + "epoch": 28.40934065934066, + "grad_norm": 5.620889186859131, + "learning_rate": 3.5795329670329676e-05, + "loss": 0.1502, + "step": 10341 + }, + { + "epoch": 28.412087912087912, + "grad_norm": 15.519218444824219, + "learning_rate": 3.5793956043956046e-05, + "loss": 0.37, + "step": 10342 + }, + { + "epoch": 28.414835164835164, + "grad_norm": 5.310215473175049, + "learning_rate": 3.579258241758242e-05, + "loss": 0.1371, + "step": 10343 + }, + { + "epoch": 28.417582417582416, + "grad_norm": 12.552594184875488, + "learning_rate": 3.579120879120879e-05, + "loss": 0.2527, + "step": 10344 + }, + { + "epoch": 28.42032967032967, + "grad_norm": 13.31509780883789, + "learning_rate": 3.578983516483517e-05, + "loss": 0.2999, + "step": 10345 + }, + { + "epoch": 28.423076923076923, + "grad_norm": 5.385605335235596, + "learning_rate": 3.5788461538461546e-05, + "loss": 0.143, + "step": 10346 + }, + { + "epoch": 28.425824175824175, + "grad_norm": 14.268280982971191, + "learning_rate": 3.5787087912087916e-05, + "loss": 0.5718, + "step": 10347 + }, + { + "epoch": 28.428571428571427, + "grad_norm": 21.77399444580078, + "learning_rate": 3.5785714285714286e-05, + "loss": 0.5221, + "step": 10348 + }, + { + "epoch": 28.431318681318682, + "grad_norm": 12.143479347229004, + "learning_rate": 3.5784340659340656e-05, + "loss": 0.388, + "step": 10349 + }, + { + "epoch": 28.434065934065934, + "grad_norm": 16.095849990844727, + "learning_rate": 3.578296703296703e-05, + "loss": 0.3471, + "step": 10350 + }, + { + "epoch": 28.436813186813186, + "grad_norm": 15.555103302001953, + "learning_rate": 3.578159340659341e-05, + "loss": 0.4157, + "step": 10351 + }, + { + "epoch": 28.439560439560438, + "grad_norm": 8.481298446655273, + "learning_rate": 3.578021978021978e-05, + "loss": 0.1805, + "step": 10352 + }, + { + "epoch": 28.442307692307693, + "grad_norm": 9.760584831237793, + "learning_rate": 3.577884615384616e-05, + "loss": 0.1948, + "step": 10353 + }, + { + "epoch": 28.445054945054945, + "grad_norm": 12.716856956481934, + "learning_rate": 3.577747252747253e-05, + "loss": 0.2859, + "step": 10354 + }, + { + "epoch": 28.447802197802197, + "grad_norm": 19.379592895507812, + "learning_rate": 3.5776098901098903e-05, + "loss": 0.4964, + "step": 10355 + }, + { + "epoch": 28.45054945054945, + "grad_norm": 14.041483879089355, + "learning_rate": 3.5774725274725274e-05, + "loss": 0.7664, + "step": 10356 + }, + { + "epoch": 28.453296703296704, + "grad_norm": 9.533820152282715, + "learning_rate": 3.577335164835165e-05, + "loss": 0.3194, + "step": 10357 + }, + { + "epoch": 28.456043956043956, + "grad_norm": 9.763248443603516, + "learning_rate": 3.577197802197803e-05, + "loss": 0.2463, + "step": 10358 + }, + { + "epoch": 28.458791208791208, + "grad_norm": 6.975632667541504, + "learning_rate": 3.57706043956044e-05, + "loss": 0.146, + "step": 10359 + }, + { + "epoch": 28.46153846153846, + "grad_norm": 12.816400527954102, + "learning_rate": 3.5769230769230774e-05, + "loss": 0.4438, + "step": 10360 + }, + { + "epoch": 28.464285714285715, + "grad_norm": 19.732772827148438, + "learning_rate": 3.5767857142857144e-05, + "loss": 0.8274, + "step": 10361 + }, + { + "epoch": 28.467032967032967, + "grad_norm": 16.19778060913086, + "learning_rate": 3.576648351648352e-05, + "loss": 0.3043, + "step": 10362 + }, + { + "epoch": 28.46978021978022, + "grad_norm": 5.107842922210693, + "learning_rate": 3.576510989010989e-05, + "loss": 0.1171, + "step": 10363 + }, + { + "epoch": 28.47252747252747, + "grad_norm": 5.585415840148926, + "learning_rate": 3.576373626373626e-05, + "loss": 0.1115, + "step": 10364 + }, + { + "epoch": 28.475274725274726, + "grad_norm": 6.122196197509766, + "learning_rate": 3.576236263736264e-05, + "loss": 0.1238, + "step": 10365 + }, + { + "epoch": 28.478021978021978, + "grad_norm": 5.372692584991455, + "learning_rate": 3.576098901098901e-05, + "loss": 0.114, + "step": 10366 + }, + { + "epoch": 28.48076923076923, + "grad_norm": 9.785601615905762, + "learning_rate": 3.5759615384615384e-05, + "loss": 0.216, + "step": 10367 + }, + { + "epoch": 28.483516483516482, + "grad_norm": 6.69095516204834, + "learning_rate": 3.575824175824176e-05, + "loss": 0.2357, + "step": 10368 + }, + { + "epoch": 28.486263736263737, + "grad_norm": 11.227344512939453, + "learning_rate": 3.575686813186813e-05, + "loss": 0.2587, + "step": 10369 + }, + { + "epoch": 28.48901098901099, + "grad_norm": 13.020112037658691, + "learning_rate": 3.575549450549451e-05, + "loss": 0.2828, + "step": 10370 + }, + { + "epoch": 28.49175824175824, + "grad_norm": 9.570430755615234, + "learning_rate": 3.575412087912088e-05, + "loss": 0.2484, + "step": 10371 + }, + { + "epoch": 28.494505494505496, + "grad_norm": 10.691680908203125, + "learning_rate": 3.5752747252747255e-05, + "loss": 0.3278, + "step": 10372 + }, + { + "epoch": 28.497252747252748, + "grad_norm": 16.004623413085938, + "learning_rate": 3.575137362637363e-05, + "loss": 0.5015, + "step": 10373 + }, + { + "epoch": 28.5, + "grad_norm": 13.245401382446289, + "learning_rate": 3.575e-05, + "loss": 0.3344, + "step": 10374 + }, + { + "epoch": 28.502747252747252, + "grad_norm": 9.343924522399902, + "learning_rate": 3.574862637362638e-05, + "loss": 0.3114, + "step": 10375 + }, + { + "epoch": 28.505494505494504, + "grad_norm": 13.066669464111328, + "learning_rate": 3.574725274725275e-05, + "loss": 0.3232, + "step": 10376 + }, + { + "epoch": 28.50824175824176, + "grad_norm": 10.595155715942383, + "learning_rate": 3.5745879120879125e-05, + "loss": 0.2927, + "step": 10377 + }, + { + "epoch": 28.51098901098901, + "grad_norm": 16.435941696166992, + "learning_rate": 3.5744505494505495e-05, + "loss": 0.8607, + "step": 10378 + }, + { + "epoch": 28.513736263736263, + "grad_norm": 19.70235824584961, + "learning_rate": 3.5743131868131865e-05, + "loss": 0.5783, + "step": 10379 + }, + { + "epoch": 28.516483516483518, + "grad_norm": 6.563827037811279, + "learning_rate": 3.574175824175824e-05, + "loss": 0.1226, + "step": 10380 + }, + { + "epoch": 28.51923076923077, + "grad_norm": 7.8632121086120605, + "learning_rate": 3.574038461538461e-05, + "loss": 0.1064, + "step": 10381 + }, + { + "epoch": 28.521978021978022, + "grad_norm": 13.108734130859375, + "learning_rate": 3.573901098901099e-05, + "loss": 0.2877, + "step": 10382 + }, + { + "epoch": 28.524725274725274, + "grad_norm": 12.496960639953613, + "learning_rate": 3.5737637362637366e-05, + "loss": 0.4363, + "step": 10383 + }, + { + "epoch": 28.52747252747253, + "grad_norm": 20.126882553100586, + "learning_rate": 3.5736263736263736e-05, + "loss": 0.6434, + "step": 10384 + }, + { + "epoch": 28.53021978021978, + "grad_norm": 9.709920883178711, + "learning_rate": 3.573489010989011e-05, + "loss": 0.175, + "step": 10385 + }, + { + "epoch": 28.532967032967033, + "grad_norm": 11.334794044494629, + "learning_rate": 3.573351648351648e-05, + "loss": 0.3455, + "step": 10386 + }, + { + "epoch": 28.535714285714285, + "grad_norm": 21.891860961914062, + "learning_rate": 3.573214285714286e-05, + "loss": 0.5543, + "step": 10387 + }, + { + "epoch": 28.53846153846154, + "grad_norm": 15.107514381408691, + "learning_rate": 3.5730769230769236e-05, + "loss": 0.3217, + "step": 10388 + }, + { + "epoch": 28.541208791208792, + "grad_norm": 10.170684814453125, + "learning_rate": 3.5729395604395606e-05, + "loss": 0.2764, + "step": 10389 + }, + { + "epoch": 28.543956043956044, + "grad_norm": 7.041165351867676, + "learning_rate": 3.572802197802198e-05, + "loss": 0.1957, + "step": 10390 + }, + { + "epoch": 28.546703296703296, + "grad_norm": 15.74996280670166, + "learning_rate": 3.572664835164835e-05, + "loss": 0.3719, + "step": 10391 + }, + { + "epoch": 28.54945054945055, + "grad_norm": 13.8483304977417, + "learning_rate": 3.572527472527473e-05, + "loss": 0.3699, + "step": 10392 + }, + { + "epoch": 28.552197802197803, + "grad_norm": 11.25035285949707, + "learning_rate": 3.57239010989011e-05, + "loss": 0.2277, + "step": 10393 + }, + { + "epoch": 28.554945054945055, + "grad_norm": 13.742706298828125, + "learning_rate": 3.572252747252747e-05, + "loss": 0.3915, + "step": 10394 + }, + { + "epoch": 28.557692307692307, + "grad_norm": 17.578516006469727, + "learning_rate": 3.5721153846153847e-05, + "loss": 0.6454, + "step": 10395 + }, + { + "epoch": 28.560439560439562, + "grad_norm": 7.857970237731934, + "learning_rate": 3.5719780219780217e-05, + "loss": 0.1978, + "step": 10396 + }, + { + "epoch": 28.563186813186814, + "grad_norm": 8.840560913085938, + "learning_rate": 3.571840659340659e-05, + "loss": 0.3711, + "step": 10397 + }, + { + "epoch": 28.565934065934066, + "grad_norm": 23.854246139526367, + "learning_rate": 3.571703296703297e-05, + "loss": 1.2595, + "step": 10398 + }, + { + "epoch": 28.568681318681318, + "grad_norm": 16.776348114013672, + "learning_rate": 3.571565934065934e-05, + "loss": 0.4144, + "step": 10399 + }, + { + "epoch": 28.571428571428573, + "grad_norm": 8.066123962402344, + "learning_rate": 3.571428571428572e-05, + "loss": 0.2471, + "step": 10400 + }, + { + "epoch": 28.574175824175825, + "grad_norm": 23.277416229248047, + "learning_rate": 3.571291208791209e-05, + "loss": 0.825, + "step": 10401 + }, + { + "epoch": 28.576923076923077, + "grad_norm": 4.9232707023620605, + "learning_rate": 3.5711538461538464e-05, + "loss": 0.0626, + "step": 10402 + }, + { + "epoch": 28.57967032967033, + "grad_norm": 10.75732135772705, + "learning_rate": 3.571016483516484e-05, + "loss": 0.2441, + "step": 10403 + }, + { + "epoch": 28.582417582417584, + "grad_norm": 10.332547187805176, + "learning_rate": 3.570879120879121e-05, + "loss": 0.5149, + "step": 10404 + }, + { + "epoch": 28.585164835164836, + "grad_norm": 11.31084156036377, + "learning_rate": 3.570741758241759e-05, + "loss": 0.3169, + "step": 10405 + }, + { + "epoch": 28.587912087912088, + "grad_norm": 11.820323944091797, + "learning_rate": 3.570604395604396e-05, + "loss": 0.3111, + "step": 10406 + }, + { + "epoch": 28.59065934065934, + "grad_norm": 9.703631401062012, + "learning_rate": 3.5704670329670334e-05, + "loss": 0.3992, + "step": 10407 + }, + { + "epoch": 28.593406593406595, + "grad_norm": 13.925765037536621, + "learning_rate": 3.5703296703296704e-05, + "loss": 0.4801, + "step": 10408 + }, + { + "epoch": 28.596153846153847, + "grad_norm": 9.759407043457031, + "learning_rate": 3.5701923076923074e-05, + "loss": 0.2652, + "step": 10409 + }, + { + "epoch": 28.5989010989011, + "grad_norm": 16.600788116455078, + "learning_rate": 3.570054945054945e-05, + "loss": 0.4596, + "step": 10410 + }, + { + "epoch": 28.60164835164835, + "grad_norm": 19.647817611694336, + "learning_rate": 3.569917582417582e-05, + "loss": 0.3895, + "step": 10411 + }, + { + "epoch": 28.604395604395606, + "grad_norm": 10.094442367553711, + "learning_rate": 3.56978021978022e-05, + "loss": 0.2119, + "step": 10412 + }, + { + "epoch": 28.607142857142858, + "grad_norm": 8.562753677368164, + "learning_rate": 3.5696428571428575e-05, + "loss": 0.2565, + "step": 10413 + }, + { + "epoch": 28.60989010989011, + "grad_norm": 14.079272270202637, + "learning_rate": 3.5695054945054945e-05, + "loss": 0.3706, + "step": 10414 + }, + { + "epoch": 28.61263736263736, + "grad_norm": 4.757502555847168, + "learning_rate": 3.569368131868132e-05, + "loss": 0.1053, + "step": 10415 + }, + { + "epoch": 28.615384615384617, + "grad_norm": 24.10370635986328, + "learning_rate": 3.569230769230769e-05, + "loss": 0.8085, + "step": 10416 + }, + { + "epoch": 28.61813186813187, + "grad_norm": 7.615590572357178, + "learning_rate": 3.569093406593407e-05, + "loss": 0.1685, + "step": 10417 + }, + { + "epoch": 28.62087912087912, + "grad_norm": 16.352596282958984, + "learning_rate": 3.5689560439560445e-05, + "loss": 0.75, + "step": 10418 + }, + { + "epoch": 28.623626373626372, + "grad_norm": 15.134358406066895, + "learning_rate": 3.5688186813186815e-05, + "loss": 0.535, + "step": 10419 + }, + { + "epoch": 28.626373626373628, + "grad_norm": 2.4748830795288086, + "learning_rate": 3.568681318681319e-05, + "loss": 0.0537, + "step": 10420 + }, + { + "epoch": 28.62912087912088, + "grad_norm": 12.319172859191895, + "learning_rate": 3.568543956043956e-05, + "loss": 0.3985, + "step": 10421 + }, + { + "epoch": 28.63186813186813, + "grad_norm": 13.29865837097168, + "learning_rate": 3.568406593406594e-05, + "loss": 0.447, + "step": 10422 + }, + { + "epoch": 28.634615384615383, + "grad_norm": 12.248481750488281, + "learning_rate": 3.568269230769231e-05, + "loss": 0.3083, + "step": 10423 + }, + { + "epoch": 28.63736263736264, + "grad_norm": 7.872884273529053, + "learning_rate": 3.568131868131868e-05, + "loss": 0.2289, + "step": 10424 + }, + { + "epoch": 28.64010989010989, + "grad_norm": 16.16680908203125, + "learning_rate": 3.5679945054945056e-05, + "loss": 0.6863, + "step": 10425 + }, + { + "epoch": 28.642857142857142, + "grad_norm": 11.797372817993164, + "learning_rate": 3.5678571428571426e-05, + "loss": 0.4709, + "step": 10426 + }, + { + "epoch": 28.645604395604394, + "grad_norm": 9.94303035736084, + "learning_rate": 3.56771978021978e-05, + "loss": 0.2632, + "step": 10427 + }, + { + "epoch": 28.64835164835165, + "grad_norm": 6.852181434631348, + "learning_rate": 3.567582417582418e-05, + "loss": 0.1459, + "step": 10428 + }, + { + "epoch": 28.6510989010989, + "grad_norm": 12.20930004119873, + "learning_rate": 3.567445054945055e-05, + "loss": 0.2483, + "step": 10429 + }, + { + "epoch": 28.653846153846153, + "grad_norm": 15.345052719116211, + "learning_rate": 3.5673076923076926e-05, + "loss": 0.3955, + "step": 10430 + }, + { + "epoch": 28.656593406593405, + "grad_norm": 8.971004486083984, + "learning_rate": 3.5671703296703296e-05, + "loss": 0.1667, + "step": 10431 + }, + { + "epoch": 28.65934065934066, + "grad_norm": 15.762032508850098, + "learning_rate": 3.567032967032967e-05, + "loss": 0.3766, + "step": 10432 + }, + { + "epoch": 28.662087912087912, + "grad_norm": 4.885357856750488, + "learning_rate": 3.566895604395605e-05, + "loss": 0.0791, + "step": 10433 + }, + { + "epoch": 28.664835164835164, + "grad_norm": 11.465980529785156, + "learning_rate": 3.566758241758242e-05, + "loss": 0.3556, + "step": 10434 + }, + { + "epoch": 28.667582417582416, + "grad_norm": 23.137248992919922, + "learning_rate": 3.5666208791208796e-05, + "loss": 0.9165, + "step": 10435 + }, + { + "epoch": 28.67032967032967, + "grad_norm": 9.09575080871582, + "learning_rate": 3.5664835164835166e-05, + "loss": 0.2461, + "step": 10436 + }, + { + "epoch": 28.673076923076923, + "grad_norm": 11.983473777770996, + "learning_rate": 3.566346153846154e-05, + "loss": 0.3537, + "step": 10437 + }, + { + "epoch": 28.675824175824175, + "grad_norm": 16.53988265991211, + "learning_rate": 3.566208791208791e-05, + "loss": 0.6224, + "step": 10438 + }, + { + "epoch": 28.678571428571427, + "grad_norm": 24.898330688476562, + "learning_rate": 3.566071428571428e-05, + "loss": 0.8969, + "step": 10439 + }, + { + "epoch": 28.681318681318682, + "grad_norm": 5.0682053565979, + "learning_rate": 3.565934065934066e-05, + "loss": 0.1028, + "step": 10440 + }, + { + "epoch": 28.684065934065934, + "grad_norm": 13.523824691772461, + "learning_rate": 3.565796703296703e-05, + "loss": 0.4609, + "step": 10441 + }, + { + "epoch": 28.686813186813186, + "grad_norm": 15.808809280395508, + "learning_rate": 3.565659340659341e-05, + "loss": 0.564, + "step": 10442 + }, + { + "epoch": 28.689560439560438, + "grad_norm": 22.14812660217285, + "learning_rate": 3.5655219780219784e-05, + "loss": 0.6365, + "step": 10443 + }, + { + "epoch": 28.692307692307693, + "grad_norm": 10.546874046325684, + "learning_rate": 3.5653846153846154e-05, + "loss": 0.3335, + "step": 10444 + }, + { + "epoch": 28.695054945054945, + "grad_norm": 14.342085838317871, + "learning_rate": 3.565247252747253e-05, + "loss": 0.4089, + "step": 10445 + }, + { + "epoch": 28.697802197802197, + "grad_norm": 7.285012245178223, + "learning_rate": 3.56510989010989e-05, + "loss": 0.2146, + "step": 10446 + }, + { + "epoch": 28.70054945054945, + "grad_norm": 11.570984840393066, + "learning_rate": 3.564972527472528e-05, + "loss": 0.2327, + "step": 10447 + }, + { + "epoch": 28.703296703296704, + "grad_norm": 11.687742233276367, + "learning_rate": 3.5648351648351654e-05, + "loss": 0.2928, + "step": 10448 + }, + { + "epoch": 28.706043956043956, + "grad_norm": 9.97496509552002, + "learning_rate": 3.5646978021978024e-05, + "loss": 0.1082, + "step": 10449 + }, + { + "epoch": 28.708791208791208, + "grad_norm": 12.504546165466309, + "learning_rate": 3.56456043956044e-05, + "loss": 0.3378, + "step": 10450 + }, + { + "epoch": 28.71153846153846, + "grad_norm": 7.02359676361084, + "learning_rate": 3.564423076923077e-05, + "loss": 0.1982, + "step": 10451 + }, + { + "epoch": 28.714285714285715, + "grad_norm": 14.807124137878418, + "learning_rate": 3.564285714285715e-05, + "loss": 0.3875, + "step": 10452 + }, + { + "epoch": 28.717032967032967, + "grad_norm": 14.048354148864746, + "learning_rate": 3.564148351648352e-05, + "loss": 0.4339, + "step": 10453 + }, + { + "epoch": 28.71978021978022, + "grad_norm": 7.670397758483887, + "learning_rate": 3.564010989010989e-05, + "loss": 0.2439, + "step": 10454 + }, + { + "epoch": 28.72252747252747, + "grad_norm": 7.074214935302734, + "learning_rate": 3.5638736263736265e-05, + "loss": 0.1415, + "step": 10455 + }, + { + "epoch": 28.725274725274726, + "grad_norm": 14.177484512329102, + "learning_rate": 3.5637362637362635e-05, + "loss": 0.4097, + "step": 10456 + }, + { + "epoch": 28.728021978021978, + "grad_norm": 7.748316287994385, + "learning_rate": 3.563598901098901e-05, + "loss": 0.1411, + "step": 10457 + }, + { + "epoch": 28.73076923076923, + "grad_norm": 7.338047027587891, + "learning_rate": 3.563461538461539e-05, + "loss": 0.1904, + "step": 10458 + }, + { + "epoch": 28.733516483516482, + "grad_norm": 17.346006393432617, + "learning_rate": 3.563324175824176e-05, + "loss": 0.5553, + "step": 10459 + }, + { + "epoch": 28.736263736263737, + "grad_norm": 7.88538932800293, + "learning_rate": 3.5631868131868135e-05, + "loss": 0.272, + "step": 10460 + }, + { + "epoch": 28.73901098901099, + "grad_norm": 17.032594680786133, + "learning_rate": 3.5630494505494505e-05, + "loss": 0.5265, + "step": 10461 + }, + { + "epoch": 28.74175824175824, + "grad_norm": 13.662667274475098, + "learning_rate": 3.562912087912088e-05, + "loss": 0.4295, + "step": 10462 + }, + { + "epoch": 28.744505494505496, + "grad_norm": 12.01977825164795, + "learning_rate": 3.562774725274726e-05, + "loss": 0.2314, + "step": 10463 + }, + { + "epoch": 28.747252747252748, + "grad_norm": 12.729268074035645, + "learning_rate": 3.562637362637363e-05, + "loss": 0.3346, + "step": 10464 + }, + { + "epoch": 28.75, + "grad_norm": 14.619013786315918, + "learning_rate": 3.5625000000000005e-05, + "loss": 0.4725, + "step": 10465 + }, + { + "epoch": 28.752747252747252, + "grad_norm": 15.528177261352539, + "learning_rate": 3.5623626373626375e-05, + "loss": 0.4014, + "step": 10466 + }, + { + "epoch": 28.755494505494504, + "grad_norm": 11.9705810546875, + "learning_rate": 3.562225274725275e-05, + "loss": 0.2808, + "step": 10467 + }, + { + "epoch": 28.75824175824176, + "grad_norm": 6.91110372543335, + "learning_rate": 3.562087912087912e-05, + "loss": 0.1564, + "step": 10468 + }, + { + "epoch": 28.76098901098901, + "grad_norm": 13.175763130187988, + "learning_rate": 3.561950549450549e-05, + "loss": 0.4725, + "step": 10469 + }, + { + "epoch": 28.763736263736263, + "grad_norm": 9.763299942016602, + "learning_rate": 3.561813186813187e-05, + "loss": 0.234, + "step": 10470 + }, + { + "epoch": 28.766483516483518, + "grad_norm": 17.038110733032227, + "learning_rate": 3.561675824175824e-05, + "loss": 0.3052, + "step": 10471 + }, + { + "epoch": 28.76923076923077, + "grad_norm": 15.535163879394531, + "learning_rate": 3.5615384615384616e-05, + "loss": 0.5054, + "step": 10472 + }, + { + "epoch": 28.771978021978022, + "grad_norm": 18.053640365600586, + "learning_rate": 3.561401098901099e-05, + "loss": 0.4568, + "step": 10473 + }, + { + "epoch": 28.774725274725274, + "grad_norm": 15.82010269165039, + "learning_rate": 3.561263736263736e-05, + "loss": 0.6443, + "step": 10474 + }, + { + "epoch": 28.77747252747253, + "grad_norm": 7.19470739364624, + "learning_rate": 3.561126373626374e-05, + "loss": 0.1717, + "step": 10475 + }, + { + "epoch": 28.78021978021978, + "grad_norm": 7.652091026306152, + "learning_rate": 3.560989010989011e-05, + "loss": 0.2318, + "step": 10476 + }, + { + "epoch": 28.782967032967033, + "grad_norm": 8.362250328063965, + "learning_rate": 3.5608516483516486e-05, + "loss": 0.1913, + "step": 10477 + }, + { + "epoch": 28.785714285714285, + "grad_norm": 20.384235382080078, + "learning_rate": 3.560714285714286e-05, + "loss": 0.9365, + "step": 10478 + }, + { + "epoch": 28.78846153846154, + "grad_norm": 12.941890716552734, + "learning_rate": 3.560576923076923e-05, + "loss": 0.2943, + "step": 10479 + }, + { + "epoch": 28.791208791208792, + "grad_norm": 8.825536727905273, + "learning_rate": 3.560439560439561e-05, + "loss": 0.4321, + "step": 10480 + }, + { + "epoch": 28.793956043956044, + "grad_norm": 14.134605407714844, + "learning_rate": 3.560302197802198e-05, + "loss": 0.4256, + "step": 10481 + }, + { + "epoch": 28.796703296703296, + "grad_norm": 10.221481323242188, + "learning_rate": 3.560164835164836e-05, + "loss": 0.2709, + "step": 10482 + }, + { + "epoch": 28.79945054945055, + "grad_norm": 17.04119110107422, + "learning_rate": 3.560027472527473e-05, + "loss": 0.5893, + "step": 10483 + }, + { + "epoch": 28.802197802197803, + "grad_norm": 6.183649063110352, + "learning_rate": 3.55989010989011e-05, + "loss": 0.1384, + "step": 10484 + }, + { + "epoch": 28.804945054945055, + "grad_norm": 13.289567947387695, + "learning_rate": 3.5597527472527473e-05, + "loss": 0.4269, + "step": 10485 + }, + { + "epoch": 28.807692307692307, + "grad_norm": 11.1367769241333, + "learning_rate": 3.5596153846153844e-05, + "loss": 0.2699, + "step": 10486 + }, + { + "epoch": 28.810439560439562, + "grad_norm": 3.296126365661621, + "learning_rate": 3.559478021978022e-05, + "loss": 0.0657, + "step": 10487 + }, + { + "epoch": 28.813186813186814, + "grad_norm": 12.967156410217285, + "learning_rate": 3.55934065934066e-05, + "loss": 0.2564, + "step": 10488 + }, + { + "epoch": 28.815934065934066, + "grad_norm": 9.623468399047852, + "learning_rate": 3.559203296703297e-05, + "loss": 0.1902, + "step": 10489 + }, + { + "epoch": 28.818681318681318, + "grad_norm": 10.024198532104492, + "learning_rate": 3.5590659340659344e-05, + "loss": 0.1696, + "step": 10490 + }, + { + "epoch": 28.821428571428573, + "grad_norm": 10.73037052154541, + "learning_rate": 3.5589285714285714e-05, + "loss": 0.4698, + "step": 10491 + }, + { + "epoch": 28.824175824175825, + "grad_norm": 15.486330032348633, + "learning_rate": 3.558791208791209e-05, + "loss": 0.5431, + "step": 10492 + }, + { + "epoch": 28.826923076923077, + "grad_norm": 8.480756759643555, + "learning_rate": 3.558653846153847e-05, + "loss": 0.2445, + "step": 10493 + }, + { + "epoch": 28.82967032967033, + "grad_norm": 10.75734806060791, + "learning_rate": 3.558516483516484e-05, + "loss": 0.1879, + "step": 10494 + }, + { + "epoch": 28.832417582417584, + "grad_norm": 13.30953311920166, + "learning_rate": 3.5583791208791214e-05, + "loss": 0.4534, + "step": 10495 + }, + { + "epoch": 28.835164835164836, + "grad_norm": 11.69851016998291, + "learning_rate": 3.5582417582417584e-05, + "loss": 0.193, + "step": 10496 + }, + { + "epoch": 28.837912087912088, + "grad_norm": 6.810169219970703, + "learning_rate": 3.558104395604396e-05, + "loss": 0.201, + "step": 10497 + }, + { + "epoch": 28.84065934065934, + "grad_norm": 11.869829177856445, + "learning_rate": 3.557967032967033e-05, + "loss": 0.2368, + "step": 10498 + }, + { + "epoch": 28.843406593406595, + "grad_norm": 14.028100967407227, + "learning_rate": 3.55782967032967e-05, + "loss": 0.3029, + "step": 10499 + }, + { + "epoch": 28.846153846153847, + "grad_norm": 12.903777122497559, + "learning_rate": 3.557692307692308e-05, + "loss": 0.2244, + "step": 10500 + }, + { + "epoch": 28.8489010989011, + "grad_norm": 12.756452560424805, + "learning_rate": 3.557554945054945e-05, + "loss": 0.5839, + "step": 10501 + }, + { + "epoch": 28.85164835164835, + "grad_norm": 17.870738983154297, + "learning_rate": 3.5574175824175825e-05, + "loss": 0.4174, + "step": 10502 + }, + { + "epoch": 28.854395604395606, + "grad_norm": 14.554814338684082, + "learning_rate": 3.55728021978022e-05, + "loss": 0.4422, + "step": 10503 + }, + { + "epoch": 28.857142857142858, + "grad_norm": 19.451244354248047, + "learning_rate": 3.557142857142857e-05, + "loss": 0.5336, + "step": 10504 + }, + { + "epoch": 28.85989010989011, + "grad_norm": 15.524282455444336, + "learning_rate": 3.557005494505495e-05, + "loss": 0.4097, + "step": 10505 + }, + { + "epoch": 28.86263736263736, + "grad_norm": 16.23615264892578, + "learning_rate": 3.556868131868132e-05, + "loss": 0.4554, + "step": 10506 + }, + { + "epoch": 28.865384615384617, + "grad_norm": 11.487441062927246, + "learning_rate": 3.5567307692307695e-05, + "loss": 0.3311, + "step": 10507 + }, + { + "epoch": 28.86813186813187, + "grad_norm": 16.925127029418945, + "learning_rate": 3.556593406593407e-05, + "loss": 0.3562, + "step": 10508 + }, + { + "epoch": 28.87087912087912, + "grad_norm": 7.080856800079346, + "learning_rate": 3.556456043956044e-05, + "loss": 0.2844, + "step": 10509 + }, + { + "epoch": 28.873626373626372, + "grad_norm": 8.900337219238281, + "learning_rate": 3.556318681318682e-05, + "loss": 0.2165, + "step": 10510 + }, + { + "epoch": 28.876373626373628, + "grad_norm": 14.627874374389648, + "learning_rate": 3.556181318681319e-05, + "loss": 0.4795, + "step": 10511 + }, + { + "epoch": 28.87912087912088, + "grad_norm": 9.432846069335938, + "learning_rate": 3.5560439560439566e-05, + "loss": 0.3311, + "step": 10512 + }, + { + "epoch": 28.88186813186813, + "grad_norm": 14.395547866821289, + "learning_rate": 3.5559065934065936e-05, + "loss": 0.37, + "step": 10513 + }, + { + "epoch": 28.884615384615383, + "grad_norm": 14.184243202209473, + "learning_rate": 3.5557692307692306e-05, + "loss": 0.3809, + "step": 10514 + }, + { + "epoch": 28.88736263736264, + "grad_norm": 15.74634075164795, + "learning_rate": 3.555631868131868e-05, + "loss": 0.3743, + "step": 10515 + }, + { + "epoch": 28.89010989010989, + "grad_norm": 10.696695327758789, + "learning_rate": 3.555494505494505e-05, + "loss": 0.4174, + "step": 10516 + }, + { + "epoch": 28.892857142857142, + "grad_norm": 16.012157440185547, + "learning_rate": 3.555357142857143e-05, + "loss": 0.5714, + "step": 10517 + }, + { + "epoch": 28.895604395604394, + "grad_norm": 11.078714370727539, + "learning_rate": 3.5552197802197806e-05, + "loss": 0.2933, + "step": 10518 + }, + { + "epoch": 28.89835164835165, + "grad_norm": 5.936433792114258, + "learning_rate": 3.5550824175824176e-05, + "loss": 0.1195, + "step": 10519 + }, + { + "epoch": 28.9010989010989, + "grad_norm": 17.095619201660156, + "learning_rate": 3.554945054945055e-05, + "loss": 0.5901, + "step": 10520 + }, + { + "epoch": 28.903846153846153, + "grad_norm": 12.2312593460083, + "learning_rate": 3.554807692307692e-05, + "loss": 0.4318, + "step": 10521 + }, + { + "epoch": 28.906593406593405, + "grad_norm": 13.252703666687012, + "learning_rate": 3.55467032967033e-05, + "loss": 0.3592, + "step": 10522 + }, + { + "epoch": 28.90934065934066, + "grad_norm": 13.534839630126953, + "learning_rate": 3.5545329670329676e-05, + "loss": 0.3233, + "step": 10523 + }, + { + "epoch": 28.912087912087912, + "grad_norm": 12.594785690307617, + "learning_rate": 3.5543956043956047e-05, + "loss": 0.2461, + "step": 10524 + }, + { + "epoch": 28.914835164835164, + "grad_norm": 5.349110126495361, + "learning_rate": 3.554258241758242e-05, + "loss": 0.1374, + "step": 10525 + }, + { + "epoch": 28.917582417582416, + "grad_norm": 15.611513137817383, + "learning_rate": 3.554120879120879e-05, + "loss": 0.5245, + "step": 10526 + }, + { + "epoch": 28.92032967032967, + "grad_norm": 14.289298057556152, + "learning_rate": 3.553983516483517e-05, + "loss": 0.551, + "step": 10527 + }, + { + "epoch": 28.923076923076923, + "grad_norm": 11.42285442352295, + "learning_rate": 3.553846153846154e-05, + "loss": 0.2272, + "step": 10528 + }, + { + "epoch": 28.925824175824175, + "grad_norm": 20.7437744140625, + "learning_rate": 3.553708791208791e-05, + "loss": 0.7172, + "step": 10529 + }, + { + "epoch": 28.928571428571427, + "grad_norm": 15.556753158569336, + "learning_rate": 3.553571428571429e-05, + "loss": 0.5453, + "step": 10530 + }, + { + "epoch": 28.931318681318682, + "grad_norm": 11.271993637084961, + "learning_rate": 3.553434065934066e-05, + "loss": 0.2968, + "step": 10531 + }, + { + "epoch": 28.934065934065934, + "grad_norm": 15.845796585083008, + "learning_rate": 3.5532967032967034e-05, + "loss": 0.5328, + "step": 10532 + }, + { + "epoch": 28.936813186813186, + "grad_norm": 5.964591026306152, + "learning_rate": 3.553159340659341e-05, + "loss": 0.109, + "step": 10533 + }, + { + "epoch": 28.939560439560438, + "grad_norm": 8.060961723327637, + "learning_rate": 3.553021978021978e-05, + "loss": 0.1521, + "step": 10534 + }, + { + "epoch": 28.942307692307693, + "grad_norm": 14.708818435668945, + "learning_rate": 3.552884615384616e-05, + "loss": 0.305, + "step": 10535 + }, + { + "epoch": 28.945054945054945, + "grad_norm": 15.604682922363281, + "learning_rate": 3.552747252747253e-05, + "loss": 0.4807, + "step": 10536 + }, + { + "epoch": 28.947802197802197, + "grad_norm": 13.620099067687988, + "learning_rate": 3.5526098901098904e-05, + "loss": 0.3479, + "step": 10537 + }, + { + "epoch": 28.95054945054945, + "grad_norm": 9.34813404083252, + "learning_rate": 3.552472527472528e-05, + "loss": 0.2118, + "step": 10538 + }, + { + "epoch": 28.953296703296704, + "grad_norm": 10.386201858520508, + "learning_rate": 3.552335164835165e-05, + "loss": 0.307, + "step": 10539 + }, + { + "epoch": 28.956043956043956, + "grad_norm": 18.836559295654297, + "learning_rate": 3.552197802197803e-05, + "loss": 0.7773, + "step": 10540 + }, + { + "epoch": 28.958791208791208, + "grad_norm": 7.132941246032715, + "learning_rate": 3.55206043956044e-05, + "loss": 0.1649, + "step": 10541 + }, + { + "epoch": 28.96153846153846, + "grad_norm": 11.141457557678223, + "learning_rate": 3.5519230769230775e-05, + "loss": 0.1869, + "step": 10542 + }, + { + "epoch": 28.964285714285715, + "grad_norm": 12.343927383422852, + "learning_rate": 3.5517857142857145e-05, + "loss": 0.2496, + "step": 10543 + }, + { + "epoch": 28.967032967032967, + "grad_norm": 7.843756198883057, + "learning_rate": 3.5516483516483515e-05, + "loss": 0.167, + "step": 10544 + }, + { + "epoch": 28.96978021978022, + "grad_norm": 7.820218086242676, + "learning_rate": 3.551510989010989e-05, + "loss": 0.1791, + "step": 10545 + }, + { + "epoch": 28.97252747252747, + "grad_norm": 16.531160354614258, + "learning_rate": 3.551373626373626e-05, + "loss": 0.4531, + "step": 10546 + }, + { + "epoch": 28.975274725274726, + "grad_norm": 14.061238288879395, + "learning_rate": 3.551236263736264e-05, + "loss": 0.3451, + "step": 10547 + }, + { + "epoch": 28.978021978021978, + "grad_norm": 16.397308349609375, + "learning_rate": 3.5510989010989015e-05, + "loss": 0.453, + "step": 10548 + }, + { + "epoch": 28.98076923076923, + "grad_norm": 10.657305717468262, + "learning_rate": 3.5509615384615385e-05, + "loss": 0.1698, + "step": 10549 + }, + { + "epoch": 28.983516483516482, + "grad_norm": 15.202415466308594, + "learning_rate": 3.550824175824176e-05, + "loss": 0.3048, + "step": 10550 + }, + { + "epoch": 28.986263736263737, + "grad_norm": 8.681804656982422, + "learning_rate": 3.550686813186813e-05, + "loss": 0.3587, + "step": 10551 + }, + { + "epoch": 28.98901098901099, + "grad_norm": 9.065377235412598, + "learning_rate": 3.550549450549451e-05, + "loss": 0.2504, + "step": 10552 + }, + { + "epoch": 28.99175824175824, + "grad_norm": 16.930923461914062, + "learning_rate": 3.5504120879120885e-05, + "loss": 0.5855, + "step": 10553 + }, + { + "epoch": 28.994505494505496, + "grad_norm": 14.282243728637695, + "learning_rate": 3.5502747252747255e-05, + "loss": 0.3207, + "step": 10554 + }, + { + "epoch": 28.997252747252748, + "grad_norm": 9.864242553710938, + "learning_rate": 3.550137362637363e-05, + "loss": 0.2775, + "step": 10555 + }, + { + "epoch": 29.0, + "grad_norm": 40.66219711303711, + "learning_rate": 3.55e-05, + "loss": 1.1915, + "step": 10556 + }, + { + "epoch": 29.0, + "eval_accuracy": 0.7975206611570248, + "eval_f1": 0.7918716184453233, + "eval_f1_DuraRiadoRio_64x64": 0.6515837104072398, + "eval_f1_Mole_64x64": 0.8865248226950354, + "eval_f1_Quebrado_64x64": 0.8788927335640139, + "eval_f1_RiadoRio_64x64": 0.7318611987381703, + "eval_f1_RioFechado_64x64": 0.8104956268221575, + "eval_loss": 0.9770141839981079, + "eval_precision": 0.8222593394134489, + "eval_precision_DuraRiadoRio_64x64": 0.935064935064935, + "eval_precision_Mole_64x64": 0.9057971014492754, + "eval_precision_Quebrado_64x64": 0.8758620689655172, + "eval_precision_RiadoRio_64x64": 0.703030303030303, + "eval_precision_RioFechado_64x64": 0.6915422885572139, + "eval_recall": 0.7984062268346923, + "eval_recall_DuraRiadoRio_64x64": 0.5, + "eval_recall_Mole_64x64": 0.8680555555555556, + "eval_recall_Quebrado_64x64": 0.8819444444444444, + "eval_recall_RiadoRio_64x64": 0.7631578947368421, + "eval_recall_RioFechado_64x64": 0.9788732394366197, + "eval_runtime": 1.7245, + "eval_samples_per_second": 420.984, + "eval_steps_per_second": 26.674, + "step": 10556 + }, + { + "epoch": 29.002747252747252, + "grad_norm": 4.464418888092041, + "learning_rate": 3.549862637362638e-05, + "loss": 0.1078, + "step": 10557 + }, + { + "epoch": 29.005494505494507, + "grad_norm": 9.228832244873047, + "learning_rate": 3.549725274725275e-05, + "loss": 0.1978, + "step": 10558 + }, + { + "epoch": 29.00824175824176, + "grad_norm": 23.136314392089844, + "learning_rate": 3.549587912087912e-05, + "loss": 0.609, + "step": 10559 + }, + { + "epoch": 29.01098901098901, + "grad_norm": 12.627542495727539, + "learning_rate": 3.5494505494505496e-05, + "loss": 0.3356, + "step": 10560 + }, + { + "epoch": 29.013736263736263, + "grad_norm": 10.58580207824707, + "learning_rate": 3.5493131868131866e-05, + "loss": 0.2497, + "step": 10561 + }, + { + "epoch": 29.016483516483518, + "grad_norm": 18.540294647216797, + "learning_rate": 3.549175824175824e-05, + "loss": 0.4438, + "step": 10562 + }, + { + "epoch": 29.01923076923077, + "grad_norm": 10.943746566772461, + "learning_rate": 3.549038461538462e-05, + "loss": 0.2995, + "step": 10563 + }, + { + "epoch": 29.021978021978022, + "grad_norm": 6.571731090545654, + "learning_rate": 3.548901098901099e-05, + "loss": 0.1244, + "step": 10564 + }, + { + "epoch": 29.024725274725274, + "grad_norm": 7.7304816246032715, + "learning_rate": 3.5487637362637366e-05, + "loss": 0.1857, + "step": 10565 + }, + { + "epoch": 29.02747252747253, + "grad_norm": 6.896875858306885, + "learning_rate": 3.5486263736263736e-05, + "loss": 0.3581, + "step": 10566 + }, + { + "epoch": 29.03021978021978, + "grad_norm": 1.5141987800598145, + "learning_rate": 3.548489010989011e-05, + "loss": 0.0392, + "step": 10567 + }, + { + "epoch": 29.032967032967033, + "grad_norm": 9.961859703063965, + "learning_rate": 3.548351648351649e-05, + "loss": 0.3087, + "step": 10568 + }, + { + "epoch": 29.035714285714285, + "grad_norm": 13.966564178466797, + "learning_rate": 3.548214285714286e-05, + "loss": 0.3186, + "step": 10569 + }, + { + "epoch": 29.03846153846154, + "grad_norm": 10.005434036254883, + "learning_rate": 3.548076923076924e-05, + "loss": 0.1893, + "step": 10570 + }, + { + "epoch": 29.041208791208792, + "grad_norm": 7.1640095710754395, + "learning_rate": 3.547939560439561e-05, + "loss": 0.2552, + "step": 10571 + }, + { + "epoch": 29.043956043956044, + "grad_norm": 7.39711856842041, + "learning_rate": 3.5478021978021984e-05, + "loss": 0.1407, + "step": 10572 + }, + { + "epoch": 29.046703296703296, + "grad_norm": 12.756369590759277, + "learning_rate": 3.5476648351648354e-05, + "loss": 0.2897, + "step": 10573 + }, + { + "epoch": 29.04945054945055, + "grad_norm": 14.252293586730957, + "learning_rate": 3.5475274725274724e-05, + "loss": 0.4606, + "step": 10574 + }, + { + "epoch": 29.052197802197803, + "grad_norm": 12.362367630004883, + "learning_rate": 3.54739010989011e-05, + "loss": 0.4776, + "step": 10575 + }, + { + "epoch": 29.054945054945055, + "grad_norm": 6.811641693115234, + "learning_rate": 3.547252747252747e-05, + "loss": 0.1834, + "step": 10576 + }, + { + "epoch": 29.057692307692307, + "grad_norm": 17.409425735473633, + "learning_rate": 3.547115384615385e-05, + "loss": 0.4346, + "step": 10577 + }, + { + "epoch": 29.060439560439562, + "grad_norm": 14.886682510375977, + "learning_rate": 3.5469780219780224e-05, + "loss": 0.4619, + "step": 10578 + }, + { + "epoch": 29.063186813186814, + "grad_norm": 8.98622989654541, + "learning_rate": 3.5468406593406594e-05, + "loss": 0.175, + "step": 10579 + }, + { + "epoch": 29.065934065934066, + "grad_norm": 15.61092758178711, + "learning_rate": 3.546703296703297e-05, + "loss": 0.4339, + "step": 10580 + }, + { + "epoch": 29.068681318681318, + "grad_norm": 13.916132926940918, + "learning_rate": 3.546565934065934e-05, + "loss": 0.2548, + "step": 10581 + }, + { + "epoch": 29.071428571428573, + "grad_norm": 14.100622177124023, + "learning_rate": 3.546428571428572e-05, + "loss": 0.4511, + "step": 10582 + }, + { + "epoch": 29.074175824175825, + "grad_norm": 18.559335708618164, + "learning_rate": 3.546291208791209e-05, + "loss": 0.5144, + "step": 10583 + }, + { + "epoch": 29.076923076923077, + "grad_norm": 11.76855182647705, + "learning_rate": 3.5461538461538464e-05, + "loss": 0.256, + "step": 10584 + }, + { + "epoch": 29.07967032967033, + "grad_norm": 16.125137329101562, + "learning_rate": 3.546016483516484e-05, + "loss": 0.3642, + "step": 10585 + }, + { + "epoch": 29.082417582417584, + "grad_norm": 9.792278289794922, + "learning_rate": 3.545879120879121e-05, + "loss": 0.2768, + "step": 10586 + }, + { + "epoch": 29.085164835164836, + "grad_norm": 2.2209675312042236, + "learning_rate": 3.545741758241759e-05, + "loss": 0.0385, + "step": 10587 + }, + { + "epoch": 29.087912087912088, + "grad_norm": 13.251919746398926, + "learning_rate": 3.545604395604396e-05, + "loss": 0.2075, + "step": 10588 + }, + { + "epoch": 29.09065934065934, + "grad_norm": 7.918696880340576, + "learning_rate": 3.545467032967033e-05, + "loss": 0.1634, + "step": 10589 + }, + { + "epoch": 29.093406593406595, + "grad_norm": 19.499876022338867, + "learning_rate": 3.5453296703296705e-05, + "loss": 0.7926, + "step": 10590 + }, + { + "epoch": 29.096153846153847, + "grad_norm": 16.108386993408203, + "learning_rate": 3.5451923076923075e-05, + "loss": 0.656, + "step": 10591 + }, + { + "epoch": 29.0989010989011, + "grad_norm": 17.199268341064453, + "learning_rate": 3.545054945054945e-05, + "loss": 0.319, + "step": 10592 + }, + { + "epoch": 29.10164835164835, + "grad_norm": 13.845513343811035, + "learning_rate": 3.544917582417582e-05, + "loss": 0.4096, + "step": 10593 + }, + { + "epoch": 29.104395604395606, + "grad_norm": 12.381011962890625, + "learning_rate": 3.54478021978022e-05, + "loss": 0.2685, + "step": 10594 + }, + { + "epoch": 29.107142857142858, + "grad_norm": 8.622177124023438, + "learning_rate": 3.5446428571428575e-05, + "loss": 0.2205, + "step": 10595 + }, + { + "epoch": 29.10989010989011, + "grad_norm": 5.060256004333496, + "learning_rate": 3.5445054945054945e-05, + "loss": 0.1371, + "step": 10596 + }, + { + "epoch": 29.11263736263736, + "grad_norm": 11.92040729522705, + "learning_rate": 3.544368131868132e-05, + "loss": 0.4188, + "step": 10597 + }, + { + "epoch": 29.115384615384617, + "grad_norm": 22.801122665405273, + "learning_rate": 3.544230769230769e-05, + "loss": 0.8453, + "step": 10598 + }, + { + "epoch": 29.11813186813187, + "grad_norm": 13.563448905944824, + "learning_rate": 3.544093406593407e-05, + "loss": 0.6777, + "step": 10599 + }, + { + "epoch": 29.12087912087912, + "grad_norm": 8.808320045471191, + "learning_rate": 3.5439560439560446e-05, + "loss": 0.1476, + "step": 10600 + }, + { + "epoch": 29.123626373626372, + "grad_norm": 26.619626998901367, + "learning_rate": 3.5438186813186816e-05, + "loss": 1.038, + "step": 10601 + }, + { + "epoch": 29.126373626373628, + "grad_norm": 16.750669479370117, + "learning_rate": 3.5436813186813186e-05, + "loss": 0.5721, + "step": 10602 + }, + { + "epoch": 29.12912087912088, + "grad_norm": 15.36121940612793, + "learning_rate": 3.543543956043956e-05, + "loss": 0.6497, + "step": 10603 + }, + { + "epoch": 29.13186813186813, + "grad_norm": 7.935070514678955, + "learning_rate": 3.543406593406593e-05, + "loss": 0.2866, + "step": 10604 + }, + { + "epoch": 29.134615384615383, + "grad_norm": 2.3729770183563232, + "learning_rate": 3.543269230769231e-05, + "loss": 0.0632, + "step": 10605 + }, + { + "epoch": 29.13736263736264, + "grad_norm": 13.867218971252441, + "learning_rate": 3.543131868131868e-05, + "loss": 0.3626, + "step": 10606 + }, + { + "epoch": 29.14010989010989, + "grad_norm": 6.777134418487549, + "learning_rate": 3.5429945054945056e-05, + "loss": 0.2086, + "step": 10607 + }, + { + "epoch": 29.142857142857142, + "grad_norm": 8.790812492370605, + "learning_rate": 3.5428571428571426e-05, + "loss": 0.1207, + "step": 10608 + }, + { + "epoch": 29.145604395604394, + "grad_norm": 12.03464412689209, + "learning_rate": 3.54271978021978e-05, + "loss": 0.2442, + "step": 10609 + }, + { + "epoch": 29.14835164835165, + "grad_norm": 12.389961242675781, + "learning_rate": 3.542582417582418e-05, + "loss": 0.3253, + "step": 10610 + }, + { + "epoch": 29.1510989010989, + "grad_norm": 6.868706703186035, + "learning_rate": 3.542445054945055e-05, + "loss": 0.194, + "step": 10611 + }, + { + "epoch": 29.153846153846153, + "grad_norm": 15.4539155960083, + "learning_rate": 3.542307692307693e-05, + "loss": 0.5546, + "step": 10612 + }, + { + "epoch": 29.156593406593405, + "grad_norm": 8.356386184692383, + "learning_rate": 3.54217032967033e-05, + "loss": 0.1607, + "step": 10613 + }, + { + "epoch": 29.15934065934066, + "grad_norm": 10.215996742248535, + "learning_rate": 3.5420329670329673e-05, + "loss": 0.4252, + "step": 10614 + }, + { + "epoch": 29.162087912087912, + "grad_norm": 12.893928527832031, + "learning_rate": 3.541895604395605e-05, + "loss": 0.2692, + "step": 10615 + }, + { + "epoch": 29.164835164835164, + "grad_norm": 7.980828285217285, + "learning_rate": 3.541758241758242e-05, + "loss": 0.1613, + "step": 10616 + }, + { + "epoch": 29.167582417582416, + "grad_norm": 15.60495662689209, + "learning_rate": 3.541620879120879e-05, + "loss": 0.5609, + "step": 10617 + }, + { + "epoch": 29.17032967032967, + "grad_norm": 11.217308044433594, + "learning_rate": 3.541483516483517e-05, + "loss": 0.178, + "step": 10618 + }, + { + "epoch": 29.173076923076923, + "grad_norm": 14.38393783569336, + "learning_rate": 3.541346153846154e-05, + "loss": 0.587, + "step": 10619 + }, + { + "epoch": 29.175824175824175, + "grad_norm": 16.29197120666504, + "learning_rate": 3.5412087912087914e-05, + "loss": 0.3597, + "step": 10620 + }, + { + "epoch": 29.178571428571427, + "grad_norm": 15.149770736694336, + "learning_rate": 3.5410714285714284e-05, + "loss": 0.4461, + "step": 10621 + }, + { + "epoch": 29.181318681318682, + "grad_norm": 14.87645149230957, + "learning_rate": 3.540934065934066e-05, + "loss": 0.4953, + "step": 10622 + }, + { + "epoch": 29.184065934065934, + "grad_norm": 9.467523574829102, + "learning_rate": 3.540796703296703e-05, + "loss": 0.3323, + "step": 10623 + }, + { + "epoch": 29.186813186813186, + "grad_norm": 12.543142318725586, + "learning_rate": 3.540659340659341e-05, + "loss": 0.2652, + "step": 10624 + }, + { + "epoch": 29.189560439560438, + "grad_norm": 15.615084648132324, + "learning_rate": 3.5405219780219784e-05, + "loss": 0.4092, + "step": 10625 + }, + { + "epoch": 29.192307692307693, + "grad_norm": 13.148530006408691, + "learning_rate": 3.5403846153846154e-05, + "loss": 0.2407, + "step": 10626 + }, + { + "epoch": 29.195054945054945, + "grad_norm": 15.855469703674316, + "learning_rate": 3.540247252747253e-05, + "loss": 0.5892, + "step": 10627 + }, + { + "epoch": 29.197802197802197, + "grad_norm": 15.222050666809082, + "learning_rate": 3.54010989010989e-05, + "loss": 0.4652, + "step": 10628 + }, + { + "epoch": 29.20054945054945, + "grad_norm": 12.107172012329102, + "learning_rate": 3.539972527472528e-05, + "loss": 0.3721, + "step": 10629 + }, + { + "epoch": 29.203296703296704, + "grad_norm": 9.697047233581543, + "learning_rate": 3.5398351648351655e-05, + "loss": 0.1826, + "step": 10630 + }, + { + "epoch": 29.206043956043956, + "grad_norm": 11.940791130065918, + "learning_rate": 3.5396978021978025e-05, + "loss": 0.3641, + "step": 10631 + }, + { + "epoch": 29.208791208791208, + "grad_norm": 13.69371509552002, + "learning_rate": 3.5395604395604395e-05, + "loss": 0.2788, + "step": 10632 + }, + { + "epoch": 29.21153846153846, + "grad_norm": 6.237361907958984, + "learning_rate": 3.539423076923077e-05, + "loss": 0.1841, + "step": 10633 + }, + { + "epoch": 29.214285714285715, + "grad_norm": 11.924322128295898, + "learning_rate": 3.539285714285714e-05, + "loss": 0.2552, + "step": 10634 + }, + { + "epoch": 29.217032967032967, + "grad_norm": 8.357821464538574, + "learning_rate": 3.539148351648352e-05, + "loss": 0.1762, + "step": 10635 + }, + { + "epoch": 29.21978021978022, + "grad_norm": 6.47006893157959, + "learning_rate": 3.539010989010989e-05, + "loss": 0.1108, + "step": 10636 + }, + { + "epoch": 29.22252747252747, + "grad_norm": 8.761185646057129, + "learning_rate": 3.5388736263736265e-05, + "loss": 0.2433, + "step": 10637 + }, + { + "epoch": 29.225274725274726, + "grad_norm": 17.200458526611328, + "learning_rate": 3.5387362637362635e-05, + "loss": 0.4037, + "step": 10638 + }, + { + "epoch": 29.228021978021978, + "grad_norm": 10.42752742767334, + "learning_rate": 3.538598901098901e-05, + "loss": 0.383, + "step": 10639 + }, + { + "epoch": 29.23076923076923, + "grad_norm": 13.238073348999023, + "learning_rate": 3.538461538461539e-05, + "loss": 0.5722, + "step": 10640 + }, + { + "epoch": 29.233516483516482, + "grad_norm": 13.964885711669922, + "learning_rate": 3.538324175824176e-05, + "loss": 0.4165, + "step": 10641 + }, + { + "epoch": 29.236263736263737, + "grad_norm": 15.359546661376953, + "learning_rate": 3.5381868131868136e-05, + "loss": 0.3378, + "step": 10642 + }, + { + "epoch": 29.23901098901099, + "grad_norm": 13.886012077331543, + "learning_rate": 3.5380494505494506e-05, + "loss": 0.3649, + "step": 10643 + }, + { + "epoch": 29.24175824175824, + "grad_norm": 8.765462875366211, + "learning_rate": 3.537912087912088e-05, + "loss": 0.2374, + "step": 10644 + }, + { + "epoch": 29.244505494505493, + "grad_norm": 12.759795188903809, + "learning_rate": 3.537774725274726e-05, + "loss": 0.2553, + "step": 10645 + }, + { + "epoch": 29.247252747252748, + "grad_norm": 10.092424392700195, + "learning_rate": 3.537637362637363e-05, + "loss": 0.1917, + "step": 10646 + }, + { + "epoch": 29.25, + "grad_norm": 5.123648166656494, + "learning_rate": 3.5375e-05, + "loss": 0.1112, + "step": 10647 + }, + { + "epoch": 29.252747252747252, + "grad_norm": 11.532416343688965, + "learning_rate": 3.5373626373626376e-05, + "loss": 0.2498, + "step": 10648 + }, + { + "epoch": 29.255494505494504, + "grad_norm": 13.648117065429688, + "learning_rate": 3.5372252747252746e-05, + "loss": 0.4561, + "step": 10649 + }, + { + "epoch": 29.25824175824176, + "grad_norm": 11.639540672302246, + "learning_rate": 3.537087912087912e-05, + "loss": 0.3711, + "step": 10650 + }, + { + "epoch": 29.26098901098901, + "grad_norm": 19.948524475097656, + "learning_rate": 3.536950549450549e-05, + "loss": 0.7117, + "step": 10651 + }, + { + "epoch": 29.263736263736263, + "grad_norm": 16.5645809173584, + "learning_rate": 3.536813186813187e-05, + "loss": 0.6512, + "step": 10652 + }, + { + "epoch": 29.266483516483518, + "grad_norm": 11.761702537536621, + "learning_rate": 3.536675824175824e-05, + "loss": 0.302, + "step": 10653 + }, + { + "epoch": 29.26923076923077, + "grad_norm": 13.860200881958008, + "learning_rate": 3.5365384615384617e-05, + "loss": 0.2902, + "step": 10654 + }, + { + "epoch": 29.271978021978022, + "grad_norm": 12.942048072814941, + "learning_rate": 3.536401098901099e-05, + "loss": 0.3861, + "step": 10655 + }, + { + "epoch": 29.274725274725274, + "grad_norm": 12.63498306274414, + "learning_rate": 3.536263736263736e-05, + "loss": 0.3894, + "step": 10656 + }, + { + "epoch": 29.27747252747253, + "grad_norm": 8.870488166809082, + "learning_rate": 3.536126373626374e-05, + "loss": 0.1458, + "step": 10657 + }, + { + "epoch": 29.28021978021978, + "grad_norm": 16.342500686645508, + "learning_rate": 3.535989010989011e-05, + "loss": 0.2869, + "step": 10658 + }, + { + "epoch": 29.282967032967033, + "grad_norm": 12.314007759094238, + "learning_rate": 3.535851648351649e-05, + "loss": 0.3799, + "step": 10659 + }, + { + "epoch": 29.285714285714285, + "grad_norm": 5.375731945037842, + "learning_rate": 3.5357142857142864e-05, + "loss": 0.0765, + "step": 10660 + }, + { + "epoch": 29.28846153846154, + "grad_norm": 10.529191017150879, + "learning_rate": 3.5355769230769234e-05, + "loss": 0.3583, + "step": 10661 + }, + { + "epoch": 29.291208791208792, + "grad_norm": 7.768020153045654, + "learning_rate": 3.5354395604395604e-05, + "loss": 0.2701, + "step": 10662 + }, + { + "epoch": 29.293956043956044, + "grad_norm": 8.746942520141602, + "learning_rate": 3.535302197802198e-05, + "loss": 0.2476, + "step": 10663 + }, + { + "epoch": 29.296703296703296, + "grad_norm": 7.916710376739502, + "learning_rate": 3.535164835164835e-05, + "loss": 0.2007, + "step": 10664 + }, + { + "epoch": 29.29945054945055, + "grad_norm": 8.668041229248047, + "learning_rate": 3.535027472527473e-05, + "loss": 0.1545, + "step": 10665 + }, + { + "epoch": 29.302197802197803, + "grad_norm": 2.5734355449676514, + "learning_rate": 3.53489010989011e-05, + "loss": 0.0564, + "step": 10666 + }, + { + "epoch": 29.304945054945055, + "grad_norm": 15.29533863067627, + "learning_rate": 3.5347527472527474e-05, + "loss": 0.4519, + "step": 10667 + }, + { + "epoch": 29.307692307692307, + "grad_norm": 8.105647087097168, + "learning_rate": 3.5346153846153844e-05, + "loss": 0.1914, + "step": 10668 + }, + { + "epoch": 29.310439560439562, + "grad_norm": 12.088258743286133, + "learning_rate": 3.534478021978022e-05, + "loss": 0.2801, + "step": 10669 + }, + { + "epoch": 29.313186813186814, + "grad_norm": 15.733712196350098, + "learning_rate": 3.53434065934066e-05, + "loss": 0.3994, + "step": 10670 + }, + { + "epoch": 29.315934065934066, + "grad_norm": 14.475716590881348, + "learning_rate": 3.534203296703297e-05, + "loss": 0.5507, + "step": 10671 + }, + { + "epoch": 29.318681318681318, + "grad_norm": 11.807915687561035, + "learning_rate": 3.5340659340659345e-05, + "loss": 0.3971, + "step": 10672 + }, + { + "epoch": 29.321428571428573, + "grad_norm": 12.904603958129883, + "learning_rate": 3.5339285714285715e-05, + "loss": 0.3792, + "step": 10673 + }, + { + "epoch": 29.324175824175825, + "grad_norm": 9.513238906860352, + "learning_rate": 3.533791208791209e-05, + "loss": 0.2841, + "step": 10674 + }, + { + "epoch": 29.326923076923077, + "grad_norm": 15.139432907104492, + "learning_rate": 3.533653846153847e-05, + "loss": 0.5128, + "step": 10675 + }, + { + "epoch": 29.32967032967033, + "grad_norm": 4.9465179443359375, + "learning_rate": 3.533516483516484e-05, + "loss": 0.0636, + "step": 10676 + }, + { + "epoch": 29.332417582417584, + "grad_norm": 8.17524242401123, + "learning_rate": 3.533379120879121e-05, + "loss": 0.1509, + "step": 10677 + }, + { + "epoch": 29.335164835164836, + "grad_norm": 13.148690223693848, + "learning_rate": 3.5332417582417585e-05, + "loss": 0.329, + "step": 10678 + }, + { + "epoch": 29.337912087912088, + "grad_norm": 19.593036651611328, + "learning_rate": 3.5331043956043955e-05, + "loss": 0.4111, + "step": 10679 + }, + { + "epoch": 29.34065934065934, + "grad_norm": 20.06221580505371, + "learning_rate": 3.532967032967033e-05, + "loss": 0.7453, + "step": 10680 + }, + { + "epoch": 29.343406593406595, + "grad_norm": 9.96251106262207, + "learning_rate": 3.53282967032967e-05, + "loss": 0.3077, + "step": 10681 + }, + { + "epoch": 29.346153846153847, + "grad_norm": 9.967569351196289, + "learning_rate": 3.532692307692308e-05, + "loss": 0.2961, + "step": 10682 + }, + { + "epoch": 29.3489010989011, + "grad_norm": 14.39439582824707, + "learning_rate": 3.532554945054945e-05, + "loss": 0.3335, + "step": 10683 + }, + { + "epoch": 29.35164835164835, + "grad_norm": 9.239517211914062, + "learning_rate": 3.5324175824175825e-05, + "loss": 0.1722, + "step": 10684 + }, + { + "epoch": 29.354395604395606, + "grad_norm": 9.353896141052246, + "learning_rate": 3.53228021978022e-05, + "loss": 0.2945, + "step": 10685 + }, + { + "epoch": 29.357142857142858, + "grad_norm": 8.610001564025879, + "learning_rate": 3.532142857142857e-05, + "loss": 0.1757, + "step": 10686 + }, + { + "epoch": 29.35989010989011, + "grad_norm": 12.69340991973877, + "learning_rate": 3.532005494505495e-05, + "loss": 0.3079, + "step": 10687 + }, + { + "epoch": 29.36263736263736, + "grad_norm": 10.487902641296387, + "learning_rate": 3.531868131868132e-05, + "loss": 0.4404, + "step": 10688 + }, + { + "epoch": 29.365384615384617, + "grad_norm": 11.501195907592773, + "learning_rate": 3.5317307692307696e-05, + "loss": 0.2571, + "step": 10689 + }, + { + "epoch": 29.36813186813187, + "grad_norm": 6.569517135620117, + "learning_rate": 3.531593406593407e-05, + "loss": 0.0877, + "step": 10690 + }, + { + "epoch": 29.37087912087912, + "grad_norm": 11.862688064575195, + "learning_rate": 3.531456043956044e-05, + "loss": 0.1905, + "step": 10691 + }, + { + "epoch": 29.373626373626372, + "grad_norm": 14.407082557678223, + "learning_rate": 3.531318681318681e-05, + "loss": 0.4721, + "step": 10692 + }, + { + "epoch": 29.376373626373628, + "grad_norm": 9.807633399963379, + "learning_rate": 3.531181318681319e-05, + "loss": 0.1951, + "step": 10693 + }, + { + "epoch": 29.37912087912088, + "grad_norm": 7.752314567565918, + "learning_rate": 3.531043956043956e-05, + "loss": 0.1572, + "step": 10694 + }, + { + "epoch": 29.38186813186813, + "grad_norm": 13.969419479370117, + "learning_rate": 3.5309065934065936e-05, + "loss": 0.33, + "step": 10695 + }, + { + "epoch": 29.384615384615383, + "grad_norm": 22.520597457885742, + "learning_rate": 3.5307692307692306e-05, + "loss": 0.7409, + "step": 10696 + }, + { + "epoch": 29.38736263736264, + "grad_norm": 19.322717666625977, + "learning_rate": 3.530631868131868e-05, + "loss": 0.7176, + "step": 10697 + }, + { + "epoch": 29.39010989010989, + "grad_norm": 8.699832916259766, + "learning_rate": 3.530494505494505e-05, + "loss": 0.273, + "step": 10698 + }, + { + "epoch": 29.392857142857142, + "grad_norm": 11.711991310119629, + "learning_rate": 3.530357142857143e-05, + "loss": 0.2693, + "step": 10699 + }, + { + "epoch": 29.395604395604394, + "grad_norm": 11.860918998718262, + "learning_rate": 3.530219780219781e-05, + "loss": 0.2286, + "step": 10700 + }, + { + "epoch": 29.39835164835165, + "grad_norm": 13.978584289550781, + "learning_rate": 3.530082417582418e-05, + "loss": 0.4219, + "step": 10701 + }, + { + "epoch": 29.4010989010989, + "grad_norm": 18.589811325073242, + "learning_rate": 3.5299450549450554e-05, + "loss": 0.4035, + "step": 10702 + }, + { + "epoch": 29.403846153846153, + "grad_norm": 18.15064239501953, + "learning_rate": 3.5298076923076924e-05, + "loss": 0.4764, + "step": 10703 + }, + { + "epoch": 29.406593406593405, + "grad_norm": 2.425471544265747, + "learning_rate": 3.52967032967033e-05, + "loss": 0.0543, + "step": 10704 + }, + { + "epoch": 29.40934065934066, + "grad_norm": 10.684260368347168, + "learning_rate": 3.529532967032968e-05, + "loss": 0.2237, + "step": 10705 + }, + { + "epoch": 29.412087912087912, + "grad_norm": 7.095716953277588, + "learning_rate": 3.529395604395605e-05, + "loss": 0.1706, + "step": 10706 + }, + { + "epoch": 29.414835164835164, + "grad_norm": 18.481040954589844, + "learning_rate": 3.529258241758242e-05, + "loss": 0.52, + "step": 10707 + }, + { + "epoch": 29.417582417582416, + "grad_norm": 15.211716651916504, + "learning_rate": 3.5291208791208794e-05, + "loss": 0.4777, + "step": 10708 + }, + { + "epoch": 29.42032967032967, + "grad_norm": 13.773792266845703, + "learning_rate": 3.5289835164835164e-05, + "loss": 0.4701, + "step": 10709 + }, + { + "epoch": 29.423076923076923, + "grad_norm": 6.117178916931152, + "learning_rate": 3.528846153846154e-05, + "loss": 0.1234, + "step": 10710 + }, + { + "epoch": 29.425824175824175, + "grad_norm": 15.368403434753418, + "learning_rate": 3.528708791208791e-05, + "loss": 0.6296, + "step": 10711 + }, + { + "epoch": 29.428571428571427, + "grad_norm": 12.493841171264648, + "learning_rate": 3.528571428571429e-05, + "loss": 0.2996, + "step": 10712 + }, + { + "epoch": 29.431318681318682, + "grad_norm": 9.135194778442383, + "learning_rate": 3.528434065934066e-05, + "loss": 0.1285, + "step": 10713 + }, + { + "epoch": 29.434065934065934, + "grad_norm": 6.418280124664307, + "learning_rate": 3.5282967032967034e-05, + "loss": 0.1035, + "step": 10714 + }, + { + "epoch": 29.436813186813186, + "grad_norm": 16.231271743774414, + "learning_rate": 3.528159340659341e-05, + "loss": 0.408, + "step": 10715 + }, + { + "epoch": 29.439560439560438, + "grad_norm": 12.362152099609375, + "learning_rate": 3.528021978021978e-05, + "loss": 0.3497, + "step": 10716 + }, + { + "epoch": 29.442307692307693, + "grad_norm": 13.075590133666992, + "learning_rate": 3.527884615384616e-05, + "loss": 0.5345, + "step": 10717 + }, + { + "epoch": 29.445054945054945, + "grad_norm": 12.927835464477539, + "learning_rate": 3.527747252747253e-05, + "loss": 0.36, + "step": 10718 + }, + { + "epoch": 29.447802197802197, + "grad_norm": 10.49449348449707, + "learning_rate": 3.5276098901098905e-05, + "loss": 0.3559, + "step": 10719 + }, + { + "epoch": 29.45054945054945, + "grad_norm": 14.600639343261719, + "learning_rate": 3.527472527472528e-05, + "loss": 0.2495, + "step": 10720 + }, + { + "epoch": 29.453296703296704, + "grad_norm": 5.465023994445801, + "learning_rate": 3.527335164835165e-05, + "loss": 0.09, + "step": 10721 + }, + { + "epoch": 29.456043956043956, + "grad_norm": 17.707965850830078, + "learning_rate": 3.527197802197802e-05, + "loss": 0.5609, + "step": 10722 + }, + { + "epoch": 29.458791208791208, + "grad_norm": 6.111660957336426, + "learning_rate": 3.52706043956044e-05, + "loss": 0.128, + "step": 10723 + }, + { + "epoch": 29.46153846153846, + "grad_norm": 8.198175430297852, + "learning_rate": 3.526923076923077e-05, + "loss": 0.1726, + "step": 10724 + }, + { + "epoch": 29.464285714285715, + "grad_norm": 11.341503143310547, + "learning_rate": 3.5267857142857145e-05, + "loss": 0.3825, + "step": 10725 + }, + { + "epoch": 29.467032967032967, + "grad_norm": 8.188448905944824, + "learning_rate": 3.5266483516483515e-05, + "loss": 0.1723, + "step": 10726 + }, + { + "epoch": 29.46978021978022, + "grad_norm": 8.390528678894043, + "learning_rate": 3.526510989010989e-05, + "loss": 0.2335, + "step": 10727 + }, + { + "epoch": 29.47252747252747, + "grad_norm": 3.8720743656158447, + "learning_rate": 3.526373626373626e-05, + "loss": 0.0954, + "step": 10728 + }, + { + "epoch": 29.475274725274726, + "grad_norm": 14.669024467468262, + "learning_rate": 3.526236263736264e-05, + "loss": 0.6079, + "step": 10729 + }, + { + "epoch": 29.478021978021978, + "grad_norm": 16.07501983642578, + "learning_rate": 3.5260989010989016e-05, + "loss": 0.4053, + "step": 10730 + }, + { + "epoch": 29.48076923076923, + "grad_norm": 9.359905242919922, + "learning_rate": 3.5259615384615386e-05, + "loss": 0.1979, + "step": 10731 + }, + { + "epoch": 29.483516483516482, + "grad_norm": 10.705523490905762, + "learning_rate": 3.525824175824176e-05, + "loss": 0.3784, + "step": 10732 + }, + { + "epoch": 29.486263736263737, + "grad_norm": 9.674184799194336, + "learning_rate": 3.525686813186813e-05, + "loss": 0.4126, + "step": 10733 + }, + { + "epoch": 29.48901098901099, + "grad_norm": 20.451387405395508, + "learning_rate": 3.525549450549451e-05, + "loss": 0.6701, + "step": 10734 + }, + { + "epoch": 29.49175824175824, + "grad_norm": 14.600456237792969, + "learning_rate": 3.5254120879120886e-05, + "loss": 0.3379, + "step": 10735 + }, + { + "epoch": 29.494505494505496, + "grad_norm": 7.006515026092529, + "learning_rate": 3.5252747252747256e-05, + "loss": 0.1689, + "step": 10736 + }, + { + "epoch": 29.497252747252748, + "grad_norm": 18.29808235168457, + "learning_rate": 3.5251373626373626e-05, + "loss": 0.6224, + "step": 10737 + }, + { + "epoch": 29.5, + "grad_norm": 17.91287612915039, + "learning_rate": 3.525e-05, + "loss": 0.5147, + "step": 10738 + }, + { + "epoch": 29.502747252747252, + "grad_norm": 10.897430419921875, + "learning_rate": 3.524862637362637e-05, + "loss": 0.3758, + "step": 10739 + }, + { + "epoch": 29.505494505494504, + "grad_norm": 18.107053756713867, + "learning_rate": 3.524725274725275e-05, + "loss": 0.707, + "step": 10740 + }, + { + "epoch": 29.50824175824176, + "grad_norm": 4.510420322418213, + "learning_rate": 3.524587912087912e-05, + "loss": 0.1093, + "step": 10741 + }, + { + "epoch": 29.51098901098901, + "grad_norm": 16.776639938354492, + "learning_rate": 3.52445054945055e-05, + "loss": 0.488, + "step": 10742 + }, + { + "epoch": 29.513736263736263, + "grad_norm": 6.277314186096191, + "learning_rate": 3.524313186813187e-05, + "loss": 0.1123, + "step": 10743 + }, + { + "epoch": 29.516483516483518, + "grad_norm": 10.015104293823242, + "learning_rate": 3.5241758241758243e-05, + "loss": 0.1975, + "step": 10744 + }, + { + "epoch": 29.51923076923077, + "grad_norm": 6.182105541229248, + "learning_rate": 3.524038461538462e-05, + "loss": 0.1588, + "step": 10745 + }, + { + "epoch": 29.521978021978022, + "grad_norm": 11.212302207946777, + "learning_rate": 3.523901098901099e-05, + "loss": 0.484, + "step": 10746 + }, + { + "epoch": 29.524725274725274, + "grad_norm": 13.346944808959961, + "learning_rate": 3.523763736263737e-05, + "loss": 0.3457, + "step": 10747 + }, + { + "epoch": 29.52747252747253, + "grad_norm": 11.941611289978027, + "learning_rate": 3.523626373626374e-05, + "loss": 0.4887, + "step": 10748 + }, + { + "epoch": 29.53021978021978, + "grad_norm": 9.387246131896973, + "learning_rate": 3.5234890109890114e-05, + "loss": 0.4259, + "step": 10749 + }, + { + "epoch": 29.532967032967033, + "grad_norm": 10.87243366241455, + "learning_rate": 3.523351648351649e-05, + "loss": 0.3344, + "step": 10750 + }, + { + "epoch": 29.535714285714285, + "grad_norm": 24.627351760864258, + "learning_rate": 3.523214285714286e-05, + "loss": 0.6107, + "step": 10751 + }, + { + "epoch": 29.53846153846154, + "grad_norm": 23.467979431152344, + "learning_rate": 3.523076923076923e-05, + "loss": 0.8763, + "step": 10752 + }, + { + "epoch": 29.541208791208792, + "grad_norm": 11.542584419250488, + "learning_rate": 3.522939560439561e-05, + "loss": 0.3109, + "step": 10753 + }, + { + "epoch": 29.543956043956044, + "grad_norm": 11.718144416809082, + "learning_rate": 3.522802197802198e-05, + "loss": 0.2457, + "step": 10754 + }, + { + "epoch": 29.546703296703296, + "grad_norm": 7.0138020515441895, + "learning_rate": 3.5226648351648354e-05, + "loss": 0.1299, + "step": 10755 + }, + { + "epoch": 29.54945054945055, + "grad_norm": 19.337379455566406, + "learning_rate": 3.5225274725274724e-05, + "loss": 0.6813, + "step": 10756 + }, + { + "epoch": 29.552197802197803, + "grad_norm": 8.715737342834473, + "learning_rate": 3.52239010989011e-05, + "loss": 0.1689, + "step": 10757 + }, + { + "epoch": 29.554945054945055, + "grad_norm": 12.024962425231934, + "learning_rate": 3.522252747252747e-05, + "loss": 0.4024, + "step": 10758 + }, + { + "epoch": 29.557692307692307, + "grad_norm": 16.957931518554688, + "learning_rate": 3.522115384615385e-05, + "loss": 0.6807, + "step": 10759 + }, + { + "epoch": 29.560439560439562, + "grad_norm": 4.249305725097656, + "learning_rate": 3.5219780219780225e-05, + "loss": 0.0685, + "step": 10760 + }, + { + "epoch": 29.563186813186814, + "grad_norm": 14.910334587097168, + "learning_rate": 3.5218406593406595e-05, + "loss": 0.4455, + "step": 10761 + }, + { + "epoch": 29.565934065934066, + "grad_norm": 12.892979621887207, + "learning_rate": 3.521703296703297e-05, + "loss": 0.3271, + "step": 10762 + }, + { + "epoch": 29.568681318681318, + "grad_norm": 18.704448699951172, + "learning_rate": 3.521565934065934e-05, + "loss": 0.5894, + "step": 10763 + }, + { + "epoch": 29.571428571428573, + "grad_norm": 7.979905605316162, + "learning_rate": 3.521428571428572e-05, + "loss": 0.2698, + "step": 10764 + }, + { + "epoch": 29.574175824175825, + "grad_norm": 20.319581985473633, + "learning_rate": 3.5212912087912095e-05, + "loss": 0.4899, + "step": 10765 + }, + { + "epoch": 29.576923076923077, + "grad_norm": 12.102848052978516, + "learning_rate": 3.5211538461538465e-05, + "loss": 0.2864, + "step": 10766 + }, + { + "epoch": 29.57967032967033, + "grad_norm": 18.639793395996094, + "learning_rate": 3.5210164835164835e-05, + "loss": 0.4762, + "step": 10767 + }, + { + "epoch": 29.582417582417584, + "grad_norm": 8.104732513427734, + "learning_rate": 3.520879120879121e-05, + "loss": 0.3075, + "step": 10768 + }, + { + "epoch": 29.585164835164836, + "grad_norm": 15.43366813659668, + "learning_rate": 3.520741758241758e-05, + "loss": 0.608, + "step": 10769 + }, + { + "epoch": 29.587912087912088, + "grad_norm": 11.59858512878418, + "learning_rate": 3.520604395604396e-05, + "loss": 0.2788, + "step": 10770 + }, + { + "epoch": 29.59065934065934, + "grad_norm": 11.636454582214355, + "learning_rate": 3.520467032967033e-05, + "loss": 0.3488, + "step": 10771 + }, + { + "epoch": 29.593406593406595, + "grad_norm": 8.55263614654541, + "learning_rate": 3.5203296703296706e-05, + "loss": 0.3503, + "step": 10772 + }, + { + "epoch": 29.596153846153847, + "grad_norm": 13.09726619720459, + "learning_rate": 3.5201923076923076e-05, + "loss": 0.2344, + "step": 10773 + }, + { + "epoch": 29.5989010989011, + "grad_norm": 10.094452857971191, + "learning_rate": 3.520054945054945e-05, + "loss": 0.2466, + "step": 10774 + }, + { + "epoch": 29.60164835164835, + "grad_norm": 12.028865814208984, + "learning_rate": 3.519917582417583e-05, + "loss": 0.3093, + "step": 10775 + }, + { + "epoch": 29.604395604395606, + "grad_norm": 3.3225810527801514, + "learning_rate": 3.51978021978022e-05, + "loss": 0.0461, + "step": 10776 + }, + { + "epoch": 29.607142857142858, + "grad_norm": 17.774860382080078, + "learning_rate": 3.5196428571428576e-05, + "loss": 0.3626, + "step": 10777 + }, + { + "epoch": 29.60989010989011, + "grad_norm": 8.368439674377441, + "learning_rate": 3.5195054945054946e-05, + "loss": 0.188, + "step": 10778 + }, + { + "epoch": 29.61263736263736, + "grad_norm": 18.456897735595703, + "learning_rate": 3.519368131868132e-05, + "loss": 0.6157, + "step": 10779 + }, + { + "epoch": 29.615384615384617, + "grad_norm": 12.151803016662598, + "learning_rate": 3.51923076923077e-05, + "loss": 0.375, + "step": 10780 + }, + { + "epoch": 29.61813186813187, + "grad_norm": 15.154497146606445, + "learning_rate": 3.519093406593407e-05, + "loss": 0.6241, + "step": 10781 + }, + { + "epoch": 29.62087912087912, + "grad_norm": 10.651102066040039, + "learning_rate": 3.518956043956044e-05, + "loss": 0.3961, + "step": 10782 + }, + { + "epoch": 29.623626373626372, + "grad_norm": 8.968478202819824, + "learning_rate": 3.5188186813186816e-05, + "loss": 0.1959, + "step": 10783 + }, + { + "epoch": 29.626373626373628, + "grad_norm": 9.44990348815918, + "learning_rate": 3.5186813186813187e-05, + "loss": 0.2312, + "step": 10784 + }, + { + "epoch": 29.62912087912088, + "grad_norm": 13.928367614746094, + "learning_rate": 3.518543956043956e-05, + "loss": 0.6156, + "step": 10785 + }, + { + "epoch": 29.63186813186813, + "grad_norm": 13.0881986618042, + "learning_rate": 3.518406593406593e-05, + "loss": 0.57, + "step": 10786 + }, + { + "epoch": 29.634615384615383, + "grad_norm": 14.610142707824707, + "learning_rate": 3.518269230769231e-05, + "loss": 0.3485, + "step": 10787 + }, + { + "epoch": 29.63736263736264, + "grad_norm": 14.371976852416992, + "learning_rate": 3.518131868131868e-05, + "loss": 0.5606, + "step": 10788 + }, + { + "epoch": 29.64010989010989, + "grad_norm": 2.173356771469116, + "learning_rate": 3.517994505494506e-05, + "loss": 0.0413, + "step": 10789 + }, + { + "epoch": 29.642857142857142, + "grad_norm": 10.079916000366211, + "learning_rate": 3.5178571428571434e-05, + "loss": 0.1613, + "step": 10790 + }, + { + "epoch": 29.645604395604394, + "grad_norm": 12.06900405883789, + "learning_rate": 3.5177197802197804e-05, + "loss": 0.4981, + "step": 10791 + }, + { + "epoch": 29.64835164835165, + "grad_norm": 16.875436782836914, + "learning_rate": 3.517582417582418e-05, + "loss": 0.4355, + "step": 10792 + }, + { + "epoch": 29.6510989010989, + "grad_norm": 13.34266471862793, + "learning_rate": 3.517445054945055e-05, + "loss": 0.5209, + "step": 10793 + }, + { + "epoch": 29.653846153846153, + "grad_norm": 14.727327346801758, + "learning_rate": 3.517307692307693e-05, + "loss": 0.4323, + "step": 10794 + }, + { + "epoch": 29.656593406593405, + "grad_norm": 21.45436668395996, + "learning_rate": 3.5171703296703304e-05, + "loss": 0.6535, + "step": 10795 + }, + { + "epoch": 29.65934065934066, + "grad_norm": 7.172440052032471, + "learning_rate": 3.5170329670329674e-05, + "loss": 0.1244, + "step": 10796 + }, + { + "epoch": 29.662087912087912, + "grad_norm": 10.309618949890137, + "learning_rate": 3.5168956043956044e-05, + "loss": 0.3073, + "step": 10797 + }, + { + "epoch": 29.664835164835164, + "grad_norm": 12.601900100708008, + "learning_rate": 3.5167582417582414e-05, + "loss": 0.3029, + "step": 10798 + }, + { + "epoch": 29.667582417582416, + "grad_norm": 17.067481994628906, + "learning_rate": 3.516620879120879e-05, + "loss": 0.3739, + "step": 10799 + }, + { + "epoch": 29.67032967032967, + "grad_norm": 6.172388553619385, + "learning_rate": 3.516483516483517e-05, + "loss": 0.0947, + "step": 10800 + }, + { + "epoch": 29.673076923076923, + "grad_norm": 6.682220458984375, + "learning_rate": 3.516346153846154e-05, + "loss": 0.1797, + "step": 10801 + }, + { + "epoch": 29.675824175824175, + "grad_norm": 12.296648979187012, + "learning_rate": 3.5162087912087915e-05, + "loss": 0.2818, + "step": 10802 + }, + { + "epoch": 29.678571428571427, + "grad_norm": 22.446609497070312, + "learning_rate": 3.5160714285714285e-05, + "loss": 0.6666, + "step": 10803 + }, + { + "epoch": 29.681318681318682, + "grad_norm": 10.846141815185547, + "learning_rate": 3.515934065934066e-05, + "loss": 0.559, + "step": 10804 + }, + { + "epoch": 29.684065934065934, + "grad_norm": 15.946527481079102, + "learning_rate": 3.515796703296704e-05, + "loss": 0.3341, + "step": 10805 + }, + { + "epoch": 29.686813186813186, + "grad_norm": 6.092608451843262, + "learning_rate": 3.515659340659341e-05, + "loss": 0.1736, + "step": 10806 + }, + { + "epoch": 29.689560439560438, + "grad_norm": 15.262859344482422, + "learning_rate": 3.5155219780219785e-05, + "loss": 0.4007, + "step": 10807 + }, + { + "epoch": 29.692307692307693, + "grad_norm": 18.762971878051758, + "learning_rate": 3.5153846153846155e-05, + "loss": 0.4981, + "step": 10808 + }, + { + "epoch": 29.695054945054945, + "grad_norm": 17.723514556884766, + "learning_rate": 3.515247252747253e-05, + "loss": 0.7223, + "step": 10809 + }, + { + "epoch": 29.697802197802197, + "grad_norm": 14.611397743225098, + "learning_rate": 3.51510989010989e-05, + "loss": 0.2963, + "step": 10810 + }, + { + "epoch": 29.70054945054945, + "grad_norm": 10.205466270446777, + "learning_rate": 3.514972527472528e-05, + "loss": 0.2764, + "step": 10811 + }, + { + "epoch": 29.703296703296704, + "grad_norm": 20.90593910217285, + "learning_rate": 3.514835164835165e-05, + "loss": 0.5828, + "step": 10812 + }, + { + "epoch": 29.706043956043956, + "grad_norm": 7.813502311706543, + "learning_rate": 3.514697802197802e-05, + "loss": 0.2927, + "step": 10813 + }, + { + "epoch": 29.708791208791208, + "grad_norm": 14.505744934082031, + "learning_rate": 3.5145604395604395e-05, + "loss": 0.4026, + "step": 10814 + }, + { + "epoch": 29.71153846153846, + "grad_norm": 10.986089706420898, + "learning_rate": 3.5144230769230766e-05, + "loss": 0.3163, + "step": 10815 + }, + { + "epoch": 29.714285714285715, + "grad_norm": 16.684541702270508, + "learning_rate": 3.514285714285714e-05, + "loss": 0.2842, + "step": 10816 + }, + { + "epoch": 29.717032967032967, + "grad_norm": 13.305952072143555, + "learning_rate": 3.514148351648352e-05, + "loss": 0.4452, + "step": 10817 + }, + { + "epoch": 29.71978021978022, + "grad_norm": 21.67743682861328, + "learning_rate": 3.514010989010989e-05, + "loss": 0.5387, + "step": 10818 + }, + { + "epoch": 29.72252747252747, + "grad_norm": 10.147220611572266, + "learning_rate": 3.5138736263736266e-05, + "loss": 0.2555, + "step": 10819 + }, + { + "epoch": 29.725274725274726, + "grad_norm": 13.873034477233887, + "learning_rate": 3.5137362637362636e-05, + "loss": 0.3803, + "step": 10820 + }, + { + "epoch": 29.728021978021978, + "grad_norm": 11.784366607666016, + "learning_rate": 3.513598901098901e-05, + "loss": 0.1677, + "step": 10821 + }, + { + "epoch": 29.73076923076923, + "grad_norm": 13.734302520751953, + "learning_rate": 3.513461538461539e-05, + "loss": 0.4196, + "step": 10822 + }, + { + "epoch": 29.733516483516482, + "grad_norm": 8.89484691619873, + "learning_rate": 3.513324175824176e-05, + "loss": 0.2055, + "step": 10823 + }, + { + "epoch": 29.736263736263737, + "grad_norm": 14.527422904968262, + "learning_rate": 3.5131868131868136e-05, + "loss": 0.3265, + "step": 10824 + }, + { + "epoch": 29.73901098901099, + "grad_norm": 10.55474853515625, + "learning_rate": 3.5130494505494506e-05, + "loss": 0.244, + "step": 10825 + }, + { + "epoch": 29.74175824175824, + "grad_norm": 9.772065162658691, + "learning_rate": 3.512912087912088e-05, + "loss": 0.2609, + "step": 10826 + }, + { + "epoch": 29.744505494505496, + "grad_norm": 12.362428665161133, + "learning_rate": 3.512774725274725e-05, + "loss": 0.5436, + "step": 10827 + }, + { + "epoch": 29.747252747252748, + "grad_norm": 11.38137149810791, + "learning_rate": 3.512637362637362e-05, + "loss": 0.282, + "step": 10828 + }, + { + "epoch": 29.75, + "grad_norm": 11.744131088256836, + "learning_rate": 3.5125e-05, + "loss": 0.2332, + "step": 10829 + }, + { + "epoch": 29.752747252747252, + "grad_norm": 7.543670654296875, + "learning_rate": 3.512362637362637e-05, + "loss": 0.1914, + "step": 10830 + }, + { + "epoch": 29.755494505494504, + "grad_norm": 21.35871124267578, + "learning_rate": 3.512225274725275e-05, + "loss": 0.798, + "step": 10831 + }, + { + "epoch": 29.75824175824176, + "grad_norm": 19.926191329956055, + "learning_rate": 3.5120879120879124e-05, + "loss": 0.6044, + "step": 10832 + }, + { + "epoch": 29.76098901098901, + "grad_norm": 4.800719738006592, + "learning_rate": 3.5119505494505494e-05, + "loss": 0.1112, + "step": 10833 + }, + { + "epoch": 29.763736263736263, + "grad_norm": 16.10402488708496, + "learning_rate": 3.511813186813187e-05, + "loss": 0.3751, + "step": 10834 + }, + { + "epoch": 29.766483516483518, + "grad_norm": 13.113394737243652, + "learning_rate": 3.511675824175824e-05, + "loss": 0.2454, + "step": 10835 + }, + { + "epoch": 29.76923076923077, + "grad_norm": 15.997601509094238, + "learning_rate": 3.511538461538462e-05, + "loss": 0.5245, + "step": 10836 + }, + { + "epoch": 29.771978021978022, + "grad_norm": 7.4840545654296875, + "learning_rate": 3.5114010989010994e-05, + "loss": 0.217, + "step": 10837 + }, + { + "epoch": 29.774725274725274, + "grad_norm": 4.897477626800537, + "learning_rate": 3.5112637362637364e-05, + "loss": 0.1124, + "step": 10838 + }, + { + "epoch": 29.77747252747253, + "grad_norm": 11.444011688232422, + "learning_rate": 3.511126373626374e-05, + "loss": 0.3156, + "step": 10839 + }, + { + "epoch": 29.78021978021978, + "grad_norm": 18.71800422668457, + "learning_rate": 3.510989010989011e-05, + "loss": 0.5513, + "step": 10840 + }, + { + "epoch": 29.782967032967033, + "grad_norm": 19.20260238647461, + "learning_rate": 3.510851648351649e-05, + "loss": 0.2643, + "step": 10841 + }, + { + "epoch": 29.785714285714285, + "grad_norm": 14.058330535888672, + "learning_rate": 3.510714285714286e-05, + "loss": 0.2874, + "step": 10842 + }, + { + "epoch": 29.78846153846154, + "grad_norm": 4.101003170013428, + "learning_rate": 3.510576923076923e-05, + "loss": 0.1369, + "step": 10843 + }, + { + "epoch": 29.791208791208792, + "grad_norm": 15.659346580505371, + "learning_rate": 3.5104395604395604e-05, + "loss": 0.4272, + "step": 10844 + }, + { + "epoch": 29.793956043956044, + "grad_norm": 10.232623100280762, + "learning_rate": 3.5103021978021974e-05, + "loss": 0.2976, + "step": 10845 + }, + { + "epoch": 29.796703296703296, + "grad_norm": 13.945834159851074, + "learning_rate": 3.510164835164835e-05, + "loss": 0.3053, + "step": 10846 + }, + { + "epoch": 29.79945054945055, + "grad_norm": 15.962982177734375, + "learning_rate": 3.510027472527473e-05, + "loss": 0.4764, + "step": 10847 + }, + { + "epoch": 29.802197802197803, + "grad_norm": 11.827461242675781, + "learning_rate": 3.50989010989011e-05, + "loss": 0.1934, + "step": 10848 + }, + { + "epoch": 29.804945054945055, + "grad_norm": 5.0607075691223145, + "learning_rate": 3.5097527472527475e-05, + "loss": 0.0751, + "step": 10849 + }, + { + "epoch": 29.807692307692307, + "grad_norm": 15.817804336547852, + "learning_rate": 3.5096153846153845e-05, + "loss": 0.537, + "step": 10850 + }, + { + "epoch": 29.810439560439562, + "grad_norm": 15.757708549499512, + "learning_rate": 3.509478021978022e-05, + "loss": 0.3494, + "step": 10851 + }, + { + "epoch": 29.813186813186814, + "grad_norm": 14.789507865905762, + "learning_rate": 3.50934065934066e-05, + "loss": 0.5268, + "step": 10852 + }, + { + "epoch": 29.815934065934066, + "grad_norm": 19.317026138305664, + "learning_rate": 3.509203296703297e-05, + "loss": 0.7797, + "step": 10853 + }, + { + "epoch": 29.818681318681318, + "grad_norm": 11.402050971984863, + "learning_rate": 3.5090659340659345e-05, + "loss": 0.2802, + "step": 10854 + }, + { + "epoch": 29.821428571428573, + "grad_norm": 11.677906036376953, + "learning_rate": 3.5089285714285715e-05, + "loss": 0.3974, + "step": 10855 + }, + { + "epoch": 29.824175824175825, + "grad_norm": 16.756427764892578, + "learning_rate": 3.508791208791209e-05, + "loss": 0.3797, + "step": 10856 + }, + { + "epoch": 29.826923076923077, + "grad_norm": 14.646648406982422, + "learning_rate": 3.508653846153846e-05, + "loss": 0.3919, + "step": 10857 + }, + { + "epoch": 29.82967032967033, + "grad_norm": 13.594890594482422, + "learning_rate": 3.508516483516483e-05, + "loss": 0.3979, + "step": 10858 + }, + { + "epoch": 29.832417582417584, + "grad_norm": 15.72681713104248, + "learning_rate": 3.508379120879121e-05, + "loss": 0.3873, + "step": 10859 + }, + { + "epoch": 29.835164835164836, + "grad_norm": 13.15699577331543, + "learning_rate": 3.508241758241758e-05, + "loss": 0.3282, + "step": 10860 + }, + { + "epoch": 29.837912087912088, + "grad_norm": 14.709176063537598, + "learning_rate": 3.5081043956043956e-05, + "loss": 0.3486, + "step": 10861 + }, + { + "epoch": 29.84065934065934, + "grad_norm": 8.905892372131348, + "learning_rate": 3.507967032967033e-05, + "loss": 0.1713, + "step": 10862 + }, + { + "epoch": 29.843406593406595, + "grad_norm": 7.984985828399658, + "learning_rate": 3.50782967032967e-05, + "loss": 0.1335, + "step": 10863 + }, + { + "epoch": 29.846153846153847, + "grad_norm": 13.90587043762207, + "learning_rate": 3.507692307692308e-05, + "loss": 0.4228, + "step": 10864 + }, + { + "epoch": 29.8489010989011, + "grad_norm": 8.727433204650879, + "learning_rate": 3.507554945054945e-05, + "loss": 0.2736, + "step": 10865 + }, + { + "epoch": 29.85164835164835, + "grad_norm": 12.808616638183594, + "learning_rate": 3.5074175824175826e-05, + "loss": 0.5446, + "step": 10866 + }, + { + "epoch": 29.854395604395606, + "grad_norm": 3.3204774856567383, + "learning_rate": 3.50728021978022e-05, + "loss": 0.0609, + "step": 10867 + }, + { + "epoch": 29.857142857142858, + "grad_norm": 11.570809364318848, + "learning_rate": 3.507142857142857e-05, + "loss": 0.2623, + "step": 10868 + }, + { + "epoch": 29.85989010989011, + "grad_norm": 14.206377983093262, + "learning_rate": 3.507005494505495e-05, + "loss": 0.3436, + "step": 10869 + }, + { + "epoch": 29.86263736263736, + "grad_norm": 12.252996444702148, + "learning_rate": 3.506868131868132e-05, + "loss": 0.3561, + "step": 10870 + }, + { + "epoch": 29.865384615384617, + "grad_norm": 20.445180892944336, + "learning_rate": 3.5067307692307697e-05, + "loss": 0.974, + "step": 10871 + }, + { + "epoch": 29.86813186813187, + "grad_norm": 6.513149261474609, + "learning_rate": 3.506593406593407e-05, + "loss": 0.1421, + "step": 10872 + }, + { + "epoch": 29.87087912087912, + "grad_norm": 9.854540824890137, + "learning_rate": 3.506456043956044e-05, + "loss": 0.3119, + "step": 10873 + }, + { + "epoch": 29.873626373626372, + "grad_norm": 16.12850570678711, + "learning_rate": 3.5063186813186813e-05, + "loss": 0.4154, + "step": 10874 + }, + { + "epoch": 29.876373626373628, + "grad_norm": 9.960714340209961, + "learning_rate": 3.5061813186813183e-05, + "loss": 0.2094, + "step": 10875 + }, + { + "epoch": 29.87912087912088, + "grad_norm": 14.716809272766113, + "learning_rate": 3.506043956043956e-05, + "loss": 0.3093, + "step": 10876 + }, + { + "epoch": 29.88186813186813, + "grad_norm": 18.62384033203125, + "learning_rate": 3.505906593406594e-05, + "loss": 0.4293, + "step": 10877 + }, + { + "epoch": 29.884615384615383, + "grad_norm": 11.105476379394531, + "learning_rate": 3.505769230769231e-05, + "loss": 0.2951, + "step": 10878 + }, + { + "epoch": 29.88736263736264, + "grad_norm": 18.67582130432129, + "learning_rate": 3.5056318681318684e-05, + "loss": 0.444, + "step": 10879 + }, + { + "epoch": 29.89010989010989, + "grad_norm": 12.826360702514648, + "learning_rate": 3.5054945054945054e-05, + "loss": 0.246, + "step": 10880 + }, + { + "epoch": 29.892857142857142, + "grad_norm": 19.561166763305664, + "learning_rate": 3.505357142857143e-05, + "loss": 0.4958, + "step": 10881 + }, + { + "epoch": 29.895604395604394, + "grad_norm": 4.381964683532715, + "learning_rate": 3.505219780219781e-05, + "loss": 0.098, + "step": 10882 + }, + { + "epoch": 29.89835164835165, + "grad_norm": 3.987844944000244, + "learning_rate": 3.505082417582418e-05, + "loss": 0.0716, + "step": 10883 + }, + { + "epoch": 29.9010989010989, + "grad_norm": 3.872309923171997, + "learning_rate": 3.5049450549450554e-05, + "loss": 0.0604, + "step": 10884 + }, + { + "epoch": 29.903846153846153, + "grad_norm": 19.576438903808594, + "learning_rate": 3.5048076923076924e-05, + "loss": 0.8725, + "step": 10885 + }, + { + "epoch": 29.906593406593405, + "grad_norm": 14.40080738067627, + "learning_rate": 3.50467032967033e-05, + "loss": 0.3845, + "step": 10886 + }, + { + "epoch": 29.90934065934066, + "grad_norm": 17.000120162963867, + "learning_rate": 3.504532967032967e-05, + "loss": 0.7092, + "step": 10887 + }, + { + "epoch": 29.912087912087912, + "grad_norm": 8.024467468261719, + "learning_rate": 3.504395604395604e-05, + "loss": 0.1835, + "step": 10888 + }, + { + "epoch": 29.914835164835164, + "grad_norm": 11.496853828430176, + "learning_rate": 3.504258241758242e-05, + "loss": 0.2769, + "step": 10889 + }, + { + "epoch": 29.917582417582416, + "grad_norm": 13.242227554321289, + "learning_rate": 3.504120879120879e-05, + "loss": 0.2104, + "step": 10890 + }, + { + "epoch": 29.92032967032967, + "grad_norm": 18.399995803833008, + "learning_rate": 3.5039835164835165e-05, + "loss": 0.6698, + "step": 10891 + }, + { + "epoch": 29.923076923076923, + "grad_norm": 11.380895614624023, + "learning_rate": 3.503846153846154e-05, + "loss": 0.4436, + "step": 10892 + }, + { + "epoch": 29.925824175824175, + "grad_norm": 2.003525495529175, + "learning_rate": 3.503708791208791e-05, + "loss": 0.0439, + "step": 10893 + }, + { + "epoch": 29.928571428571427, + "grad_norm": 16.157556533813477, + "learning_rate": 3.503571428571429e-05, + "loss": 0.3466, + "step": 10894 + }, + { + "epoch": 29.931318681318682, + "grad_norm": 10.876185417175293, + "learning_rate": 3.503434065934066e-05, + "loss": 0.3109, + "step": 10895 + }, + { + "epoch": 29.934065934065934, + "grad_norm": 17.554393768310547, + "learning_rate": 3.5032967032967035e-05, + "loss": 0.6126, + "step": 10896 + }, + { + "epoch": 29.936813186813186, + "grad_norm": 5.181750297546387, + "learning_rate": 3.503159340659341e-05, + "loss": 0.0948, + "step": 10897 + }, + { + "epoch": 29.939560439560438, + "grad_norm": 7.153624534606934, + "learning_rate": 3.503021978021978e-05, + "loss": 0.1424, + "step": 10898 + }, + { + "epoch": 29.942307692307693, + "grad_norm": 13.93542766571045, + "learning_rate": 3.502884615384616e-05, + "loss": 0.2975, + "step": 10899 + }, + { + "epoch": 29.945054945054945, + "grad_norm": 7.43884801864624, + "learning_rate": 3.502747252747253e-05, + "loss": 0.2438, + "step": 10900 + }, + { + "epoch": 29.947802197802197, + "grad_norm": 11.603653907775879, + "learning_rate": 3.5026098901098906e-05, + "loss": 0.4481, + "step": 10901 + }, + { + "epoch": 29.95054945054945, + "grad_norm": 13.55140495300293, + "learning_rate": 3.5024725274725276e-05, + "loss": 0.3297, + "step": 10902 + }, + { + "epoch": 29.953296703296704, + "grad_norm": 16.8746280670166, + "learning_rate": 3.5023351648351646e-05, + "loss": 0.4661, + "step": 10903 + }, + { + "epoch": 29.956043956043956, + "grad_norm": 12.855469703674316, + "learning_rate": 3.502197802197802e-05, + "loss": 0.4565, + "step": 10904 + }, + { + "epoch": 29.958791208791208, + "grad_norm": 5.314962863922119, + "learning_rate": 3.502060439560439e-05, + "loss": 0.1183, + "step": 10905 + }, + { + "epoch": 29.96153846153846, + "grad_norm": 13.9564790725708, + "learning_rate": 3.501923076923077e-05, + "loss": 0.305, + "step": 10906 + }, + { + "epoch": 29.964285714285715, + "grad_norm": 9.425177574157715, + "learning_rate": 3.5017857142857146e-05, + "loss": 0.1698, + "step": 10907 + }, + { + "epoch": 29.967032967032967, + "grad_norm": 7.553582191467285, + "learning_rate": 3.5016483516483516e-05, + "loss": 0.1989, + "step": 10908 + }, + { + "epoch": 29.96978021978022, + "grad_norm": 14.06213092803955, + "learning_rate": 3.501510989010989e-05, + "loss": 0.3618, + "step": 10909 + }, + { + "epoch": 29.97252747252747, + "grad_norm": 9.128642082214355, + "learning_rate": 3.501373626373626e-05, + "loss": 0.1669, + "step": 10910 + }, + { + "epoch": 29.975274725274726, + "grad_norm": 12.287586212158203, + "learning_rate": 3.501236263736264e-05, + "loss": 0.248, + "step": 10911 + }, + { + "epoch": 29.978021978021978, + "grad_norm": 9.862515449523926, + "learning_rate": 3.5010989010989016e-05, + "loss": 0.1797, + "step": 10912 + }, + { + "epoch": 29.98076923076923, + "grad_norm": 10.583330154418945, + "learning_rate": 3.5009615384615386e-05, + "loss": 0.3213, + "step": 10913 + }, + { + "epoch": 29.983516483516482, + "grad_norm": 14.264408111572266, + "learning_rate": 3.500824175824176e-05, + "loss": 0.5587, + "step": 10914 + }, + { + "epoch": 29.986263736263737, + "grad_norm": 18.22830581665039, + "learning_rate": 3.500686813186813e-05, + "loss": 0.4962, + "step": 10915 + }, + { + "epoch": 29.98901098901099, + "grad_norm": 21.266815185546875, + "learning_rate": 3.500549450549451e-05, + "loss": 0.5907, + "step": 10916 + }, + { + "epoch": 29.99175824175824, + "grad_norm": 5.639019966125488, + "learning_rate": 3.500412087912088e-05, + "loss": 0.1071, + "step": 10917 + }, + { + "epoch": 29.994505494505496, + "grad_norm": 19.42885971069336, + "learning_rate": 3.500274725274725e-05, + "loss": 0.3268, + "step": 10918 + }, + { + "epoch": 29.997252747252748, + "grad_norm": 4.948127746582031, + "learning_rate": 3.500137362637363e-05, + "loss": 0.0788, + "step": 10919 + }, + { + "epoch": 30.0, + "grad_norm": 20.96906089782715, + "learning_rate": 3.5e-05, + "loss": 0.2653, + "step": 10920 + }, + { + "epoch": 30.0, + "eval_accuracy": 0.849862258953168, + "eval_f1": 0.8497488059574403, + "eval_f1_DuraRiadoRio_64x64": 0.8313725490196079, + "eval_f1_Mole_64x64": 0.8788927335640139, + "eval_f1_Quebrado_64x64": 0.8666666666666667, + "eval_f1_RiadoRio_64x64": 0.7718120805369127, + "eval_f1_RioFechado_64x64": 0.9, + "eval_loss": 0.6579229235649109, + "eval_precision": 0.8600697878718258, + "eval_precision_DuraRiadoRio_64x64": 0.954954954954955, + "eval_precision_Mole_64x64": 0.8758620689655172, + "eval_precision_Quebrado_64x64": 0.7688172043010753, + "eval_precision_RiadoRio_64x64": 0.7876712328767124, + "eval_precision_RioFechado_64x64": 0.9130434782608695, + "eval_recall": 0.8510028004283008, + "eval_recall_DuraRiadoRio_64x64": 0.7361111111111112, + "eval_recall_Mole_64x64": 0.8819444444444444, + "eval_recall_Quebrado_64x64": 0.9930555555555556, + "eval_recall_RiadoRio_64x64": 0.756578947368421, + "eval_recall_RioFechado_64x64": 0.8873239436619719, + "eval_runtime": 1.7665, + "eval_samples_per_second": 410.979, + "eval_steps_per_second": 26.04, + "step": 10920 + }, + { + "epoch": 30.002747252747252, + "grad_norm": 12.505105018615723, + "learning_rate": 3.4998626373626374e-05, + "loss": 0.2156, + "step": 10921 + }, + { + "epoch": 30.005494505494507, + "grad_norm": 10.448545455932617, + "learning_rate": 3.499725274725275e-05, + "loss": 0.345, + "step": 10922 + }, + { + "epoch": 30.00824175824176, + "grad_norm": 7.210360527038574, + "learning_rate": 3.499587912087912e-05, + "loss": 0.1413, + "step": 10923 + }, + { + "epoch": 30.01098901098901, + "grad_norm": 3.5572328567504883, + "learning_rate": 3.49945054945055e-05, + "loss": 0.0839, + "step": 10924 + }, + { + "epoch": 30.013736263736263, + "grad_norm": 11.571868896484375, + "learning_rate": 3.499313186813187e-05, + "loss": 0.2839, + "step": 10925 + }, + { + "epoch": 30.016483516483518, + "grad_norm": 3.48010516166687, + "learning_rate": 3.4991758241758244e-05, + "loss": 0.0801, + "step": 10926 + }, + { + "epoch": 30.01923076923077, + "grad_norm": 17.917070388793945, + "learning_rate": 3.499038461538462e-05, + "loss": 0.4924, + "step": 10927 + }, + { + "epoch": 30.021978021978022, + "grad_norm": 20.733346939086914, + "learning_rate": 3.498901098901099e-05, + "loss": 0.6826, + "step": 10928 + }, + { + "epoch": 30.024725274725274, + "grad_norm": 13.277337074279785, + "learning_rate": 3.498763736263737e-05, + "loss": 0.4677, + "step": 10929 + }, + { + "epoch": 30.02747252747253, + "grad_norm": 10.545591354370117, + "learning_rate": 3.498626373626374e-05, + "loss": 0.2463, + "step": 10930 + }, + { + "epoch": 30.03021978021978, + "grad_norm": 22.648357391357422, + "learning_rate": 3.4984890109890115e-05, + "loss": 0.738, + "step": 10931 + }, + { + "epoch": 30.032967032967033, + "grad_norm": 17.042503356933594, + "learning_rate": 3.4983516483516485e-05, + "loss": 0.5333, + "step": 10932 + }, + { + "epoch": 30.035714285714285, + "grad_norm": 12.384988784790039, + "learning_rate": 3.4982142857142855e-05, + "loss": 0.3045, + "step": 10933 + }, + { + "epoch": 30.03846153846154, + "grad_norm": 15.984342575073242, + "learning_rate": 3.498076923076923e-05, + "loss": 0.3384, + "step": 10934 + }, + { + "epoch": 30.041208791208792, + "grad_norm": 13.6542387008667, + "learning_rate": 3.49793956043956e-05, + "loss": 0.5388, + "step": 10935 + }, + { + "epoch": 30.043956043956044, + "grad_norm": 13.35523509979248, + "learning_rate": 3.497802197802198e-05, + "loss": 0.329, + "step": 10936 + }, + { + "epoch": 30.046703296703296, + "grad_norm": 15.389416694641113, + "learning_rate": 3.4976648351648355e-05, + "loss": 0.6614, + "step": 10937 + }, + { + "epoch": 30.04945054945055, + "grad_norm": 13.347268104553223, + "learning_rate": 3.4975274725274725e-05, + "loss": 0.3617, + "step": 10938 + }, + { + "epoch": 30.052197802197803, + "grad_norm": 9.09624195098877, + "learning_rate": 3.49739010989011e-05, + "loss": 0.1641, + "step": 10939 + }, + { + "epoch": 30.054945054945055, + "grad_norm": 15.308429718017578, + "learning_rate": 3.497252747252747e-05, + "loss": 0.5154, + "step": 10940 + }, + { + "epoch": 30.057692307692307, + "grad_norm": 19.5158634185791, + "learning_rate": 3.497115384615385e-05, + "loss": 0.8425, + "step": 10941 + }, + { + "epoch": 30.060439560439562, + "grad_norm": 12.095861434936523, + "learning_rate": 3.4969780219780225e-05, + "loss": 0.2782, + "step": 10942 + }, + { + "epoch": 30.063186813186814, + "grad_norm": 10.378972053527832, + "learning_rate": 3.4968406593406595e-05, + "loss": 0.1908, + "step": 10943 + }, + { + "epoch": 30.065934065934066, + "grad_norm": 2.735468626022339, + "learning_rate": 3.496703296703297e-05, + "loss": 0.0595, + "step": 10944 + }, + { + "epoch": 30.068681318681318, + "grad_norm": 5.777840614318848, + "learning_rate": 3.496565934065934e-05, + "loss": 0.2634, + "step": 10945 + }, + { + "epoch": 30.071428571428573, + "grad_norm": 15.293113708496094, + "learning_rate": 3.496428571428572e-05, + "loss": 0.6395, + "step": 10946 + }, + { + "epoch": 30.074175824175825, + "grad_norm": 5.62087345123291, + "learning_rate": 3.496291208791209e-05, + "loss": 0.0785, + "step": 10947 + }, + { + "epoch": 30.076923076923077, + "grad_norm": 9.168070793151855, + "learning_rate": 3.496153846153846e-05, + "loss": 0.1976, + "step": 10948 + }, + { + "epoch": 30.07967032967033, + "grad_norm": 6.600368499755859, + "learning_rate": 3.4960164835164836e-05, + "loss": 0.1657, + "step": 10949 + }, + { + "epoch": 30.082417582417584, + "grad_norm": 8.893754959106445, + "learning_rate": 3.4958791208791206e-05, + "loss": 0.1852, + "step": 10950 + }, + { + "epoch": 30.085164835164836, + "grad_norm": 17.264623641967773, + "learning_rate": 3.495741758241758e-05, + "loss": 0.4812, + "step": 10951 + }, + { + "epoch": 30.087912087912088, + "grad_norm": 18.246213912963867, + "learning_rate": 3.495604395604396e-05, + "loss": 0.5812, + "step": 10952 + }, + { + "epoch": 30.09065934065934, + "grad_norm": 16.930984497070312, + "learning_rate": 3.495467032967033e-05, + "loss": 0.564, + "step": 10953 + }, + { + "epoch": 30.093406593406595, + "grad_norm": 17.056550979614258, + "learning_rate": 3.4953296703296706e-05, + "loss": 0.5384, + "step": 10954 + }, + { + "epoch": 30.096153846153847, + "grad_norm": 13.03264331817627, + "learning_rate": 3.4951923076923076e-05, + "loss": 0.2675, + "step": 10955 + }, + { + "epoch": 30.0989010989011, + "grad_norm": 11.078925132751465, + "learning_rate": 3.495054945054945e-05, + "loss": 0.2078, + "step": 10956 + }, + { + "epoch": 30.10164835164835, + "grad_norm": 10.285768508911133, + "learning_rate": 3.494917582417583e-05, + "loss": 0.6236, + "step": 10957 + }, + { + "epoch": 30.104395604395606, + "grad_norm": 13.305620193481445, + "learning_rate": 3.49478021978022e-05, + "loss": 0.4097, + "step": 10958 + }, + { + "epoch": 30.107142857142858, + "grad_norm": 9.607389450073242, + "learning_rate": 3.494642857142858e-05, + "loss": 0.3193, + "step": 10959 + }, + { + "epoch": 30.10989010989011, + "grad_norm": 14.2442626953125, + "learning_rate": 3.494505494505495e-05, + "loss": 0.4486, + "step": 10960 + }, + { + "epoch": 30.11263736263736, + "grad_norm": 13.083149909973145, + "learning_rate": 3.4943681318681324e-05, + "loss": 0.4455, + "step": 10961 + }, + { + "epoch": 30.115384615384617, + "grad_norm": 15.446331024169922, + "learning_rate": 3.4942307692307694e-05, + "loss": 0.5831, + "step": 10962 + }, + { + "epoch": 30.11813186813187, + "grad_norm": 35.240848541259766, + "learning_rate": 3.4940934065934064e-05, + "loss": 1.6734, + "step": 10963 + }, + { + "epoch": 30.12087912087912, + "grad_norm": 7.177986145019531, + "learning_rate": 3.493956043956044e-05, + "loss": 0.267, + "step": 10964 + }, + { + "epoch": 30.123626373626372, + "grad_norm": 9.755541801452637, + "learning_rate": 3.493818681318681e-05, + "loss": 0.3403, + "step": 10965 + }, + { + "epoch": 30.126373626373628, + "grad_norm": 13.63244342803955, + "learning_rate": 3.493681318681319e-05, + "loss": 0.364, + "step": 10966 + }, + { + "epoch": 30.12912087912088, + "grad_norm": 0.8877682685852051, + "learning_rate": 3.4935439560439564e-05, + "loss": 0.0171, + "step": 10967 + }, + { + "epoch": 30.13186813186813, + "grad_norm": 11.7391357421875, + "learning_rate": 3.4934065934065934e-05, + "loss": 0.3335, + "step": 10968 + }, + { + "epoch": 30.134615384615383, + "grad_norm": 10.054487228393555, + "learning_rate": 3.493269230769231e-05, + "loss": 0.2847, + "step": 10969 + }, + { + "epoch": 30.13736263736264, + "grad_norm": 15.644569396972656, + "learning_rate": 3.493131868131868e-05, + "loss": 0.4307, + "step": 10970 + }, + { + "epoch": 30.14010989010989, + "grad_norm": 9.631339073181152, + "learning_rate": 3.492994505494506e-05, + "loss": 0.2855, + "step": 10971 + }, + { + "epoch": 30.142857142857142, + "grad_norm": 11.307550430297852, + "learning_rate": 3.4928571428571434e-05, + "loss": 0.2391, + "step": 10972 + }, + { + "epoch": 30.145604395604394, + "grad_norm": 7.134273529052734, + "learning_rate": 3.4927197802197804e-05, + "loss": 0.1663, + "step": 10973 + }, + { + "epoch": 30.14835164835165, + "grad_norm": 9.537833213806152, + "learning_rate": 3.492582417582418e-05, + "loss": 0.2079, + "step": 10974 + }, + { + "epoch": 30.1510989010989, + "grad_norm": 18.46743392944336, + "learning_rate": 3.492445054945055e-05, + "loss": 0.5593, + "step": 10975 + }, + { + "epoch": 30.153846153846153, + "grad_norm": 8.600448608398438, + "learning_rate": 3.492307692307693e-05, + "loss": 0.258, + "step": 10976 + }, + { + "epoch": 30.156593406593405, + "grad_norm": 11.332592964172363, + "learning_rate": 3.49217032967033e-05, + "loss": 0.2843, + "step": 10977 + }, + { + "epoch": 30.15934065934066, + "grad_norm": 11.39692497253418, + "learning_rate": 3.492032967032967e-05, + "loss": 0.1932, + "step": 10978 + }, + { + "epoch": 30.162087912087912, + "grad_norm": 11.627215385437012, + "learning_rate": 3.4918956043956045e-05, + "loss": 0.2068, + "step": 10979 + }, + { + "epoch": 30.164835164835164, + "grad_norm": 9.390952110290527, + "learning_rate": 3.4917582417582415e-05, + "loss": 0.3729, + "step": 10980 + }, + { + "epoch": 30.167582417582416, + "grad_norm": 9.767297744750977, + "learning_rate": 3.491620879120879e-05, + "loss": 0.2909, + "step": 10981 + }, + { + "epoch": 30.17032967032967, + "grad_norm": 12.151765823364258, + "learning_rate": 3.491483516483517e-05, + "loss": 0.2865, + "step": 10982 + }, + { + "epoch": 30.173076923076923, + "grad_norm": 10.235280990600586, + "learning_rate": 3.491346153846154e-05, + "loss": 0.2984, + "step": 10983 + }, + { + "epoch": 30.175824175824175, + "grad_norm": 15.073369026184082, + "learning_rate": 3.4912087912087915e-05, + "loss": 0.2659, + "step": 10984 + }, + { + "epoch": 30.178571428571427, + "grad_norm": 18.786998748779297, + "learning_rate": 3.4910714285714285e-05, + "loss": 0.8322, + "step": 10985 + }, + { + "epoch": 30.181318681318682, + "grad_norm": 11.568017959594727, + "learning_rate": 3.490934065934066e-05, + "loss": 0.2385, + "step": 10986 + }, + { + "epoch": 30.184065934065934, + "grad_norm": 12.520367622375488, + "learning_rate": 3.490796703296704e-05, + "loss": 0.3277, + "step": 10987 + }, + { + "epoch": 30.186813186813186, + "grad_norm": 6.453204154968262, + "learning_rate": 3.490659340659341e-05, + "loss": 0.1732, + "step": 10988 + }, + { + "epoch": 30.189560439560438, + "grad_norm": 19.843584060668945, + "learning_rate": 3.4905219780219786e-05, + "loss": 0.7867, + "step": 10989 + }, + { + "epoch": 30.192307692307693, + "grad_norm": 17.42611312866211, + "learning_rate": 3.4903846153846156e-05, + "loss": 0.5106, + "step": 10990 + }, + { + "epoch": 30.195054945054945, + "grad_norm": 14.37937068939209, + "learning_rate": 3.490247252747253e-05, + "loss": 0.3796, + "step": 10991 + }, + { + "epoch": 30.197802197802197, + "grad_norm": 6.89988899230957, + "learning_rate": 3.49010989010989e-05, + "loss": 0.1461, + "step": 10992 + }, + { + "epoch": 30.20054945054945, + "grad_norm": 8.924034118652344, + "learning_rate": 3.489972527472527e-05, + "loss": 0.185, + "step": 10993 + }, + { + "epoch": 30.203296703296704, + "grad_norm": 20.25672149658203, + "learning_rate": 3.489835164835165e-05, + "loss": 0.6498, + "step": 10994 + }, + { + "epoch": 30.206043956043956, + "grad_norm": 7.583448886871338, + "learning_rate": 3.489697802197802e-05, + "loss": 0.2152, + "step": 10995 + }, + { + "epoch": 30.208791208791208, + "grad_norm": 12.01803970336914, + "learning_rate": 3.4895604395604396e-05, + "loss": 0.2327, + "step": 10996 + }, + { + "epoch": 30.21153846153846, + "grad_norm": 13.771952629089355, + "learning_rate": 3.489423076923077e-05, + "loss": 0.2569, + "step": 10997 + }, + { + "epoch": 30.214285714285715, + "grad_norm": 24.762948989868164, + "learning_rate": 3.489285714285714e-05, + "loss": 0.9929, + "step": 10998 + }, + { + "epoch": 30.217032967032967, + "grad_norm": 13.680848121643066, + "learning_rate": 3.489148351648352e-05, + "loss": 0.333, + "step": 10999 + }, + { + "epoch": 30.21978021978022, + "grad_norm": 16.566463470458984, + "learning_rate": 3.489010989010989e-05, + "loss": 0.4548, + "step": 11000 + }, + { + "epoch": 30.22252747252747, + "grad_norm": 22.441089630126953, + "learning_rate": 3.4888736263736267e-05, + "loss": 1.1964, + "step": 11001 + }, + { + "epoch": 30.225274725274726, + "grad_norm": 18.60015869140625, + "learning_rate": 3.4887362637362643e-05, + "loss": 0.4119, + "step": 11002 + }, + { + "epoch": 30.228021978021978, + "grad_norm": 12.26176643371582, + "learning_rate": 3.4885989010989013e-05, + "loss": 0.2723, + "step": 11003 + }, + { + "epoch": 30.23076923076923, + "grad_norm": 3.7561287879943848, + "learning_rate": 3.488461538461539e-05, + "loss": 0.0643, + "step": 11004 + }, + { + "epoch": 30.233516483516482, + "grad_norm": 18.652807235717773, + "learning_rate": 3.488324175824176e-05, + "loss": 0.5259, + "step": 11005 + }, + { + "epoch": 30.236263736263737, + "grad_norm": 13.202984809875488, + "learning_rate": 3.488186813186814e-05, + "loss": 0.3142, + "step": 11006 + }, + { + "epoch": 30.23901098901099, + "grad_norm": 10.473856925964355, + "learning_rate": 3.488049450549451e-05, + "loss": 0.1961, + "step": 11007 + }, + { + "epoch": 30.24175824175824, + "grad_norm": 19.620573043823242, + "learning_rate": 3.487912087912088e-05, + "loss": 0.8422, + "step": 11008 + }, + { + "epoch": 30.244505494505493, + "grad_norm": 15.101003646850586, + "learning_rate": 3.4877747252747254e-05, + "loss": 0.4278, + "step": 11009 + }, + { + "epoch": 30.247252747252748, + "grad_norm": 9.255276679992676, + "learning_rate": 3.4876373626373624e-05, + "loss": 0.2057, + "step": 11010 + }, + { + "epoch": 30.25, + "grad_norm": 9.736119270324707, + "learning_rate": 3.4875e-05, + "loss": 0.1815, + "step": 11011 + }, + { + "epoch": 30.252747252747252, + "grad_norm": 25.27741050720215, + "learning_rate": 3.487362637362638e-05, + "loss": 0.8712, + "step": 11012 + }, + { + "epoch": 30.255494505494504, + "grad_norm": 11.263948440551758, + "learning_rate": 3.487225274725275e-05, + "loss": 0.1595, + "step": 11013 + }, + { + "epoch": 30.25824175824176, + "grad_norm": 10.236783027648926, + "learning_rate": 3.4870879120879124e-05, + "loss": 0.1538, + "step": 11014 + }, + { + "epoch": 30.26098901098901, + "grad_norm": 7.639524936676025, + "learning_rate": 3.4869505494505494e-05, + "loss": 0.1565, + "step": 11015 + }, + { + "epoch": 30.263736263736263, + "grad_norm": 20.742694854736328, + "learning_rate": 3.486813186813187e-05, + "loss": 0.8793, + "step": 11016 + }, + { + "epoch": 30.266483516483518, + "grad_norm": 14.44222640991211, + "learning_rate": 3.486675824175825e-05, + "loss": 0.4228, + "step": 11017 + }, + { + "epoch": 30.26923076923077, + "grad_norm": 9.048202514648438, + "learning_rate": 3.486538461538462e-05, + "loss": 0.3068, + "step": 11018 + }, + { + "epoch": 30.271978021978022, + "grad_norm": 20.06578254699707, + "learning_rate": 3.4864010989010995e-05, + "loss": 0.764, + "step": 11019 + }, + { + "epoch": 30.274725274725274, + "grad_norm": 17.47702407836914, + "learning_rate": 3.4862637362637365e-05, + "loss": 0.4903, + "step": 11020 + }, + { + "epoch": 30.27747252747253, + "grad_norm": 14.494457244873047, + "learning_rate": 3.486126373626374e-05, + "loss": 0.3978, + "step": 11021 + }, + { + "epoch": 30.28021978021978, + "grad_norm": 9.252839088439941, + "learning_rate": 3.485989010989011e-05, + "loss": 0.2096, + "step": 11022 + }, + { + "epoch": 30.282967032967033, + "grad_norm": 10.816250801086426, + "learning_rate": 3.485851648351648e-05, + "loss": 0.279, + "step": 11023 + }, + { + "epoch": 30.285714285714285, + "grad_norm": 20.03217315673828, + "learning_rate": 3.485714285714286e-05, + "loss": 0.8493, + "step": 11024 + }, + { + "epoch": 30.28846153846154, + "grad_norm": 7.459322929382324, + "learning_rate": 3.485576923076923e-05, + "loss": 0.2635, + "step": 11025 + }, + { + "epoch": 30.291208791208792, + "grad_norm": 16.593656539916992, + "learning_rate": 3.4854395604395605e-05, + "loss": 0.5451, + "step": 11026 + }, + { + "epoch": 30.293956043956044, + "grad_norm": 15.774765968322754, + "learning_rate": 3.485302197802198e-05, + "loss": 0.4003, + "step": 11027 + }, + { + "epoch": 30.296703296703296, + "grad_norm": 10.588067054748535, + "learning_rate": 3.485164835164835e-05, + "loss": 0.3164, + "step": 11028 + }, + { + "epoch": 30.29945054945055, + "grad_norm": 21.77603530883789, + "learning_rate": 3.485027472527473e-05, + "loss": 0.7444, + "step": 11029 + }, + { + "epoch": 30.302197802197803, + "grad_norm": 8.549182891845703, + "learning_rate": 3.48489010989011e-05, + "loss": 0.2739, + "step": 11030 + }, + { + "epoch": 30.304945054945055, + "grad_norm": 11.016077995300293, + "learning_rate": 3.4847527472527476e-05, + "loss": 0.335, + "step": 11031 + }, + { + "epoch": 30.307692307692307, + "grad_norm": 14.19854736328125, + "learning_rate": 3.484615384615385e-05, + "loss": 0.3703, + "step": 11032 + }, + { + "epoch": 30.310439560439562, + "grad_norm": 8.357723236083984, + "learning_rate": 3.484478021978022e-05, + "loss": 0.246, + "step": 11033 + }, + { + "epoch": 30.313186813186814, + "grad_norm": 7.405043125152588, + "learning_rate": 3.48434065934066e-05, + "loss": 0.1896, + "step": 11034 + }, + { + "epoch": 30.315934065934066, + "grad_norm": 15.775193214416504, + "learning_rate": 3.484203296703297e-05, + "loss": 0.4528, + "step": 11035 + }, + { + "epoch": 30.318681318681318, + "grad_norm": 11.271181106567383, + "learning_rate": 3.4840659340659346e-05, + "loss": 0.2341, + "step": 11036 + }, + { + "epoch": 30.321428571428573, + "grad_norm": 16.7396183013916, + "learning_rate": 3.4839285714285716e-05, + "loss": 0.5233, + "step": 11037 + }, + { + "epoch": 30.324175824175825, + "grad_norm": 15.405074119567871, + "learning_rate": 3.4837912087912086e-05, + "loss": 0.5518, + "step": 11038 + }, + { + "epoch": 30.326923076923077, + "grad_norm": 19.31755828857422, + "learning_rate": 3.483653846153846e-05, + "loss": 0.7981, + "step": 11039 + }, + { + "epoch": 30.32967032967033, + "grad_norm": 10.625210762023926, + "learning_rate": 3.483516483516483e-05, + "loss": 0.4209, + "step": 11040 + }, + { + "epoch": 30.332417582417584, + "grad_norm": 13.365303993225098, + "learning_rate": 3.483379120879121e-05, + "loss": 0.4315, + "step": 11041 + }, + { + "epoch": 30.335164835164836, + "grad_norm": 10.475086212158203, + "learning_rate": 3.483241758241758e-05, + "loss": 0.2917, + "step": 11042 + }, + { + "epoch": 30.337912087912088, + "grad_norm": 13.01634407043457, + "learning_rate": 3.4831043956043956e-05, + "loss": 0.4064, + "step": 11043 + }, + { + "epoch": 30.34065934065934, + "grad_norm": 6.322328090667725, + "learning_rate": 3.482967032967033e-05, + "loss": 0.1159, + "step": 11044 + }, + { + "epoch": 30.343406593406595, + "grad_norm": 15.75405216217041, + "learning_rate": 3.48282967032967e-05, + "loss": 0.4256, + "step": 11045 + }, + { + "epoch": 30.346153846153847, + "grad_norm": 15.183435440063477, + "learning_rate": 3.482692307692308e-05, + "loss": 0.3275, + "step": 11046 + }, + { + "epoch": 30.3489010989011, + "grad_norm": 11.267377853393555, + "learning_rate": 3.482554945054945e-05, + "loss": 0.3125, + "step": 11047 + }, + { + "epoch": 30.35164835164835, + "grad_norm": 13.197650909423828, + "learning_rate": 3.482417582417583e-05, + "loss": 0.264, + "step": 11048 + }, + { + "epoch": 30.354395604395606, + "grad_norm": 17.39563751220703, + "learning_rate": 3.4822802197802204e-05, + "loss": 0.5523, + "step": 11049 + }, + { + "epoch": 30.357142857142858, + "grad_norm": 13.509737014770508, + "learning_rate": 3.4821428571428574e-05, + "loss": 0.3549, + "step": 11050 + }, + { + "epoch": 30.35989010989011, + "grad_norm": 8.778366088867188, + "learning_rate": 3.482005494505495e-05, + "loss": 0.178, + "step": 11051 + }, + { + "epoch": 30.36263736263736, + "grad_norm": 13.636995315551758, + "learning_rate": 3.481868131868132e-05, + "loss": 0.3865, + "step": 11052 + }, + { + "epoch": 30.365384615384617, + "grad_norm": 5.843183517456055, + "learning_rate": 3.481730769230769e-05, + "loss": 0.1184, + "step": 11053 + }, + { + "epoch": 30.36813186813187, + "grad_norm": 12.976984024047852, + "learning_rate": 3.481593406593407e-05, + "loss": 0.2684, + "step": 11054 + }, + { + "epoch": 30.37087912087912, + "grad_norm": 5.115318775177002, + "learning_rate": 3.481456043956044e-05, + "loss": 0.0706, + "step": 11055 + }, + { + "epoch": 30.373626373626372, + "grad_norm": 10.83515739440918, + "learning_rate": 3.4813186813186814e-05, + "loss": 0.3393, + "step": 11056 + }, + { + "epoch": 30.376373626373628, + "grad_norm": 18.156015396118164, + "learning_rate": 3.4811813186813184e-05, + "loss": 0.382, + "step": 11057 + }, + { + "epoch": 30.37912087912088, + "grad_norm": 16.501087188720703, + "learning_rate": 3.481043956043956e-05, + "loss": 0.3675, + "step": 11058 + }, + { + "epoch": 30.38186813186813, + "grad_norm": 9.45564079284668, + "learning_rate": 3.480906593406594e-05, + "loss": 0.1435, + "step": 11059 + }, + { + "epoch": 30.384615384615383, + "grad_norm": 6.3091254234313965, + "learning_rate": 3.480769230769231e-05, + "loss": 0.1464, + "step": 11060 + }, + { + "epoch": 30.38736263736264, + "grad_norm": 10.060282707214355, + "learning_rate": 3.4806318681318685e-05, + "loss": 0.2961, + "step": 11061 + }, + { + "epoch": 30.39010989010989, + "grad_norm": 12.322399139404297, + "learning_rate": 3.4804945054945055e-05, + "loss": 0.2389, + "step": 11062 + }, + { + "epoch": 30.392857142857142, + "grad_norm": 16.511493682861328, + "learning_rate": 3.480357142857143e-05, + "loss": 0.4022, + "step": 11063 + }, + { + "epoch": 30.395604395604394, + "grad_norm": 10.140795707702637, + "learning_rate": 3.480219780219781e-05, + "loss": 0.2929, + "step": 11064 + }, + { + "epoch": 30.39835164835165, + "grad_norm": 14.707538604736328, + "learning_rate": 3.480082417582418e-05, + "loss": 0.3344, + "step": 11065 + }, + { + "epoch": 30.4010989010989, + "grad_norm": 7.39268684387207, + "learning_rate": 3.4799450549450555e-05, + "loss": 0.2184, + "step": 11066 + }, + { + "epoch": 30.403846153846153, + "grad_norm": 4.951560974121094, + "learning_rate": 3.4798076923076925e-05, + "loss": 0.1274, + "step": 11067 + }, + { + "epoch": 30.406593406593405, + "grad_norm": 16.089492797851562, + "learning_rate": 3.4796703296703295e-05, + "loss": 0.3937, + "step": 11068 + }, + { + "epoch": 30.40934065934066, + "grad_norm": 9.5402250289917, + "learning_rate": 3.479532967032967e-05, + "loss": 0.2167, + "step": 11069 + }, + { + "epoch": 30.412087912087912, + "grad_norm": 15.718558311462402, + "learning_rate": 3.479395604395604e-05, + "loss": 0.5258, + "step": 11070 + }, + { + "epoch": 30.414835164835164, + "grad_norm": 16.029874801635742, + "learning_rate": 3.479258241758242e-05, + "loss": 0.6392, + "step": 11071 + }, + { + "epoch": 30.417582417582416, + "grad_norm": 15.926236152648926, + "learning_rate": 3.479120879120879e-05, + "loss": 0.3101, + "step": 11072 + }, + { + "epoch": 30.42032967032967, + "grad_norm": 10.228272438049316, + "learning_rate": 3.4789835164835165e-05, + "loss": 0.2732, + "step": 11073 + }, + { + "epoch": 30.423076923076923, + "grad_norm": 6.862594127655029, + "learning_rate": 3.478846153846154e-05, + "loss": 0.1375, + "step": 11074 + }, + { + "epoch": 30.425824175824175, + "grad_norm": 10.119827270507812, + "learning_rate": 3.478708791208791e-05, + "loss": 0.4222, + "step": 11075 + }, + { + "epoch": 30.428571428571427, + "grad_norm": 10.040789604187012, + "learning_rate": 3.478571428571429e-05, + "loss": 0.1668, + "step": 11076 + }, + { + "epoch": 30.431318681318682, + "grad_norm": 9.83909797668457, + "learning_rate": 3.478434065934066e-05, + "loss": 0.2625, + "step": 11077 + }, + { + "epoch": 30.434065934065934, + "grad_norm": 20.631757736206055, + "learning_rate": 3.4782967032967036e-05, + "loss": 0.4692, + "step": 11078 + }, + { + "epoch": 30.436813186813186, + "grad_norm": 8.522043228149414, + "learning_rate": 3.478159340659341e-05, + "loss": 0.2744, + "step": 11079 + }, + { + "epoch": 30.439560439560438, + "grad_norm": 15.760698318481445, + "learning_rate": 3.478021978021978e-05, + "loss": 0.4519, + "step": 11080 + }, + { + "epoch": 30.442307692307693, + "grad_norm": 16.052734375, + "learning_rate": 3.477884615384616e-05, + "loss": 0.5171, + "step": 11081 + }, + { + "epoch": 30.445054945054945, + "grad_norm": 4.531693458557129, + "learning_rate": 3.477747252747253e-05, + "loss": 0.1134, + "step": 11082 + }, + { + "epoch": 30.447802197802197, + "grad_norm": 7.948779106140137, + "learning_rate": 3.47760989010989e-05, + "loss": 0.1568, + "step": 11083 + }, + { + "epoch": 30.45054945054945, + "grad_norm": 6.76233434677124, + "learning_rate": 3.4774725274725276e-05, + "loss": 0.1169, + "step": 11084 + }, + { + "epoch": 30.453296703296704, + "grad_norm": 9.911728858947754, + "learning_rate": 3.4773351648351646e-05, + "loss": 0.3275, + "step": 11085 + }, + { + "epoch": 30.456043956043956, + "grad_norm": 6.519381046295166, + "learning_rate": 3.477197802197802e-05, + "loss": 0.1996, + "step": 11086 + }, + { + "epoch": 30.458791208791208, + "grad_norm": 17.395275115966797, + "learning_rate": 3.477060439560439e-05, + "loss": 0.4825, + "step": 11087 + }, + { + "epoch": 30.46153846153846, + "grad_norm": 12.6050386428833, + "learning_rate": 3.476923076923077e-05, + "loss": 0.3309, + "step": 11088 + }, + { + "epoch": 30.464285714285715, + "grad_norm": 12.996097564697266, + "learning_rate": 3.476785714285715e-05, + "loss": 0.3831, + "step": 11089 + }, + { + "epoch": 30.467032967032967, + "grad_norm": 18.24796485900879, + "learning_rate": 3.476648351648352e-05, + "loss": 0.4536, + "step": 11090 + }, + { + "epoch": 30.46978021978022, + "grad_norm": 11.706250190734863, + "learning_rate": 3.4765109890109894e-05, + "loss": 0.3868, + "step": 11091 + }, + { + "epoch": 30.47252747252747, + "grad_norm": 11.063957214355469, + "learning_rate": 3.4763736263736264e-05, + "loss": 0.3305, + "step": 11092 + }, + { + "epoch": 30.475274725274726, + "grad_norm": 15.695023536682129, + "learning_rate": 3.476236263736264e-05, + "loss": 0.321, + "step": 11093 + }, + { + "epoch": 30.478021978021978, + "grad_norm": 8.098947525024414, + "learning_rate": 3.476098901098902e-05, + "loss": 0.2042, + "step": 11094 + }, + { + "epoch": 30.48076923076923, + "grad_norm": 3.567282199859619, + "learning_rate": 3.475961538461539e-05, + "loss": 0.0638, + "step": 11095 + }, + { + "epoch": 30.483516483516482, + "grad_norm": 11.401910781860352, + "learning_rate": 3.4758241758241764e-05, + "loss": 0.2905, + "step": 11096 + }, + { + "epoch": 30.486263736263737, + "grad_norm": 3.990879535675049, + "learning_rate": 3.4756868131868134e-05, + "loss": 0.0774, + "step": 11097 + }, + { + "epoch": 30.48901098901099, + "grad_norm": 13.721891403198242, + "learning_rate": 3.4755494505494504e-05, + "loss": 0.4395, + "step": 11098 + }, + { + "epoch": 30.49175824175824, + "grad_norm": 12.101624488830566, + "learning_rate": 3.475412087912088e-05, + "loss": 0.4142, + "step": 11099 + }, + { + "epoch": 30.494505494505496, + "grad_norm": 6.504311561584473, + "learning_rate": 3.475274725274725e-05, + "loss": 0.1692, + "step": 11100 + }, + { + "epoch": 30.497252747252748, + "grad_norm": 13.370790481567383, + "learning_rate": 3.475137362637363e-05, + "loss": 0.3257, + "step": 11101 + }, + { + "epoch": 30.5, + "grad_norm": 11.905180931091309, + "learning_rate": 3.475e-05, + "loss": 0.289, + "step": 11102 + }, + { + "epoch": 30.502747252747252, + "grad_norm": 13.47840690612793, + "learning_rate": 3.4748626373626374e-05, + "loss": 0.5026, + "step": 11103 + }, + { + "epoch": 30.505494505494504, + "grad_norm": 6.817751407623291, + "learning_rate": 3.474725274725275e-05, + "loss": 0.1157, + "step": 11104 + }, + { + "epoch": 30.50824175824176, + "grad_norm": 17.195837020874023, + "learning_rate": 3.474587912087912e-05, + "loss": 0.323, + "step": 11105 + }, + { + "epoch": 30.51098901098901, + "grad_norm": 3.731401205062866, + "learning_rate": 3.47445054945055e-05, + "loss": 0.0623, + "step": 11106 + }, + { + "epoch": 30.513736263736263, + "grad_norm": 7.590793132781982, + "learning_rate": 3.474313186813187e-05, + "loss": 0.3179, + "step": 11107 + }, + { + "epoch": 30.516483516483518, + "grad_norm": 14.382518768310547, + "learning_rate": 3.4741758241758245e-05, + "loss": 0.4172, + "step": 11108 + }, + { + "epoch": 30.51923076923077, + "grad_norm": 13.309700012207031, + "learning_rate": 3.474038461538462e-05, + "loss": 0.214, + "step": 11109 + }, + { + "epoch": 30.521978021978022, + "grad_norm": 2.263139009475708, + "learning_rate": 3.473901098901099e-05, + "loss": 0.0561, + "step": 11110 + }, + { + "epoch": 30.524725274725274, + "grad_norm": 9.08709716796875, + "learning_rate": 3.473763736263737e-05, + "loss": 0.2236, + "step": 11111 + }, + { + "epoch": 30.52747252747253, + "grad_norm": 7.873963356018066, + "learning_rate": 3.473626373626374e-05, + "loss": 0.2136, + "step": 11112 + }, + { + "epoch": 30.53021978021978, + "grad_norm": 15.232053756713867, + "learning_rate": 3.473489010989011e-05, + "loss": 0.4466, + "step": 11113 + }, + { + "epoch": 30.532967032967033, + "grad_norm": 15.801607131958008, + "learning_rate": 3.4733516483516485e-05, + "loss": 0.4402, + "step": 11114 + }, + { + "epoch": 30.535714285714285, + "grad_norm": 14.655984878540039, + "learning_rate": 3.4732142857142855e-05, + "loss": 0.4602, + "step": 11115 + }, + { + "epoch": 30.53846153846154, + "grad_norm": 2.7850687503814697, + "learning_rate": 3.473076923076923e-05, + "loss": 0.0316, + "step": 11116 + }, + { + "epoch": 30.541208791208792, + "grad_norm": 9.714925765991211, + "learning_rate": 3.47293956043956e-05, + "loss": 0.2406, + "step": 11117 + }, + { + "epoch": 30.543956043956044, + "grad_norm": 9.013264656066895, + "learning_rate": 3.472802197802198e-05, + "loss": 0.1846, + "step": 11118 + }, + { + "epoch": 30.546703296703296, + "grad_norm": 4.474470138549805, + "learning_rate": 3.4726648351648356e-05, + "loss": 0.1068, + "step": 11119 + }, + { + "epoch": 30.54945054945055, + "grad_norm": 11.259261131286621, + "learning_rate": 3.4725274725274726e-05, + "loss": 0.1776, + "step": 11120 + }, + { + "epoch": 30.552197802197803, + "grad_norm": 4.974644660949707, + "learning_rate": 3.47239010989011e-05, + "loss": 0.1143, + "step": 11121 + }, + { + "epoch": 30.554945054945055, + "grad_norm": 11.526151657104492, + "learning_rate": 3.472252747252747e-05, + "loss": 0.4104, + "step": 11122 + }, + { + "epoch": 30.557692307692307, + "grad_norm": 16.008148193359375, + "learning_rate": 3.472115384615385e-05, + "loss": 0.6611, + "step": 11123 + }, + { + "epoch": 30.560439560439562, + "grad_norm": 10.853814125061035, + "learning_rate": 3.4719780219780226e-05, + "loss": 0.3765, + "step": 11124 + }, + { + "epoch": 30.563186813186814, + "grad_norm": 16.492822647094727, + "learning_rate": 3.4718406593406596e-05, + "loss": 0.5718, + "step": 11125 + }, + { + "epoch": 30.565934065934066, + "grad_norm": 12.103324890136719, + "learning_rate": 3.471703296703297e-05, + "loss": 0.3393, + "step": 11126 + }, + { + "epoch": 30.568681318681318, + "grad_norm": 11.119363784790039, + "learning_rate": 3.471565934065934e-05, + "loss": 0.2068, + "step": 11127 + }, + { + "epoch": 30.571428571428573, + "grad_norm": 23.18115997314453, + "learning_rate": 3.471428571428571e-05, + "loss": 0.7659, + "step": 11128 + }, + { + "epoch": 30.574175824175825, + "grad_norm": 11.832428932189941, + "learning_rate": 3.471291208791209e-05, + "loss": 0.4182, + "step": 11129 + }, + { + "epoch": 30.576923076923077, + "grad_norm": 12.255874633789062, + "learning_rate": 3.471153846153846e-05, + "loss": 0.2021, + "step": 11130 + }, + { + "epoch": 30.57967032967033, + "grad_norm": 12.972878456115723, + "learning_rate": 3.4710164835164837e-05, + "loss": 0.2335, + "step": 11131 + }, + { + "epoch": 30.582417582417584, + "grad_norm": 17.324630737304688, + "learning_rate": 3.470879120879121e-05, + "loss": 0.4357, + "step": 11132 + }, + { + "epoch": 30.585164835164836, + "grad_norm": 18.285158157348633, + "learning_rate": 3.4707417582417583e-05, + "loss": 0.54, + "step": 11133 + }, + { + "epoch": 30.587912087912088, + "grad_norm": 11.082871437072754, + "learning_rate": 3.470604395604396e-05, + "loss": 0.1706, + "step": 11134 + }, + { + "epoch": 30.59065934065934, + "grad_norm": 18.4466552734375, + "learning_rate": 3.470467032967033e-05, + "loss": 0.4903, + "step": 11135 + }, + { + "epoch": 30.593406593406595, + "grad_norm": 17.241334915161133, + "learning_rate": 3.470329670329671e-05, + "loss": 0.3451, + "step": 11136 + }, + { + "epoch": 30.596153846153847, + "grad_norm": 7.902100563049316, + "learning_rate": 3.470192307692308e-05, + "loss": 0.0909, + "step": 11137 + }, + { + "epoch": 30.5989010989011, + "grad_norm": 6.421151161193848, + "learning_rate": 3.4700549450549454e-05, + "loss": 0.0898, + "step": 11138 + }, + { + "epoch": 30.60164835164835, + "grad_norm": 8.429384231567383, + "learning_rate": 3.469917582417583e-05, + "loss": 0.1715, + "step": 11139 + }, + { + "epoch": 30.604395604395606, + "grad_norm": 17.35369873046875, + "learning_rate": 3.46978021978022e-05, + "loss": 0.2841, + "step": 11140 + }, + { + "epoch": 30.607142857142858, + "grad_norm": 15.475602149963379, + "learning_rate": 3.469642857142858e-05, + "loss": 0.3278, + "step": 11141 + }, + { + "epoch": 30.60989010989011, + "grad_norm": 23.028385162353516, + "learning_rate": 3.469505494505495e-05, + "loss": 1.0277, + "step": 11142 + }, + { + "epoch": 30.61263736263736, + "grad_norm": 16.095739364624023, + "learning_rate": 3.469368131868132e-05, + "loss": 0.5779, + "step": 11143 + }, + { + "epoch": 30.615384615384617, + "grad_norm": 15.594164848327637, + "learning_rate": 3.4692307692307694e-05, + "loss": 0.6123, + "step": 11144 + }, + { + "epoch": 30.61813186813187, + "grad_norm": 6.396407127380371, + "learning_rate": 3.4690934065934064e-05, + "loss": 0.1306, + "step": 11145 + }, + { + "epoch": 30.62087912087912, + "grad_norm": 10.56943416595459, + "learning_rate": 3.468956043956044e-05, + "loss": 0.2699, + "step": 11146 + }, + { + "epoch": 30.623626373626372, + "grad_norm": 16.595266342163086, + "learning_rate": 3.468818681318681e-05, + "loss": 0.3273, + "step": 11147 + }, + { + "epoch": 30.626373626373628, + "grad_norm": 13.473780632019043, + "learning_rate": 3.468681318681319e-05, + "loss": 0.492, + "step": 11148 + }, + { + "epoch": 30.62912087912088, + "grad_norm": 11.491007804870605, + "learning_rate": 3.4685439560439565e-05, + "loss": 0.2511, + "step": 11149 + }, + { + "epoch": 30.63186813186813, + "grad_norm": 5.8520331382751465, + "learning_rate": 3.4684065934065935e-05, + "loss": 0.1136, + "step": 11150 + }, + { + "epoch": 30.634615384615383, + "grad_norm": 17.082996368408203, + "learning_rate": 3.468269230769231e-05, + "loss": 0.6014, + "step": 11151 + }, + { + "epoch": 30.63736263736264, + "grad_norm": 7.758815288543701, + "learning_rate": 3.468131868131868e-05, + "loss": 0.175, + "step": 11152 + }, + { + "epoch": 30.64010989010989, + "grad_norm": 13.589462280273438, + "learning_rate": 3.467994505494506e-05, + "loss": 0.3829, + "step": 11153 + }, + { + "epoch": 30.642857142857142, + "grad_norm": 13.34172248840332, + "learning_rate": 3.4678571428571435e-05, + "loss": 0.5413, + "step": 11154 + }, + { + "epoch": 30.645604395604394, + "grad_norm": 11.763595581054688, + "learning_rate": 3.4677197802197805e-05, + "loss": 0.1976, + "step": 11155 + }, + { + "epoch": 30.64835164835165, + "grad_norm": 10.964696884155273, + "learning_rate": 3.467582417582418e-05, + "loss": 0.1879, + "step": 11156 + }, + { + "epoch": 30.6510989010989, + "grad_norm": 12.919031143188477, + "learning_rate": 3.467445054945055e-05, + "loss": 0.3443, + "step": 11157 + }, + { + "epoch": 30.653846153846153, + "grad_norm": 10.178139686584473, + "learning_rate": 3.467307692307692e-05, + "loss": 0.1773, + "step": 11158 + }, + { + "epoch": 30.656593406593405, + "grad_norm": 11.366050720214844, + "learning_rate": 3.46717032967033e-05, + "loss": 0.2772, + "step": 11159 + }, + { + "epoch": 30.65934065934066, + "grad_norm": 19.494840621948242, + "learning_rate": 3.467032967032967e-05, + "loss": 0.4789, + "step": 11160 + }, + { + "epoch": 30.662087912087912, + "grad_norm": 16.544208526611328, + "learning_rate": 3.4668956043956046e-05, + "loss": 0.4677, + "step": 11161 + }, + { + "epoch": 30.664835164835164, + "grad_norm": 16.122791290283203, + "learning_rate": 3.4667582417582416e-05, + "loss": 0.3935, + "step": 11162 + }, + { + "epoch": 30.667582417582416, + "grad_norm": 18.723196029663086, + "learning_rate": 3.466620879120879e-05, + "loss": 0.7342, + "step": 11163 + }, + { + "epoch": 30.67032967032967, + "grad_norm": 17.378578186035156, + "learning_rate": 3.466483516483517e-05, + "loss": 0.7665, + "step": 11164 + }, + { + "epoch": 30.673076923076923, + "grad_norm": 13.117871284484863, + "learning_rate": 3.466346153846154e-05, + "loss": 0.6856, + "step": 11165 + }, + { + "epoch": 30.675824175824175, + "grad_norm": 7.125353813171387, + "learning_rate": 3.4662087912087916e-05, + "loss": 0.1401, + "step": 11166 + }, + { + "epoch": 30.678571428571427, + "grad_norm": 5.635830402374268, + "learning_rate": 3.4660714285714286e-05, + "loss": 0.0895, + "step": 11167 + }, + { + "epoch": 30.681318681318682, + "grad_norm": 17.1468505859375, + "learning_rate": 3.465934065934066e-05, + "loss": 0.3962, + "step": 11168 + }, + { + "epoch": 30.684065934065934, + "grad_norm": 18.354751586914062, + "learning_rate": 3.465796703296704e-05, + "loss": 0.2869, + "step": 11169 + }, + { + "epoch": 30.686813186813186, + "grad_norm": 6.991373538970947, + "learning_rate": 3.465659340659341e-05, + "loss": 0.1698, + "step": 11170 + }, + { + "epoch": 30.689560439560438, + "grad_norm": 13.6869535446167, + "learning_rate": 3.4655219780219786e-05, + "loss": 0.3831, + "step": 11171 + }, + { + "epoch": 30.692307692307693, + "grad_norm": 10.878397941589355, + "learning_rate": 3.4653846153846156e-05, + "loss": 0.3243, + "step": 11172 + }, + { + "epoch": 30.695054945054945, + "grad_norm": 14.037612915039062, + "learning_rate": 3.4652472527472526e-05, + "loss": 0.5859, + "step": 11173 + }, + { + "epoch": 30.697802197802197, + "grad_norm": 10.115572929382324, + "learning_rate": 3.46510989010989e-05, + "loss": 0.2951, + "step": 11174 + }, + { + "epoch": 30.70054945054945, + "grad_norm": 16.47182273864746, + "learning_rate": 3.464972527472527e-05, + "loss": 0.5033, + "step": 11175 + }, + { + "epoch": 30.703296703296704, + "grad_norm": 20.11480140686035, + "learning_rate": 3.464835164835165e-05, + "loss": 0.3925, + "step": 11176 + }, + { + "epoch": 30.706043956043956, + "grad_norm": 13.243176460266113, + "learning_rate": 3.464697802197802e-05, + "loss": 0.2118, + "step": 11177 + }, + { + "epoch": 30.708791208791208, + "grad_norm": 8.881611824035645, + "learning_rate": 3.46456043956044e-05, + "loss": 0.2782, + "step": 11178 + }, + { + "epoch": 30.71153846153846, + "grad_norm": 13.093754768371582, + "learning_rate": 3.4644230769230774e-05, + "loss": 0.2464, + "step": 11179 + }, + { + "epoch": 30.714285714285715, + "grad_norm": 15.809271812438965, + "learning_rate": 3.4642857142857144e-05, + "loss": 0.3245, + "step": 11180 + }, + { + "epoch": 30.717032967032967, + "grad_norm": 12.697880744934082, + "learning_rate": 3.464148351648352e-05, + "loss": 0.2322, + "step": 11181 + }, + { + "epoch": 30.71978021978022, + "grad_norm": 14.464016914367676, + "learning_rate": 3.464010989010989e-05, + "loss": 0.3246, + "step": 11182 + }, + { + "epoch": 30.72252747252747, + "grad_norm": 12.41308879852295, + "learning_rate": 3.463873626373627e-05, + "loss": 0.2786, + "step": 11183 + }, + { + "epoch": 30.725274725274726, + "grad_norm": 18.404769897460938, + "learning_rate": 3.4637362637362644e-05, + "loss": 0.6267, + "step": 11184 + }, + { + "epoch": 30.728021978021978, + "grad_norm": 12.678821563720703, + "learning_rate": 3.4635989010989014e-05, + "loss": 0.3132, + "step": 11185 + }, + { + "epoch": 30.73076923076923, + "grad_norm": 24.27678871154785, + "learning_rate": 3.463461538461539e-05, + "loss": 0.7812, + "step": 11186 + }, + { + "epoch": 30.733516483516482, + "grad_norm": 9.320977210998535, + "learning_rate": 3.463324175824176e-05, + "loss": 0.1218, + "step": 11187 + }, + { + "epoch": 30.736263736263737, + "grad_norm": 20.054235458374023, + "learning_rate": 3.463186813186813e-05, + "loss": 0.5399, + "step": 11188 + }, + { + "epoch": 30.73901098901099, + "grad_norm": 12.599203109741211, + "learning_rate": 3.463049450549451e-05, + "loss": 0.6823, + "step": 11189 + }, + { + "epoch": 30.74175824175824, + "grad_norm": 13.059915542602539, + "learning_rate": 3.462912087912088e-05, + "loss": 0.2411, + "step": 11190 + }, + { + "epoch": 30.744505494505496, + "grad_norm": 4.174944877624512, + "learning_rate": 3.4627747252747255e-05, + "loss": 0.0634, + "step": 11191 + }, + { + "epoch": 30.747252747252748, + "grad_norm": 8.187372207641602, + "learning_rate": 3.4626373626373625e-05, + "loss": 0.1981, + "step": 11192 + }, + { + "epoch": 30.75, + "grad_norm": 10.939840316772461, + "learning_rate": 3.4625e-05, + "loss": 0.1901, + "step": 11193 + }, + { + "epoch": 30.752747252747252, + "grad_norm": 9.272440910339355, + "learning_rate": 3.462362637362638e-05, + "loss": 0.2155, + "step": 11194 + }, + { + "epoch": 30.755494505494504, + "grad_norm": 10.883528709411621, + "learning_rate": 3.462225274725275e-05, + "loss": 0.1835, + "step": 11195 + }, + { + "epoch": 30.75824175824176, + "grad_norm": 18.19970703125, + "learning_rate": 3.4620879120879125e-05, + "loss": 0.6362, + "step": 11196 + }, + { + "epoch": 30.76098901098901, + "grad_norm": 17.264076232910156, + "learning_rate": 3.4619505494505495e-05, + "loss": 0.4408, + "step": 11197 + }, + { + "epoch": 30.763736263736263, + "grad_norm": 11.27853012084961, + "learning_rate": 3.461813186813187e-05, + "loss": 0.3635, + "step": 11198 + }, + { + "epoch": 30.766483516483518, + "grad_norm": 11.522461891174316, + "learning_rate": 3.461675824175825e-05, + "loss": 0.467, + "step": 11199 + }, + { + "epoch": 30.76923076923077, + "grad_norm": 17.398042678833008, + "learning_rate": 3.461538461538462e-05, + "loss": 0.3967, + "step": 11200 + }, + { + "epoch": 30.771978021978022, + "grad_norm": 10.255925178527832, + "learning_rate": 3.4614010989010995e-05, + "loss": 0.1669, + "step": 11201 + }, + { + "epoch": 30.774725274725274, + "grad_norm": 17.05012321472168, + "learning_rate": 3.4612637362637365e-05, + "loss": 0.5444, + "step": 11202 + }, + { + "epoch": 30.77747252747253, + "grad_norm": 9.339542388916016, + "learning_rate": 3.4611263736263735e-05, + "loss": 0.226, + "step": 11203 + }, + { + "epoch": 30.78021978021978, + "grad_norm": 19.530885696411133, + "learning_rate": 3.460989010989011e-05, + "loss": 0.9341, + "step": 11204 + }, + { + "epoch": 30.782967032967033, + "grad_norm": 12.815225601196289, + "learning_rate": 3.460851648351648e-05, + "loss": 0.3341, + "step": 11205 + }, + { + "epoch": 30.785714285714285, + "grad_norm": 20.25084114074707, + "learning_rate": 3.460714285714286e-05, + "loss": 0.4429, + "step": 11206 + }, + { + "epoch": 30.78846153846154, + "grad_norm": 14.915027618408203, + "learning_rate": 3.460576923076923e-05, + "loss": 0.5982, + "step": 11207 + }, + { + "epoch": 30.791208791208792, + "grad_norm": 17.397825241088867, + "learning_rate": 3.4604395604395606e-05, + "loss": 0.7325, + "step": 11208 + }, + { + "epoch": 30.793956043956044, + "grad_norm": 7.8422722816467285, + "learning_rate": 3.460302197802198e-05, + "loss": 0.2465, + "step": 11209 + }, + { + "epoch": 30.796703296703296, + "grad_norm": 11.521726608276367, + "learning_rate": 3.460164835164835e-05, + "loss": 0.331, + "step": 11210 + }, + { + "epoch": 30.79945054945055, + "grad_norm": 16.218400955200195, + "learning_rate": 3.460027472527473e-05, + "loss": 0.5356, + "step": 11211 + }, + { + "epoch": 30.802197802197803, + "grad_norm": 11.335447311401367, + "learning_rate": 3.45989010989011e-05, + "loss": 0.3154, + "step": 11212 + }, + { + "epoch": 30.804945054945055, + "grad_norm": 17.78098487854004, + "learning_rate": 3.4597527472527476e-05, + "loss": 0.8251, + "step": 11213 + }, + { + "epoch": 30.807692307692307, + "grad_norm": 8.028852462768555, + "learning_rate": 3.459615384615385e-05, + "loss": 0.1229, + "step": 11214 + }, + { + "epoch": 30.810439560439562, + "grad_norm": 7.844306468963623, + "learning_rate": 3.459478021978022e-05, + "loss": 0.1663, + "step": 11215 + }, + { + "epoch": 30.813186813186814, + "grad_norm": 7.110301494598389, + "learning_rate": 3.45934065934066e-05, + "loss": 0.2551, + "step": 11216 + }, + { + "epoch": 30.815934065934066, + "grad_norm": 13.739761352539062, + "learning_rate": 3.459203296703297e-05, + "loss": 0.3284, + "step": 11217 + }, + { + "epoch": 30.818681318681318, + "grad_norm": 9.709244728088379, + "learning_rate": 3.459065934065934e-05, + "loss": 0.3157, + "step": 11218 + }, + { + "epoch": 30.821428571428573, + "grad_norm": 14.929781913757324, + "learning_rate": 3.458928571428572e-05, + "loss": 0.28, + "step": 11219 + }, + { + "epoch": 30.824175824175825, + "grad_norm": 11.161330223083496, + "learning_rate": 3.458791208791209e-05, + "loss": 0.4981, + "step": 11220 + }, + { + "epoch": 30.826923076923077, + "grad_norm": 8.767484664916992, + "learning_rate": 3.4586538461538464e-05, + "loss": 0.1889, + "step": 11221 + }, + { + "epoch": 30.82967032967033, + "grad_norm": 14.519465446472168, + "learning_rate": 3.4585164835164834e-05, + "loss": 0.3783, + "step": 11222 + }, + { + "epoch": 30.832417582417584, + "grad_norm": 7.42315149307251, + "learning_rate": 3.458379120879121e-05, + "loss": 0.2025, + "step": 11223 + }, + { + "epoch": 30.835164835164836, + "grad_norm": 8.683991432189941, + "learning_rate": 3.458241758241759e-05, + "loss": 0.2408, + "step": 11224 + }, + { + "epoch": 30.837912087912088, + "grad_norm": 12.826242446899414, + "learning_rate": 3.458104395604396e-05, + "loss": 0.3166, + "step": 11225 + }, + { + "epoch": 30.84065934065934, + "grad_norm": 12.246847152709961, + "learning_rate": 3.4579670329670334e-05, + "loss": 0.3672, + "step": 11226 + }, + { + "epoch": 30.843406593406595, + "grad_norm": 18.34653663635254, + "learning_rate": 3.4578296703296704e-05, + "loss": 0.5547, + "step": 11227 + }, + { + "epoch": 30.846153846153847, + "grad_norm": 9.71881103515625, + "learning_rate": 3.457692307692308e-05, + "loss": 0.2377, + "step": 11228 + }, + { + "epoch": 30.8489010989011, + "grad_norm": 4.956374168395996, + "learning_rate": 3.457554945054946e-05, + "loss": 0.1092, + "step": 11229 + }, + { + "epoch": 30.85164835164835, + "grad_norm": 10.878987312316895, + "learning_rate": 3.457417582417583e-05, + "loss": 0.3737, + "step": 11230 + }, + { + "epoch": 30.854395604395606, + "grad_norm": 9.790780067443848, + "learning_rate": 3.45728021978022e-05, + "loss": 0.3929, + "step": 11231 + }, + { + "epoch": 30.857142857142858, + "grad_norm": 13.776988983154297, + "learning_rate": 3.4571428571428574e-05, + "loss": 0.4642, + "step": 11232 + }, + { + "epoch": 30.85989010989011, + "grad_norm": 13.41267204284668, + "learning_rate": 3.4570054945054944e-05, + "loss": 0.4491, + "step": 11233 + }, + { + "epoch": 30.86263736263736, + "grad_norm": 8.586769104003906, + "learning_rate": 3.456868131868132e-05, + "loss": 0.2334, + "step": 11234 + }, + { + "epoch": 30.865384615384617, + "grad_norm": 6.834378719329834, + "learning_rate": 3.456730769230769e-05, + "loss": 0.0828, + "step": 11235 + }, + { + "epoch": 30.86813186813187, + "grad_norm": 13.878449440002441, + "learning_rate": 3.456593406593407e-05, + "loss": 0.5951, + "step": 11236 + }, + { + "epoch": 30.87087912087912, + "grad_norm": 9.420184135437012, + "learning_rate": 3.456456043956044e-05, + "loss": 0.3715, + "step": 11237 + }, + { + "epoch": 30.873626373626372, + "grad_norm": 14.642662048339844, + "learning_rate": 3.4563186813186815e-05, + "loss": 0.4598, + "step": 11238 + }, + { + "epoch": 30.876373626373628, + "grad_norm": 7.988326072692871, + "learning_rate": 3.456181318681319e-05, + "loss": 0.1468, + "step": 11239 + }, + { + "epoch": 30.87912087912088, + "grad_norm": 13.82530689239502, + "learning_rate": 3.456043956043956e-05, + "loss": 0.5532, + "step": 11240 + }, + { + "epoch": 30.88186813186813, + "grad_norm": 11.618350982666016, + "learning_rate": 3.455906593406594e-05, + "loss": 0.3717, + "step": 11241 + }, + { + "epoch": 30.884615384615383, + "grad_norm": 19.597126007080078, + "learning_rate": 3.455769230769231e-05, + "loss": 0.4283, + "step": 11242 + }, + { + "epoch": 30.88736263736264, + "grad_norm": 14.447650909423828, + "learning_rate": 3.4556318681318685e-05, + "loss": 0.6285, + "step": 11243 + }, + { + "epoch": 30.89010989010989, + "grad_norm": 6.1293559074401855, + "learning_rate": 3.455494505494506e-05, + "loss": 0.1714, + "step": 11244 + }, + { + "epoch": 30.892857142857142, + "grad_norm": 10.589522361755371, + "learning_rate": 3.455357142857143e-05, + "loss": 0.3098, + "step": 11245 + }, + { + "epoch": 30.895604395604394, + "grad_norm": 13.050308227539062, + "learning_rate": 3.45521978021978e-05, + "loss": 0.3651, + "step": 11246 + }, + { + "epoch": 30.89835164835165, + "grad_norm": 8.770873069763184, + "learning_rate": 3.455082417582418e-05, + "loss": 0.2496, + "step": 11247 + }, + { + "epoch": 30.9010989010989, + "grad_norm": 6.454803466796875, + "learning_rate": 3.454945054945055e-05, + "loss": 0.1301, + "step": 11248 + }, + { + "epoch": 30.903846153846153, + "grad_norm": 9.206098556518555, + "learning_rate": 3.4548076923076926e-05, + "loss": 0.174, + "step": 11249 + }, + { + "epoch": 30.906593406593405, + "grad_norm": 5.048745632171631, + "learning_rate": 3.4546703296703296e-05, + "loss": 0.1124, + "step": 11250 + }, + { + "epoch": 30.90934065934066, + "grad_norm": 6.502087116241455, + "learning_rate": 3.454532967032967e-05, + "loss": 0.1307, + "step": 11251 + }, + { + "epoch": 30.912087912087912, + "grad_norm": 11.268438339233398, + "learning_rate": 3.454395604395604e-05, + "loss": 0.3512, + "step": 11252 + }, + { + "epoch": 30.914835164835164, + "grad_norm": 2.388782024383545, + "learning_rate": 3.454258241758242e-05, + "loss": 0.039, + "step": 11253 + }, + { + "epoch": 30.917582417582416, + "grad_norm": 20.00078010559082, + "learning_rate": 3.4541208791208796e-05, + "loss": 0.9982, + "step": 11254 + }, + { + "epoch": 30.92032967032967, + "grad_norm": 11.946761131286621, + "learning_rate": 3.4539835164835166e-05, + "loss": 0.473, + "step": 11255 + }, + { + "epoch": 30.923076923076923, + "grad_norm": 11.17398738861084, + "learning_rate": 3.453846153846154e-05, + "loss": 0.3036, + "step": 11256 + }, + { + "epoch": 30.925824175824175, + "grad_norm": 18.087940216064453, + "learning_rate": 3.453708791208791e-05, + "loss": 0.4275, + "step": 11257 + }, + { + "epoch": 30.928571428571427, + "grad_norm": 16.60083770751953, + "learning_rate": 3.453571428571429e-05, + "loss": 0.4985, + "step": 11258 + }, + { + "epoch": 30.931318681318682, + "grad_norm": 23.22402000427246, + "learning_rate": 3.4534340659340667e-05, + "loss": 0.7088, + "step": 11259 + }, + { + "epoch": 30.934065934065934, + "grad_norm": 11.650160789489746, + "learning_rate": 3.4532967032967037e-05, + "loss": 0.1782, + "step": 11260 + }, + { + "epoch": 30.936813186813186, + "grad_norm": 13.937990188598633, + "learning_rate": 3.4531593406593407e-05, + "loss": 0.4167, + "step": 11261 + }, + { + "epoch": 30.939560439560438, + "grad_norm": 18.391904830932617, + "learning_rate": 3.453021978021978e-05, + "loss": 0.5871, + "step": 11262 + }, + { + "epoch": 30.942307692307693, + "grad_norm": 4.988274097442627, + "learning_rate": 3.4528846153846153e-05, + "loss": 0.1099, + "step": 11263 + }, + { + "epoch": 30.945054945054945, + "grad_norm": 9.26976490020752, + "learning_rate": 3.4527472527472523e-05, + "loss": 0.1186, + "step": 11264 + }, + { + "epoch": 30.947802197802197, + "grad_norm": 11.275146484375, + "learning_rate": 3.45260989010989e-05, + "loss": 0.3428, + "step": 11265 + }, + { + "epoch": 30.95054945054945, + "grad_norm": 8.064982414245605, + "learning_rate": 3.452472527472528e-05, + "loss": 0.1296, + "step": 11266 + }, + { + "epoch": 30.953296703296704, + "grad_norm": 9.933938980102539, + "learning_rate": 3.452335164835165e-05, + "loss": 0.2947, + "step": 11267 + }, + { + "epoch": 30.956043956043956, + "grad_norm": 8.50320053100586, + "learning_rate": 3.4521978021978024e-05, + "loss": 0.1888, + "step": 11268 + }, + { + "epoch": 30.958791208791208, + "grad_norm": 5.177613258361816, + "learning_rate": 3.4520604395604394e-05, + "loss": 0.0819, + "step": 11269 + }, + { + "epoch": 30.96153846153846, + "grad_norm": 10.451138496398926, + "learning_rate": 3.451923076923077e-05, + "loss": 0.2019, + "step": 11270 + }, + { + "epoch": 30.964285714285715, + "grad_norm": 8.609790802001953, + "learning_rate": 3.451785714285715e-05, + "loss": 0.1871, + "step": 11271 + }, + { + "epoch": 30.967032967032967, + "grad_norm": 18.874910354614258, + "learning_rate": 3.451648351648352e-05, + "loss": 0.6939, + "step": 11272 + }, + { + "epoch": 30.96978021978022, + "grad_norm": 9.484192848205566, + "learning_rate": 3.4515109890109894e-05, + "loss": 0.1252, + "step": 11273 + }, + { + "epoch": 30.97252747252747, + "grad_norm": 6.373208045959473, + "learning_rate": 3.4513736263736264e-05, + "loss": 0.1501, + "step": 11274 + }, + { + "epoch": 30.975274725274726, + "grad_norm": 12.177854537963867, + "learning_rate": 3.451236263736264e-05, + "loss": 0.3562, + "step": 11275 + }, + { + "epoch": 30.978021978021978, + "grad_norm": 19.387041091918945, + "learning_rate": 3.451098901098901e-05, + "loss": 0.7761, + "step": 11276 + }, + { + "epoch": 30.98076923076923, + "grad_norm": 5.427517414093018, + "learning_rate": 3.450961538461539e-05, + "loss": 0.167, + "step": 11277 + }, + { + "epoch": 30.983516483516482, + "grad_norm": 10.584782600402832, + "learning_rate": 3.450824175824176e-05, + "loss": 0.1421, + "step": 11278 + }, + { + "epoch": 30.986263736263737, + "grad_norm": 23.129669189453125, + "learning_rate": 3.450686813186813e-05, + "loss": 0.911, + "step": 11279 + }, + { + "epoch": 30.98901098901099, + "grad_norm": 11.23182487487793, + "learning_rate": 3.4505494505494505e-05, + "loss": 0.2425, + "step": 11280 + }, + { + "epoch": 30.99175824175824, + "grad_norm": 12.470874786376953, + "learning_rate": 3.450412087912088e-05, + "loss": 0.3123, + "step": 11281 + }, + { + "epoch": 30.994505494505496, + "grad_norm": 17.680246353149414, + "learning_rate": 3.450274725274725e-05, + "loss": 0.8594, + "step": 11282 + }, + { + "epoch": 30.997252747252748, + "grad_norm": 7.34419584274292, + "learning_rate": 3.450137362637363e-05, + "loss": 0.1666, + "step": 11283 + }, + { + "epoch": 31.0, + "grad_norm": 42.06395721435547, + "learning_rate": 3.45e-05, + "loss": 0.6757, + "step": 11284 + }, + { + "epoch": 31.0, + "eval_accuracy": 0.7203856749311295, + "eval_f1": 0.7020193311793899, + "eval_f1_DuraRiadoRio_64x64": 0.6964856230031949, + "eval_f1_Mole_64x64": 0.45989304812834225, + "eval_f1_Quebrado_64x64": 0.8788927335640139, + "eval_f1_RiadoRio_64x64": 0.6369047619047619, + "eval_f1_RioFechado_64x64": 0.8379204892966361, + "eval_loss": 1.1711584329605103, + "eval_precision": 0.7685789525675352, + "eval_precision_DuraRiadoRio_64x64": 0.6449704142011834, + "eval_precision_Mole_64x64": 1.0, + "eval_precision_Quebrado_64x64": 0.8758620689655172, + "eval_precision_RiadoRio_64x64": 0.5815217391304348, + "eval_precision_RioFechado_64x64": 0.7405405405405405, + "eval_recall": 0.7212472201630838, + "eval_recall_DuraRiadoRio_64x64": 0.7569444444444444, + "eval_recall_Mole_64x64": 0.2986111111111111, + "eval_recall_Quebrado_64x64": 0.8819444444444444, + "eval_recall_RiadoRio_64x64": 0.7039473684210527, + "eval_recall_RioFechado_64x64": 0.9647887323943662, + "eval_runtime": 1.724, + "eval_samples_per_second": 421.12, + "eval_steps_per_second": 26.683, + "step": 11284 + }, + { + "epoch": 31.002747252747252, + "grad_norm": 9.639942169189453, + "learning_rate": 3.4498626373626375e-05, + "loss": 0.2059, + "step": 11285 + }, + { + "epoch": 31.005494505494507, + "grad_norm": 7.3749494552612305, + "learning_rate": 3.449725274725275e-05, + "loss": 0.1052, + "step": 11286 + }, + { + "epoch": 31.00824175824176, + "grad_norm": 12.668073654174805, + "learning_rate": 3.449587912087912e-05, + "loss": 0.5694, + "step": 11287 + }, + { + "epoch": 31.01098901098901, + "grad_norm": 19.944746017456055, + "learning_rate": 3.44945054945055e-05, + "loss": 0.6661, + "step": 11288 + }, + { + "epoch": 31.013736263736263, + "grad_norm": 22.950685501098633, + "learning_rate": 3.449313186813187e-05, + "loss": 0.633, + "step": 11289 + }, + { + "epoch": 31.016483516483518, + "grad_norm": 17.132890701293945, + "learning_rate": 3.4491758241758246e-05, + "loss": 0.6401, + "step": 11290 + }, + { + "epoch": 31.01923076923077, + "grad_norm": 4.912703037261963, + "learning_rate": 3.4490384615384616e-05, + "loss": 0.0629, + "step": 11291 + }, + { + "epoch": 31.021978021978022, + "grad_norm": 11.492205619812012, + "learning_rate": 3.448901098901099e-05, + "loss": 0.4146, + "step": 11292 + }, + { + "epoch": 31.024725274725274, + "grad_norm": 11.371360778808594, + "learning_rate": 3.448763736263736e-05, + "loss": 0.2427, + "step": 11293 + }, + { + "epoch": 31.02747252747253, + "grad_norm": 12.91529369354248, + "learning_rate": 3.448626373626373e-05, + "loss": 0.2226, + "step": 11294 + }, + { + "epoch": 31.03021978021978, + "grad_norm": 9.385753631591797, + "learning_rate": 3.448489010989011e-05, + "loss": 0.3219, + "step": 11295 + }, + { + "epoch": 31.032967032967033, + "grad_norm": 17.625944137573242, + "learning_rate": 3.4483516483516486e-05, + "loss": 0.7133, + "step": 11296 + }, + { + "epoch": 31.035714285714285, + "grad_norm": 13.87799072265625, + "learning_rate": 3.4482142857142856e-05, + "loss": 0.2221, + "step": 11297 + }, + { + "epoch": 31.03846153846154, + "grad_norm": 18.009374618530273, + "learning_rate": 3.448076923076923e-05, + "loss": 0.4899, + "step": 11298 + }, + { + "epoch": 31.041208791208792, + "grad_norm": 8.872480392456055, + "learning_rate": 3.44793956043956e-05, + "loss": 0.2045, + "step": 11299 + }, + { + "epoch": 31.043956043956044, + "grad_norm": 13.007619857788086, + "learning_rate": 3.447802197802198e-05, + "loss": 0.4762, + "step": 11300 + }, + { + "epoch": 31.046703296703296, + "grad_norm": 6.1420416831970215, + "learning_rate": 3.4476648351648356e-05, + "loss": 0.1478, + "step": 11301 + }, + { + "epoch": 31.04945054945055, + "grad_norm": 8.310705184936523, + "learning_rate": 3.4475274725274726e-05, + "loss": 0.2329, + "step": 11302 + }, + { + "epoch": 31.052197802197803, + "grad_norm": 18.198583602905273, + "learning_rate": 3.44739010989011e-05, + "loss": 0.5405, + "step": 11303 + }, + { + "epoch": 31.054945054945055, + "grad_norm": 15.440549850463867, + "learning_rate": 3.447252747252747e-05, + "loss": 0.2633, + "step": 11304 + }, + { + "epoch": 31.057692307692307, + "grad_norm": 10.093064308166504, + "learning_rate": 3.447115384615385e-05, + "loss": 0.1702, + "step": 11305 + }, + { + "epoch": 31.060439560439562, + "grad_norm": 12.639836311340332, + "learning_rate": 3.446978021978022e-05, + "loss": 0.3576, + "step": 11306 + }, + { + "epoch": 31.063186813186814, + "grad_norm": 14.087471961975098, + "learning_rate": 3.44684065934066e-05, + "loss": 0.4112, + "step": 11307 + }, + { + "epoch": 31.065934065934066, + "grad_norm": 6.849520683288574, + "learning_rate": 3.446703296703297e-05, + "loss": 0.1654, + "step": 11308 + }, + { + "epoch": 31.068681318681318, + "grad_norm": 19.598560333251953, + "learning_rate": 3.446565934065934e-05, + "loss": 0.5961, + "step": 11309 + }, + { + "epoch": 31.071428571428573, + "grad_norm": 21.311813354492188, + "learning_rate": 3.4464285714285714e-05, + "loss": 0.6168, + "step": 11310 + }, + { + "epoch": 31.074175824175825, + "grad_norm": 11.287140846252441, + "learning_rate": 3.446291208791209e-05, + "loss": 0.404, + "step": 11311 + }, + { + "epoch": 31.076923076923077, + "grad_norm": 12.07386589050293, + "learning_rate": 3.446153846153846e-05, + "loss": 0.3414, + "step": 11312 + }, + { + "epoch": 31.07967032967033, + "grad_norm": 7.481587886810303, + "learning_rate": 3.446016483516484e-05, + "loss": 0.1072, + "step": 11313 + }, + { + "epoch": 31.082417582417584, + "grad_norm": 3.3076555728912354, + "learning_rate": 3.445879120879121e-05, + "loss": 0.0647, + "step": 11314 + }, + { + "epoch": 31.085164835164836, + "grad_norm": 13.308913230895996, + "learning_rate": 3.4457417582417584e-05, + "loss": 0.3456, + "step": 11315 + }, + { + "epoch": 31.087912087912088, + "grad_norm": 18.899755477905273, + "learning_rate": 3.445604395604396e-05, + "loss": 0.6918, + "step": 11316 + }, + { + "epoch": 31.09065934065934, + "grad_norm": 11.23511791229248, + "learning_rate": 3.445467032967033e-05, + "loss": 0.3335, + "step": 11317 + }, + { + "epoch": 31.093406593406595, + "grad_norm": 8.162856101989746, + "learning_rate": 3.445329670329671e-05, + "loss": 0.2393, + "step": 11318 + }, + { + "epoch": 31.096153846153847, + "grad_norm": 9.65374755859375, + "learning_rate": 3.445192307692308e-05, + "loss": 0.2202, + "step": 11319 + }, + { + "epoch": 31.0989010989011, + "grad_norm": 10.187662124633789, + "learning_rate": 3.4450549450549455e-05, + "loss": 0.1689, + "step": 11320 + }, + { + "epoch": 31.10164835164835, + "grad_norm": 14.660717964172363, + "learning_rate": 3.4449175824175825e-05, + "loss": 0.3689, + "step": 11321 + }, + { + "epoch": 31.104395604395606, + "grad_norm": 12.237083435058594, + "learning_rate": 3.44478021978022e-05, + "loss": 0.226, + "step": 11322 + }, + { + "epoch": 31.107142857142858, + "grad_norm": 12.981023788452148, + "learning_rate": 3.444642857142857e-05, + "loss": 0.2285, + "step": 11323 + }, + { + "epoch": 31.10989010989011, + "grad_norm": 10.803659439086914, + "learning_rate": 3.444505494505494e-05, + "loss": 0.2887, + "step": 11324 + }, + { + "epoch": 31.11263736263736, + "grad_norm": 6.254340648651123, + "learning_rate": 3.444368131868132e-05, + "loss": 0.1483, + "step": 11325 + }, + { + "epoch": 31.115384615384617, + "grad_norm": 12.431537628173828, + "learning_rate": 3.4442307692307695e-05, + "loss": 0.3659, + "step": 11326 + }, + { + "epoch": 31.11813186813187, + "grad_norm": 5.369506359100342, + "learning_rate": 3.4440934065934065e-05, + "loss": 0.1184, + "step": 11327 + }, + { + "epoch": 31.12087912087912, + "grad_norm": 10.170557022094727, + "learning_rate": 3.443956043956044e-05, + "loss": 0.2342, + "step": 11328 + }, + { + "epoch": 31.123626373626372, + "grad_norm": 8.720440864562988, + "learning_rate": 3.443818681318681e-05, + "loss": 0.1099, + "step": 11329 + }, + { + "epoch": 31.126373626373628, + "grad_norm": 14.17673397064209, + "learning_rate": 3.443681318681319e-05, + "loss": 0.2738, + "step": 11330 + }, + { + "epoch": 31.12912087912088, + "grad_norm": 16.64532470703125, + "learning_rate": 3.4435439560439565e-05, + "loss": 0.2897, + "step": 11331 + }, + { + "epoch": 31.13186813186813, + "grad_norm": 12.30539321899414, + "learning_rate": 3.4434065934065935e-05, + "loss": 0.2404, + "step": 11332 + }, + { + "epoch": 31.134615384615383, + "grad_norm": 9.61271858215332, + "learning_rate": 3.443269230769231e-05, + "loss": 0.2759, + "step": 11333 + }, + { + "epoch": 31.13736263736264, + "grad_norm": 16.555192947387695, + "learning_rate": 3.443131868131868e-05, + "loss": 0.7018, + "step": 11334 + }, + { + "epoch": 31.14010989010989, + "grad_norm": 13.906408309936523, + "learning_rate": 3.442994505494506e-05, + "loss": 0.5578, + "step": 11335 + }, + { + "epoch": 31.142857142857142, + "grad_norm": 11.893857955932617, + "learning_rate": 3.442857142857143e-05, + "loss": 0.1989, + "step": 11336 + }, + { + "epoch": 31.145604395604394, + "grad_norm": 4.231657028198242, + "learning_rate": 3.4427197802197806e-05, + "loss": 0.0873, + "step": 11337 + }, + { + "epoch": 31.14835164835165, + "grad_norm": 9.087430953979492, + "learning_rate": 3.4425824175824176e-05, + "loss": 0.3547, + "step": 11338 + }, + { + "epoch": 31.1510989010989, + "grad_norm": 15.210783958435059, + "learning_rate": 3.4424450549450546e-05, + "loss": 0.4214, + "step": 11339 + }, + { + "epoch": 31.153846153846153, + "grad_norm": 10.563150405883789, + "learning_rate": 3.442307692307692e-05, + "loss": 0.2759, + "step": 11340 + }, + { + "epoch": 31.156593406593405, + "grad_norm": 14.476051330566406, + "learning_rate": 3.44217032967033e-05, + "loss": 0.3828, + "step": 11341 + }, + { + "epoch": 31.15934065934066, + "grad_norm": 11.470160484313965, + "learning_rate": 3.442032967032967e-05, + "loss": 0.1558, + "step": 11342 + }, + { + "epoch": 31.162087912087912, + "grad_norm": 13.039376258850098, + "learning_rate": 3.4418956043956046e-05, + "loss": 0.3856, + "step": 11343 + }, + { + "epoch": 31.164835164835164, + "grad_norm": 4.122829437255859, + "learning_rate": 3.4417582417582416e-05, + "loss": 0.0785, + "step": 11344 + }, + { + "epoch": 31.167582417582416, + "grad_norm": 13.538900375366211, + "learning_rate": 3.441620879120879e-05, + "loss": 0.3459, + "step": 11345 + }, + { + "epoch": 31.17032967032967, + "grad_norm": 15.446187973022461, + "learning_rate": 3.441483516483517e-05, + "loss": 0.6007, + "step": 11346 + }, + { + "epoch": 31.173076923076923, + "grad_norm": 20.42251968383789, + "learning_rate": 3.441346153846154e-05, + "loss": 0.6905, + "step": 11347 + }, + { + "epoch": 31.175824175824175, + "grad_norm": 12.814397811889648, + "learning_rate": 3.441208791208792e-05, + "loss": 0.3212, + "step": 11348 + }, + { + "epoch": 31.178571428571427, + "grad_norm": 2.189605951309204, + "learning_rate": 3.441071428571429e-05, + "loss": 0.0382, + "step": 11349 + }, + { + "epoch": 31.181318681318682, + "grad_norm": 7.908196926116943, + "learning_rate": 3.4409340659340664e-05, + "loss": 0.1299, + "step": 11350 + }, + { + "epoch": 31.184065934065934, + "grad_norm": 7.963016033172607, + "learning_rate": 3.4407967032967034e-05, + "loss": 0.1729, + "step": 11351 + }, + { + "epoch": 31.186813186813186, + "grad_norm": 14.152840614318848, + "learning_rate": 3.440659340659341e-05, + "loss": 0.2168, + "step": 11352 + }, + { + "epoch": 31.189560439560438, + "grad_norm": 8.203348159790039, + "learning_rate": 3.440521978021978e-05, + "loss": 0.0949, + "step": 11353 + }, + { + "epoch": 31.192307692307693, + "grad_norm": 13.681533813476562, + "learning_rate": 3.440384615384615e-05, + "loss": 0.4932, + "step": 11354 + }, + { + "epoch": 31.195054945054945, + "grad_norm": 12.08614444732666, + "learning_rate": 3.440247252747253e-05, + "loss": 0.321, + "step": 11355 + }, + { + "epoch": 31.197802197802197, + "grad_norm": 13.858453750610352, + "learning_rate": 3.4401098901098904e-05, + "loss": 0.4591, + "step": 11356 + }, + { + "epoch": 31.20054945054945, + "grad_norm": 10.472375869750977, + "learning_rate": 3.4399725274725274e-05, + "loss": 0.2815, + "step": 11357 + }, + { + "epoch": 31.203296703296704, + "grad_norm": 6.244185447692871, + "learning_rate": 3.439835164835165e-05, + "loss": 0.1109, + "step": 11358 + }, + { + "epoch": 31.206043956043956, + "grad_norm": 14.943513870239258, + "learning_rate": 3.439697802197802e-05, + "loss": 0.3143, + "step": 11359 + }, + { + "epoch": 31.208791208791208, + "grad_norm": 11.781387329101562, + "learning_rate": 3.43956043956044e-05, + "loss": 0.3519, + "step": 11360 + }, + { + "epoch": 31.21153846153846, + "grad_norm": 11.524626731872559, + "learning_rate": 3.4394230769230774e-05, + "loss": 0.3182, + "step": 11361 + }, + { + "epoch": 31.214285714285715, + "grad_norm": 15.517912864685059, + "learning_rate": 3.4392857142857144e-05, + "loss": 0.271, + "step": 11362 + }, + { + "epoch": 31.217032967032967, + "grad_norm": 14.935572624206543, + "learning_rate": 3.439148351648352e-05, + "loss": 0.4196, + "step": 11363 + }, + { + "epoch": 31.21978021978022, + "grad_norm": 14.013029098510742, + "learning_rate": 3.439010989010989e-05, + "loss": 0.2543, + "step": 11364 + }, + { + "epoch": 31.22252747252747, + "grad_norm": 10.68713665008545, + "learning_rate": 3.438873626373627e-05, + "loss": 0.5335, + "step": 11365 + }, + { + "epoch": 31.225274725274726, + "grad_norm": 9.405867576599121, + "learning_rate": 3.438736263736264e-05, + "loss": 0.1833, + "step": 11366 + }, + { + "epoch": 31.228021978021978, + "grad_norm": 10.239502906799316, + "learning_rate": 3.4385989010989015e-05, + "loss": 0.3514, + "step": 11367 + }, + { + "epoch": 31.23076923076923, + "grad_norm": 12.300007820129395, + "learning_rate": 3.4384615384615385e-05, + "loss": 0.1945, + "step": 11368 + }, + { + "epoch": 31.233516483516482, + "grad_norm": 14.482823371887207, + "learning_rate": 3.4383241758241755e-05, + "loss": 0.2757, + "step": 11369 + }, + { + "epoch": 31.236263736263737, + "grad_norm": 13.673611640930176, + "learning_rate": 3.438186813186813e-05, + "loss": 0.4356, + "step": 11370 + }, + { + "epoch": 31.23901098901099, + "grad_norm": 7.864358425140381, + "learning_rate": 3.438049450549451e-05, + "loss": 0.1971, + "step": 11371 + }, + { + "epoch": 31.24175824175824, + "grad_norm": 11.450698852539062, + "learning_rate": 3.437912087912088e-05, + "loss": 0.3964, + "step": 11372 + }, + { + "epoch": 31.244505494505493, + "grad_norm": 19.123886108398438, + "learning_rate": 3.4377747252747255e-05, + "loss": 0.489, + "step": 11373 + }, + { + "epoch": 31.247252747252748, + "grad_norm": 11.053190231323242, + "learning_rate": 3.4376373626373625e-05, + "loss": 0.2867, + "step": 11374 + }, + { + "epoch": 31.25, + "grad_norm": 18.282596588134766, + "learning_rate": 3.4375e-05, + "loss": 0.5655, + "step": 11375 + }, + { + "epoch": 31.252747252747252, + "grad_norm": 9.780244827270508, + "learning_rate": 3.437362637362638e-05, + "loss": 0.1031, + "step": 11376 + }, + { + "epoch": 31.255494505494504, + "grad_norm": 11.683072090148926, + "learning_rate": 3.437225274725275e-05, + "loss": 0.5562, + "step": 11377 + }, + { + "epoch": 31.25824175824176, + "grad_norm": 12.180481910705566, + "learning_rate": 3.4370879120879126e-05, + "loss": 0.4422, + "step": 11378 + }, + { + "epoch": 31.26098901098901, + "grad_norm": 9.34951400756836, + "learning_rate": 3.4369505494505496e-05, + "loss": 0.248, + "step": 11379 + }, + { + "epoch": 31.263736263736263, + "grad_norm": 15.902926445007324, + "learning_rate": 3.436813186813187e-05, + "loss": 0.3581, + "step": 11380 + }, + { + "epoch": 31.266483516483518, + "grad_norm": 15.145082473754883, + "learning_rate": 3.436675824175824e-05, + "loss": 0.4739, + "step": 11381 + }, + { + "epoch": 31.26923076923077, + "grad_norm": 20.508068084716797, + "learning_rate": 3.436538461538462e-05, + "loss": 0.7685, + "step": 11382 + }, + { + "epoch": 31.271978021978022, + "grad_norm": 21.47980499267578, + "learning_rate": 3.436401098901099e-05, + "loss": 0.5548, + "step": 11383 + }, + { + "epoch": 31.274725274725274, + "grad_norm": 5.130927562713623, + "learning_rate": 3.436263736263736e-05, + "loss": 0.1206, + "step": 11384 + }, + { + "epoch": 31.27747252747253, + "grad_norm": 7.890373706817627, + "learning_rate": 3.4361263736263736e-05, + "loss": 0.1944, + "step": 11385 + }, + { + "epoch": 31.28021978021978, + "grad_norm": 10.360780715942383, + "learning_rate": 3.435989010989011e-05, + "loss": 0.1683, + "step": 11386 + }, + { + "epoch": 31.282967032967033, + "grad_norm": 19.64596939086914, + "learning_rate": 3.435851648351648e-05, + "loss": 0.7696, + "step": 11387 + }, + { + "epoch": 31.285714285714285, + "grad_norm": 2.6317434310913086, + "learning_rate": 3.435714285714286e-05, + "loss": 0.0574, + "step": 11388 + }, + { + "epoch": 31.28846153846154, + "grad_norm": 14.145800590515137, + "learning_rate": 3.435576923076923e-05, + "loss": 0.46, + "step": 11389 + }, + { + "epoch": 31.291208791208792, + "grad_norm": 15.20681095123291, + "learning_rate": 3.4354395604395607e-05, + "loss": 0.3631, + "step": 11390 + }, + { + "epoch": 31.293956043956044, + "grad_norm": 6.179259777069092, + "learning_rate": 3.435302197802198e-05, + "loss": 0.2024, + "step": 11391 + }, + { + "epoch": 31.296703296703296, + "grad_norm": 13.66905403137207, + "learning_rate": 3.435164835164835e-05, + "loss": 0.4972, + "step": 11392 + }, + { + "epoch": 31.29945054945055, + "grad_norm": 8.420011520385742, + "learning_rate": 3.435027472527473e-05, + "loss": 0.2205, + "step": 11393 + }, + { + "epoch": 31.302197802197803, + "grad_norm": 15.91655445098877, + "learning_rate": 3.43489010989011e-05, + "loss": 0.4983, + "step": 11394 + }, + { + "epoch": 31.304945054945055, + "grad_norm": 9.004419326782227, + "learning_rate": 3.434752747252748e-05, + "loss": 0.1828, + "step": 11395 + }, + { + "epoch": 31.307692307692307, + "grad_norm": 14.432744026184082, + "learning_rate": 3.434615384615385e-05, + "loss": 0.367, + "step": 11396 + }, + { + "epoch": 31.310439560439562, + "grad_norm": 3.0113182067871094, + "learning_rate": 3.4344780219780224e-05, + "loss": 0.0329, + "step": 11397 + }, + { + "epoch": 31.313186813186814, + "grad_norm": 8.20083999633789, + "learning_rate": 3.4343406593406594e-05, + "loss": 0.2473, + "step": 11398 + }, + { + "epoch": 31.315934065934066, + "grad_norm": 14.11028003692627, + "learning_rate": 3.4342032967032964e-05, + "loss": 0.3621, + "step": 11399 + }, + { + "epoch": 31.318681318681318, + "grad_norm": 15.336943626403809, + "learning_rate": 3.434065934065934e-05, + "loss": 0.5318, + "step": 11400 + }, + { + "epoch": 31.321428571428573, + "grad_norm": 8.41103458404541, + "learning_rate": 3.433928571428572e-05, + "loss": 0.2392, + "step": 11401 + }, + { + "epoch": 31.324175824175825, + "grad_norm": 10.320788383483887, + "learning_rate": 3.433791208791209e-05, + "loss": 0.1867, + "step": 11402 + }, + { + "epoch": 31.326923076923077, + "grad_norm": 8.182534217834473, + "learning_rate": 3.4336538461538464e-05, + "loss": 0.2312, + "step": 11403 + }, + { + "epoch": 31.32967032967033, + "grad_norm": 10.387707710266113, + "learning_rate": 3.4335164835164834e-05, + "loss": 0.2399, + "step": 11404 + }, + { + "epoch": 31.332417582417584, + "grad_norm": 2.536081314086914, + "learning_rate": 3.433379120879121e-05, + "loss": 0.0525, + "step": 11405 + }, + { + "epoch": 31.335164835164836, + "grad_norm": 20.080392837524414, + "learning_rate": 3.433241758241759e-05, + "loss": 0.6279, + "step": 11406 + }, + { + "epoch": 31.337912087912088, + "grad_norm": 13.174217224121094, + "learning_rate": 3.433104395604396e-05, + "loss": 0.2545, + "step": 11407 + }, + { + "epoch": 31.34065934065934, + "grad_norm": 19.185583114624023, + "learning_rate": 3.4329670329670335e-05, + "loss": 0.5369, + "step": 11408 + }, + { + "epoch": 31.343406593406595, + "grad_norm": 12.429426193237305, + "learning_rate": 3.4328296703296705e-05, + "loss": 0.1761, + "step": 11409 + }, + { + "epoch": 31.346153846153847, + "grad_norm": 7.017393589019775, + "learning_rate": 3.432692307692308e-05, + "loss": 0.1806, + "step": 11410 + }, + { + "epoch": 31.3489010989011, + "grad_norm": 15.629223823547363, + "learning_rate": 3.432554945054945e-05, + "loss": 0.8066, + "step": 11411 + }, + { + "epoch": 31.35164835164835, + "grad_norm": 11.706995010375977, + "learning_rate": 3.432417582417582e-05, + "loss": 0.2601, + "step": 11412 + }, + { + "epoch": 31.354395604395606, + "grad_norm": 15.935478210449219, + "learning_rate": 3.43228021978022e-05, + "loss": 0.5381, + "step": 11413 + }, + { + "epoch": 31.357142857142858, + "grad_norm": 8.269990921020508, + "learning_rate": 3.432142857142857e-05, + "loss": 0.1425, + "step": 11414 + }, + { + "epoch": 31.35989010989011, + "grad_norm": 16.562313079833984, + "learning_rate": 3.4320054945054945e-05, + "loss": 0.4421, + "step": 11415 + }, + { + "epoch": 31.36263736263736, + "grad_norm": 19.171184539794922, + "learning_rate": 3.431868131868132e-05, + "loss": 0.3841, + "step": 11416 + }, + { + "epoch": 31.365384615384617, + "grad_norm": 10.762350082397461, + "learning_rate": 3.431730769230769e-05, + "loss": 0.2794, + "step": 11417 + }, + { + "epoch": 31.36813186813187, + "grad_norm": 15.067887306213379, + "learning_rate": 3.431593406593407e-05, + "loss": 0.5729, + "step": 11418 + }, + { + "epoch": 31.37087912087912, + "grad_norm": 20.03518295288086, + "learning_rate": 3.431456043956044e-05, + "loss": 0.5642, + "step": 11419 + }, + { + "epoch": 31.373626373626372, + "grad_norm": 9.278876304626465, + "learning_rate": 3.4313186813186816e-05, + "loss": 0.2232, + "step": 11420 + }, + { + "epoch": 31.376373626373628, + "grad_norm": 14.535792350769043, + "learning_rate": 3.431181318681319e-05, + "loss": 0.5213, + "step": 11421 + }, + { + "epoch": 31.37912087912088, + "grad_norm": 12.649919509887695, + "learning_rate": 3.431043956043956e-05, + "loss": 0.1616, + "step": 11422 + }, + { + "epoch": 31.38186813186813, + "grad_norm": 13.014307022094727, + "learning_rate": 3.430906593406594e-05, + "loss": 0.4405, + "step": 11423 + }, + { + "epoch": 31.384615384615383, + "grad_norm": 15.404214859008789, + "learning_rate": 3.430769230769231e-05, + "loss": 0.6016, + "step": 11424 + }, + { + "epoch": 31.38736263736264, + "grad_norm": 13.152881622314453, + "learning_rate": 3.4306318681318686e-05, + "loss": 0.4663, + "step": 11425 + }, + { + "epoch": 31.39010989010989, + "grad_norm": 11.966032028198242, + "learning_rate": 3.4304945054945056e-05, + "loss": 0.25, + "step": 11426 + }, + { + "epoch": 31.392857142857142, + "grad_norm": 8.368460655212402, + "learning_rate": 3.4303571428571426e-05, + "loss": 0.1483, + "step": 11427 + }, + { + "epoch": 31.395604395604394, + "grad_norm": 12.042743682861328, + "learning_rate": 3.43021978021978e-05, + "loss": 0.2065, + "step": 11428 + }, + { + "epoch": 31.39835164835165, + "grad_norm": 22.399965286254883, + "learning_rate": 3.430082417582417e-05, + "loss": 1.0241, + "step": 11429 + }, + { + "epoch": 31.4010989010989, + "grad_norm": 25.59978675842285, + "learning_rate": 3.429945054945055e-05, + "loss": 0.8415, + "step": 11430 + }, + { + "epoch": 31.403846153846153, + "grad_norm": 7.637521266937256, + "learning_rate": 3.4298076923076926e-05, + "loss": 0.1774, + "step": 11431 + }, + { + "epoch": 31.406593406593405, + "grad_norm": 9.63849925994873, + "learning_rate": 3.4296703296703296e-05, + "loss": 0.2374, + "step": 11432 + }, + { + "epoch": 31.40934065934066, + "grad_norm": 11.597052574157715, + "learning_rate": 3.429532967032967e-05, + "loss": 0.3183, + "step": 11433 + }, + { + "epoch": 31.412087912087912, + "grad_norm": 14.069856643676758, + "learning_rate": 3.429395604395604e-05, + "loss": 0.2963, + "step": 11434 + }, + { + "epoch": 31.414835164835164, + "grad_norm": 6.814109802246094, + "learning_rate": 3.429258241758242e-05, + "loss": 0.2368, + "step": 11435 + }, + { + "epoch": 31.417582417582416, + "grad_norm": 17.7474422454834, + "learning_rate": 3.42912087912088e-05, + "loss": 0.3892, + "step": 11436 + }, + { + "epoch": 31.42032967032967, + "grad_norm": 17.07376480102539, + "learning_rate": 3.428983516483517e-05, + "loss": 0.4387, + "step": 11437 + }, + { + "epoch": 31.423076923076923, + "grad_norm": 8.470580101013184, + "learning_rate": 3.4288461538461544e-05, + "loss": 0.1135, + "step": 11438 + }, + { + "epoch": 31.425824175824175, + "grad_norm": 9.633801460266113, + "learning_rate": 3.4287087912087914e-05, + "loss": 0.6366, + "step": 11439 + }, + { + "epoch": 31.428571428571427, + "grad_norm": 7.705033302307129, + "learning_rate": 3.428571428571429e-05, + "loss": 0.2107, + "step": 11440 + }, + { + "epoch": 31.431318681318682, + "grad_norm": 6.66647481918335, + "learning_rate": 3.428434065934066e-05, + "loss": 0.1733, + "step": 11441 + }, + { + "epoch": 31.434065934065934, + "grad_norm": 18.920751571655273, + "learning_rate": 3.428296703296703e-05, + "loss": 0.5484, + "step": 11442 + }, + { + "epoch": 31.436813186813186, + "grad_norm": 4.972447395324707, + "learning_rate": 3.428159340659341e-05, + "loss": 0.1014, + "step": 11443 + }, + { + "epoch": 31.439560439560438, + "grad_norm": 17.377172470092773, + "learning_rate": 3.428021978021978e-05, + "loss": 0.5171, + "step": 11444 + }, + { + "epoch": 31.442307692307693, + "grad_norm": 8.426936149597168, + "learning_rate": 3.4278846153846154e-05, + "loss": 0.1446, + "step": 11445 + }, + { + "epoch": 31.445054945054945, + "grad_norm": 13.519185066223145, + "learning_rate": 3.427747252747253e-05, + "loss": 0.3457, + "step": 11446 + }, + { + "epoch": 31.447802197802197, + "grad_norm": 22.313493728637695, + "learning_rate": 3.42760989010989e-05, + "loss": 0.9454, + "step": 11447 + }, + { + "epoch": 31.45054945054945, + "grad_norm": 14.810522079467773, + "learning_rate": 3.427472527472528e-05, + "loss": 0.429, + "step": 11448 + }, + { + "epoch": 31.453296703296704, + "grad_norm": 8.050881385803223, + "learning_rate": 3.427335164835165e-05, + "loss": 0.2252, + "step": 11449 + }, + { + "epoch": 31.456043956043956, + "grad_norm": 5.437961578369141, + "learning_rate": 3.4271978021978025e-05, + "loss": 0.111, + "step": 11450 + }, + { + "epoch": 31.458791208791208, + "grad_norm": 13.957118034362793, + "learning_rate": 3.42706043956044e-05, + "loss": 0.2789, + "step": 11451 + }, + { + "epoch": 31.46153846153846, + "grad_norm": 12.213430404663086, + "learning_rate": 3.426923076923077e-05, + "loss": 0.2991, + "step": 11452 + }, + { + "epoch": 31.464285714285715, + "grad_norm": 16.01556396484375, + "learning_rate": 3.426785714285715e-05, + "loss": 0.2607, + "step": 11453 + }, + { + "epoch": 31.467032967032967, + "grad_norm": 11.981515884399414, + "learning_rate": 3.426648351648352e-05, + "loss": 0.2243, + "step": 11454 + }, + { + "epoch": 31.46978021978022, + "grad_norm": 16.53976058959961, + "learning_rate": 3.4265109890109895e-05, + "loss": 0.3689, + "step": 11455 + }, + { + "epoch": 31.47252747252747, + "grad_norm": 16.05974578857422, + "learning_rate": 3.4263736263736265e-05, + "loss": 0.5935, + "step": 11456 + }, + { + "epoch": 31.475274725274726, + "grad_norm": 11.138819694519043, + "learning_rate": 3.4262362637362635e-05, + "loss": 0.4393, + "step": 11457 + }, + { + "epoch": 31.478021978021978, + "grad_norm": 11.220361709594727, + "learning_rate": 3.426098901098901e-05, + "loss": 0.3282, + "step": 11458 + }, + { + "epoch": 31.48076923076923, + "grad_norm": 8.060672760009766, + "learning_rate": 3.425961538461538e-05, + "loss": 0.2461, + "step": 11459 + }, + { + "epoch": 31.483516483516482, + "grad_norm": 12.031412124633789, + "learning_rate": 3.425824175824176e-05, + "loss": 0.4127, + "step": 11460 + }, + { + "epoch": 31.486263736263737, + "grad_norm": 16.033872604370117, + "learning_rate": 3.4256868131868135e-05, + "loss": 0.4396, + "step": 11461 + }, + { + "epoch": 31.48901098901099, + "grad_norm": 10.13058853149414, + "learning_rate": 3.4255494505494505e-05, + "loss": 0.2355, + "step": 11462 + }, + { + "epoch": 31.49175824175824, + "grad_norm": 5.351162910461426, + "learning_rate": 3.425412087912088e-05, + "loss": 0.193, + "step": 11463 + }, + { + "epoch": 31.494505494505496, + "grad_norm": 16.1820011138916, + "learning_rate": 3.425274725274725e-05, + "loss": 0.3859, + "step": 11464 + }, + { + "epoch": 31.497252747252748, + "grad_norm": 12.146990776062012, + "learning_rate": 3.425137362637363e-05, + "loss": 0.3011, + "step": 11465 + }, + { + "epoch": 31.5, + "grad_norm": 15.16129207611084, + "learning_rate": 3.4250000000000006e-05, + "loss": 0.3399, + "step": 11466 + }, + { + "epoch": 31.502747252747252, + "grad_norm": 16.044063568115234, + "learning_rate": 3.4248626373626376e-05, + "loss": 0.3731, + "step": 11467 + }, + { + "epoch": 31.505494505494504, + "grad_norm": 8.802510261535645, + "learning_rate": 3.424725274725275e-05, + "loss": 0.2211, + "step": 11468 + }, + { + "epoch": 31.50824175824176, + "grad_norm": 13.63352108001709, + "learning_rate": 3.424587912087912e-05, + "loss": 0.3879, + "step": 11469 + }, + { + "epoch": 31.51098901098901, + "grad_norm": 15.846550941467285, + "learning_rate": 3.42445054945055e-05, + "loss": 0.6393, + "step": 11470 + }, + { + "epoch": 31.513736263736263, + "grad_norm": 13.997795104980469, + "learning_rate": 3.424313186813187e-05, + "loss": 0.4333, + "step": 11471 + }, + { + "epoch": 31.516483516483518, + "grad_norm": 4.144077777862549, + "learning_rate": 3.424175824175824e-05, + "loss": 0.0933, + "step": 11472 + }, + { + "epoch": 31.51923076923077, + "grad_norm": 12.489044189453125, + "learning_rate": 3.4240384615384616e-05, + "loss": 0.2496, + "step": 11473 + }, + { + "epoch": 31.521978021978022, + "grad_norm": 16.257457733154297, + "learning_rate": 3.4239010989010986e-05, + "loss": 0.3694, + "step": 11474 + }, + { + "epoch": 31.524725274725274, + "grad_norm": 13.865934371948242, + "learning_rate": 3.423763736263736e-05, + "loss": 0.5269, + "step": 11475 + }, + { + "epoch": 31.52747252747253, + "grad_norm": 15.417884826660156, + "learning_rate": 3.423626373626374e-05, + "loss": 0.5283, + "step": 11476 + }, + { + "epoch": 31.53021978021978, + "grad_norm": 9.537575721740723, + "learning_rate": 3.423489010989011e-05, + "loss": 0.2837, + "step": 11477 + }, + { + "epoch": 31.532967032967033, + "grad_norm": 16.986263275146484, + "learning_rate": 3.423351648351649e-05, + "loss": 0.4652, + "step": 11478 + }, + { + "epoch": 31.535714285714285, + "grad_norm": 4.702460289001465, + "learning_rate": 3.423214285714286e-05, + "loss": 0.1253, + "step": 11479 + }, + { + "epoch": 31.53846153846154, + "grad_norm": 2.9166200160980225, + "learning_rate": 3.4230769230769234e-05, + "loss": 0.0616, + "step": 11480 + }, + { + "epoch": 31.541208791208792, + "grad_norm": 12.720767974853516, + "learning_rate": 3.422939560439561e-05, + "loss": 0.1961, + "step": 11481 + }, + { + "epoch": 31.543956043956044, + "grad_norm": 14.229303359985352, + "learning_rate": 3.422802197802198e-05, + "loss": 0.3254, + "step": 11482 + }, + { + "epoch": 31.546703296703296, + "grad_norm": 10.578537940979004, + "learning_rate": 3.422664835164836e-05, + "loss": 0.1548, + "step": 11483 + }, + { + "epoch": 31.54945054945055, + "grad_norm": 9.353058815002441, + "learning_rate": 3.422527472527473e-05, + "loss": 0.1599, + "step": 11484 + }, + { + "epoch": 31.552197802197803, + "grad_norm": 14.022705078125, + "learning_rate": 3.4223901098901104e-05, + "loss": 0.3244, + "step": 11485 + }, + { + "epoch": 31.554945054945055, + "grad_norm": 11.130842208862305, + "learning_rate": 3.4222527472527474e-05, + "loss": 0.262, + "step": 11486 + }, + { + "epoch": 31.557692307692307, + "grad_norm": 12.723307609558105, + "learning_rate": 3.4221153846153844e-05, + "loss": 0.5394, + "step": 11487 + }, + { + "epoch": 31.560439560439562, + "grad_norm": 5.084625720977783, + "learning_rate": 3.421978021978022e-05, + "loss": 0.0834, + "step": 11488 + }, + { + "epoch": 31.563186813186814, + "grad_norm": 12.202849388122559, + "learning_rate": 3.421840659340659e-05, + "loss": 0.1799, + "step": 11489 + }, + { + "epoch": 31.565934065934066, + "grad_norm": 4.598004341125488, + "learning_rate": 3.421703296703297e-05, + "loss": 0.1045, + "step": 11490 + }, + { + "epoch": 31.568681318681318, + "grad_norm": 7.238299369812012, + "learning_rate": 3.421565934065934e-05, + "loss": 0.168, + "step": 11491 + }, + { + "epoch": 31.571428571428573, + "grad_norm": 13.737138748168945, + "learning_rate": 3.4214285714285714e-05, + "loss": 0.3752, + "step": 11492 + }, + { + "epoch": 31.574175824175825, + "grad_norm": 20.960163116455078, + "learning_rate": 3.421291208791209e-05, + "loss": 0.8158, + "step": 11493 + }, + { + "epoch": 31.576923076923077, + "grad_norm": 12.073076248168945, + "learning_rate": 3.421153846153846e-05, + "loss": 0.2205, + "step": 11494 + }, + { + "epoch": 31.57967032967033, + "grad_norm": 11.06617546081543, + "learning_rate": 3.421016483516484e-05, + "loss": 0.43, + "step": 11495 + }, + { + "epoch": 31.582417582417584, + "grad_norm": 10.596636772155762, + "learning_rate": 3.420879120879121e-05, + "loss": 0.3023, + "step": 11496 + }, + { + "epoch": 31.585164835164836, + "grad_norm": 16.26047134399414, + "learning_rate": 3.4207417582417585e-05, + "loss": 0.4318, + "step": 11497 + }, + { + "epoch": 31.587912087912088, + "grad_norm": 11.672547340393066, + "learning_rate": 3.420604395604396e-05, + "loss": 0.2177, + "step": 11498 + }, + { + "epoch": 31.59065934065934, + "grad_norm": 11.34201431274414, + "learning_rate": 3.420467032967033e-05, + "loss": 0.2953, + "step": 11499 + }, + { + "epoch": 31.593406593406595, + "grad_norm": 10.1414794921875, + "learning_rate": 3.420329670329671e-05, + "loss": 0.2812, + "step": 11500 + }, + { + "epoch": 31.596153846153847, + "grad_norm": 5.696324825286865, + "learning_rate": 3.420192307692308e-05, + "loss": 0.1046, + "step": 11501 + }, + { + "epoch": 31.5989010989011, + "grad_norm": 9.293338775634766, + "learning_rate": 3.420054945054945e-05, + "loss": 0.2947, + "step": 11502 + }, + { + "epoch": 31.60164835164835, + "grad_norm": 15.445649147033691, + "learning_rate": 3.4199175824175825e-05, + "loss": 0.3553, + "step": 11503 + }, + { + "epoch": 31.604395604395606, + "grad_norm": 11.873520851135254, + "learning_rate": 3.4197802197802195e-05, + "loss": 0.3626, + "step": 11504 + }, + { + "epoch": 31.607142857142858, + "grad_norm": 12.310046195983887, + "learning_rate": 3.419642857142857e-05, + "loss": 0.3168, + "step": 11505 + }, + { + "epoch": 31.60989010989011, + "grad_norm": 6.938121795654297, + "learning_rate": 3.419505494505494e-05, + "loss": 0.1504, + "step": 11506 + }, + { + "epoch": 31.61263736263736, + "grad_norm": 9.477832794189453, + "learning_rate": 3.419368131868132e-05, + "loss": 0.161, + "step": 11507 + }, + { + "epoch": 31.615384615384617, + "grad_norm": 12.383581161499023, + "learning_rate": 3.4192307692307696e-05, + "loss": 0.2524, + "step": 11508 + }, + { + "epoch": 31.61813186813187, + "grad_norm": 10.462565422058105, + "learning_rate": 3.4190934065934066e-05, + "loss": 0.4331, + "step": 11509 + }, + { + "epoch": 31.62087912087912, + "grad_norm": 19.91593360900879, + "learning_rate": 3.418956043956044e-05, + "loss": 0.7081, + "step": 11510 + }, + { + "epoch": 31.623626373626372, + "grad_norm": 13.024456977844238, + "learning_rate": 3.418818681318681e-05, + "loss": 0.2808, + "step": 11511 + }, + { + "epoch": 31.626373626373628, + "grad_norm": 8.015083312988281, + "learning_rate": 3.418681318681319e-05, + "loss": 0.2196, + "step": 11512 + }, + { + "epoch": 31.62912087912088, + "grad_norm": 9.980012893676758, + "learning_rate": 3.4185439560439566e-05, + "loss": 0.245, + "step": 11513 + }, + { + "epoch": 31.63186813186813, + "grad_norm": 22.85481071472168, + "learning_rate": 3.4184065934065936e-05, + "loss": 0.6412, + "step": 11514 + }, + { + "epoch": 31.634615384615383, + "grad_norm": 9.359310150146484, + "learning_rate": 3.418269230769231e-05, + "loss": 0.2754, + "step": 11515 + }, + { + "epoch": 31.63736263736264, + "grad_norm": 17.66193389892578, + "learning_rate": 3.418131868131868e-05, + "loss": 0.7725, + "step": 11516 + }, + { + "epoch": 31.64010989010989, + "grad_norm": 4.239078521728516, + "learning_rate": 3.417994505494505e-05, + "loss": 0.0947, + "step": 11517 + }, + { + "epoch": 31.642857142857142, + "grad_norm": 16.207134246826172, + "learning_rate": 3.417857142857143e-05, + "loss": 0.4685, + "step": 11518 + }, + { + "epoch": 31.645604395604394, + "grad_norm": 16.050128936767578, + "learning_rate": 3.41771978021978e-05, + "loss": 0.4706, + "step": 11519 + }, + { + "epoch": 31.64835164835165, + "grad_norm": 17.910139083862305, + "learning_rate": 3.4175824175824177e-05, + "loss": 0.5062, + "step": 11520 + }, + { + "epoch": 31.6510989010989, + "grad_norm": 11.470035552978516, + "learning_rate": 3.4174450549450547e-05, + "loss": 0.3338, + "step": 11521 + }, + { + "epoch": 31.653846153846153, + "grad_norm": 10.78697681427002, + "learning_rate": 3.417307692307692e-05, + "loss": 0.173, + "step": 11522 + }, + { + "epoch": 31.656593406593405, + "grad_norm": 5.757343292236328, + "learning_rate": 3.41717032967033e-05, + "loss": 0.1011, + "step": 11523 + }, + { + "epoch": 31.65934065934066, + "grad_norm": 12.565339088439941, + "learning_rate": 3.417032967032967e-05, + "loss": 0.2072, + "step": 11524 + }, + { + "epoch": 31.662087912087912, + "grad_norm": 24.038951873779297, + "learning_rate": 3.416895604395605e-05, + "loss": 1.1894, + "step": 11525 + }, + { + "epoch": 31.664835164835164, + "grad_norm": 9.307660102844238, + "learning_rate": 3.416758241758242e-05, + "loss": 0.1124, + "step": 11526 + }, + { + "epoch": 31.667582417582416, + "grad_norm": 7.353759765625, + "learning_rate": 3.4166208791208794e-05, + "loss": 0.123, + "step": 11527 + }, + { + "epoch": 31.67032967032967, + "grad_norm": 14.549600601196289, + "learning_rate": 3.416483516483517e-05, + "loss": 0.4972, + "step": 11528 + }, + { + "epoch": 31.673076923076923, + "grad_norm": 16.043376922607422, + "learning_rate": 3.416346153846154e-05, + "loss": 0.3575, + "step": 11529 + }, + { + "epoch": 31.675824175824175, + "grad_norm": 9.925301551818848, + "learning_rate": 3.416208791208792e-05, + "loss": 0.1955, + "step": 11530 + }, + { + "epoch": 31.678571428571427, + "grad_norm": 16.492000579833984, + "learning_rate": 3.416071428571429e-05, + "loss": 0.4082, + "step": 11531 + }, + { + "epoch": 31.681318681318682, + "grad_norm": 6.0491766929626465, + "learning_rate": 3.415934065934066e-05, + "loss": 0.1132, + "step": 11532 + }, + { + "epoch": 31.684065934065934, + "grad_norm": 2.317201614379883, + "learning_rate": 3.4157967032967034e-05, + "loss": 0.0519, + "step": 11533 + }, + { + "epoch": 31.686813186813186, + "grad_norm": 14.686662673950195, + "learning_rate": 3.4156593406593404e-05, + "loss": 0.4745, + "step": 11534 + }, + { + "epoch": 31.689560439560438, + "grad_norm": 9.882084846496582, + "learning_rate": 3.415521978021978e-05, + "loss": 0.2201, + "step": 11535 + }, + { + "epoch": 31.692307692307693, + "grad_norm": 9.177168846130371, + "learning_rate": 3.415384615384615e-05, + "loss": 0.1927, + "step": 11536 + }, + { + "epoch": 31.695054945054945, + "grad_norm": 14.569296836853027, + "learning_rate": 3.415247252747253e-05, + "loss": 0.2825, + "step": 11537 + }, + { + "epoch": 31.697802197802197, + "grad_norm": 20.7005672454834, + "learning_rate": 3.4151098901098905e-05, + "loss": 0.7149, + "step": 11538 + }, + { + "epoch": 31.70054945054945, + "grad_norm": 14.019039154052734, + "learning_rate": 3.4149725274725275e-05, + "loss": 0.3917, + "step": 11539 + }, + { + "epoch": 31.703296703296704, + "grad_norm": 1.8909977674484253, + "learning_rate": 3.414835164835165e-05, + "loss": 0.0337, + "step": 11540 + }, + { + "epoch": 31.706043956043956, + "grad_norm": 16.268922805786133, + "learning_rate": 3.414697802197802e-05, + "loss": 0.4371, + "step": 11541 + }, + { + "epoch": 31.708791208791208, + "grad_norm": 9.326269149780273, + "learning_rate": 3.41456043956044e-05, + "loss": 0.2411, + "step": 11542 + }, + { + "epoch": 31.71153846153846, + "grad_norm": 6.810212135314941, + "learning_rate": 3.4144230769230775e-05, + "loss": 0.1981, + "step": 11543 + }, + { + "epoch": 31.714285714285715, + "grad_norm": 5.524570465087891, + "learning_rate": 3.4142857142857145e-05, + "loss": 0.1991, + "step": 11544 + }, + { + "epoch": 31.717032967032967, + "grad_norm": 12.179993629455566, + "learning_rate": 3.414148351648352e-05, + "loss": 0.3031, + "step": 11545 + }, + { + "epoch": 31.71978021978022, + "grad_norm": 11.9094877243042, + "learning_rate": 3.414010989010989e-05, + "loss": 0.308, + "step": 11546 + }, + { + "epoch": 31.72252747252747, + "grad_norm": 13.948431968688965, + "learning_rate": 3.413873626373626e-05, + "loss": 0.5079, + "step": 11547 + }, + { + "epoch": 31.725274725274726, + "grad_norm": 13.637896537780762, + "learning_rate": 3.413736263736264e-05, + "loss": 0.3365, + "step": 11548 + }, + { + "epoch": 31.728021978021978, + "grad_norm": 14.613581657409668, + "learning_rate": 3.413598901098901e-05, + "loss": 0.4196, + "step": 11549 + }, + { + "epoch": 31.73076923076923, + "grad_norm": 9.4566068649292, + "learning_rate": 3.4134615384615386e-05, + "loss": 0.1482, + "step": 11550 + }, + { + "epoch": 31.733516483516482, + "grad_norm": 15.817919731140137, + "learning_rate": 3.4133241758241756e-05, + "loss": 0.5284, + "step": 11551 + }, + { + "epoch": 31.736263736263737, + "grad_norm": 2.765171527862549, + "learning_rate": 3.413186813186813e-05, + "loss": 0.0511, + "step": 11552 + }, + { + "epoch": 31.73901098901099, + "grad_norm": 12.735160827636719, + "learning_rate": 3.413049450549451e-05, + "loss": 0.2747, + "step": 11553 + }, + { + "epoch": 31.74175824175824, + "grad_norm": 5.5627264976501465, + "learning_rate": 3.412912087912088e-05, + "loss": 0.0889, + "step": 11554 + }, + { + "epoch": 31.744505494505496, + "grad_norm": 13.62112045288086, + "learning_rate": 3.4127747252747256e-05, + "loss": 0.3165, + "step": 11555 + }, + { + "epoch": 31.747252747252748, + "grad_norm": 12.722915649414062, + "learning_rate": 3.4126373626373626e-05, + "loss": 0.1487, + "step": 11556 + }, + { + "epoch": 31.75, + "grad_norm": 14.109804153442383, + "learning_rate": 3.4125e-05, + "loss": 0.5454, + "step": 11557 + }, + { + "epoch": 31.752747252747252, + "grad_norm": 16.08270835876465, + "learning_rate": 3.412362637362638e-05, + "loss": 0.5123, + "step": 11558 + }, + { + "epoch": 31.755494505494504, + "grad_norm": 14.227706909179688, + "learning_rate": 3.412225274725275e-05, + "loss": 0.6648, + "step": 11559 + }, + { + "epoch": 31.75824175824176, + "grad_norm": 8.062321662902832, + "learning_rate": 3.4120879120879126e-05, + "loss": 0.1692, + "step": 11560 + }, + { + "epoch": 31.76098901098901, + "grad_norm": 13.436688423156738, + "learning_rate": 3.4119505494505496e-05, + "loss": 0.3604, + "step": 11561 + }, + { + "epoch": 31.763736263736263, + "grad_norm": 12.764425277709961, + "learning_rate": 3.4118131868131866e-05, + "loss": 0.1753, + "step": 11562 + }, + { + "epoch": 31.766483516483518, + "grad_norm": 14.648717880249023, + "learning_rate": 3.411675824175824e-05, + "loss": 0.3322, + "step": 11563 + }, + { + "epoch": 31.76923076923077, + "grad_norm": 12.728522300720215, + "learning_rate": 3.411538461538461e-05, + "loss": 0.2709, + "step": 11564 + }, + { + "epoch": 31.771978021978022, + "grad_norm": 13.585689544677734, + "learning_rate": 3.411401098901099e-05, + "loss": 0.2768, + "step": 11565 + }, + { + "epoch": 31.774725274725274, + "grad_norm": 27.404390335083008, + "learning_rate": 3.411263736263736e-05, + "loss": 0.4014, + "step": 11566 + }, + { + "epoch": 31.77747252747253, + "grad_norm": 12.70964527130127, + "learning_rate": 3.411126373626374e-05, + "loss": 0.3678, + "step": 11567 + }, + { + "epoch": 31.78021978021978, + "grad_norm": 15.067524909973145, + "learning_rate": 3.4109890109890114e-05, + "loss": 0.6274, + "step": 11568 + }, + { + "epoch": 31.782967032967033, + "grad_norm": 14.306371688842773, + "learning_rate": 3.4108516483516484e-05, + "loss": 0.4859, + "step": 11569 + }, + { + "epoch": 31.785714285714285, + "grad_norm": 10.752131462097168, + "learning_rate": 3.410714285714286e-05, + "loss": 0.1718, + "step": 11570 + }, + { + "epoch": 31.78846153846154, + "grad_norm": 13.355299949645996, + "learning_rate": 3.410576923076923e-05, + "loss": 0.336, + "step": 11571 + }, + { + "epoch": 31.791208791208792, + "grad_norm": 8.702277183532715, + "learning_rate": 3.410439560439561e-05, + "loss": 0.1433, + "step": 11572 + }, + { + "epoch": 31.793956043956044, + "grad_norm": 13.348331451416016, + "learning_rate": 3.4103021978021984e-05, + "loss": 0.3913, + "step": 11573 + }, + { + "epoch": 31.796703296703296, + "grad_norm": 18.030725479125977, + "learning_rate": 3.4101648351648354e-05, + "loss": 0.5013, + "step": 11574 + }, + { + "epoch": 31.79945054945055, + "grad_norm": 5.639461517333984, + "learning_rate": 3.410027472527473e-05, + "loss": 0.1744, + "step": 11575 + }, + { + "epoch": 31.802197802197803, + "grad_norm": 17.247106552124023, + "learning_rate": 3.40989010989011e-05, + "loss": 0.501, + "step": 11576 + }, + { + "epoch": 31.804945054945055, + "grad_norm": 16.46615982055664, + "learning_rate": 3.409752747252747e-05, + "loss": 0.338, + "step": 11577 + }, + { + "epoch": 31.807692307692307, + "grad_norm": 16.35342788696289, + "learning_rate": 3.409615384615385e-05, + "loss": 0.4052, + "step": 11578 + }, + { + "epoch": 31.810439560439562, + "grad_norm": 16.061914443969727, + "learning_rate": 3.409478021978022e-05, + "loss": 0.2797, + "step": 11579 + }, + { + "epoch": 31.813186813186814, + "grad_norm": 15.6201753616333, + "learning_rate": 3.4093406593406595e-05, + "loss": 0.3999, + "step": 11580 + }, + { + "epoch": 31.815934065934066, + "grad_norm": 15.2581787109375, + "learning_rate": 3.4092032967032965e-05, + "loss": 0.375, + "step": 11581 + }, + { + "epoch": 31.818681318681318, + "grad_norm": 14.240129470825195, + "learning_rate": 3.409065934065934e-05, + "loss": 0.613, + "step": 11582 + }, + { + "epoch": 31.821428571428573, + "grad_norm": 17.750425338745117, + "learning_rate": 3.408928571428572e-05, + "loss": 0.4366, + "step": 11583 + }, + { + "epoch": 31.824175824175825, + "grad_norm": 11.168583869934082, + "learning_rate": 3.408791208791209e-05, + "loss": 0.4197, + "step": 11584 + }, + { + "epoch": 31.826923076923077, + "grad_norm": 7.008947372436523, + "learning_rate": 3.4086538461538465e-05, + "loss": 0.153, + "step": 11585 + }, + { + "epoch": 31.82967032967033, + "grad_norm": 21.49199104309082, + "learning_rate": 3.4085164835164835e-05, + "loss": 0.7296, + "step": 11586 + }, + { + "epoch": 31.832417582417584, + "grad_norm": 10.066779136657715, + "learning_rate": 3.408379120879121e-05, + "loss": 0.2569, + "step": 11587 + }, + { + "epoch": 31.835164835164836, + "grad_norm": 20.077592849731445, + "learning_rate": 3.408241758241759e-05, + "loss": 0.6735, + "step": 11588 + }, + { + "epoch": 31.837912087912088, + "grad_norm": 10.063694953918457, + "learning_rate": 3.408104395604396e-05, + "loss": 0.1609, + "step": 11589 + }, + { + "epoch": 31.84065934065934, + "grad_norm": 8.703990936279297, + "learning_rate": 3.4079670329670335e-05, + "loss": 0.3177, + "step": 11590 + }, + { + "epoch": 31.843406593406595, + "grad_norm": 14.658808708190918, + "learning_rate": 3.4078296703296705e-05, + "loss": 0.4285, + "step": 11591 + }, + { + "epoch": 31.846153846153847, + "grad_norm": 7.653726100921631, + "learning_rate": 3.4076923076923075e-05, + "loss": 0.1667, + "step": 11592 + }, + { + "epoch": 31.8489010989011, + "grad_norm": 5.3751139640808105, + "learning_rate": 3.407554945054945e-05, + "loss": 0.0771, + "step": 11593 + }, + { + "epoch": 31.85164835164835, + "grad_norm": 10.806557655334473, + "learning_rate": 3.407417582417582e-05, + "loss": 0.3053, + "step": 11594 + }, + { + "epoch": 31.854395604395606, + "grad_norm": 8.024375915527344, + "learning_rate": 3.40728021978022e-05, + "loss": 0.2136, + "step": 11595 + }, + { + "epoch": 31.857142857142858, + "grad_norm": 12.194658279418945, + "learning_rate": 3.407142857142857e-05, + "loss": 0.3636, + "step": 11596 + }, + { + "epoch": 31.85989010989011, + "grad_norm": 19.109630584716797, + "learning_rate": 3.4070054945054946e-05, + "loss": 0.5072, + "step": 11597 + }, + { + "epoch": 31.86263736263736, + "grad_norm": 16.63551139831543, + "learning_rate": 3.406868131868132e-05, + "loss": 0.4362, + "step": 11598 + }, + { + "epoch": 31.865384615384617, + "grad_norm": 12.018051147460938, + "learning_rate": 3.406730769230769e-05, + "loss": 0.4833, + "step": 11599 + }, + { + "epoch": 31.86813186813187, + "grad_norm": 6.571959018707275, + "learning_rate": 3.406593406593407e-05, + "loss": 0.0809, + "step": 11600 + }, + { + "epoch": 31.87087912087912, + "grad_norm": 12.119404792785645, + "learning_rate": 3.406456043956044e-05, + "loss": 0.2478, + "step": 11601 + }, + { + "epoch": 31.873626373626372, + "grad_norm": 20.32581901550293, + "learning_rate": 3.4063186813186816e-05, + "loss": 0.499, + "step": 11602 + }, + { + "epoch": 31.876373626373628, + "grad_norm": 2.2034425735473633, + "learning_rate": 3.406181318681319e-05, + "loss": 0.0512, + "step": 11603 + }, + { + "epoch": 31.87912087912088, + "grad_norm": 10.329421997070312, + "learning_rate": 3.406043956043956e-05, + "loss": 0.3223, + "step": 11604 + }, + { + "epoch": 31.88186813186813, + "grad_norm": 7.75238561630249, + "learning_rate": 3.405906593406594e-05, + "loss": 0.1488, + "step": 11605 + }, + { + "epoch": 31.884615384615383, + "grad_norm": 16.413066864013672, + "learning_rate": 3.405769230769231e-05, + "loss": 0.3815, + "step": 11606 + }, + { + "epoch": 31.88736263736264, + "grad_norm": 19.311269760131836, + "learning_rate": 3.405631868131868e-05, + "loss": 0.3978, + "step": 11607 + }, + { + "epoch": 31.89010989010989, + "grad_norm": 11.272407531738281, + "learning_rate": 3.405494505494506e-05, + "loss": 0.2791, + "step": 11608 + }, + { + "epoch": 31.892857142857142, + "grad_norm": 8.42999267578125, + "learning_rate": 3.405357142857143e-05, + "loss": 0.3699, + "step": 11609 + }, + { + "epoch": 31.895604395604394, + "grad_norm": 5.228204250335693, + "learning_rate": 3.4052197802197803e-05, + "loss": 0.0918, + "step": 11610 + }, + { + "epoch": 31.89835164835165, + "grad_norm": 11.327863693237305, + "learning_rate": 3.4050824175824174e-05, + "loss": 0.4002, + "step": 11611 + }, + { + "epoch": 31.9010989010989, + "grad_norm": 20.899921417236328, + "learning_rate": 3.404945054945055e-05, + "loss": 0.8053, + "step": 11612 + }, + { + "epoch": 31.903846153846153, + "grad_norm": 14.095197677612305, + "learning_rate": 3.404807692307693e-05, + "loss": 0.2896, + "step": 11613 + }, + { + "epoch": 31.906593406593405, + "grad_norm": 13.297390937805176, + "learning_rate": 3.40467032967033e-05, + "loss": 0.2817, + "step": 11614 + }, + { + "epoch": 31.90934065934066, + "grad_norm": 15.851435661315918, + "learning_rate": 3.4045329670329674e-05, + "loss": 0.2969, + "step": 11615 + }, + { + "epoch": 31.912087912087912, + "grad_norm": 5.852993011474609, + "learning_rate": 3.4043956043956044e-05, + "loss": 0.1533, + "step": 11616 + }, + { + "epoch": 31.914835164835164, + "grad_norm": 24.312665939331055, + "learning_rate": 3.404258241758242e-05, + "loss": 1.1076, + "step": 11617 + }, + { + "epoch": 31.917582417582416, + "grad_norm": 16.206850051879883, + "learning_rate": 3.40412087912088e-05, + "loss": 0.3533, + "step": 11618 + }, + { + "epoch": 31.92032967032967, + "grad_norm": 7.796273231506348, + "learning_rate": 3.403983516483517e-05, + "loss": 0.2071, + "step": 11619 + }, + { + "epoch": 31.923076923076923, + "grad_norm": 7.230066776275635, + "learning_rate": 3.4038461538461544e-05, + "loss": 0.1899, + "step": 11620 + }, + { + "epoch": 31.925824175824175, + "grad_norm": 12.423891067504883, + "learning_rate": 3.4037087912087914e-05, + "loss": 0.3681, + "step": 11621 + }, + { + "epoch": 31.928571428571427, + "grad_norm": 14.26854133605957, + "learning_rate": 3.4035714285714284e-05, + "loss": 0.5416, + "step": 11622 + }, + { + "epoch": 31.931318681318682, + "grad_norm": 27.007057189941406, + "learning_rate": 3.403434065934066e-05, + "loss": 1.0241, + "step": 11623 + }, + { + "epoch": 31.934065934065934, + "grad_norm": 10.286778450012207, + "learning_rate": 3.403296703296703e-05, + "loss": 0.3794, + "step": 11624 + }, + { + "epoch": 31.936813186813186, + "grad_norm": 8.477381706237793, + "learning_rate": 3.403159340659341e-05, + "loss": 0.2358, + "step": 11625 + }, + { + "epoch": 31.939560439560438, + "grad_norm": 18.488018035888672, + "learning_rate": 3.403021978021978e-05, + "loss": 0.6843, + "step": 11626 + }, + { + "epoch": 31.942307692307693, + "grad_norm": 12.558327674865723, + "learning_rate": 3.4028846153846155e-05, + "loss": 0.3837, + "step": 11627 + }, + { + "epoch": 31.945054945054945, + "grad_norm": 19.650949478149414, + "learning_rate": 3.402747252747253e-05, + "loss": 0.7969, + "step": 11628 + }, + { + "epoch": 31.947802197802197, + "grad_norm": 7.560098171234131, + "learning_rate": 3.40260989010989e-05, + "loss": 0.1487, + "step": 11629 + }, + { + "epoch": 31.95054945054945, + "grad_norm": 5.546338081359863, + "learning_rate": 3.402472527472528e-05, + "loss": 0.143, + "step": 11630 + }, + { + "epoch": 31.953296703296704, + "grad_norm": 11.860321044921875, + "learning_rate": 3.402335164835165e-05, + "loss": 0.2489, + "step": 11631 + }, + { + "epoch": 31.956043956043956, + "grad_norm": 15.186137199401855, + "learning_rate": 3.4021978021978025e-05, + "loss": 0.7267, + "step": 11632 + }, + { + "epoch": 31.958791208791208, + "grad_norm": 14.955244064331055, + "learning_rate": 3.40206043956044e-05, + "loss": 0.4735, + "step": 11633 + }, + { + "epoch": 31.96153846153846, + "grad_norm": 12.5487060546875, + "learning_rate": 3.401923076923077e-05, + "loss": 0.35, + "step": 11634 + }, + { + "epoch": 31.964285714285715, + "grad_norm": 8.539748191833496, + "learning_rate": 3.401785714285715e-05, + "loss": 0.2876, + "step": 11635 + }, + { + "epoch": 31.967032967032967, + "grad_norm": 15.722604751586914, + "learning_rate": 3.401648351648352e-05, + "loss": 0.6755, + "step": 11636 + }, + { + "epoch": 31.96978021978022, + "grad_norm": 9.931117057800293, + "learning_rate": 3.401510989010989e-05, + "loss": 0.2366, + "step": 11637 + }, + { + "epoch": 31.97252747252747, + "grad_norm": 8.522010803222656, + "learning_rate": 3.4013736263736266e-05, + "loss": 0.1542, + "step": 11638 + }, + { + "epoch": 31.975274725274726, + "grad_norm": 15.30691146850586, + "learning_rate": 3.4012362637362636e-05, + "loss": 0.3491, + "step": 11639 + }, + { + "epoch": 31.978021978021978, + "grad_norm": 10.408563613891602, + "learning_rate": 3.401098901098901e-05, + "loss": 0.303, + "step": 11640 + }, + { + "epoch": 31.98076923076923, + "grad_norm": 7.260004043579102, + "learning_rate": 3.400961538461538e-05, + "loss": 0.2359, + "step": 11641 + }, + { + "epoch": 31.983516483516482, + "grad_norm": 12.148653030395508, + "learning_rate": 3.400824175824176e-05, + "loss": 0.3756, + "step": 11642 + }, + { + "epoch": 31.986263736263737, + "grad_norm": 17.073585510253906, + "learning_rate": 3.4006868131868136e-05, + "loss": 0.6597, + "step": 11643 + }, + { + "epoch": 31.98901098901099, + "grad_norm": 14.581130027770996, + "learning_rate": 3.4005494505494506e-05, + "loss": 0.3264, + "step": 11644 + }, + { + "epoch": 31.99175824175824, + "grad_norm": 14.771028518676758, + "learning_rate": 3.400412087912088e-05, + "loss": 0.4221, + "step": 11645 + }, + { + "epoch": 31.994505494505496, + "grad_norm": 14.46051025390625, + "learning_rate": 3.400274725274725e-05, + "loss": 0.4521, + "step": 11646 + }, + { + "epoch": 31.997252747252748, + "grad_norm": 4.486486434936523, + "learning_rate": 3.400137362637363e-05, + "loss": 0.0713, + "step": 11647 + }, + { + "epoch": 32.0, + "grad_norm": 10.547618865966797, + "learning_rate": 3.4000000000000007e-05, + "loss": 0.1863, + "step": 11648 + }, + { + "epoch": 32.0, + "eval_accuracy": 0.7823691460055097, + "eval_f1": 0.7839178127799078, + "eval_f1_DuraRiadoRio_64x64": 0.7625418060200669, + "eval_f1_Mole_64x64": 0.8592057761732852, + "eval_f1_Quebrado_64x64": 0.868421052631579, + "eval_f1_RiadoRio_64x64": 0.69164265129683, + "eval_f1_RioFechado_64x64": 0.7377777777777778, + "eval_loss": 0.7388832569122314, + "eval_precision": 0.8141210656915241, + "eval_precision_DuraRiadoRio_64x64": 0.7354838709677419, + "eval_precision_Mole_64x64": 0.8947368421052632, + "eval_precision_Quebrado_64x64": 0.825, + "eval_precision_RiadoRio_64x64": 0.6153846153846154, + "eval_precision_RioFechado_64x64": 1.0, + "eval_recall": 0.7817405897372538, + "eval_recall_DuraRiadoRio_64x64": 0.7916666666666666, + "eval_recall_Mole_64x64": 0.8263888888888888, + "eval_recall_Quebrado_64x64": 0.9166666666666666, + "eval_recall_RiadoRio_64x64": 0.7894736842105263, + "eval_recall_RioFechado_64x64": 0.5845070422535211, + "eval_runtime": 1.7353, + "eval_samples_per_second": 418.38, + "eval_steps_per_second": 26.509, + "step": 11648 + }, + { + "epoch": 32.002747252747255, + "grad_norm": 4.444972038269043, + "learning_rate": 3.3998626373626377e-05, + "loss": 0.099, + "step": 11649 + }, + { + "epoch": 32.005494505494504, + "grad_norm": 9.078287124633789, + "learning_rate": 3.399725274725275e-05, + "loss": 0.2199, + "step": 11650 + }, + { + "epoch": 32.00824175824176, + "grad_norm": 6.059119701385498, + "learning_rate": 3.399587912087912e-05, + "loss": 0.2249, + "step": 11651 + }, + { + "epoch": 32.010989010989015, + "grad_norm": 8.780998229980469, + "learning_rate": 3.399450549450549e-05, + "loss": 0.2214, + "step": 11652 + }, + { + "epoch": 32.01373626373626, + "grad_norm": 5.295434951782227, + "learning_rate": 3.399313186813187e-05, + "loss": 0.0661, + "step": 11653 + }, + { + "epoch": 32.01648351648352, + "grad_norm": 5.020112991333008, + "learning_rate": 3.399175824175824e-05, + "loss": 0.1217, + "step": 11654 + }, + { + "epoch": 32.01923076923077, + "grad_norm": 14.930313110351562, + "learning_rate": 3.399038461538462e-05, + "loss": 0.3301, + "step": 11655 + }, + { + "epoch": 32.02197802197802, + "grad_norm": 8.372200965881348, + "learning_rate": 3.398901098901099e-05, + "loss": 0.2185, + "step": 11656 + }, + { + "epoch": 32.02472527472528, + "grad_norm": 8.411548614501953, + "learning_rate": 3.3987637362637364e-05, + "loss": 0.2217, + "step": 11657 + }, + { + "epoch": 32.027472527472526, + "grad_norm": 7.879373073577881, + "learning_rate": 3.398626373626374e-05, + "loss": 0.2128, + "step": 11658 + }, + { + "epoch": 32.03021978021978, + "grad_norm": 12.593921661376953, + "learning_rate": 3.398489010989011e-05, + "loss": 0.2544, + "step": 11659 + }, + { + "epoch": 32.032967032967036, + "grad_norm": 8.791584968566895, + "learning_rate": 3.398351648351649e-05, + "loss": 0.1523, + "step": 11660 + }, + { + "epoch": 32.035714285714285, + "grad_norm": 11.571809768676758, + "learning_rate": 3.398214285714286e-05, + "loss": 0.269, + "step": 11661 + }, + { + "epoch": 32.03846153846154, + "grad_norm": 8.236594200134277, + "learning_rate": 3.3980769230769234e-05, + "loss": 0.1466, + "step": 11662 + }, + { + "epoch": 32.04120879120879, + "grad_norm": 5.052328586578369, + "learning_rate": 3.397939560439561e-05, + "loss": 0.1684, + "step": 11663 + }, + { + "epoch": 32.043956043956044, + "grad_norm": 8.614338874816895, + "learning_rate": 3.397802197802198e-05, + "loss": 0.2355, + "step": 11664 + }, + { + "epoch": 32.0467032967033, + "grad_norm": 11.422277450561523, + "learning_rate": 3.397664835164836e-05, + "loss": 0.3073, + "step": 11665 + }, + { + "epoch": 32.04945054945055, + "grad_norm": 15.665410995483398, + "learning_rate": 3.397527472527473e-05, + "loss": 0.4064, + "step": 11666 + }, + { + "epoch": 32.0521978021978, + "grad_norm": 14.02721118927002, + "learning_rate": 3.39739010989011e-05, + "loss": 0.3781, + "step": 11667 + }, + { + "epoch": 32.05494505494506, + "grad_norm": 10.430723190307617, + "learning_rate": 3.3972527472527475e-05, + "loss": 0.2398, + "step": 11668 + }, + { + "epoch": 32.05769230769231, + "grad_norm": 11.644402503967285, + "learning_rate": 3.3971153846153845e-05, + "loss": 0.2018, + "step": 11669 + }, + { + "epoch": 32.06043956043956, + "grad_norm": 9.615334510803223, + "learning_rate": 3.396978021978022e-05, + "loss": 0.3215, + "step": 11670 + }, + { + "epoch": 32.06318681318681, + "grad_norm": 11.490103721618652, + "learning_rate": 3.396840659340659e-05, + "loss": 0.2099, + "step": 11671 + }, + { + "epoch": 32.065934065934066, + "grad_norm": 14.771344184875488, + "learning_rate": 3.396703296703297e-05, + "loss": 0.3156, + "step": 11672 + }, + { + "epoch": 32.06868131868132, + "grad_norm": 16.932641983032227, + "learning_rate": 3.3965659340659345e-05, + "loss": 0.5999, + "step": 11673 + }, + { + "epoch": 32.07142857142857, + "grad_norm": 16.321626663208008, + "learning_rate": 3.3964285714285715e-05, + "loss": 0.5125, + "step": 11674 + }, + { + "epoch": 32.074175824175825, + "grad_norm": 7.835797309875488, + "learning_rate": 3.396291208791209e-05, + "loss": 0.2621, + "step": 11675 + }, + { + "epoch": 32.07692307692308, + "grad_norm": 8.093161582946777, + "learning_rate": 3.396153846153846e-05, + "loss": 0.2498, + "step": 11676 + }, + { + "epoch": 32.07967032967033, + "grad_norm": 5.0387959480285645, + "learning_rate": 3.396016483516484e-05, + "loss": 0.1516, + "step": 11677 + }, + { + "epoch": 32.082417582417584, + "grad_norm": 15.630890846252441, + "learning_rate": 3.3958791208791215e-05, + "loss": 0.3694, + "step": 11678 + }, + { + "epoch": 32.08516483516483, + "grad_norm": 15.34787368774414, + "learning_rate": 3.3957417582417586e-05, + "loss": 0.4829, + "step": 11679 + }, + { + "epoch": 32.08791208791209, + "grad_norm": 11.297431945800781, + "learning_rate": 3.395604395604396e-05, + "loss": 0.3173, + "step": 11680 + }, + { + "epoch": 32.09065934065934, + "grad_norm": 13.533683776855469, + "learning_rate": 3.395467032967033e-05, + "loss": 0.435, + "step": 11681 + }, + { + "epoch": 32.09340659340659, + "grad_norm": 9.887435913085938, + "learning_rate": 3.39532967032967e-05, + "loss": 0.1668, + "step": 11682 + }, + { + "epoch": 32.09615384615385, + "grad_norm": 14.19163703918457, + "learning_rate": 3.395192307692308e-05, + "loss": 0.2191, + "step": 11683 + }, + { + "epoch": 32.0989010989011, + "grad_norm": 13.247074127197266, + "learning_rate": 3.395054945054945e-05, + "loss": 0.3418, + "step": 11684 + }, + { + "epoch": 32.10164835164835, + "grad_norm": 19.34033966064453, + "learning_rate": 3.3949175824175826e-05, + "loss": 0.7604, + "step": 11685 + }, + { + "epoch": 32.104395604395606, + "grad_norm": 9.625755310058594, + "learning_rate": 3.3947802197802196e-05, + "loss": 0.3209, + "step": 11686 + }, + { + "epoch": 32.107142857142854, + "grad_norm": 4.524463176727295, + "learning_rate": 3.394642857142857e-05, + "loss": 0.089, + "step": 11687 + }, + { + "epoch": 32.10989010989011, + "grad_norm": 10.8057861328125, + "learning_rate": 3.394505494505495e-05, + "loss": 0.2425, + "step": 11688 + }, + { + "epoch": 32.112637362637365, + "grad_norm": 6.613767623901367, + "learning_rate": 3.394368131868132e-05, + "loss": 0.1828, + "step": 11689 + }, + { + "epoch": 32.11538461538461, + "grad_norm": 10.77412223815918, + "learning_rate": 3.3942307692307696e-05, + "loss": 0.2391, + "step": 11690 + }, + { + "epoch": 32.11813186813187, + "grad_norm": 8.549304962158203, + "learning_rate": 3.3940934065934066e-05, + "loss": 0.224, + "step": 11691 + }, + { + "epoch": 32.120879120879124, + "grad_norm": 4.810882568359375, + "learning_rate": 3.393956043956044e-05, + "loss": 0.0799, + "step": 11692 + }, + { + "epoch": 32.12362637362637, + "grad_norm": 10.966548919677734, + "learning_rate": 3.393818681318682e-05, + "loss": 0.1946, + "step": 11693 + }, + { + "epoch": 32.12637362637363, + "grad_norm": 11.39384651184082, + "learning_rate": 3.393681318681319e-05, + "loss": 0.3328, + "step": 11694 + }, + { + "epoch": 32.129120879120876, + "grad_norm": 15.93648910522461, + "learning_rate": 3.393543956043957e-05, + "loss": 0.5093, + "step": 11695 + }, + { + "epoch": 32.13186813186813, + "grad_norm": 9.417436599731445, + "learning_rate": 3.393406593406594e-05, + "loss": 0.1998, + "step": 11696 + }, + { + "epoch": 32.13461538461539, + "grad_norm": 11.090947151184082, + "learning_rate": 3.393269230769231e-05, + "loss": 0.3143, + "step": 11697 + }, + { + "epoch": 32.137362637362635, + "grad_norm": 13.40975570678711, + "learning_rate": 3.3931318681318684e-05, + "loss": 0.3388, + "step": 11698 + }, + { + "epoch": 32.14010989010989, + "grad_norm": 9.961382865905762, + "learning_rate": 3.3929945054945054e-05, + "loss": 0.3822, + "step": 11699 + }, + { + "epoch": 32.142857142857146, + "grad_norm": 11.776018142700195, + "learning_rate": 3.392857142857143e-05, + "loss": 0.2197, + "step": 11700 + }, + { + "epoch": 32.145604395604394, + "grad_norm": 10.437322616577148, + "learning_rate": 3.39271978021978e-05, + "loss": 0.3326, + "step": 11701 + }, + { + "epoch": 32.14835164835165, + "grad_norm": 16.909740447998047, + "learning_rate": 3.392582417582418e-05, + "loss": 0.51, + "step": 11702 + }, + { + "epoch": 32.1510989010989, + "grad_norm": 7.323986053466797, + "learning_rate": 3.3924450549450554e-05, + "loss": 0.1661, + "step": 11703 + }, + { + "epoch": 32.15384615384615, + "grad_norm": 16.899765014648438, + "learning_rate": 3.3923076923076924e-05, + "loss": 0.5488, + "step": 11704 + }, + { + "epoch": 32.15659340659341, + "grad_norm": 14.546769142150879, + "learning_rate": 3.39217032967033e-05, + "loss": 0.4157, + "step": 11705 + }, + { + "epoch": 32.15934065934066, + "grad_norm": 19.325525283813477, + "learning_rate": 3.392032967032967e-05, + "loss": 0.7804, + "step": 11706 + }, + { + "epoch": 32.16208791208791, + "grad_norm": 5.790271759033203, + "learning_rate": 3.391895604395605e-05, + "loss": 0.1633, + "step": 11707 + }, + { + "epoch": 32.16483516483517, + "grad_norm": 9.912497520446777, + "learning_rate": 3.3917582417582424e-05, + "loss": 0.1652, + "step": 11708 + }, + { + "epoch": 32.167582417582416, + "grad_norm": 12.370477676391602, + "learning_rate": 3.3916208791208794e-05, + "loss": 0.2494, + "step": 11709 + }, + { + "epoch": 32.17032967032967, + "grad_norm": 7.84788703918457, + "learning_rate": 3.391483516483517e-05, + "loss": 0.1598, + "step": 11710 + }, + { + "epoch": 32.17307692307692, + "grad_norm": 10.541396141052246, + "learning_rate": 3.391346153846154e-05, + "loss": 0.1679, + "step": 11711 + }, + { + "epoch": 32.175824175824175, + "grad_norm": 10.752657890319824, + "learning_rate": 3.391208791208791e-05, + "loss": 0.2638, + "step": 11712 + }, + { + "epoch": 32.17857142857143, + "grad_norm": 9.05533218383789, + "learning_rate": 3.391071428571429e-05, + "loss": 0.1569, + "step": 11713 + }, + { + "epoch": 32.18131868131868, + "grad_norm": 10.722163200378418, + "learning_rate": 3.390934065934066e-05, + "loss": 0.2584, + "step": 11714 + }, + { + "epoch": 32.184065934065934, + "grad_norm": 14.390654563903809, + "learning_rate": 3.3907967032967035e-05, + "loss": 0.2424, + "step": 11715 + }, + { + "epoch": 32.18681318681319, + "grad_norm": 12.272316932678223, + "learning_rate": 3.3906593406593405e-05, + "loss": 0.397, + "step": 11716 + }, + { + "epoch": 32.18956043956044, + "grad_norm": 4.974562168121338, + "learning_rate": 3.390521978021978e-05, + "loss": 0.0946, + "step": 11717 + }, + { + "epoch": 32.19230769230769, + "grad_norm": 15.499504089355469, + "learning_rate": 3.390384615384615e-05, + "loss": 0.4683, + "step": 11718 + }, + { + "epoch": 32.19505494505494, + "grad_norm": 18.956926345825195, + "learning_rate": 3.390247252747253e-05, + "loss": 0.6651, + "step": 11719 + }, + { + "epoch": 32.1978021978022, + "grad_norm": 15.490217208862305, + "learning_rate": 3.3901098901098905e-05, + "loss": 0.4808, + "step": 11720 + }, + { + "epoch": 32.20054945054945, + "grad_norm": 10.362274169921875, + "learning_rate": 3.3899725274725275e-05, + "loss": 0.2493, + "step": 11721 + }, + { + "epoch": 32.2032967032967, + "grad_norm": 8.056788444519043, + "learning_rate": 3.389835164835165e-05, + "loss": 0.1674, + "step": 11722 + }, + { + "epoch": 32.206043956043956, + "grad_norm": 10.839876174926758, + "learning_rate": 3.389697802197802e-05, + "loss": 0.1467, + "step": 11723 + }, + { + "epoch": 32.20879120879121, + "grad_norm": 9.273287773132324, + "learning_rate": 3.38956043956044e-05, + "loss": 0.2514, + "step": 11724 + }, + { + "epoch": 32.21153846153846, + "grad_norm": 23.3754940032959, + "learning_rate": 3.3894230769230776e-05, + "loss": 0.7692, + "step": 11725 + }, + { + "epoch": 32.214285714285715, + "grad_norm": 18.039318084716797, + "learning_rate": 3.3892857142857146e-05, + "loss": 0.2868, + "step": 11726 + }, + { + "epoch": 32.217032967032964, + "grad_norm": 12.505605697631836, + "learning_rate": 3.3891483516483516e-05, + "loss": 0.303, + "step": 11727 + }, + { + "epoch": 32.21978021978022, + "grad_norm": 4.562585830688477, + "learning_rate": 3.3890109890109886e-05, + "loss": 0.1297, + "step": 11728 + }, + { + "epoch": 32.222527472527474, + "grad_norm": 4.503873825073242, + "learning_rate": 3.388873626373626e-05, + "loss": 0.1302, + "step": 11729 + }, + { + "epoch": 32.22527472527472, + "grad_norm": 12.712225914001465, + "learning_rate": 3.388736263736264e-05, + "loss": 0.2014, + "step": 11730 + }, + { + "epoch": 32.22802197802198, + "grad_norm": 19.693157196044922, + "learning_rate": 3.388598901098901e-05, + "loss": 0.597, + "step": 11731 + }, + { + "epoch": 32.23076923076923, + "grad_norm": 12.7091646194458, + "learning_rate": 3.3884615384615386e-05, + "loss": 0.4479, + "step": 11732 + }, + { + "epoch": 32.23351648351648, + "grad_norm": 12.87939453125, + "learning_rate": 3.3883241758241756e-05, + "loss": 0.3799, + "step": 11733 + }, + { + "epoch": 32.23626373626374, + "grad_norm": 5.113419532775879, + "learning_rate": 3.388186813186813e-05, + "loss": 0.1078, + "step": 11734 + }, + { + "epoch": 32.239010989010985, + "grad_norm": 12.844023704528809, + "learning_rate": 3.388049450549451e-05, + "loss": 0.5595, + "step": 11735 + }, + { + "epoch": 32.24175824175824, + "grad_norm": 19.871103286743164, + "learning_rate": 3.387912087912088e-05, + "loss": 0.7036, + "step": 11736 + }, + { + "epoch": 32.244505494505496, + "grad_norm": 18.246320724487305, + "learning_rate": 3.387774725274726e-05, + "loss": 0.3894, + "step": 11737 + }, + { + "epoch": 32.247252747252745, + "grad_norm": 18.2091007232666, + "learning_rate": 3.387637362637363e-05, + "loss": 0.7765, + "step": 11738 + }, + { + "epoch": 32.25, + "grad_norm": 18.40692901611328, + "learning_rate": 3.3875000000000003e-05, + "loss": 0.7352, + "step": 11739 + }, + { + "epoch": 32.252747252747255, + "grad_norm": 7.6311516761779785, + "learning_rate": 3.387362637362638e-05, + "loss": 0.172, + "step": 11740 + }, + { + "epoch": 32.255494505494504, + "grad_norm": 5.219918251037598, + "learning_rate": 3.387225274725275e-05, + "loss": 0.115, + "step": 11741 + }, + { + "epoch": 32.25824175824176, + "grad_norm": 14.133520126342773, + "learning_rate": 3.387087912087912e-05, + "loss": 0.4743, + "step": 11742 + }, + { + "epoch": 32.260989010989015, + "grad_norm": 18.333757400512695, + "learning_rate": 3.386950549450549e-05, + "loss": 0.4498, + "step": 11743 + }, + { + "epoch": 32.26373626373626, + "grad_norm": 7.0228962898254395, + "learning_rate": 3.386813186813187e-05, + "loss": 0.1405, + "step": 11744 + }, + { + "epoch": 32.26648351648352, + "grad_norm": 3.035032272338867, + "learning_rate": 3.3866758241758244e-05, + "loss": 0.0686, + "step": 11745 + }, + { + "epoch": 32.26923076923077, + "grad_norm": 10.241406440734863, + "learning_rate": 3.3865384615384614e-05, + "loss": 0.3182, + "step": 11746 + }, + { + "epoch": 32.27197802197802, + "grad_norm": 12.830877304077148, + "learning_rate": 3.386401098901099e-05, + "loss": 0.2908, + "step": 11747 + }, + { + "epoch": 32.27472527472528, + "grad_norm": 11.126498222351074, + "learning_rate": 3.386263736263736e-05, + "loss": 0.2552, + "step": 11748 + }, + { + "epoch": 32.277472527472526, + "grad_norm": 4.8305253982543945, + "learning_rate": 3.386126373626374e-05, + "loss": 0.0818, + "step": 11749 + }, + { + "epoch": 32.28021978021978, + "grad_norm": 13.699553489685059, + "learning_rate": 3.3859890109890114e-05, + "loss": 0.3629, + "step": 11750 + }, + { + "epoch": 32.282967032967036, + "grad_norm": 10.952591896057129, + "learning_rate": 3.3858516483516484e-05, + "loss": 0.2539, + "step": 11751 + }, + { + "epoch": 32.285714285714285, + "grad_norm": 17.968353271484375, + "learning_rate": 3.385714285714286e-05, + "loss": 0.4558, + "step": 11752 + }, + { + "epoch": 32.28846153846154, + "grad_norm": 9.499959945678711, + "learning_rate": 3.385576923076923e-05, + "loss": 0.1564, + "step": 11753 + }, + { + "epoch": 32.29120879120879, + "grad_norm": 16.518930435180664, + "learning_rate": 3.385439560439561e-05, + "loss": 0.3113, + "step": 11754 + }, + { + "epoch": 32.293956043956044, + "grad_norm": 3.6549346446990967, + "learning_rate": 3.3853021978021985e-05, + "loss": 0.0811, + "step": 11755 + }, + { + "epoch": 32.2967032967033, + "grad_norm": 4.153615474700928, + "learning_rate": 3.3851648351648355e-05, + "loss": 0.0661, + "step": 11756 + }, + { + "epoch": 32.29945054945055, + "grad_norm": 13.265335083007812, + "learning_rate": 3.3850274725274725e-05, + "loss": 0.3127, + "step": 11757 + }, + { + "epoch": 32.3021978021978, + "grad_norm": 1.7100001573562622, + "learning_rate": 3.3848901098901095e-05, + "loss": 0.0311, + "step": 11758 + }, + { + "epoch": 32.30494505494506, + "grad_norm": 10.640944480895996, + "learning_rate": 3.384752747252747e-05, + "loss": 0.3755, + "step": 11759 + }, + { + "epoch": 32.30769230769231, + "grad_norm": 11.594400405883789, + "learning_rate": 3.384615384615385e-05, + "loss": 0.3486, + "step": 11760 + }, + { + "epoch": 32.31043956043956, + "grad_norm": 6.078170299530029, + "learning_rate": 3.384478021978022e-05, + "loss": 0.1413, + "step": 11761 + }, + { + "epoch": 32.31318681318681, + "grad_norm": 14.447408676147461, + "learning_rate": 3.3843406593406595e-05, + "loss": 0.4216, + "step": 11762 + }, + { + "epoch": 32.315934065934066, + "grad_norm": 13.738760948181152, + "learning_rate": 3.3842032967032965e-05, + "loss": 0.5396, + "step": 11763 + }, + { + "epoch": 32.31868131868132, + "grad_norm": 8.950515747070312, + "learning_rate": 3.384065934065934e-05, + "loss": 0.1925, + "step": 11764 + }, + { + "epoch": 32.32142857142857, + "grad_norm": 9.597661972045898, + "learning_rate": 3.383928571428572e-05, + "loss": 0.3606, + "step": 11765 + }, + { + "epoch": 32.324175824175825, + "grad_norm": 11.198771476745605, + "learning_rate": 3.383791208791209e-05, + "loss": 0.4485, + "step": 11766 + }, + { + "epoch": 32.32692307692308, + "grad_norm": 11.088659286499023, + "learning_rate": 3.3836538461538466e-05, + "loss": 0.3409, + "step": 11767 + }, + { + "epoch": 32.32967032967033, + "grad_norm": 12.168121337890625, + "learning_rate": 3.3835164835164836e-05, + "loss": 0.3488, + "step": 11768 + }, + { + "epoch": 32.332417582417584, + "grad_norm": 20.27135467529297, + "learning_rate": 3.383379120879121e-05, + "loss": 0.6025, + "step": 11769 + }, + { + "epoch": 32.33516483516483, + "grad_norm": 17.134355545043945, + "learning_rate": 3.383241758241759e-05, + "loss": 0.445, + "step": 11770 + }, + { + "epoch": 32.33791208791209, + "grad_norm": 14.994071006774902, + "learning_rate": 3.383104395604396e-05, + "loss": 0.3582, + "step": 11771 + }, + { + "epoch": 32.34065934065934, + "grad_norm": 9.586689949035645, + "learning_rate": 3.382967032967033e-05, + "loss": 0.1505, + "step": 11772 + }, + { + "epoch": 32.34340659340659, + "grad_norm": 8.23708438873291, + "learning_rate": 3.38282967032967e-05, + "loss": 0.172, + "step": 11773 + }, + { + "epoch": 32.34615384615385, + "grad_norm": 12.197839736938477, + "learning_rate": 3.3826923076923076e-05, + "loss": 0.3559, + "step": 11774 + }, + { + "epoch": 32.3489010989011, + "grad_norm": 4.465618133544922, + "learning_rate": 3.382554945054945e-05, + "loss": 0.0872, + "step": 11775 + }, + { + "epoch": 32.35164835164835, + "grad_norm": 15.331037521362305, + "learning_rate": 3.382417582417582e-05, + "loss": 0.4127, + "step": 11776 + }, + { + "epoch": 32.354395604395606, + "grad_norm": 18.55794906616211, + "learning_rate": 3.38228021978022e-05, + "loss": 0.5775, + "step": 11777 + }, + { + "epoch": 32.357142857142854, + "grad_norm": 16.933197021484375, + "learning_rate": 3.382142857142857e-05, + "loss": 0.7214, + "step": 11778 + }, + { + "epoch": 32.35989010989011, + "grad_norm": 11.579185485839844, + "learning_rate": 3.3820054945054947e-05, + "loss": 0.2898, + "step": 11779 + }, + { + "epoch": 32.362637362637365, + "grad_norm": 8.05398178100586, + "learning_rate": 3.381868131868132e-05, + "loss": 0.1735, + "step": 11780 + }, + { + "epoch": 32.36538461538461, + "grad_norm": 12.921987533569336, + "learning_rate": 3.381730769230769e-05, + "loss": 0.3385, + "step": 11781 + }, + { + "epoch": 32.36813186813187, + "grad_norm": 13.82427978515625, + "learning_rate": 3.381593406593407e-05, + "loss": 0.54, + "step": 11782 + }, + { + "epoch": 32.370879120879124, + "grad_norm": 18.906063079833984, + "learning_rate": 3.381456043956044e-05, + "loss": 0.5077, + "step": 11783 + }, + { + "epoch": 32.37362637362637, + "grad_norm": 11.722208976745605, + "learning_rate": 3.381318681318682e-05, + "loss": 0.4112, + "step": 11784 + }, + { + "epoch": 32.37637362637363, + "grad_norm": 23.576927185058594, + "learning_rate": 3.3811813186813194e-05, + "loss": 1.1579, + "step": 11785 + }, + { + "epoch": 32.379120879120876, + "grad_norm": 15.299986839294434, + "learning_rate": 3.3810439560439564e-05, + "loss": 0.2295, + "step": 11786 + }, + { + "epoch": 32.38186813186813, + "grad_norm": 10.7233304977417, + "learning_rate": 3.3809065934065934e-05, + "loss": 0.1813, + "step": 11787 + }, + { + "epoch": 32.38461538461539, + "grad_norm": 20.464073181152344, + "learning_rate": 3.3807692307692304e-05, + "loss": 0.68, + "step": 11788 + }, + { + "epoch": 32.387362637362635, + "grad_norm": 20.965967178344727, + "learning_rate": 3.380631868131868e-05, + "loss": 0.4802, + "step": 11789 + }, + { + "epoch": 32.39010989010989, + "grad_norm": 7.0208821296691895, + "learning_rate": 3.380494505494506e-05, + "loss": 0.1875, + "step": 11790 + }, + { + "epoch": 32.392857142857146, + "grad_norm": 16.36972999572754, + "learning_rate": 3.380357142857143e-05, + "loss": 0.6918, + "step": 11791 + }, + { + "epoch": 32.395604395604394, + "grad_norm": 14.37281608581543, + "learning_rate": 3.3802197802197804e-05, + "loss": 0.3828, + "step": 11792 + }, + { + "epoch": 32.39835164835165, + "grad_norm": 8.058236122131348, + "learning_rate": 3.3800824175824174e-05, + "loss": 0.2243, + "step": 11793 + }, + { + "epoch": 32.4010989010989, + "grad_norm": 17.247549057006836, + "learning_rate": 3.379945054945055e-05, + "loss": 0.5821, + "step": 11794 + }, + { + "epoch": 32.40384615384615, + "grad_norm": 14.867069244384766, + "learning_rate": 3.379807692307693e-05, + "loss": 0.5289, + "step": 11795 + }, + { + "epoch": 32.40659340659341, + "grad_norm": 12.449785232543945, + "learning_rate": 3.37967032967033e-05, + "loss": 0.2631, + "step": 11796 + }, + { + "epoch": 32.40934065934066, + "grad_norm": 14.886201858520508, + "learning_rate": 3.3795329670329675e-05, + "loss": 0.3015, + "step": 11797 + }, + { + "epoch": 32.41208791208791, + "grad_norm": 14.321105003356934, + "learning_rate": 3.3793956043956045e-05, + "loss": 0.425, + "step": 11798 + }, + { + "epoch": 32.41483516483517, + "grad_norm": 11.005253791809082, + "learning_rate": 3.379258241758242e-05, + "loss": 0.1655, + "step": 11799 + }, + { + "epoch": 32.417582417582416, + "grad_norm": 2.840489387512207, + "learning_rate": 3.37912087912088e-05, + "loss": 0.044, + "step": 11800 + }, + { + "epoch": 32.42032967032967, + "grad_norm": 6.83792781829834, + "learning_rate": 3.378983516483517e-05, + "loss": 0.2034, + "step": 11801 + }, + { + "epoch": 32.42307692307692, + "grad_norm": 11.25059700012207, + "learning_rate": 3.378846153846154e-05, + "loss": 0.2447, + "step": 11802 + }, + { + "epoch": 32.425824175824175, + "grad_norm": 10.436570167541504, + "learning_rate": 3.378708791208791e-05, + "loss": 0.3553, + "step": 11803 + }, + { + "epoch": 32.42857142857143, + "grad_norm": 18.293500900268555, + "learning_rate": 3.3785714285714285e-05, + "loss": 0.6264, + "step": 11804 + }, + { + "epoch": 32.43131868131868, + "grad_norm": 6.805543422698975, + "learning_rate": 3.378434065934066e-05, + "loss": 0.1345, + "step": 11805 + }, + { + "epoch": 32.434065934065934, + "grad_norm": 8.623397827148438, + "learning_rate": 3.378296703296703e-05, + "loss": 0.195, + "step": 11806 + }, + { + "epoch": 32.43681318681319, + "grad_norm": 5.508235454559326, + "learning_rate": 3.378159340659341e-05, + "loss": 0.0848, + "step": 11807 + }, + { + "epoch": 32.43956043956044, + "grad_norm": 12.384750366210938, + "learning_rate": 3.378021978021978e-05, + "loss": 0.3862, + "step": 11808 + }, + { + "epoch": 32.44230769230769, + "grad_norm": 15.87213134765625, + "learning_rate": 3.3778846153846156e-05, + "loss": 0.4931, + "step": 11809 + }, + { + "epoch": 32.44505494505494, + "grad_norm": 13.220559120178223, + "learning_rate": 3.377747252747253e-05, + "loss": 0.2811, + "step": 11810 + }, + { + "epoch": 32.4478021978022, + "grad_norm": 10.875171661376953, + "learning_rate": 3.37760989010989e-05, + "loss": 0.3563, + "step": 11811 + }, + { + "epoch": 32.45054945054945, + "grad_norm": 13.133853912353516, + "learning_rate": 3.377472527472528e-05, + "loss": 0.367, + "step": 11812 + }, + { + "epoch": 32.4532967032967, + "grad_norm": 25.4570255279541, + "learning_rate": 3.377335164835165e-05, + "loss": 1.1208, + "step": 11813 + }, + { + "epoch": 32.456043956043956, + "grad_norm": 17.993576049804688, + "learning_rate": 3.3771978021978026e-05, + "loss": 0.5513, + "step": 11814 + }, + { + "epoch": 32.45879120879121, + "grad_norm": 16.511640548706055, + "learning_rate": 3.37706043956044e-05, + "loss": 0.531, + "step": 11815 + }, + { + "epoch": 32.46153846153846, + "grad_norm": 12.244776725769043, + "learning_rate": 3.376923076923077e-05, + "loss": 0.1786, + "step": 11816 + }, + { + "epoch": 32.464285714285715, + "grad_norm": 25.52971076965332, + "learning_rate": 3.376785714285714e-05, + "loss": 0.6931, + "step": 11817 + }, + { + "epoch": 32.467032967032964, + "grad_norm": 16.332061767578125, + "learning_rate": 3.376648351648351e-05, + "loss": 0.4874, + "step": 11818 + }, + { + "epoch": 32.46978021978022, + "grad_norm": 14.271418571472168, + "learning_rate": 3.376510989010989e-05, + "loss": 0.2978, + "step": 11819 + }, + { + "epoch": 32.472527472527474, + "grad_norm": 19.784175872802734, + "learning_rate": 3.3763736263736266e-05, + "loss": 0.4933, + "step": 11820 + }, + { + "epoch": 32.47527472527472, + "grad_norm": 8.889081954956055, + "learning_rate": 3.3762362637362636e-05, + "loss": 0.2443, + "step": 11821 + }, + { + "epoch": 32.47802197802198, + "grad_norm": 10.45425796508789, + "learning_rate": 3.376098901098901e-05, + "loss": 0.2167, + "step": 11822 + }, + { + "epoch": 32.48076923076923, + "grad_norm": 4.2238383293151855, + "learning_rate": 3.375961538461538e-05, + "loss": 0.0673, + "step": 11823 + }, + { + "epoch": 32.48351648351648, + "grad_norm": 9.080538749694824, + "learning_rate": 3.375824175824176e-05, + "loss": 0.2872, + "step": 11824 + }, + { + "epoch": 32.48626373626374, + "grad_norm": 16.860902786254883, + "learning_rate": 3.375686813186814e-05, + "loss": 0.482, + "step": 11825 + }, + { + "epoch": 32.489010989010985, + "grad_norm": 17.002269744873047, + "learning_rate": 3.375549450549451e-05, + "loss": 0.6848, + "step": 11826 + }, + { + "epoch": 32.49175824175824, + "grad_norm": 7.908890247344971, + "learning_rate": 3.3754120879120884e-05, + "loss": 0.2265, + "step": 11827 + }, + { + "epoch": 32.494505494505496, + "grad_norm": 12.816673278808594, + "learning_rate": 3.3752747252747254e-05, + "loss": 0.4589, + "step": 11828 + }, + { + "epoch": 32.497252747252745, + "grad_norm": 17.676485061645508, + "learning_rate": 3.375137362637363e-05, + "loss": 0.4145, + "step": 11829 + }, + { + "epoch": 32.5, + "grad_norm": 10.399980545043945, + "learning_rate": 3.375000000000001e-05, + "loss": 0.2025, + "step": 11830 + }, + { + "epoch": 32.502747252747255, + "grad_norm": 14.012990951538086, + "learning_rate": 3.374862637362638e-05, + "loss": 0.3691, + "step": 11831 + }, + { + "epoch": 32.505494505494504, + "grad_norm": 10.61867618560791, + "learning_rate": 3.374725274725275e-05, + "loss": 0.2323, + "step": 11832 + }, + { + "epoch": 32.50824175824176, + "grad_norm": 12.884747505187988, + "learning_rate": 3.374587912087912e-05, + "loss": 0.5126, + "step": 11833 + }, + { + "epoch": 32.51098901098901, + "grad_norm": 10.753789901733398, + "learning_rate": 3.3744505494505494e-05, + "loss": 0.2567, + "step": 11834 + }, + { + "epoch": 32.51373626373626, + "grad_norm": 13.93863582611084, + "learning_rate": 3.374313186813187e-05, + "loss": 0.3357, + "step": 11835 + }, + { + "epoch": 32.51648351648352, + "grad_norm": 13.593213081359863, + "learning_rate": 3.374175824175824e-05, + "loss": 0.2993, + "step": 11836 + }, + { + "epoch": 32.51923076923077, + "grad_norm": 15.054007530212402, + "learning_rate": 3.374038461538462e-05, + "loss": 0.3823, + "step": 11837 + }, + { + "epoch": 32.52197802197802, + "grad_norm": 9.978379249572754, + "learning_rate": 3.373901098901099e-05, + "loss": 0.1315, + "step": 11838 + }, + { + "epoch": 32.52472527472528, + "grad_norm": 7.594841003417969, + "learning_rate": 3.3737637362637364e-05, + "loss": 0.2379, + "step": 11839 + }, + { + "epoch": 32.527472527472526, + "grad_norm": 14.987737655639648, + "learning_rate": 3.373626373626374e-05, + "loss": 0.7813, + "step": 11840 + }, + { + "epoch": 32.53021978021978, + "grad_norm": 11.073062896728516, + "learning_rate": 3.373489010989011e-05, + "loss": 0.2206, + "step": 11841 + }, + { + "epoch": 32.532967032967036, + "grad_norm": 8.126826286315918, + "learning_rate": 3.373351648351649e-05, + "loss": 0.2378, + "step": 11842 + }, + { + "epoch": 32.535714285714285, + "grad_norm": 7.78312349319458, + "learning_rate": 3.373214285714286e-05, + "loss": 0.1533, + "step": 11843 + }, + { + "epoch": 32.53846153846154, + "grad_norm": 11.018513679504395, + "learning_rate": 3.3730769230769235e-05, + "loss": 0.4026, + "step": 11844 + }, + { + "epoch": 32.54120879120879, + "grad_norm": 14.087285041809082, + "learning_rate": 3.3729395604395605e-05, + "loss": 0.236, + "step": 11845 + }, + { + "epoch": 32.543956043956044, + "grad_norm": 9.563217163085938, + "learning_rate": 3.372802197802198e-05, + "loss": 0.2345, + "step": 11846 + }, + { + "epoch": 32.5467032967033, + "grad_norm": 17.930221557617188, + "learning_rate": 3.372664835164835e-05, + "loss": 0.6066, + "step": 11847 + }, + { + "epoch": 32.54945054945055, + "grad_norm": 14.188431739807129, + "learning_rate": 3.372527472527472e-05, + "loss": 0.408, + "step": 11848 + }, + { + "epoch": 32.5521978021978, + "grad_norm": 5.90981912612915, + "learning_rate": 3.37239010989011e-05, + "loss": 0.1369, + "step": 11849 + }, + { + "epoch": 32.55494505494506, + "grad_norm": 13.845192909240723, + "learning_rate": 3.3722527472527475e-05, + "loss": 0.6347, + "step": 11850 + }, + { + "epoch": 32.55769230769231, + "grad_norm": 6.35294246673584, + "learning_rate": 3.3721153846153845e-05, + "loss": 0.1804, + "step": 11851 + }, + { + "epoch": 32.56043956043956, + "grad_norm": 10.154902458190918, + "learning_rate": 3.371978021978022e-05, + "loss": 0.2351, + "step": 11852 + }, + { + "epoch": 32.56318681318681, + "grad_norm": 12.072409629821777, + "learning_rate": 3.371840659340659e-05, + "loss": 0.4771, + "step": 11853 + }, + { + "epoch": 32.565934065934066, + "grad_norm": 16.054954528808594, + "learning_rate": 3.371703296703297e-05, + "loss": 0.3301, + "step": 11854 + }, + { + "epoch": 32.56868131868132, + "grad_norm": 12.520185470581055, + "learning_rate": 3.3715659340659346e-05, + "loss": 0.3204, + "step": 11855 + }, + { + "epoch": 32.57142857142857, + "grad_norm": 6.42012882232666, + "learning_rate": 3.3714285714285716e-05, + "loss": 0.1439, + "step": 11856 + }, + { + "epoch": 32.574175824175825, + "grad_norm": 5.1094651222229, + "learning_rate": 3.371291208791209e-05, + "loss": 0.1192, + "step": 11857 + }, + { + "epoch": 32.57692307692308, + "grad_norm": 11.931164741516113, + "learning_rate": 3.371153846153846e-05, + "loss": 0.2259, + "step": 11858 + }, + { + "epoch": 32.57967032967033, + "grad_norm": 10.010028839111328, + "learning_rate": 3.371016483516484e-05, + "loss": 0.221, + "step": 11859 + }, + { + "epoch": 32.582417582417584, + "grad_norm": 5.687525272369385, + "learning_rate": 3.370879120879121e-05, + "loss": 0.2612, + "step": 11860 + }, + { + "epoch": 32.58516483516483, + "grad_norm": 12.820998191833496, + "learning_rate": 3.3707417582417586e-05, + "loss": 0.2701, + "step": 11861 + }, + { + "epoch": 32.58791208791209, + "grad_norm": 16.330791473388672, + "learning_rate": 3.3706043956043956e-05, + "loss": 0.5918, + "step": 11862 + }, + { + "epoch": 32.59065934065934, + "grad_norm": 16.48569679260254, + "learning_rate": 3.3704670329670326e-05, + "loss": 0.5887, + "step": 11863 + }, + { + "epoch": 32.59340659340659, + "grad_norm": 4.72622013092041, + "learning_rate": 3.37032967032967e-05, + "loss": 0.0839, + "step": 11864 + }, + { + "epoch": 32.59615384615385, + "grad_norm": 12.19791316986084, + "learning_rate": 3.370192307692308e-05, + "loss": 0.3548, + "step": 11865 + }, + { + "epoch": 32.5989010989011, + "grad_norm": 14.020892143249512, + "learning_rate": 3.370054945054945e-05, + "loss": 0.5793, + "step": 11866 + }, + { + "epoch": 32.60164835164835, + "grad_norm": 13.351133346557617, + "learning_rate": 3.369917582417583e-05, + "loss": 0.3466, + "step": 11867 + }, + { + "epoch": 32.604395604395606, + "grad_norm": 8.33206558227539, + "learning_rate": 3.36978021978022e-05, + "loss": 0.2458, + "step": 11868 + }, + { + "epoch": 32.607142857142854, + "grad_norm": 16.331335067749023, + "learning_rate": 3.3696428571428573e-05, + "loss": 0.319, + "step": 11869 + }, + { + "epoch": 32.60989010989011, + "grad_norm": 7.162799835205078, + "learning_rate": 3.369505494505495e-05, + "loss": 0.1215, + "step": 11870 + }, + { + "epoch": 32.612637362637365, + "grad_norm": 9.96145248413086, + "learning_rate": 3.369368131868132e-05, + "loss": 0.2812, + "step": 11871 + }, + { + "epoch": 32.61538461538461, + "grad_norm": 11.672651290893555, + "learning_rate": 3.36923076923077e-05, + "loss": 0.3582, + "step": 11872 + }, + { + "epoch": 32.61813186813187, + "grad_norm": 15.158419609069824, + "learning_rate": 3.369093406593407e-05, + "loss": 0.5932, + "step": 11873 + }, + { + "epoch": 32.620879120879124, + "grad_norm": 15.447418212890625, + "learning_rate": 3.3689560439560444e-05, + "loss": 0.4754, + "step": 11874 + }, + { + "epoch": 32.62362637362637, + "grad_norm": 11.799728393554688, + "learning_rate": 3.3688186813186814e-05, + "loss": 0.5021, + "step": 11875 + }, + { + "epoch": 32.62637362637363, + "grad_norm": 9.155815124511719, + "learning_rate": 3.368681318681319e-05, + "loss": 0.1675, + "step": 11876 + }, + { + "epoch": 32.629120879120876, + "grad_norm": 14.011263847351074, + "learning_rate": 3.368543956043956e-05, + "loss": 0.5241, + "step": 11877 + }, + { + "epoch": 32.63186813186813, + "grad_norm": 9.956656455993652, + "learning_rate": 3.368406593406593e-05, + "loss": 0.4277, + "step": 11878 + }, + { + "epoch": 32.63461538461539, + "grad_norm": 11.2218599319458, + "learning_rate": 3.368269230769231e-05, + "loss": 0.3144, + "step": 11879 + }, + { + "epoch": 32.637362637362635, + "grad_norm": 15.40207576751709, + "learning_rate": 3.3681318681318684e-05, + "loss": 0.3583, + "step": 11880 + }, + { + "epoch": 32.64010989010989, + "grad_norm": 23.456815719604492, + "learning_rate": 3.3679945054945054e-05, + "loss": 0.8368, + "step": 11881 + }, + { + "epoch": 32.642857142857146, + "grad_norm": 3.834991693496704, + "learning_rate": 3.367857142857143e-05, + "loss": 0.1032, + "step": 11882 + }, + { + "epoch": 32.645604395604394, + "grad_norm": 17.245952606201172, + "learning_rate": 3.36771978021978e-05, + "loss": 0.339, + "step": 11883 + }, + { + "epoch": 32.64835164835165, + "grad_norm": 19.99498748779297, + "learning_rate": 3.367582417582418e-05, + "loss": 0.4914, + "step": 11884 + }, + { + "epoch": 32.6510989010989, + "grad_norm": 9.359456062316895, + "learning_rate": 3.3674450549450555e-05, + "loss": 0.2931, + "step": 11885 + }, + { + "epoch": 32.65384615384615, + "grad_norm": 5.425229549407959, + "learning_rate": 3.3673076923076925e-05, + "loss": 0.0938, + "step": 11886 + }, + { + "epoch": 32.65659340659341, + "grad_norm": 16.919343948364258, + "learning_rate": 3.36717032967033e-05, + "loss": 0.4661, + "step": 11887 + }, + { + "epoch": 32.65934065934066, + "grad_norm": 11.553263664245605, + "learning_rate": 3.367032967032967e-05, + "loss": 0.3012, + "step": 11888 + }, + { + "epoch": 32.66208791208791, + "grad_norm": 16.021770477294922, + "learning_rate": 3.366895604395605e-05, + "loss": 0.4832, + "step": 11889 + }, + { + "epoch": 32.66483516483517, + "grad_norm": 4.722503662109375, + "learning_rate": 3.366758241758242e-05, + "loss": 0.0954, + "step": 11890 + }, + { + "epoch": 32.667582417582416, + "grad_norm": 4.739372253417969, + "learning_rate": 3.3666208791208795e-05, + "loss": 0.0854, + "step": 11891 + }, + { + "epoch": 32.67032967032967, + "grad_norm": 11.902888298034668, + "learning_rate": 3.3664835164835165e-05, + "loss": 0.2702, + "step": 11892 + }, + { + "epoch": 32.67307692307692, + "grad_norm": 6.927493095397949, + "learning_rate": 3.3663461538461535e-05, + "loss": 0.0636, + "step": 11893 + }, + { + "epoch": 32.675824175824175, + "grad_norm": 8.810709953308105, + "learning_rate": 3.366208791208791e-05, + "loss": 0.4295, + "step": 11894 + }, + { + "epoch": 32.67857142857143, + "grad_norm": 11.99980640411377, + "learning_rate": 3.366071428571429e-05, + "loss": 0.172, + "step": 11895 + }, + { + "epoch": 32.68131868131868, + "grad_norm": 10.036200523376465, + "learning_rate": 3.365934065934066e-05, + "loss": 0.4055, + "step": 11896 + }, + { + "epoch": 32.684065934065934, + "grad_norm": 13.227293014526367, + "learning_rate": 3.3657967032967036e-05, + "loss": 0.4683, + "step": 11897 + }, + { + "epoch": 32.68681318681319, + "grad_norm": 7.160518169403076, + "learning_rate": 3.3656593406593406e-05, + "loss": 0.1734, + "step": 11898 + }, + { + "epoch": 32.68956043956044, + "grad_norm": 20.17629051208496, + "learning_rate": 3.365521978021978e-05, + "loss": 0.6301, + "step": 11899 + }, + { + "epoch": 32.69230769230769, + "grad_norm": 9.953605651855469, + "learning_rate": 3.365384615384616e-05, + "loss": 0.1547, + "step": 11900 + }, + { + "epoch": 32.69505494505494, + "grad_norm": 5.283594131469727, + "learning_rate": 3.365247252747253e-05, + "loss": 0.1062, + "step": 11901 + }, + { + "epoch": 32.6978021978022, + "grad_norm": 4.065414905548096, + "learning_rate": 3.3651098901098906e-05, + "loss": 0.069, + "step": 11902 + }, + { + "epoch": 32.70054945054945, + "grad_norm": 15.040264129638672, + "learning_rate": 3.3649725274725276e-05, + "loss": 0.3705, + "step": 11903 + }, + { + "epoch": 32.7032967032967, + "grad_norm": 7.7372517585754395, + "learning_rate": 3.364835164835165e-05, + "loss": 0.2467, + "step": 11904 + }, + { + "epoch": 32.706043956043956, + "grad_norm": 13.185859680175781, + "learning_rate": 3.364697802197802e-05, + "loss": 0.6139, + "step": 11905 + }, + { + "epoch": 32.70879120879121, + "grad_norm": 7.576460361480713, + "learning_rate": 3.36456043956044e-05, + "loss": 0.2013, + "step": 11906 + }, + { + "epoch": 32.71153846153846, + "grad_norm": 5.0324015617370605, + "learning_rate": 3.364423076923077e-05, + "loss": 0.1358, + "step": 11907 + }, + { + "epoch": 32.714285714285715, + "grad_norm": 10.09211540222168, + "learning_rate": 3.364285714285714e-05, + "loss": 0.4824, + "step": 11908 + }, + { + "epoch": 32.717032967032964, + "grad_norm": 6.488926887512207, + "learning_rate": 3.3641483516483517e-05, + "loss": 0.1074, + "step": 11909 + }, + { + "epoch": 32.71978021978022, + "grad_norm": 17.74056625366211, + "learning_rate": 3.364010989010989e-05, + "loss": 0.7021, + "step": 11910 + }, + { + "epoch": 32.722527472527474, + "grad_norm": 8.804302215576172, + "learning_rate": 3.363873626373626e-05, + "loss": 0.2075, + "step": 11911 + }, + { + "epoch": 32.72527472527472, + "grad_norm": 17.403718948364258, + "learning_rate": 3.363736263736264e-05, + "loss": 0.4599, + "step": 11912 + }, + { + "epoch": 32.72802197802198, + "grad_norm": 17.28980827331543, + "learning_rate": 3.363598901098901e-05, + "loss": 0.4765, + "step": 11913 + }, + { + "epoch": 32.73076923076923, + "grad_norm": 6.273872375488281, + "learning_rate": 3.363461538461539e-05, + "loss": 0.2047, + "step": 11914 + }, + { + "epoch": 32.73351648351648, + "grad_norm": 4.743221759796143, + "learning_rate": 3.3633241758241764e-05, + "loss": 0.1274, + "step": 11915 + }, + { + "epoch": 32.73626373626374, + "grad_norm": 7.887625217437744, + "learning_rate": 3.3631868131868134e-05, + "loss": 0.2608, + "step": 11916 + }, + { + "epoch": 32.73901098901099, + "grad_norm": 7.017098903656006, + "learning_rate": 3.363049450549451e-05, + "loss": 0.1215, + "step": 11917 + }, + { + "epoch": 32.74175824175824, + "grad_norm": 11.882548332214355, + "learning_rate": 3.362912087912088e-05, + "loss": 0.3708, + "step": 11918 + }, + { + "epoch": 32.744505494505496, + "grad_norm": 23.51961326599121, + "learning_rate": 3.362774725274726e-05, + "loss": 0.6358, + "step": 11919 + }, + { + "epoch": 32.747252747252745, + "grad_norm": 8.875285148620605, + "learning_rate": 3.362637362637363e-05, + "loss": 0.2328, + "step": 11920 + }, + { + "epoch": 32.75, + "grad_norm": 7.262668132781982, + "learning_rate": 3.3625000000000004e-05, + "loss": 0.0941, + "step": 11921 + }, + { + "epoch": 32.752747252747255, + "grad_norm": 4.7417120933532715, + "learning_rate": 3.3623626373626374e-05, + "loss": 0.0687, + "step": 11922 + }, + { + "epoch": 32.755494505494504, + "grad_norm": 4.287613391876221, + "learning_rate": 3.3622252747252744e-05, + "loss": 0.0798, + "step": 11923 + }, + { + "epoch": 32.75824175824176, + "grad_norm": 13.814746856689453, + "learning_rate": 3.362087912087912e-05, + "loss": 0.2745, + "step": 11924 + }, + { + "epoch": 32.76098901098901, + "grad_norm": 16.99288558959961, + "learning_rate": 3.36195054945055e-05, + "loss": 0.5345, + "step": 11925 + }, + { + "epoch": 32.76373626373626, + "grad_norm": 29.075817108154297, + "learning_rate": 3.361813186813187e-05, + "loss": 0.7537, + "step": 11926 + }, + { + "epoch": 32.76648351648352, + "grad_norm": 5.629448890686035, + "learning_rate": 3.3616758241758245e-05, + "loss": 0.1275, + "step": 11927 + }, + { + "epoch": 32.76923076923077, + "grad_norm": 10.048530578613281, + "learning_rate": 3.3615384615384615e-05, + "loss": 0.3104, + "step": 11928 + }, + { + "epoch": 32.77197802197802, + "grad_norm": 4.032877445220947, + "learning_rate": 3.361401098901099e-05, + "loss": 0.078, + "step": 11929 + }, + { + "epoch": 32.77472527472528, + "grad_norm": 11.883721351623535, + "learning_rate": 3.361263736263737e-05, + "loss": 0.2961, + "step": 11930 + }, + { + "epoch": 32.777472527472526, + "grad_norm": 10.167755126953125, + "learning_rate": 3.361126373626374e-05, + "loss": 0.2442, + "step": 11931 + }, + { + "epoch": 32.78021978021978, + "grad_norm": 7.119908332824707, + "learning_rate": 3.3609890109890115e-05, + "loss": 0.1585, + "step": 11932 + }, + { + "epoch": 32.782967032967036, + "grad_norm": 9.965524673461914, + "learning_rate": 3.3608516483516485e-05, + "loss": 0.1764, + "step": 11933 + }, + { + "epoch": 32.785714285714285, + "grad_norm": 15.505999565124512, + "learning_rate": 3.360714285714286e-05, + "loss": 0.5045, + "step": 11934 + }, + { + "epoch": 32.78846153846154, + "grad_norm": 10.289559364318848, + "learning_rate": 3.360576923076923e-05, + "loss": 0.1872, + "step": 11935 + }, + { + "epoch": 32.79120879120879, + "grad_norm": 12.375606536865234, + "learning_rate": 3.360439560439561e-05, + "loss": 0.3067, + "step": 11936 + }, + { + "epoch": 32.793956043956044, + "grad_norm": 19.76353645324707, + "learning_rate": 3.360302197802198e-05, + "loss": 0.6845, + "step": 11937 + }, + { + "epoch": 32.7967032967033, + "grad_norm": 13.152505874633789, + "learning_rate": 3.360164835164835e-05, + "loss": 0.3328, + "step": 11938 + }, + { + "epoch": 32.79945054945055, + "grad_norm": 11.029465675354004, + "learning_rate": 3.3600274725274725e-05, + "loss": 0.4408, + "step": 11939 + }, + { + "epoch": 32.8021978021978, + "grad_norm": 3.7988951206207275, + "learning_rate": 3.35989010989011e-05, + "loss": 0.0654, + "step": 11940 + }, + { + "epoch": 32.80494505494506, + "grad_norm": 16.36254119873047, + "learning_rate": 3.359752747252747e-05, + "loss": 0.4758, + "step": 11941 + }, + { + "epoch": 32.80769230769231, + "grad_norm": 12.499127388000488, + "learning_rate": 3.359615384615385e-05, + "loss": 0.2588, + "step": 11942 + }, + { + "epoch": 32.81043956043956, + "grad_norm": 11.141412734985352, + "learning_rate": 3.359478021978022e-05, + "loss": 0.2189, + "step": 11943 + }, + { + "epoch": 32.81318681318681, + "grad_norm": 2.7875726222991943, + "learning_rate": 3.3593406593406596e-05, + "loss": 0.0419, + "step": 11944 + }, + { + "epoch": 32.815934065934066, + "grad_norm": 17.250791549682617, + "learning_rate": 3.3592032967032966e-05, + "loss": 0.5622, + "step": 11945 + }, + { + "epoch": 32.81868131868132, + "grad_norm": 2.911954402923584, + "learning_rate": 3.359065934065934e-05, + "loss": 0.0666, + "step": 11946 + }, + { + "epoch": 32.82142857142857, + "grad_norm": 5.231159210205078, + "learning_rate": 3.358928571428572e-05, + "loss": 0.1526, + "step": 11947 + }, + { + "epoch": 32.824175824175825, + "grad_norm": 14.940580368041992, + "learning_rate": 3.358791208791209e-05, + "loss": 0.445, + "step": 11948 + }, + { + "epoch": 32.82692307692308, + "grad_norm": 7.226109981536865, + "learning_rate": 3.3586538461538466e-05, + "loss": 0.2952, + "step": 11949 + }, + { + "epoch": 32.82967032967033, + "grad_norm": 11.317061424255371, + "learning_rate": 3.3585164835164836e-05, + "loss": 0.2776, + "step": 11950 + }, + { + "epoch": 32.832417582417584, + "grad_norm": 12.239286422729492, + "learning_rate": 3.358379120879121e-05, + "loss": 0.6175, + "step": 11951 + }, + { + "epoch": 32.83516483516483, + "grad_norm": 10.531944274902344, + "learning_rate": 3.358241758241758e-05, + "loss": 0.2011, + "step": 11952 + }, + { + "epoch": 32.83791208791209, + "grad_norm": 11.051834106445312, + "learning_rate": 3.358104395604395e-05, + "loss": 0.2539, + "step": 11953 + }, + { + "epoch": 32.84065934065934, + "grad_norm": 18.56761932373047, + "learning_rate": 3.357967032967033e-05, + "loss": 0.6096, + "step": 11954 + }, + { + "epoch": 32.84340659340659, + "grad_norm": 6.129467487335205, + "learning_rate": 3.35782967032967e-05, + "loss": 0.201, + "step": 11955 + }, + { + "epoch": 32.84615384615385, + "grad_norm": 17.30906105041504, + "learning_rate": 3.357692307692308e-05, + "loss": 0.2687, + "step": 11956 + }, + { + "epoch": 32.8489010989011, + "grad_norm": 12.540785789489746, + "learning_rate": 3.3575549450549454e-05, + "loss": 0.1929, + "step": 11957 + }, + { + "epoch": 32.85164835164835, + "grad_norm": 18.495576858520508, + "learning_rate": 3.3574175824175824e-05, + "loss": 0.5563, + "step": 11958 + }, + { + "epoch": 32.854395604395606, + "grad_norm": 10.914323806762695, + "learning_rate": 3.35728021978022e-05, + "loss": 0.2652, + "step": 11959 + }, + { + "epoch": 32.857142857142854, + "grad_norm": 12.120726585388184, + "learning_rate": 3.357142857142857e-05, + "loss": 0.2806, + "step": 11960 + }, + { + "epoch": 32.85989010989011, + "grad_norm": 8.90511703491211, + "learning_rate": 3.357005494505495e-05, + "loss": 0.2717, + "step": 11961 + }, + { + "epoch": 32.862637362637365, + "grad_norm": 13.965598106384277, + "learning_rate": 3.3568681318681324e-05, + "loss": 0.4382, + "step": 11962 + }, + { + "epoch": 32.86538461538461, + "grad_norm": 12.927227973937988, + "learning_rate": 3.3567307692307694e-05, + "loss": 0.2888, + "step": 11963 + }, + { + "epoch": 32.86813186813187, + "grad_norm": 20.171489715576172, + "learning_rate": 3.356593406593407e-05, + "loss": 0.3732, + "step": 11964 + }, + { + "epoch": 32.870879120879124, + "grad_norm": 12.97076416015625, + "learning_rate": 3.356456043956044e-05, + "loss": 0.4172, + "step": 11965 + }, + { + "epoch": 32.87362637362637, + "grad_norm": 21.92460060119629, + "learning_rate": 3.356318681318682e-05, + "loss": 0.6289, + "step": 11966 + }, + { + "epoch": 32.87637362637363, + "grad_norm": 9.115853309631348, + "learning_rate": 3.356181318681319e-05, + "loss": 0.2356, + "step": 11967 + }, + { + "epoch": 32.879120879120876, + "grad_norm": 16.254934310913086, + "learning_rate": 3.356043956043956e-05, + "loss": 0.4648, + "step": 11968 + }, + { + "epoch": 32.88186813186813, + "grad_norm": 7.536396503448486, + "learning_rate": 3.3559065934065934e-05, + "loss": 0.1398, + "step": 11969 + }, + { + "epoch": 32.88461538461539, + "grad_norm": 9.380188941955566, + "learning_rate": 3.3557692307692304e-05, + "loss": 0.1676, + "step": 11970 + }, + { + "epoch": 32.887362637362635, + "grad_norm": 15.571333885192871, + "learning_rate": 3.355631868131868e-05, + "loss": 0.4628, + "step": 11971 + }, + { + "epoch": 32.89010989010989, + "grad_norm": 11.792915344238281, + "learning_rate": 3.355494505494506e-05, + "loss": 0.1733, + "step": 11972 + }, + { + "epoch": 32.892857142857146, + "grad_norm": 9.335990905761719, + "learning_rate": 3.355357142857143e-05, + "loss": 0.1174, + "step": 11973 + }, + { + "epoch": 32.895604395604394, + "grad_norm": 6.993887424468994, + "learning_rate": 3.3552197802197805e-05, + "loss": 0.1526, + "step": 11974 + }, + { + "epoch": 32.89835164835165, + "grad_norm": 24.106121063232422, + "learning_rate": 3.3550824175824175e-05, + "loss": 0.9562, + "step": 11975 + }, + { + "epoch": 32.9010989010989, + "grad_norm": 9.265464782714844, + "learning_rate": 3.354945054945055e-05, + "loss": 0.3654, + "step": 11976 + }, + { + "epoch": 32.90384615384615, + "grad_norm": 11.230368614196777, + "learning_rate": 3.354807692307693e-05, + "loss": 0.21, + "step": 11977 + }, + { + "epoch": 32.90659340659341, + "grad_norm": 18.669570922851562, + "learning_rate": 3.35467032967033e-05, + "loss": 0.7742, + "step": 11978 + }, + { + "epoch": 32.90934065934066, + "grad_norm": 8.525700569152832, + "learning_rate": 3.3545329670329675e-05, + "loss": 0.1724, + "step": 11979 + }, + { + "epoch": 32.91208791208791, + "grad_norm": 5.814406394958496, + "learning_rate": 3.3543956043956045e-05, + "loss": 0.1745, + "step": 11980 + }, + { + "epoch": 32.91483516483517, + "grad_norm": 12.530903816223145, + "learning_rate": 3.354258241758242e-05, + "loss": 0.3845, + "step": 11981 + }, + { + "epoch": 32.917582417582416, + "grad_norm": 10.135343551635742, + "learning_rate": 3.354120879120879e-05, + "loss": 0.1873, + "step": 11982 + }, + { + "epoch": 32.92032967032967, + "grad_norm": 10.346076011657715, + "learning_rate": 3.353983516483516e-05, + "loss": 0.3054, + "step": 11983 + }, + { + "epoch": 32.92307692307692, + "grad_norm": 4.520149230957031, + "learning_rate": 3.353846153846154e-05, + "loss": 0.1675, + "step": 11984 + }, + { + "epoch": 32.925824175824175, + "grad_norm": 19.316665649414062, + "learning_rate": 3.353708791208791e-05, + "loss": 0.7429, + "step": 11985 + }, + { + "epoch": 32.92857142857143, + "grad_norm": 14.65985107421875, + "learning_rate": 3.3535714285714286e-05, + "loss": 0.445, + "step": 11986 + }, + { + "epoch": 32.93131868131868, + "grad_norm": 15.18020248413086, + "learning_rate": 3.353434065934066e-05, + "loss": 0.4955, + "step": 11987 + }, + { + "epoch": 32.934065934065934, + "grad_norm": 20.847387313842773, + "learning_rate": 3.353296703296703e-05, + "loss": 0.6047, + "step": 11988 + }, + { + "epoch": 32.93681318681319, + "grad_norm": 7.46976900100708, + "learning_rate": 3.353159340659341e-05, + "loss": 0.2309, + "step": 11989 + }, + { + "epoch": 32.93956043956044, + "grad_norm": 5.153509140014648, + "learning_rate": 3.353021978021978e-05, + "loss": 0.1011, + "step": 11990 + }, + { + "epoch": 32.94230769230769, + "grad_norm": 16.837169647216797, + "learning_rate": 3.3528846153846156e-05, + "loss": 0.3757, + "step": 11991 + }, + { + "epoch": 32.94505494505494, + "grad_norm": 9.028005599975586, + "learning_rate": 3.352747252747253e-05, + "loss": 0.1682, + "step": 11992 + }, + { + "epoch": 32.9478021978022, + "grad_norm": 5.22761869430542, + "learning_rate": 3.35260989010989e-05, + "loss": 0.1897, + "step": 11993 + }, + { + "epoch": 32.95054945054945, + "grad_norm": 5.944363594055176, + "learning_rate": 3.352472527472528e-05, + "loss": 0.102, + "step": 11994 + }, + { + "epoch": 32.9532967032967, + "grad_norm": 16.943382263183594, + "learning_rate": 3.352335164835165e-05, + "loss": 0.2785, + "step": 11995 + }, + { + "epoch": 32.956043956043956, + "grad_norm": 14.627184867858887, + "learning_rate": 3.352197802197803e-05, + "loss": 0.4286, + "step": 11996 + }, + { + "epoch": 32.95879120879121, + "grad_norm": 8.832971572875977, + "learning_rate": 3.35206043956044e-05, + "loss": 0.1694, + "step": 11997 + }, + { + "epoch": 32.96153846153846, + "grad_norm": 20.56293487548828, + "learning_rate": 3.351923076923077e-05, + "loss": 0.572, + "step": 11998 + }, + { + "epoch": 32.964285714285715, + "grad_norm": 12.689505577087402, + "learning_rate": 3.3517857142857143e-05, + "loss": 0.4695, + "step": 11999 + }, + { + "epoch": 32.967032967032964, + "grad_norm": 13.434703826904297, + "learning_rate": 3.3516483516483513e-05, + "loss": 0.2531, + "step": 12000 + }, + { + "epoch": 32.96978021978022, + "grad_norm": 1.6085753440856934, + "learning_rate": 3.351510989010989e-05, + "loss": 0.0318, + "step": 12001 + }, + { + "epoch": 32.972527472527474, + "grad_norm": 14.042433738708496, + "learning_rate": 3.351373626373627e-05, + "loss": 0.4001, + "step": 12002 + }, + { + "epoch": 32.97527472527472, + "grad_norm": 17.395139694213867, + "learning_rate": 3.351236263736264e-05, + "loss": 0.4394, + "step": 12003 + }, + { + "epoch": 32.97802197802198, + "grad_norm": 15.436971664428711, + "learning_rate": 3.3510989010989014e-05, + "loss": 0.5465, + "step": 12004 + }, + { + "epoch": 32.98076923076923, + "grad_norm": 3.1428005695343018, + "learning_rate": 3.3509615384615384e-05, + "loss": 0.0675, + "step": 12005 + }, + { + "epoch": 32.98351648351648, + "grad_norm": 16.051864624023438, + "learning_rate": 3.350824175824176e-05, + "loss": 0.3986, + "step": 12006 + }, + { + "epoch": 32.98626373626374, + "grad_norm": 10.015952110290527, + "learning_rate": 3.350686813186814e-05, + "loss": 0.2123, + "step": 12007 + }, + { + "epoch": 32.98901098901099, + "grad_norm": 14.958320617675781, + "learning_rate": 3.350549450549451e-05, + "loss": 0.2731, + "step": 12008 + }, + { + "epoch": 32.99175824175824, + "grad_norm": 11.332282066345215, + "learning_rate": 3.3504120879120884e-05, + "loss": 0.377, + "step": 12009 + }, + { + "epoch": 32.994505494505496, + "grad_norm": 13.097495079040527, + "learning_rate": 3.3502747252747254e-05, + "loss": 0.2969, + "step": 12010 + }, + { + "epoch": 32.997252747252745, + "grad_norm": 12.189041137695312, + "learning_rate": 3.350137362637363e-05, + "loss": 0.3452, + "step": 12011 + }, + { + "epoch": 33.0, + "grad_norm": 59.740718841552734, + "learning_rate": 3.35e-05, + "loss": 4.8154, + "step": 12012 + }, + { + "epoch": 33.0, + "eval_accuracy": 0.6143250688705234, + "eval_f1": 0.5631385467011205, + "eval_f1_DuraRiadoRio_64x64": 0.5253456221198156, + "eval_f1_Mole_64x64": 0.2, + "eval_f1_Quebrado_64x64": 0.7932960893854749, + "eval_f1_RiadoRio_64x64": 0.5676392572944297, + "eval_f1_RioFechado_64x64": 0.7294117647058823, + "eval_loss": 2.8632888793945312, + "eval_precision": 0.7092383002991121, + "eval_precision_DuraRiadoRio_64x64": 0.7808219178082192, + "eval_precision_Mole_64x64": 1.0, + "eval_precision_Quebrado_64x64": 0.6635514018691588, + "eval_precision_RiadoRio_64x64": 0.47555555555555556, + "eval_precision_RioFechado_64x64": 0.6262626262626263, + "eval_recall": 0.6140484721192653, + "eval_recall_DuraRiadoRio_64x64": 0.3958333333333333, + "eval_recall_Mole_64x64": 0.1111111111111111, + "eval_recall_Quebrado_64x64": 0.9861111111111112, + "eval_recall_RiadoRio_64x64": 0.7039473684210527, + "eval_recall_RioFechado_64x64": 0.8732394366197183, + "eval_runtime": 1.8111, + "eval_samples_per_second": 400.866, + "eval_steps_per_second": 25.399, + "step": 12012 + }, + { + "epoch": 33.002747252747255, + "grad_norm": 14.910799980163574, + "learning_rate": 3.349862637362637e-05, + "loss": 0.5342, + "step": 12013 + }, + { + "epoch": 33.005494505494504, + "grad_norm": 13.013895988464355, + "learning_rate": 3.349725274725275e-05, + "loss": 0.2625, + "step": 12014 + }, + { + "epoch": 33.00824175824176, + "grad_norm": 8.45541000366211, + "learning_rate": 3.349587912087912e-05, + "loss": 0.1719, + "step": 12015 + }, + { + "epoch": 33.010989010989015, + "grad_norm": 13.451638221740723, + "learning_rate": 3.3494505494505495e-05, + "loss": 0.2705, + "step": 12016 + }, + { + "epoch": 33.01373626373626, + "grad_norm": 8.389418601989746, + "learning_rate": 3.349313186813187e-05, + "loss": 0.1639, + "step": 12017 + }, + { + "epoch": 33.01648351648352, + "grad_norm": 16.97516441345215, + "learning_rate": 3.349175824175824e-05, + "loss": 0.3124, + "step": 12018 + }, + { + "epoch": 33.01923076923077, + "grad_norm": 10.243829727172852, + "learning_rate": 3.349038461538462e-05, + "loss": 0.1973, + "step": 12019 + }, + { + "epoch": 33.02197802197802, + "grad_norm": 10.763847351074219, + "learning_rate": 3.348901098901099e-05, + "loss": 0.352, + "step": 12020 + }, + { + "epoch": 33.02472527472528, + "grad_norm": 11.238853454589844, + "learning_rate": 3.3487637362637365e-05, + "loss": 0.4835, + "step": 12021 + }, + { + "epoch": 33.027472527472526, + "grad_norm": 10.085678100585938, + "learning_rate": 3.348626373626374e-05, + "loss": 0.2722, + "step": 12022 + }, + { + "epoch": 33.03021978021978, + "grad_norm": 13.233135223388672, + "learning_rate": 3.348489010989011e-05, + "loss": 0.4199, + "step": 12023 + }, + { + "epoch": 33.032967032967036, + "grad_norm": 8.76645278930664, + "learning_rate": 3.348351648351649e-05, + "loss": 0.197, + "step": 12024 + }, + { + "epoch": 33.035714285714285, + "grad_norm": 6.136237144470215, + "learning_rate": 3.348214285714286e-05, + "loss": 0.0742, + "step": 12025 + }, + { + "epoch": 33.03846153846154, + "grad_norm": 12.12307071685791, + "learning_rate": 3.3480769230769236e-05, + "loss": 0.3463, + "step": 12026 + }, + { + "epoch": 33.04120879120879, + "grad_norm": 20.235137939453125, + "learning_rate": 3.3479395604395606e-05, + "loss": 0.6635, + "step": 12027 + }, + { + "epoch": 33.043956043956044, + "grad_norm": 14.536186218261719, + "learning_rate": 3.3478021978021976e-05, + "loss": 0.3577, + "step": 12028 + }, + { + "epoch": 33.0467032967033, + "grad_norm": 11.025720596313477, + "learning_rate": 3.347664835164835e-05, + "loss": 0.4188, + "step": 12029 + }, + { + "epoch": 33.04945054945055, + "grad_norm": 17.01065444946289, + "learning_rate": 3.347527472527472e-05, + "loss": 0.6069, + "step": 12030 + }, + { + "epoch": 33.0521978021978, + "grad_norm": 9.649429321289062, + "learning_rate": 3.34739010989011e-05, + "loss": 0.1928, + "step": 12031 + }, + { + "epoch": 33.05494505494506, + "grad_norm": 15.540658950805664, + "learning_rate": 3.3472527472527476e-05, + "loss": 0.5187, + "step": 12032 + }, + { + "epoch": 33.05769230769231, + "grad_norm": 19.8007869720459, + "learning_rate": 3.3471153846153846e-05, + "loss": 0.748, + "step": 12033 + }, + { + "epoch": 33.06043956043956, + "grad_norm": 19.221824645996094, + "learning_rate": 3.346978021978022e-05, + "loss": 0.7654, + "step": 12034 + }, + { + "epoch": 33.06318681318681, + "grad_norm": 13.71935749053955, + "learning_rate": 3.346840659340659e-05, + "loss": 0.2495, + "step": 12035 + }, + { + "epoch": 33.065934065934066, + "grad_norm": 5.606795310974121, + "learning_rate": 3.346703296703297e-05, + "loss": 0.152, + "step": 12036 + }, + { + "epoch": 33.06868131868132, + "grad_norm": 10.80971908569336, + "learning_rate": 3.3465659340659346e-05, + "loss": 0.3957, + "step": 12037 + }, + { + "epoch": 33.07142857142857, + "grad_norm": 7.382504463195801, + "learning_rate": 3.3464285714285716e-05, + "loss": 0.2331, + "step": 12038 + }, + { + "epoch": 33.074175824175825, + "grad_norm": 7.073176383972168, + "learning_rate": 3.346291208791209e-05, + "loss": 0.1606, + "step": 12039 + }, + { + "epoch": 33.07692307692308, + "grad_norm": 19.41634178161621, + "learning_rate": 3.346153846153846e-05, + "loss": 0.6026, + "step": 12040 + }, + { + "epoch": 33.07967032967033, + "grad_norm": 11.303211212158203, + "learning_rate": 3.346016483516483e-05, + "loss": 0.3209, + "step": 12041 + }, + { + "epoch": 33.082417582417584, + "grad_norm": 16.653030395507812, + "learning_rate": 3.345879120879121e-05, + "loss": 0.5248, + "step": 12042 + }, + { + "epoch": 33.08516483516483, + "grad_norm": 13.295103073120117, + "learning_rate": 3.345741758241758e-05, + "loss": 0.193, + "step": 12043 + }, + { + "epoch": 33.08791208791209, + "grad_norm": 18.905174255371094, + "learning_rate": 3.345604395604396e-05, + "loss": 0.3272, + "step": 12044 + }, + { + "epoch": 33.09065934065934, + "grad_norm": 6.034056186676025, + "learning_rate": 3.345467032967033e-05, + "loss": 0.0919, + "step": 12045 + }, + { + "epoch": 33.09340659340659, + "grad_norm": 14.041508674621582, + "learning_rate": 3.3453296703296704e-05, + "loss": 0.3325, + "step": 12046 + }, + { + "epoch": 33.09615384615385, + "grad_norm": 14.598870277404785, + "learning_rate": 3.345192307692308e-05, + "loss": 0.2516, + "step": 12047 + }, + { + "epoch": 33.0989010989011, + "grad_norm": 16.3912353515625, + "learning_rate": 3.345054945054945e-05, + "loss": 0.363, + "step": 12048 + }, + { + "epoch": 33.10164835164835, + "grad_norm": 17.70103645324707, + "learning_rate": 3.344917582417583e-05, + "loss": 0.4146, + "step": 12049 + }, + { + "epoch": 33.104395604395606, + "grad_norm": 2.9751548767089844, + "learning_rate": 3.34478021978022e-05, + "loss": 0.0394, + "step": 12050 + }, + { + "epoch": 33.107142857142854, + "grad_norm": 7.605090618133545, + "learning_rate": 3.3446428571428574e-05, + "loss": 0.2683, + "step": 12051 + }, + { + "epoch": 33.10989010989011, + "grad_norm": 11.10262393951416, + "learning_rate": 3.344505494505495e-05, + "loss": 0.414, + "step": 12052 + }, + { + "epoch": 33.112637362637365, + "grad_norm": 14.300532341003418, + "learning_rate": 3.344368131868132e-05, + "loss": 0.3163, + "step": 12053 + }, + { + "epoch": 33.11538461538461, + "grad_norm": 19.68230628967285, + "learning_rate": 3.34423076923077e-05, + "loss": 0.8016, + "step": 12054 + }, + { + "epoch": 33.11813186813187, + "grad_norm": 3.9630813598632812, + "learning_rate": 3.344093406593407e-05, + "loss": 0.0442, + "step": 12055 + }, + { + "epoch": 33.120879120879124, + "grad_norm": 8.40706729888916, + "learning_rate": 3.343956043956044e-05, + "loss": 0.2481, + "step": 12056 + }, + { + "epoch": 33.12362637362637, + "grad_norm": 10.176762580871582, + "learning_rate": 3.3438186813186815e-05, + "loss": 0.1456, + "step": 12057 + }, + { + "epoch": 33.12637362637363, + "grad_norm": 12.760348320007324, + "learning_rate": 3.3436813186813185e-05, + "loss": 0.3203, + "step": 12058 + }, + { + "epoch": 33.129120879120876, + "grad_norm": 10.543364524841309, + "learning_rate": 3.343543956043956e-05, + "loss": 0.2469, + "step": 12059 + }, + { + "epoch": 33.13186813186813, + "grad_norm": 10.126952171325684, + "learning_rate": 3.343406593406593e-05, + "loss": 0.1289, + "step": 12060 + }, + { + "epoch": 33.13461538461539, + "grad_norm": 8.067054748535156, + "learning_rate": 3.343269230769231e-05, + "loss": 0.1893, + "step": 12061 + }, + { + "epoch": 33.137362637362635, + "grad_norm": 24.827425003051758, + "learning_rate": 3.3431318681318685e-05, + "loss": 0.7629, + "step": 12062 + }, + { + "epoch": 33.14010989010989, + "grad_norm": 18.295698165893555, + "learning_rate": 3.3429945054945055e-05, + "loss": 0.2402, + "step": 12063 + }, + { + "epoch": 33.142857142857146, + "grad_norm": 17.111862182617188, + "learning_rate": 3.342857142857143e-05, + "loss": 0.5221, + "step": 12064 + }, + { + "epoch": 33.145604395604394, + "grad_norm": 13.900321006774902, + "learning_rate": 3.34271978021978e-05, + "loss": 0.3918, + "step": 12065 + }, + { + "epoch": 33.14835164835165, + "grad_norm": 5.335380554199219, + "learning_rate": 3.342582417582418e-05, + "loss": 0.138, + "step": 12066 + }, + { + "epoch": 33.1510989010989, + "grad_norm": 9.165617942810059, + "learning_rate": 3.3424450549450555e-05, + "loss": 0.199, + "step": 12067 + }, + { + "epoch": 33.15384615384615, + "grad_norm": 3.7629430294036865, + "learning_rate": 3.3423076923076925e-05, + "loss": 0.0678, + "step": 12068 + }, + { + "epoch": 33.15659340659341, + "grad_norm": 21.707992553710938, + "learning_rate": 3.34217032967033e-05, + "loss": 1.0623, + "step": 12069 + }, + { + "epoch": 33.15934065934066, + "grad_norm": 13.087623596191406, + "learning_rate": 3.342032967032967e-05, + "loss": 0.3124, + "step": 12070 + }, + { + "epoch": 33.16208791208791, + "grad_norm": 12.494503021240234, + "learning_rate": 3.341895604395604e-05, + "loss": 0.2636, + "step": 12071 + }, + { + "epoch": 33.16483516483517, + "grad_norm": 16.055604934692383, + "learning_rate": 3.341758241758242e-05, + "loss": 0.3795, + "step": 12072 + }, + { + "epoch": 33.167582417582416, + "grad_norm": 18.41898536682129, + "learning_rate": 3.341620879120879e-05, + "loss": 0.4876, + "step": 12073 + }, + { + "epoch": 33.17032967032967, + "grad_norm": 5.673741340637207, + "learning_rate": 3.3414835164835166e-05, + "loss": 0.1029, + "step": 12074 + }, + { + "epoch": 33.17307692307692, + "grad_norm": 10.912461280822754, + "learning_rate": 3.3413461538461536e-05, + "loss": 0.4047, + "step": 12075 + }, + { + "epoch": 33.175824175824175, + "grad_norm": 13.476873397827148, + "learning_rate": 3.341208791208791e-05, + "loss": 0.4832, + "step": 12076 + }, + { + "epoch": 33.17857142857143, + "grad_norm": 12.908912658691406, + "learning_rate": 3.341071428571429e-05, + "loss": 0.4638, + "step": 12077 + }, + { + "epoch": 33.18131868131868, + "grad_norm": 2.83616304397583, + "learning_rate": 3.340934065934066e-05, + "loss": 0.0549, + "step": 12078 + }, + { + "epoch": 33.184065934065934, + "grad_norm": 10.113563537597656, + "learning_rate": 3.3407967032967036e-05, + "loss": 0.143, + "step": 12079 + }, + { + "epoch": 33.18681318681319, + "grad_norm": 18.57842254638672, + "learning_rate": 3.3406593406593406e-05, + "loss": 0.5131, + "step": 12080 + }, + { + "epoch": 33.18956043956044, + "grad_norm": 9.608382225036621, + "learning_rate": 3.340521978021978e-05, + "loss": 0.2948, + "step": 12081 + }, + { + "epoch": 33.19230769230769, + "grad_norm": 24.008440017700195, + "learning_rate": 3.340384615384616e-05, + "loss": 0.7997, + "step": 12082 + }, + { + "epoch": 33.19505494505494, + "grad_norm": 9.233673095703125, + "learning_rate": 3.340247252747253e-05, + "loss": 0.2187, + "step": 12083 + }, + { + "epoch": 33.1978021978022, + "grad_norm": 20.310945510864258, + "learning_rate": 3.340109890109891e-05, + "loss": 0.4246, + "step": 12084 + }, + { + "epoch": 33.20054945054945, + "grad_norm": 12.433670997619629, + "learning_rate": 3.339972527472528e-05, + "loss": 0.2957, + "step": 12085 + }, + { + "epoch": 33.2032967032967, + "grad_norm": 23.80805015563965, + "learning_rate": 3.339835164835165e-05, + "loss": 0.6915, + "step": 12086 + }, + { + "epoch": 33.206043956043956, + "grad_norm": 8.193922996520996, + "learning_rate": 3.3396978021978024e-05, + "loss": 0.1201, + "step": 12087 + }, + { + "epoch": 33.20879120879121, + "grad_norm": 10.215627670288086, + "learning_rate": 3.3395604395604394e-05, + "loss": 0.1432, + "step": 12088 + }, + { + "epoch": 33.21153846153846, + "grad_norm": 8.426412582397461, + "learning_rate": 3.339423076923077e-05, + "loss": 0.1367, + "step": 12089 + }, + { + "epoch": 33.214285714285715, + "grad_norm": 12.475301742553711, + "learning_rate": 3.339285714285714e-05, + "loss": 0.235, + "step": 12090 + }, + { + "epoch": 33.217032967032964, + "grad_norm": 7.305792331695557, + "learning_rate": 3.339148351648352e-05, + "loss": 0.1295, + "step": 12091 + }, + { + "epoch": 33.21978021978022, + "grad_norm": 5.830615043640137, + "learning_rate": 3.3390109890109894e-05, + "loss": 0.169, + "step": 12092 + }, + { + "epoch": 33.222527472527474, + "grad_norm": 7.497882843017578, + "learning_rate": 3.3388736263736264e-05, + "loss": 0.1631, + "step": 12093 + }, + { + "epoch": 33.22527472527472, + "grad_norm": 18.0025691986084, + "learning_rate": 3.338736263736264e-05, + "loss": 0.4454, + "step": 12094 + }, + { + "epoch": 33.22802197802198, + "grad_norm": 9.821093559265137, + "learning_rate": 3.338598901098901e-05, + "loss": 0.2081, + "step": 12095 + }, + { + "epoch": 33.23076923076923, + "grad_norm": 10.872527122497559, + "learning_rate": 3.338461538461539e-05, + "loss": 0.2319, + "step": 12096 + }, + { + "epoch": 33.23351648351648, + "grad_norm": 10.809864044189453, + "learning_rate": 3.3383241758241764e-05, + "loss": 0.1951, + "step": 12097 + }, + { + "epoch": 33.23626373626374, + "grad_norm": 8.69285774230957, + "learning_rate": 3.3381868131868134e-05, + "loss": 0.1932, + "step": 12098 + }, + { + "epoch": 33.239010989010985, + "grad_norm": 10.678572654724121, + "learning_rate": 3.338049450549451e-05, + "loss": 0.4068, + "step": 12099 + }, + { + "epoch": 33.24175824175824, + "grad_norm": 7.617954730987549, + "learning_rate": 3.337912087912088e-05, + "loss": 0.358, + "step": 12100 + }, + { + "epoch": 33.244505494505496, + "grad_norm": 19.448322296142578, + "learning_rate": 3.337774725274725e-05, + "loss": 0.6075, + "step": 12101 + }, + { + "epoch": 33.247252747252745, + "grad_norm": 14.446205139160156, + "learning_rate": 3.337637362637363e-05, + "loss": 0.6745, + "step": 12102 + }, + { + "epoch": 33.25, + "grad_norm": 8.457039833068848, + "learning_rate": 3.3375e-05, + "loss": 0.1456, + "step": 12103 + }, + { + "epoch": 33.252747252747255, + "grad_norm": 9.889833450317383, + "learning_rate": 3.3373626373626375e-05, + "loss": 0.1743, + "step": 12104 + }, + { + "epoch": 33.255494505494504, + "grad_norm": 4.186470985412598, + "learning_rate": 3.3372252747252745e-05, + "loss": 0.0704, + "step": 12105 + }, + { + "epoch": 33.25824175824176, + "grad_norm": 20.018733978271484, + "learning_rate": 3.337087912087912e-05, + "loss": 1.0391, + "step": 12106 + }, + { + "epoch": 33.260989010989015, + "grad_norm": 4.69384241104126, + "learning_rate": 3.33695054945055e-05, + "loss": 0.0736, + "step": 12107 + }, + { + "epoch": 33.26373626373626, + "grad_norm": 7.980048179626465, + "learning_rate": 3.336813186813187e-05, + "loss": 0.1757, + "step": 12108 + }, + { + "epoch": 33.26648351648352, + "grad_norm": 12.323321342468262, + "learning_rate": 3.3366758241758245e-05, + "loss": 0.2462, + "step": 12109 + }, + { + "epoch": 33.26923076923077, + "grad_norm": 10.273707389831543, + "learning_rate": 3.3365384615384615e-05, + "loss": 0.2132, + "step": 12110 + }, + { + "epoch": 33.27197802197802, + "grad_norm": 9.03862190246582, + "learning_rate": 3.336401098901099e-05, + "loss": 0.2141, + "step": 12111 + }, + { + "epoch": 33.27472527472528, + "grad_norm": 14.302067756652832, + "learning_rate": 3.336263736263737e-05, + "loss": 0.5108, + "step": 12112 + }, + { + "epoch": 33.277472527472526, + "grad_norm": 8.095398902893066, + "learning_rate": 3.336126373626374e-05, + "loss": 0.1216, + "step": 12113 + }, + { + "epoch": 33.28021978021978, + "grad_norm": 6.294269561767578, + "learning_rate": 3.3359890109890116e-05, + "loss": 0.1887, + "step": 12114 + }, + { + "epoch": 33.282967032967036, + "grad_norm": 10.051158905029297, + "learning_rate": 3.3358516483516486e-05, + "loss": 0.2982, + "step": 12115 + }, + { + "epoch": 33.285714285714285, + "grad_norm": 11.0711030960083, + "learning_rate": 3.3357142857142856e-05, + "loss": 0.3671, + "step": 12116 + }, + { + "epoch": 33.28846153846154, + "grad_norm": 13.213430404663086, + "learning_rate": 3.335576923076923e-05, + "loss": 0.303, + "step": 12117 + }, + { + "epoch": 33.29120879120879, + "grad_norm": 5.08664083480835, + "learning_rate": 3.33543956043956e-05, + "loss": 0.1538, + "step": 12118 + }, + { + "epoch": 33.293956043956044, + "grad_norm": 15.042014122009277, + "learning_rate": 3.335302197802198e-05, + "loss": 0.3621, + "step": 12119 + }, + { + "epoch": 33.2967032967033, + "grad_norm": 17.409334182739258, + "learning_rate": 3.335164835164835e-05, + "loss": 0.4043, + "step": 12120 + }, + { + "epoch": 33.29945054945055, + "grad_norm": 16.79621124267578, + "learning_rate": 3.3350274725274726e-05, + "loss": 0.3711, + "step": 12121 + }, + { + "epoch": 33.3021978021978, + "grad_norm": 16.171062469482422, + "learning_rate": 3.33489010989011e-05, + "loss": 0.5583, + "step": 12122 + }, + { + "epoch": 33.30494505494506, + "grad_norm": 3.408846616744995, + "learning_rate": 3.334752747252747e-05, + "loss": 0.0614, + "step": 12123 + }, + { + "epoch": 33.30769230769231, + "grad_norm": 18.425418853759766, + "learning_rate": 3.334615384615385e-05, + "loss": 0.6105, + "step": 12124 + }, + { + "epoch": 33.31043956043956, + "grad_norm": 11.687736511230469, + "learning_rate": 3.334478021978022e-05, + "loss": 0.374, + "step": 12125 + }, + { + "epoch": 33.31318681318681, + "grad_norm": 6.191712856292725, + "learning_rate": 3.33434065934066e-05, + "loss": 0.1778, + "step": 12126 + }, + { + "epoch": 33.315934065934066, + "grad_norm": 11.080901145935059, + "learning_rate": 3.3342032967032973e-05, + "loss": 0.3525, + "step": 12127 + }, + { + "epoch": 33.31868131868132, + "grad_norm": 6.500951766967773, + "learning_rate": 3.3340659340659343e-05, + "loss": 0.1143, + "step": 12128 + }, + { + "epoch": 33.32142857142857, + "grad_norm": 3.961038589477539, + "learning_rate": 3.333928571428572e-05, + "loss": 0.0617, + "step": 12129 + }, + { + "epoch": 33.324175824175825, + "grad_norm": 13.328076362609863, + "learning_rate": 3.333791208791209e-05, + "loss": 0.4389, + "step": 12130 + }, + { + "epoch": 33.32692307692308, + "grad_norm": 15.596502304077148, + "learning_rate": 3.333653846153846e-05, + "loss": 0.3502, + "step": 12131 + }, + { + "epoch": 33.32967032967033, + "grad_norm": 11.205009460449219, + "learning_rate": 3.333516483516484e-05, + "loss": 0.1447, + "step": 12132 + }, + { + "epoch": 33.332417582417584, + "grad_norm": 16.020902633666992, + "learning_rate": 3.333379120879121e-05, + "loss": 0.4436, + "step": 12133 + }, + { + "epoch": 33.33516483516483, + "grad_norm": 9.618083000183105, + "learning_rate": 3.3332417582417584e-05, + "loss": 0.2259, + "step": 12134 + }, + { + "epoch": 33.33791208791209, + "grad_norm": 8.423543930053711, + "learning_rate": 3.3331043956043954e-05, + "loss": 0.2021, + "step": 12135 + }, + { + "epoch": 33.34065934065934, + "grad_norm": 9.598841667175293, + "learning_rate": 3.332967032967033e-05, + "loss": 0.284, + "step": 12136 + }, + { + "epoch": 33.34340659340659, + "grad_norm": 18.508163452148438, + "learning_rate": 3.332829670329671e-05, + "loss": 0.7737, + "step": 12137 + }, + { + "epoch": 33.34615384615385, + "grad_norm": 7.389163494110107, + "learning_rate": 3.332692307692308e-05, + "loss": 0.1886, + "step": 12138 + }, + { + "epoch": 33.3489010989011, + "grad_norm": 9.436895370483398, + "learning_rate": 3.3325549450549454e-05, + "loss": 0.1847, + "step": 12139 + }, + { + "epoch": 33.35164835164835, + "grad_norm": 6.6627349853515625, + "learning_rate": 3.3324175824175824e-05, + "loss": 0.1416, + "step": 12140 + }, + { + "epoch": 33.354395604395606, + "grad_norm": 6.515737056732178, + "learning_rate": 3.33228021978022e-05, + "loss": 0.116, + "step": 12141 + }, + { + "epoch": 33.357142857142854, + "grad_norm": 15.727615356445312, + "learning_rate": 3.332142857142858e-05, + "loss": 0.3923, + "step": 12142 + }, + { + "epoch": 33.35989010989011, + "grad_norm": 10.001845359802246, + "learning_rate": 3.332005494505495e-05, + "loss": 0.1685, + "step": 12143 + }, + { + "epoch": 33.362637362637365, + "grad_norm": 5.664029598236084, + "learning_rate": 3.3318681318681325e-05, + "loss": 0.0993, + "step": 12144 + }, + { + "epoch": 33.36538461538461, + "grad_norm": 14.112086296081543, + "learning_rate": 3.3317307692307695e-05, + "loss": 0.2904, + "step": 12145 + }, + { + "epoch": 33.36813186813187, + "grad_norm": 3.451460361480713, + "learning_rate": 3.3315934065934065e-05, + "loss": 0.0552, + "step": 12146 + }, + { + "epoch": 33.370879120879124, + "grad_norm": 10.170637130737305, + "learning_rate": 3.331456043956044e-05, + "loss": 0.3869, + "step": 12147 + }, + { + "epoch": 33.37362637362637, + "grad_norm": 5.629842281341553, + "learning_rate": 3.331318681318681e-05, + "loss": 0.1526, + "step": 12148 + }, + { + "epoch": 33.37637362637363, + "grad_norm": 5.498367786407471, + "learning_rate": 3.331181318681319e-05, + "loss": 0.0941, + "step": 12149 + }, + { + "epoch": 33.379120879120876, + "grad_norm": 9.92575454711914, + "learning_rate": 3.331043956043956e-05, + "loss": 0.2251, + "step": 12150 + }, + { + "epoch": 33.38186813186813, + "grad_norm": 12.548978805541992, + "learning_rate": 3.3309065934065935e-05, + "loss": 0.2961, + "step": 12151 + }, + { + "epoch": 33.38461538461539, + "grad_norm": 17.91962432861328, + "learning_rate": 3.330769230769231e-05, + "loss": 1.0038, + "step": 12152 + }, + { + "epoch": 33.387362637362635, + "grad_norm": 6.84331750869751, + "learning_rate": 3.330631868131868e-05, + "loss": 0.0702, + "step": 12153 + }, + { + "epoch": 33.39010989010989, + "grad_norm": 18.361520767211914, + "learning_rate": 3.330494505494506e-05, + "loss": 0.5349, + "step": 12154 + }, + { + "epoch": 33.392857142857146, + "grad_norm": 11.064104080200195, + "learning_rate": 3.330357142857143e-05, + "loss": 0.2614, + "step": 12155 + }, + { + "epoch": 33.395604395604394, + "grad_norm": 23.91828727722168, + "learning_rate": 3.3302197802197806e-05, + "loss": 0.8194, + "step": 12156 + }, + { + "epoch": 33.39835164835165, + "grad_norm": 14.695244789123535, + "learning_rate": 3.330082417582418e-05, + "loss": 0.4919, + "step": 12157 + }, + { + "epoch": 33.4010989010989, + "grad_norm": 12.37030029296875, + "learning_rate": 3.329945054945055e-05, + "loss": 0.254, + "step": 12158 + }, + { + "epoch": 33.40384615384615, + "grad_norm": 17.00763702392578, + "learning_rate": 3.329807692307693e-05, + "loss": 0.4307, + "step": 12159 + }, + { + "epoch": 33.40659340659341, + "grad_norm": 12.504467964172363, + "learning_rate": 3.32967032967033e-05, + "loss": 0.4433, + "step": 12160 + }, + { + "epoch": 33.40934065934066, + "grad_norm": 15.528091430664062, + "learning_rate": 3.329532967032967e-05, + "loss": 0.5292, + "step": 12161 + }, + { + "epoch": 33.41208791208791, + "grad_norm": 9.324589729309082, + "learning_rate": 3.3293956043956046e-05, + "loss": 0.2051, + "step": 12162 + }, + { + "epoch": 33.41483516483517, + "grad_norm": 9.297496795654297, + "learning_rate": 3.3292582417582416e-05, + "loss": 0.2955, + "step": 12163 + }, + { + "epoch": 33.417582417582416, + "grad_norm": 26.63402557373047, + "learning_rate": 3.329120879120879e-05, + "loss": 0.8462, + "step": 12164 + }, + { + "epoch": 33.42032967032967, + "grad_norm": 8.74443244934082, + "learning_rate": 3.328983516483516e-05, + "loss": 0.301, + "step": 12165 + }, + { + "epoch": 33.42307692307692, + "grad_norm": 12.483943939208984, + "learning_rate": 3.328846153846154e-05, + "loss": 0.2467, + "step": 12166 + }, + { + "epoch": 33.425824175824175, + "grad_norm": 15.98287296295166, + "learning_rate": 3.3287087912087916e-05, + "loss": 0.5574, + "step": 12167 + }, + { + "epoch": 33.42857142857143, + "grad_norm": 10.81444263458252, + "learning_rate": 3.3285714285714286e-05, + "loss": 0.2949, + "step": 12168 + }, + { + "epoch": 33.43131868131868, + "grad_norm": 9.785033226013184, + "learning_rate": 3.328434065934066e-05, + "loss": 0.2052, + "step": 12169 + }, + { + "epoch": 33.434065934065934, + "grad_norm": 16.35486602783203, + "learning_rate": 3.328296703296703e-05, + "loss": 0.3149, + "step": 12170 + }, + { + "epoch": 33.43681318681319, + "grad_norm": 18.121477127075195, + "learning_rate": 3.328159340659341e-05, + "loss": 0.7337, + "step": 12171 + }, + { + "epoch": 33.43956043956044, + "grad_norm": 9.968935012817383, + "learning_rate": 3.328021978021978e-05, + "loss": 0.2427, + "step": 12172 + }, + { + "epoch": 33.44230769230769, + "grad_norm": 6.188803195953369, + "learning_rate": 3.327884615384616e-05, + "loss": 0.1218, + "step": 12173 + }, + { + "epoch": 33.44505494505494, + "grad_norm": 18.269691467285156, + "learning_rate": 3.3277472527472534e-05, + "loss": 0.5961, + "step": 12174 + }, + { + "epoch": 33.4478021978022, + "grad_norm": 16.048603057861328, + "learning_rate": 3.3276098901098904e-05, + "loss": 0.3733, + "step": 12175 + }, + { + "epoch": 33.45054945054945, + "grad_norm": 15.760974884033203, + "learning_rate": 3.3274725274725274e-05, + "loss": 0.3012, + "step": 12176 + }, + { + "epoch": 33.4532967032967, + "grad_norm": 8.033183097839355, + "learning_rate": 3.327335164835165e-05, + "loss": 0.2862, + "step": 12177 + }, + { + "epoch": 33.456043956043956, + "grad_norm": 18.1033935546875, + "learning_rate": 3.327197802197802e-05, + "loss": 0.6457, + "step": 12178 + }, + { + "epoch": 33.45879120879121, + "grad_norm": 12.533354759216309, + "learning_rate": 3.32706043956044e-05, + "loss": 0.319, + "step": 12179 + }, + { + "epoch": 33.46153846153846, + "grad_norm": 8.922944068908691, + "learning_rate": 3.326923076923077e-05, + "loss": 0.1221, + "step": 12180 + }, + { + "epoch": 33.464285714285715, + "grad_norm": 17.50143814086914, + "learning_rate": 3.3267857142857144e-05, + "loss": 0.2555, + "step": 12181 + }, + { + "epoch": 33.467032967032964, + "grad_norm": 14.884509086608887, + "learning_rate": 3.3266483516483514e-05, + "loss": 0.276, + "step": 12182 + }, + { + "epoch": 33.46978021978022, + "grad_norm": 11.293869972229004, + "learning_rate": 3.326510989010989e-05, + "loss": 0.1792, + "step": 12183 + }, + { + "epoch": 33.472527472527474, + "grad_norm": 9.78758716583252, + "learning_rate": 3.326373626373627e-05, + "loss": 0.2089, + "step": 12184 + }, + { + "epoch": 33.47527472527472, + "grad_norm": 16.62991714477539, + "learning_rate": 3.326236263736264e-05, + "loss": 0.4988, + "step": 12185 + }, + { + "epoch": 33.47802197802198, + "grad_norm": 8.632779121398926, + "learning_rate": 3.3260989010989015e-05, + "loss": 0.1447, + "step": 12186 + }, + { + "epoch": 33.48076923076923, + "grad_norm": 8.078503608703613, + "learning_rate": 3.3259615384615385e-05, + "loss": 0.1252, + "step": 12187 + }, + { + "epoch": 33.48351648351648, + "grad_norm": 11.756326675415039, + "learning_rate": 3.325824175824176e-05, + "loss": 0.3468, + "step": 12188 + }, + { + "epoch": 33.48626373626374, + "grad_norm": 18.270082473754883, + "learning_rate": 3.325686813186814e-05, + "loss": 0.4455, + "step": 12189 + }, + { + "epoch": 33.489010989010985, + "grad_norm": 7.0223588943481445, + "learning_rate": 3.325549450549451e-05, + "loss": 0.2848, + "step": 12190 + }, + { + "epoch": 33.49175824175824, + "grad_norm": 3.412233352661133, + "learning_rate": 3.325412087912088e-05, + "loss": 0.0741, + "step": 12191 + }, + { + "epoch": 33.494505494505496, + "grad_norm": 11.282585144042969, + "learning_rate": 3.3252747252747255e-05, + "loss": 0.2234, + "step": 12192 + }, + { + "epoch": 33.497252747252745, + "grad_norm": 9.05403995513916, + "learning_rate": 3.3251373626373625e-05, + "loss": 0.2184, + "step": 12193 + }, + { + "epoch": 33.5, + "grad_norm": 17.274959564208984, + "learning_rate": 3.325e-05, + "loss": 0.5335, + "step": 12194 + }, + { + "epoch": 33.502747252747255, + "grad_norm": 9.188383102416992, + "learning_rate": 3.324862637362637e-05, + "loss": 0.1897, + "step": 12195 + }, + { + "epoch": 33.505494505494504, + "grad_norm": 17.818603515625, + "learning_rate": 3.324725274725275e-05, + "loss": 0.4903, + "step": 12196 + }, + { + "epoch": 33.50824175824176, + "grad_norm": 5.842972278594971, + "learning_rate": 3.324587912087912e-05, + "loss": 0.1793, + "step": 12197 + }, + { + "epoch": 33.51098901098901, + "grad_norm": 3.2633297443389893, + "learning_rate": 3.3244505494505495e-05, + "loss": 0.0528, + "step": 12198 + }, + { + "epoch": 33.51373626373626, + "grad_norm": 13.88953971862793, + "learning_rate": 3.324313186813187e-05, + "loss": 0.2528, + "step": 12199 + }, + { + "epoch": 33.51648351648352, + "grad_norm": 14.07608413696289, + "learning_rate": 3.324175824175824e-05, + "loss": 0.4038, + "step": 12200 + }, + { + "epoch": 33.51923076923077, + "grad_norm": 9.86132526397705, + "learning_rate": 3.324038461538462e-05, + "loss": 0.317, + "step": 12201 + }, + { + "epoch": 33.52197802197802, + "grad_norm": 13.601149559020996, + "learning_rate": 3.323901098901099e-05, + "loss": 0.2889, + "step": 12202 + }, + { + "epoch": 33.52472527472528, + "grad_norm": 8.668973922729492, + "learning_rate": 3.3237637362637366e-05, + "loss": 0.1487, + "step": 12203 + }, + { + "epoch": 33.527472527472526, + "grad_norm": 7.655539035797119, + "learning_rate": 3.323626373626374e-05, + "loss": 0.1912, + "step": 12204 + }, + { + "epoch": 33.53021978021978, + "grad_norm": 16.56552505493164, + "learning_rate": 3.323489010989011e-05, + "loss": 0.439, + "step": 12205 + }, + { + "epoch": 33.532967032967036, + "grad_norm": 21.875917434692383, + "learning_rate": 3.323351648351648e-05, + "loss": 0.5898, + "step": 12206 + }, + { + "epoch": 33.535714285714285, + "grad_norm": 7.729613780975342, + "learning_rate": 3.323214285714286e-05, + "loss": 0.1464, + "step": 12207 + }, + { + "epoch": 33.53846153846154, + "grad_norm": 9.363996505737305, + "learning_rate": 3.323076923076923e-05, + "loss": 0.2879, + "step": 12208 + }, + { + "epoch": 33.54120879120879, + "grad_norm": 17.096158981323242, + "learning_rate": 3.3229395604395606e-05, + "loss": 0.365, + "step": 12209 + }, + { + "epoch": 33.543956043956044, + "grad_norm": 9.241042137145996, + "learning_rate": 3.3228021978021976e-05, + "loss": 0.2292, + "step": 12210 + }, + { + "epoch": 33.5467032967033, + "grad_norm": 19.9317569732666, + "learning_rate": 3.322664835164835e-05, + "loss": 0.6461, + "step": 12211 + }, + { + "epoch": 33.54945054945055, + "grad_norm": 21.9642276763916, + "learning_rate": 3.322527472527472e-05, + "loss": 1.1243, + "step": 12212 + }, + { + "epoch": 33.5521978021978, + "grad_norm": 13.214566230773926, + "learning_rate": 3.32239010989011e-05, + "loss": 0.3588, + "step": 12213 + }, + { + "epoch": 33.55494505494506, + "grad_norm": 14.455286026000977, + "learning_rate": 3.322252747252748e-05, + "loss": 0.388, + "step": 12214 + }, + { + "epoch": 33.55769230769231, + "grad_norm": 7.976823806762695, + "learning_rate": 3.322115384615385e-05, + "loss": 0.2027, + "step": 12215 + }, + { + "epoch": 33.56043956043956, + "grad_norm": 17.075395584106445, + "learning_rate": 3.3219780219780224e-05, + "loss": 0.4702, + "step": 12216 + }, + { + "epoch": 33.56318681318681, + "grad_norm": 16.192533493041992, + "learning_rate": 3.3218406593406594e-05, + "loss": 0.4292, + "step": 12217 + }, + { + "epoch": 33.565934065934066, + "grad_norm": 5.47440242767334, + "learning_rate": 3.321703296703297e-05, + "loss": 0.1231, + "step": 12218 + }, + { + "epoch": 33.56868131868132, + "grad_norm": 14.434922218322754, + "learning_rate": 3.321565934065935e-05, + "loss": 0.409, + "step": 12219 + }, + { + "epoch": 33.57142857142857, + "grad_norm": 6.910072326660156, + "learning_rate": 3.321428571428572e-05, + "loss": 0.1553, + "step": 12220 + }, + { + "epoch": 33.574175824175825, + "grad_norm": 12.20316219329834, + "learning_rate": 3.321291208791209e-05, + "loss": 0.4646, + "step": 12221 + }, + { + "epoch": 33.57692307692308, + "grad_norm": 13.834110260009766, + "learning_rate": 3.321153846153846e-05, + "loss": 0.4128, + "step": 12222 + }, + { + "epoch": 33.57967032967033, + "grad_norm": 21.7311954498291, + "learning_rate": 3.3210164835164834e-05, + "loss": 0.6587, + "step": 12223 + }, + { + "epoch": 33.582417582417584, + "grad_norm": 9.648625373840332, + "learning_rate": 3.320879120879121e-05, + "loss": 0.2361, + "step": 12224 + }, + { + "epoch": 33.58516483516483, + "grad_norm": 6.63547420501709, + "learning_rate": 3.320741758241758e-05, + "loss": 0.1414, + "step": 12225 + }, + { + "epoch": 33.58791208791209, + "grad_norm": 11.201197624206543, + "learning_rate": 3.320604395604396e-05, + "loss": 0.2824, + "step": 12226 + }, + { + "epoch": 33.59065934065934, + "grad_norm": 15.91271686553955, + "learning_rate": 3.320467032967033e-05, + "loss": 0.6168, + "step": 12227 + }, + { + "epoch": 33.59340659340659, + "grad_norm": 14.473884582519531, + "learning_rate": 3.3203296703296704e-05, + "loss": 0.6547, + "step": 12228 + }, + { + "epoch": 33.59615384615385, + "grad_norm": 16.238040924072266, + "learning_rate": 3.320192307692308e-05, + "loss": 0.5034, + "step": 12229 + }, + { + "epoch": 33.5989010989011, + "grad_norm": 16.19197654724121, + "learning_rate": 3.320054945054945e-05, + "loss": 0.3827, + "step": 12230 + }, + { + "epoch": 33.60164835164835, + "grad_norm": 12.053048133850098, + "learning_rate": 3.319917582417583e-05, + "loss": 0.2945, + "step": 12231 + }, + { + "epoch": 33.604395604395606, + "grad_norm": 11.068440437316895, + "learning_rate": 3.31978021978022e-05, + "loss": 0.2491, + "step": 12232 + }, + { + "epoch": 33.607142857142854, + "grad_norm": 12.696239471435547, + "learning_rate": 3.3196428571428575e-05, + "loss": 0.2801, + "step": 12233 + }, + { + "epoch": 33.60989010989011, + "grad_norm": 10.736574172973633, + "learning_rate": 3.319505494505495e-05, + "loss": 0.3128, + "step": 12234 + }, + { + "epoch": 33.612637362637365, + "grad_norm": 13.48443603515625, + "learning_rate": 3.319368131868132e-05, + "loss": 0.3145, + "step": 12235 + }, + { + "epoch": 33.61538461538461, + "grad_norm": 7.276828765869141, + "learning_rate": 3.319230769230769e-05, + "loss": 0.1852, + "step": 12236 + }, + { + "epoch": 33.61813186813187, + "grad_norm": 16.323911666870117, + "learning_rate": 3.319093406593406e-05, + "loss": 0.4061, + "step": 12237 + }, + { + "epoch": 33.620879120879124, + "grad_norm": 13.071167945861816, + "learning_rate": 3.318956043956044e-05, + "loss": 0.2734, + "step": 12238 + }, + { + "epoch": 33.62362637362637, + "grad_norm": 3.671147346496582, + "learning_rate": 3.3188186813186815e-05, + "loss": 0.0668, + "step": 12239 + }, + { + "epoch": 33.62637362637363, + "grad_norm": 9.12214183807373, + "learning_rate": 3.3186813186813185e-05, + "loss": 0.282, + "step": 12240 + }, + { + "epoch": 33.629120879120876, + "grad_norm": 3.8732028007507324, + "learning_rate": 3.318543956043956e-05, + "loss": 0.0994, + "step": 12241 + }, + { + "epoch": 33.63186813186813, + "grad_norm": 10.58980941772461, + "learning_rate": 3.318406593406593e-05, + "loss": 0.1658, + "step": 12242 + }, + { + "epoch": 33.63461538461539, + "grad_norm": 9.791740417480469, + "learning_rate": 3.318269230769231e-05, + "loss": 0.2528, + "step": 12243 + }, + { + "epoch": 33.637362637362635, + "grad_norm": 10.578519821166992, + "learning_rate": 3.3181318681318686e-05, + "loss": 0.3332, + "step": 12244 + }, + { + "epoch": 33.64010989010989, + "grad_norm": 14.96042537689209, + "learning_rate": 3.3179945054945056e-05, + "loss": 0.378, + "step": 12245 + }, + { + "epoch": 33.642857142857146, + "grad_norm": 15.942438125610352, + "learning_rate": 3.317857142857143e-05, + "loss": 0.5162, + "step": 12246 + }, + { + "epoch": 33.645604395604394, + "grad_norm": 8.345091819763184, + "learning_rate": 3.31771978021978e-05, + "loss": 0.2169, + "step": 12247 + }, + { + "epoch": 33.64835164835165, + "grad_norm": 17.72572135925293, + "learning_rate": 3.317582417582418e-05, + "loss": 0.4875, + "step": 12248 + }, + { + "epoch": 33.6510989010989, + "grad_norm": 9.357139587402344, + "learning_rate": 3.3174450549450556e-05, + "loss": 0.1272, + "step": 12249 + }, + { + "epoch": 33.65384615384615, + "grad_norm": 14.120915412902832, + "learning_rate": 3.3173076923076926e-05, + "loss": 0.271, + "step": 12250 + }, + { + "epoch": 33.65659340659341, + "grad_norm": 6.412327289581299, + "learning_rate": 3.3171703296703296e-05, + "loss": 0.1331, + "step": 12251 + }, + { + "epoch": 33.65934065934066, + "grad_norm": 10.19508171081543, + "learning_rate": 3.3170329670329666e-05, + "loss": 0.2232, + "step": 12252 + }, + { + "epoch": 33.66208791208791, + "grad_norm": 19.88465118408203, + "learning_rate": 3.316895604395604e-05, + "loss": 0.4856, + "step": 12253 + }, + { + "epoch": 33.66483516483517, + "grad_norm": 11.362821578979492, + "learning_rate": 3.316758241758242e-05, + "loss": 0.2903, + "step": 12254 + }, + { + "epoch": 33.667582417582416, + "grad_norm": 6.1551594734191895, + "learning_rate": 3.316620879120879e-05, + "loss": 0.125, + "step": 12255 + }, + { + "epoch": 33.67032967032967, + "grad_norm": 5.365301132202148, + "learning_rate": 3.3164835164835167e-05, + "loss": 0.0927, + "step": 12256 + }, + { + "epoch": 33.67307692307692, + "grad_norm": 11.155471801757812, + "learning_rate": 3.316346153846154e-05, + "loss": 0.2483, + "step": 12257 + }, + { + "epoch": 33.675824175824175, + "grad_norm": 7.173980236053467, + "learning_rate": 3.3162087912087913e-05, + "loss": 0.1374, + "step": 12258 + }, + { + "epoch": 33.67857142857143, + "grad_norm": 17.032855987548828, + "learning_rate": 3.316071428571429e-05, + "loss": 0.4922, + "step": 12259 + }, + { + "epoch": 33.68131868131868, + "grad_norm": 13.344060897827148, + "learning_rate": 3.315934065934066e-05, + "loss": 0.3912, + "step": 12260 + }, + { + "epoch": 33.684065934065934, + "grad_norm": 12.344768524169922, + "learning_rate": 3.315796703296704e-05, + "loss": 0.1954, + "step": 12261 + }, + { + "epoch": 33.68681318681319, + "grad_norm": 17.540611267089844, + "learning_rate": 3.315659340659341e-05, + "loss": 0.8124, + "step": 12262 + }, + { + "epoch": 33.68956043956044, + "grad_norm": 12.384281158447266, + "learning_rate": 3.3155219780219784e-05, + "loss": 0.2703, + "step": 12263 + }, + { + "epoch": 33.69230769230769, + "grad_norm": 18.820796966552734, + "learning_rate": 3.315384615384616e-05, + "loss": 0.8055, + "step": 12264 + }, + { + "epoch": 33.69505494505494, + "grad_norm": 12.819851875305176, + "learning_rate": 3.315247252747253e-05, + "loss": 0.3008, + "step": 12265 + }, + { + "epoch": 33.6978021978022, + "grad_norm": 12.80887508392334, + "learning_rate": 3.31510989010989e-05, + "loss": 0.4258, + "step": 12266 + }, + { + "epoch": 33.70054945054945, + "grad_norm": 8.356311798095703, + "learning_rate": 3.314972527472527e-05, + "loss": 0.2534, + "step": 12267 + }, + { + "epoch": 33.7032967032967, + "grad_norm": 5.9273762702941895, + "learning_rate": 3.314835164835165e-05, + "loss": 0.1317, + "step": 12268 + }, + { + "epoch": 33.706043956043956, + "grad_norm": 11.656323432922363, + "learning_rate": 3.3146978021978024e-05, + "loss": 0.2673, + "step": 12269 + }, + { + "epoch": 33.70879120879121, + "grad_norm": 2.6304173469543457, + "learning_rate": 3.3145604395604394e-05, + "loss": 0.0478, + "step": 12270 + }, + { + "epoch": 33.71153846153846, + "grad_norm": 17.78052520751953, + "learning_rate": 3.314423076923077e-05, + "loss": 0.4794, + "step": 12271 + }, + { + "epoch": 33.714285714285715, + "grad_norm": 12.040030479431152, + "learning_rate": 3.314285714285714e-05, + "loss": 0.3056, + "step": 12272 + }, + { + "epoch": 33.717032967032964, + "grad_norm": 12.581865310668945, + "learning_rate": 3.314148351648352e-05, + "loss": 0.4729, + "step": 12273 + }, + { + "epoch": 33.71978021978022, + "grad_norm": 12.941953659057617, + "learning_rate": 3.3140109890109895e-05, + "loss": 0.3012, + "step": 12274 + }, + { + "epoch": 33.722527472527474, + "grad_norm": 10.095036506652832, + "learning_rate": 3.3138736263736265e-05, + "loss": 0.3068, + "step": 12275 + }, + { + "epoch": 33.72527472527472, + "grad_norm": 18.924877166748047, + "learning_rate": 3.313736263736264e-05, + "loss": 0.5601, + "step": 12276 + }, + { + "epoch": 33.72802197802198, + "grad_norm": 10.161608695983887, + "learning_rate": 3.313598901098901e-05, + "loss": 0.2602, + "step": 12277 + }, + { + "epoch": 33.73076923076923, + "grad_norm": 13.25367259979248, + "learning_rate": 3.313461538461539e-05, + "loss": 0.2382, + "step": 12278 + }, + { + "epoch": 33.73351648351648, + "grad_norm": 7.136160850524902, + "learning_rate": 3.3133241758241765e-05, + "loss": 0.1465, + "step": 12279 + }, + { + "epoch": 33.73626373626374, + "grad_norm": 12.726484298706055, + "learning_rate": 3.3131868131868135e-05, + "loss": 0.3522, + "step": 12280 + }, + { + "epoch": 33.73901098901099, + "grad_norm": 10.432662010192871, + "learning_rate": 3.3130494505494505e-05, + "loss": 0.2248, + "step": 12281 + }, + { + "epoch": 33.74175824175824, + "grad_norm": 17.257526397705078, + "learning_rate": 3.3129120879120875e-05, + "loss": 0.4216, + "step": 12282 + }, + { + "epoch": 33.744505494505496, + "grad_norm": 16.681333541870117, + "learning_rate": 3.312774725274725e-05, + "loss": 0.4337, + "step": 12283 + }, + { + "epoch": 33.747252747252745, + "grad_norm": 13.820395469665527, + "learning_rate": 3.312637362637363e-05, + "loss": 0.5146, + "step": 12284 + }, + { + "epoch": 33.75, + "grad_norm": 8.256163597106934, + "learning_rate": 3.3125e-05, + "loss": 0.2397, + "step": 12285 + }, + { + "epoch": 33.752747252747255, + "grad_norm": 4.495039463043213, + "learning_rate": 3.3123626373626376e-05, + "loss": 0.1024, + "step": 12286 + }, + { + "epoch": 33.755494505494504, + "grad_norm": 10.053646087646484, + "learning_rate": 3.3122252747252746e-05, + "loss": 0.2401, + "step": 12287 + }, + { + "epoch": 33.75824175824176, + "grad_norm": 13.603801727294922, + "learning_rate": 3.312087912087912e-05, + "loss": 0.4543, + "step": 12288 + }, + { + "epoch": 33.76098901098901, + "grad_norm": 5.964678764343262, + "learning_rate": 3.31195054945055e-05, + "loss": 0.0935, + "step": 12289 + }, + { + "epoch": 33.76373626373626, + "grad_norm": 7.74372673034668, + "learning_rate": 3.311813186813187e-05, + "loss": 0.1217, + "step": 12290 + }, + { + "epoch": 33.76648351648352, + "grad_norm": 18.4793758392334, + "learning_rate": 3.3116758241758246e-05, + "loss": 0.5246, + "step": 12291 + }, + { + "epoch": 33.76923076923077, + "grad_norm": 8.731596946716309, + "learning_rate": 3.3115384615384616e-05, + "loss": 0.1404, + "step": 12292 + }, + { + "epoch": 33.77197802197802, + "grad_norm": 8.294927597045898, + "learning_rate": 3.311401098901099e-05, + "loss": 0.171, + "step": 12293 + }, + { + "epoch": 33.77472527472528, + "grad_norm": 16.248796463012695, + "learning_rate": 3.311263736263737e-05, + "loss": 0.5558, + "step": 12294 + }, + { + "epoch": 33.777472527472526, + "grad_norm": 9.896565437316895, + "learning_rate": 3.311126373626374e-05, + "loss": 0.1671, + "step": 12295 + }, + { + "epoch": 33.78021978021978, + "grad_norm": 6.096136569976807, + "learning_rate": 3.310989010989011e-05, + "loss": 0.081, + "step": 12296 + }, + { + "epoch": 33.782967032967036, + "grad_norm": 11.460206985473633, + "learning_rate": 3.310851648351648e-05, + "loss": 0.3888, + "step": 12297 + }, + { + "epoch": 33.785714285714285, + "grad_norm": 2.7635297775268555, + "learning_rate": 3.3107142857142856e-05, + "loss": 0.0604, + "step": 12298 + }, + { + "epoch": 33.78846153846154, + "grad_norm": 8.319716453552246, + "learning_rate": 3.310576923076923e-05, + "loss": 0.2409, + "step": 12299 + }, + { + "epoch": 33.79120879120879, + "grad_norm": 11.740483283996582, + "learning_rate": 3.31043956043956e-05, + "loss": 0.2153, + "step": 12300 + }, + { + "epoch": 33.793956043956044, + "grad_norm": 5.0472636222839355, + "learning_rate": 3.310302197802198e-05, + "loss": 0.0972, + "step": 12301 + }, + { + "epoch": 33.7967032967033, + "grad_norm": 14.605263710021973, + "learning_rate": 3.310164835164835e-05, + "loss": 0.3647, + "step": 12302 + }, + { + "epoch": 33.79945054945055, + "grad_norm": 17.979219436645508, + "learning_rate": 3.310027472527473e-05, + "loss": 0.4261, + "step": 12303 + }, + { + "epoch": 33.8021978021978, + "grad_norm": 12.221059799194336, + "learning_rate": 3.3098901098901104e-05, + "loss": 0.3388, + "step": 12304 + }, + { + "epoch": 33.80494505494506, + "grad_norm": 9.87331771850586, + "learning_rate": 3.3097527472527474e-05, + "loss": 0.3029, + "step": 12305 + }, + { + "epoch": 33.80769230769231, + "grad_norm": 11.600699424743652, + "learning_rate": 3.309615384615385e-05, + "loss": 0.1488, + "step": 12306 + }, + { + "epoch": 33.81043956043956, + "grad_norm": 11.956316947937012, + "learning_rate": 3.309478021978022e-05, + "loss": 0.425, + "step": 12307 + }, + { + "epoch": 33.81318681318681, + "grad_norm": 9.965185165405273, + "learning_rate": 3.30934065934066e-05, + "loss": 0.1831, + "step": 12308 + }, + { + "epoch": 33.815934065934066, + "grad_norm": 16.189189910888672, + "learning_rate": 3.3092032967032974e-05, + "loss": 0.317, + "step": 12309 + }, + { + "epoch": 33.81868131868132, + "grad_norm": 17.541088104248047, + "learning_rate": 3.3090659340659344e-05, + "loss": 0.5595, + "step": 12310 + }, + { + "epoch": 33.82142857142857, + "grad_norm": 7.979928970336914, + "learning_rate": 3.3089285714285714e-05, + "loss": 0.2391, + "step": 12311 + }, + { + "epoch": 33.824175824175825, + "grad_norm": 18.190099716186523, + "learning_rate": 3.3087912087912084e-05, + "loss": 0.7044, + "step": 12312 + }, + { + "epoch": 33.82692307692308, + "grad_norm": 12.042457580566406, + "learning_rate": 3.308653846153846e-05, + "loss": 0.2888, + "step": 12313 + }, + { + "epoch": 33.82967032967033, + "grad_norm": 8.881937980651855, + "learning_rate": 3.308516483516484e-05, + "loss": 0.208, + "step": 12314 + }, + { + "epoch": 33.832417582417584, + "grad_norm": 22.17510223388672, + "learning_rate": 3.308379120879121e-05, + "loss": 0.529, + "step": 12315 + }, + { + "epoch": 33.83516483516483, + "grad_norm": 11.22208309173584, + "learning_rate": 3.3082417582417585e-05, + "loss": 0.3085, + "step": 12316 + }, + { + "epoch": 33.83791208791209, + "grad_norm": 7.6195783615112305, + "learning_rate": 3.3081043956043955e-05, + "loss": 0.1387, + "step": 12317 + }, + { + "epoch": 33.84065934065934, + "grad_norm": 11.824161529541016, + "learning_rate": 3.307967032967033e-05, + "loss": 0.2348, + "step": 12318 + }, + { + "epoch": 33.84340659340659, + "grad_norm": 15.166644096374512, + "learning_rate": 3.307829670329671e-05, + "loss": 0.5965, + "step": 12319 + }, + { + "epoch": 33.84615384615385, + "grad_norm": 11.619965553283691, + "learning_rate": 3.307692307692308e-05, + "loss": 0.3489, + "step": 12320 + }, + { + "epoch": 33.8489010989011, + "grad_norm": 12.556857109069824, + "learning_rate": 3.3075549450549455e-05, + "loss": 0.2507, + "step": 12321 + }, + { + "epoch": 33.85164835164835, + "grad_norm": 18.308683395385742, + "learning_rate": 3.3074175824175825e-05, + "loss": 0.5984, + "step": 12322 + }, + { + "epoch": 33.854395604395606, + "grad_norm": 10.993738174438477, + "learning_rate": 3.30728021978022e-05, + "loss": 0.2515, + "step": 12323 + }, + { + "epoch": 33.857142857142854, + "grad_norm": 13.549039840698242, + "learning_rate": 3.307142857142858e-05, + "loss": 0.448, + "step": 12324 + }, + { + "epoch": 33.85989010989011, + "grad_norm": 11.382576942443848, + "learning_rate": 3.307005494505495e-05, + "loss": 0.3271, + "step": 12325 + }, + { + "epoch": 33.862637362637365, + "grad_norm": 4.104611396789551, + "learning_rate": 3.306868131868132e-05, + "loss": 0.1079, + "step": 12326 + }, + { + "epoch": 33.86538461538461, + "grad_norm": 10.653169631958008, + "learning_rate": 3.306730769230769e-05, + "loss": 0.244, + "step": 12327 + }, + { + "epoch": 33.86813186813187, + "grad_norm": 20.420730590820312, + "learning_rate": 3.3065934065934065e-05, + "loss": 0.5681, + "step": 12328 + }, + { + "epoch": 33.870879120879124, + "grad_norm": 6.122447967529297, + "learning_rate": 3.306456043956044e-05, + "loss": 0.11, + "step": 12329 + }, + { + "epoch": 33.87362637362637, + "grad_norm": 16.465768814086914, + "learning_rate": 3.306318681318681e-05, + "loss": 0.5, + "step": 12330 + }, + { + "epoch": 33.87637362637363, + "grad_norm": 7.505776405334473, + "learning_rate": 3.306181318681319e-05, + "loss": 0.1058, + "step": 12331 + }, + { + "epoch": 33.879120879120876, + "grad_norm": 19.264007568359375, + "learning_rate": 3.306043956043956e-05, + "loss": 0.5215, + "step": 12332 + }, + { + "epoch": 33.88186813186813, + "grad_norm": 14.478468894958496, + "learning_rate": 3.3059065934065936e-05, + "loss": 0.4471, + "step": 12333 + }, + { + "epoch": 33.88461538461539, + "grad_norm": 11.21120834350586, + "learning_rate": 3.305769230769231e-05, + "loss": 0.1942, + "step": 12334 + }, + { + "epoch": 33.887362637362635, + "grad_norm": 17.76900863647461, + "learning_rate": 3.305631868131868e-05, + "loss": 0.5771, + "step": 12335 + }, + { + "epoch": 33.89010989010989, + "grad_norm": 20.214950561523438, + "learning_rate": 3.305494505494506e-05, + "loss": 0.5046, + "step": 12336 + }, + { + "epoch": 33.892857142857146, + "grad_norm": 13.369873046875, + "learning_rate": 3.305357142857143e-05, + "loss": 0.4448, + "step": 12337 + }, + { + "epoch": 33.895604395604394, + "grad_norm": 8.54749870300293, + "learning_rate": 3.3052197802197806e-05, + "loss": 0.2112, + "step": 12338 + }, + { + "epoch": 33.89835164835165, + "grad_norm": 9.607511520385742, + "learning_rate": 3.305082417582418e-05, + "loss": 0.2498, + "step": 12339 + }, + { + "epoch": 33.9010989010989, + "grad_norm": 16.01018524169922, + "learning_rate": 3.304945054945055e-05, + "loss": 0.3707, + "step": 12340 + }, + { + "epoch": 33.90384615384615, + "grad_norm": 18.260574340820312, + "learning_rate": 3.304807692307692e-05, + "loss": 0.3878, + "step": 12341 + }, + { + "epoch": 33.90659340659341, + "grad_norm": 12.939258575439453, + "learning_rate": 3.304670329670329e-05, + "loss": 0.4866, + "step": 12342 + }, + { + "epoch": 33.90934065934066, + "grad_norm": 11.536125183105469, + "learning_rate": 3.304532967032967e-05, + "loss": 0.4682, + "step": 12343 + }, + { + "epoch": 33.91208791208791, + "grad_norm": 8.283768653869629, + "learning_rate": 3.304395604395605e-05, + "loss": 0.1863, + "step": 12344 + }, + { + "epoch": 33.91483516483517, + "grad_norm": 16.11966323852539, + "learning_rate": 3.304258241758242e-05, + "loss": 0.4586, + "step": 12345 + }, + { + "epoch": 33.917582417582416, + "grad_norm": 22.440513610839844, + "learning_rate": 3.3041208791208794e-05, + "loss": 0.5838, + "step": 12346 + }, + { + "epoch": 33.92032967032967, + "grad_norm": 7.1061601638793945, + "learning_rate": 3.3039835164835164e-05, + "loss": 0.2119, + "step": 12347 + }, + { + "epoch": 33.92307692307692, + "grad_norm": 18.527448654174805, + "learning_rate": 3.303846153846154e-05, + "loss": 0.7086, + "step": 12348 + }, + { + "epoch": 33.925824175824175, + "grad_norm": 13.293109893798828, + "learning_rate": 3.303708791208792e-05, + "loss": 0.175, + "step": 12349 + }, + { + "epoch": 33.92857142857143, + "grad_norm": 16.460378646850586, + "learning_rate": 3.303571428571429e-05, + "loss": 0.4217, + "step": 12350 + }, + { + "epoch": 33.93131868131868, + "grad_norm": 7.471543788909912, + "learning_rate": 3.3034340659340664e-05, + "loss": 0.2133, + "step": 12351 + }, + { + "epoch": 33.934065934065934, + "grad_norm": 19.260326385498047, + "learning_rate": 3.3032967032967034e-05, + "loss": 0.5789, + "step": 12352 + }, + { + "epoch": 33.93681318681319, + "grad_norm": 11.3275785446167, + "learning_rate": 3.303159340659341e-05, + "loss": 0.2356, + "step": 12353 + }, + { + "epoch": 33.93956043956044, + "grad_norm": 12.666815757751465, + "learning_rate": 3.303021978021979e-05, + "loss": 0.343, + "step": 12354 + }, + { + "epoch": 33.94230769230769, + "grad_norm": 11.191527366638184, + "learning_rate": 3.302884615384616e-05, + "loss": 0.3976, + "step": 12355 + }, + { + "epoch": 33.94505494505494, + "grad_norm": 11.715387344360352, + "learning_rate": 3.302747252747253e-05, + "loss": 0.2221, + "step": 12356 + }, + { + "epoch": 33.9478021978022, + "grad_norm": 12.380816459655762, + "learning_rate": 3.30260989010989e-05, + "loss": 0.3032, + "step": 12357 + }, + { + "epoch": 33.95054945054945, + "grad_norm": 5.988814353942871, + "learning_rate": 3.3024725274725274e-05, + "loss": 0.1055, + "step": 12358 + }, + { + "epoch": 33.9532967032967, + "grad_norm": 6.986269474029541, + "learning_rate": 3.302335164835165e-05, + "loss": 0.2568, + "step": 12359 + }, + { + "epoch": 33.956043956043956, + "grad_norm": 11.634483337402344, + "learning_rate": 3.302197802197802e-05, + "loss": 0.251, + "step": 12360 + }, + { + "epoch": 33.95879120879121, + "grad_norm": 20.085508346557617, + "learning_rate": 3.30206043956044e-05, + "loss": 0.3711, + "step": 12361 + }, + { + "epoch": 33.96153846153846, + "grad_norm": 17.307523727416992, + "learning_rate": 3.301923076923077e-05, + "loss": 0.3111, + "step": 12362 + }, + { + "epoch": 33.964285714285715, + "grad_norm": 11.72010612487793, + "learning_rate": 3.3017857142857145e-05, + "loss": 0.336, + "step": 12363 + }, + { + "epoch": 33.967032967032964, + "grad_norm": 6.757371425628662, + "learning_rate": 3.301648351648352e-05, + "loss": 0.1335, + "step": 12364 + }, + { + "epoch": 33.96978021978022, + "grad_norm": 15.190878868103027, + "learning_rate": 3.301510989010989e-05, + "loss": 0.5557, + "step": 12365 + }, + { + "epoch": 33.972527472527474, + "grad_norm": 8.165063858032227, + "learning_rate": 3.301373626373627e-05, + "loss": 0.22, + "step": 12366 + }, + { + "epoch": 33.97527472527472, + "grad_norm": 15.899893760681152, + "learning_rate": 3.301236263736264e-05, + "loss": 0.4345, + "step": 12367 + }, + { + "epoch": 33.97802197802198, + "grad_norm": 12.213248252868652, + "learning_rate": 3.3010989010989015e-05, + "loss": 0.3761, + "step": 12368 + }, + { + "epoch": 33.98076923076923, + "grad_norm": 23.346139907836914, + "learning_rate": 3.300961538461539e-05, + "loss": 0.7198, + "step": 12369 + }, + { + "epoch": 33.98351648351648, + "grad_norm": 10.722715377807617, + "learning_rate": 3.300824175824176e-05, + "loss": 0.3716, + "step": 12370 + }, + { + "epoch": 33.98626373626374, + "grad_norm": 10.489879608154297, + "learning_rate": 3.300686813186813e-05, + "loss": 0.2523, + "step": 12371 + }, + { + "epoch": 33.98901098901099, + "grad_norm": 7.7709527015686035, + "learning_rate": 3.30054945054945e-05, + "loss": 0.3016, + "step": 12372 + }, + { + "epoch": 33.99175824175824, + "grad_norm": 7.901221752166748, + "learning_rate": 3.300412087912088e-05, + "loss": 0.2289, + "step": 12373 + }, + { + "epoch": 33.994505494505496, + "grad_norm": 13.165050506591797, + "learning_rate": 3.3002747252747256e-05, + "loss": 0.3851, + "step": 12374 + }, + { + "epoch": 33.997252747252745, + "grad_norm": 8.459501266479492, + "learning_rate": 3.3001373626373626e-05, + "loss": 0.132, + "step": 12375 + }, + { + "epoch": 34.0, + "grad_norm": 27.696208953857422, + "learning_rate": 3.3e-05, + "loss": 0.3842, + "step": 12376 + }, + { + "epoch": 34.0, + "eval_accuracy": 0.8429752066115702, + "eval_f1": 0.8396572988021751, + "eval_f1_DuraRiadoRio_64x64": 0.8654545454545455, + "eval_f1_Mole_64x64": 0.85, + "eval_f1_Quebrado_64x64": 0.8761904761904762, + "eval_f1_RiadoRio_64x64": 0.694980694980695, + "eval_f1_RioFechado_64x64": 0.911660777385159, + "eval_loss": 0.5938862562179565, + "eval_precision": 0.8488313751000373, + "eval_precision_DuraRiadoRio_64x64": 0.9083969465648855, + "eval_precision_Mole_64x64": 0.7727272727272727, + "eval_precision_Quebrado_64x64": 0.8070175438596491, + "eval_precision_RiadoRio_64x64": 0.8411214953271028, + "eval_precision_RioFechado_64x64": 0.9148936170212766, + "eval_recall": 0.8459445268099828, + "eval_recall_DuraRiadoRio_64x64": 0.8263888888888888, + "eval_recall_Mole_64x64": 0.9444444444444444, + "eval_recall_Quebrado_64x64": 0.9583333333333334, + "eval_recall_RiadoRio_64x64": 0.5921052631578947, + "eval_recall_RioFechado_64x64": 0.9084507042253521, + "eval_runtime": 1.7766, + "eval_samples_per_second": 408.655, + "eval_steps_per_second": 25.893, + "step": 12376 + }, + { + "epoch": 34.002747252747255, + "grad_norm": 10.244783401489258, + "learning_rate": 3.299862637362637e-05, + "loss": 0.1934, + "step": 12377 + }, + { + "epoch": 34.005494505494504, + "grad_norm": 21.26896858215332, + "learning_rate": 3.299725274725275e-05, + "loss": 0.5757, + "step": 12378 + }, + { + "epoch": 34.00824175824176, + "grad_norm": 11.577315330505371, + "learning_rate": 3.2995879120879126e-05, + "loss": 0.2242, + "step": 12379 + }, + { + "epoch": 34.010989010989015, + "grad_norm": 6.533620357513428, + "learning_rate": 3.2994505494505496e-05, + "loss": 0.1654, + "step": 12380 + }, + { + "epoch": 34.01373626373626, + "grad_norm": 16.762161254882812, + "learning_rate": 3.299313186813187e-05, + "loss": 0.5503, + "step": 12381 + }, + { + "epoch": 34.01648351648352, + "grad_norm": 17.38370132446289, + "learning_rate": 3.299175824175824e-05, + "loss": 0.5105, + "step": 12382 + }, + { + "epoch": 34.01923076923077, + "grad_norm": 18.92963218688965, + "learning_rate": 3.299038461538462e-05, + "loss": 0.8407, + "step": 12383 + }, + { + "epoch": 34.02197802197802, + "grad_norm": 13.56999397277832, + "learning_rate": 3.2989010989010997e-05, + "loss": 0.3243, + "step": 12384 + }, + { + "epoch": 34.02472527472528, + "grad_norm": 18.964298248291016, + "learning_rate": 3.2987637362637367e-05, + "loss": 0.4145, + "step": 12385 + }, + { + "epoch": 34.027472527472526, + "grad_norm": 5.072268962860107, + "learning_rate": 3.2986263736263737e-05, + "loss": 0.1068, + "step": 12386 + }, + { + "epoch": 34.03021978021978, + "grad_norm": 12.543063163757324, + "learning_rate": 3.298489010989011e-05, + "loss": 0.1837, + "step": 12387 + }, + { + "epoch": 34.032967032967036, + "grad_norm": 7.30390739440918, + "learning_rate": 3.2983516483516483e-05, + "loss": 0.1004, + "step": 12388 + }, + { + "epoch": 34.035714285714285, + "grad_norm": 10.468893051147461, + "learning_rate": 3.298214285714286e-05, + "loss": 0.2608, + "step": 12389 + }, + { + "epoch": 34.03846153846154, + "grad_norm": 11.530769348144531, + "learning_rate": 3.298076923076923e-05, + "loss": 0.3589, + "step": 12390 + }, + { + "epoch": 34.04120879120879, + "grad_norm": 21.10366439819336, + "learning_rate": 3.297939560439561e-05, + "loss": 0.7111, + "step": 12391 + }, + { + "epoch": 34.043956043956044, + "grad_norm": 8.926756858825684, + "learning_rate": 3.297802197802198e-05, + "loss": 0.1775, + "step": 12392 + }, + { + "epoch": 34.0467032967033, + "grad_norm": 11.897303581237793, + "learning_rate": 3.2976648351648354e-05, + "loss": 0.2282, + "step": 12393 + }, + { + "epoch": 34.04945054945055, + "grad_norm": 13.675589561462402, + "learning_rate": 3.297527472527473e-05, + "loss": 0.3076, + "step": 12394 + }, + { + "epoch": 34.0521978021978, + "grad_norm": 6.189232349395752, + "learning_rate": 3.29739010989011e-05, + "loss": 0.1333, + "step": 12395 + }, + { + "epoch": 34.05494505494506, + "grad_norm": 6.911462306976318, + "learning_rate": 3.297252747252748e-05, + "loss": 0.2003, + "step": 12396 + }, + { + "epoch": 34.05769230769231, + "grad_norm": 9.766565322875977, + "learning_rate": 3.297115384615385e-05, + "loss": 0.1311, + "step": 12397 + }, + { + "epoch": 34.06043956043956, + "grad_norm": 11.645977020263672, + "learning_rate": 3.2969780219780224e-05, + "loss": 0.297, + "step": 12398 + }, + { + "epoch": 34.06318681318681, + "grad_norm": 10.56629753112793, + "learning_rate": 3.2968406593406594e-05, + "loss": 0.197, + "step": 12399 + }, + { + "epoch": 34.065934065934066, + "grad_norm": 18.38642120361328, + "learning_rate": 3.296703296703297e-05, + "loss": 0.7349, + "step": 12400 + }, + { + "epoch": 34.06868131868132, + "grad_norm": 7.769867897033691, + "learning_rate": 3.296565934065934e-05, + "loss": 0.1746, + "step": 12401 + }, + { + "epoch": 34.07142857142857, + "grad_norm": 18.50928497314453, + "learning_rate": 3.296428571428571e-05, + "loss": 0.5286, + "step": 12402 + }, + { + "epoch": 34.074175824175825, + "grad_norm": 4.6421589851379395, + "learning_rate": 3.296291208791209e-05, + "loss": 0.0726, + "step": 12403 + }, + { + "epoch": 34.07692307692308, + "grad_norm": 8.733223915100098, + "learning_rate": 3.296153846153846e-05, + "loss": 0.1702, + "step": 12404 + }, + { + "epoch": 34.07967032967033, + "grad_norm": 10.235466003417969, + "learning_rate": 3.2960164835164835e-05, + "loss": 0.2565, + "step": 12405 + }, + { + "epoch": 34.082417582417584, + "grad_norm": 14.175930976867676, + "learning_rate": 3.295879120879121e-05, + "loss": 0.3574, + "step": 12406 + }, + { + "epoch": 34.08516483516483, + "grad_norm": 11.389106750488281, + "learning_rate": 3.295741758241758e-05, + "loss": 0.1827, + "step": 12407 + }, + { + "epoch": 34.08791208791209, + "grad_norm": 8.44059944152832, + "learning_rate": 3.295604395604396e-05, + "loss": 0.179, + "step": 12408 + }, + { + "epoch": 34.09065934065934, + "grad_norm": 22.013120651245117, + "learning_rate": 3.295467032967033e-05, + "loss": 0.6104, + "step": 12409 + }, + { + "epoch": 34.09340659340659, + "grad_norm": 13.760896682739258, + "learning_rate": 3.2953296703296705e-05, + "loss": 0.3763, + "step": 12410 + }, + { + "epoch": 34.09615384615385, + "grad_norm": 15.351537704467773, + "learning_rate": 3.295192307692308e-05, + "loss": 0.4997, + "step": 12411 + }, + { + "epoch": 34.0989010989011, + "grad_norm": 11.963019371032715, + "learning_rate": 3.295054945054945e-05, + "loss": 0.2445, + "step": 12412 + }, + { + "epoch": 34.10164835164835, + "grad_norm": 11.421365737915039, + "learning_rate": 3.294917582417583e-05, + "loss": 0.3161, + "step": 12413 + }, + { + "epoch": 34.104395604395606, + "grad_norm": 18.99520492553711, + "learning_rate": 3.29478021978022e-05, + "loss": 0.569, + "step": 12414 + }, + { + "epoch": 34.107142857142854, + "grad_norm": 18.681917190551758, + "learning_rate": 3.2946428571428576e-05, + "loss": 0.5395, + "step": 12415 + }, + { + "epoch": 34.10989010989011, + "grad_norm": 15.021700859069824, + "learning_rate": 3.2945054945054946e-05, + "loss": 0.3262, + "step": 12416 + }, + { + "epoch": 34.112637362637365, + "grad_norm": 13.292144775390625, + "learning_rate": 3.2943681318681316e-05, + "loss": 0.2252, + "step": 12417 + }, + { + "epoch": 34.11538461538461, + "grad_norm": 19.753755569458008, + "learning_rate": 3.294230769230769e-05, + "loss": 0.3874, + "step": 12418 + }, + { + "epoch": 34.11813186813187, + "grad_norm": 20.093374252319336, + "learning_rate": 3.294093406593406e-05, + "loss": 0.5218, + "step": 12419 + }, + { + "epoch": 34.120879120879124, + "grad_norm": 18.495677947998047, + "learning_rate": 3.293956043956044e-05, + "loss": 0.6007, + "step": 12420 + }, + { + "epoch": 34.12362637362637, + "grad_norm": 11.64976978302002, + "learning_rate": 3.2938186813186816e-05, + "loss": 0.2556, + "step": 12421 + }, + { + "epoch": 34.12637362637363, + "grad_norm": 11.751777648925781, + "learning_rate": 3.2936813186813186e-05, + "loss": 0.3597, + "step": 12422 + }, + { + "epoch": 34.129120879120876, + "grad_norm": 15.92788028717041, + "learning_rate": 3.293543956043956e-05, + "loss": 0.5334, + "step": 12423 + }, + { + "epoch": 34.13186813186813, + "grad_norm": 19.696447372436523, + "learning_rate": 3.293406593406593e-05, + "loss": 0.4902, + "step": 12424 + }, + { + "epoch": 34.13461538461539, + "grad_norm": 12.569463729858398, + "learning_rate": 3.293269230769231e-05, + "loss": 0.3518, + "step": 12425 + }, + { + "epoch": 34.137362637362635, + "grad_norm": 11.658623695373535, + "learning_rate": 3.2931318681318686e-05, + "loss": 0.197, + "step": 12426 + }, + { + "epoch": 34.14010989010989, + "grad_norm": 10.00070571899414, + "learning_rate": 3.2929945054945056e-05, + "loss": 0.3179, + "step": 12427 + }, + { + "epoch": 34.142857142857146, + "grad_norm": 14.835798263549805, + "learning_rate": 3.292857142857143e-05, + "loss": 0.3759, + "step": 12428 + }, + { + "epoch": 34.145604395604394, + "grad_norm": 8.544156074523926, + "learning_rate": 3.29271978021978e-05, + "loss": 0.1586, + "step": 12429 + }, + { + "epoch": 34.14835164835165, + "grad_norm": 7.832921028137207, + "learning_rate": 3.292582417582418e-05, + "loss": 0.1163, + "step": 12430 + }, + { + "epoch": 34.1510989010989, + "grad_norm": 9.413276672363281, + "learning_rate": 3.292445054945055e-05, + "loss": 0.2116, + "step": 12431 + }, + { + "epoch": 34.15384615384615, + "grad_norm": 11.674419403076172, + "learning_rate": 3.292307692307692e-05, + "loss": 0.2388, + "step": 12432 + }, + { + "epoch": 34.15659340659341, + "grad_norm": 8.465036392211914, + "learning_rate": 3.29217032967033e-05, + "loss": 0.241, + "step": 12433 + }, + { + "epoch": 34.15934065934066, + "grad_norm": 17.485986709594727, + "learning_rate": 3.292032967032967e-05, + "loss": 0.3826, + "step": 12434 + }, + { + "epoch": 34.16208791208791, + "grad_norm": 12.426090240478516, + "learning_rate": 3.2918956043956044e-05, + "loss": 0.3858, + "step": 12435 + }, + { + "epoch": 34.16483516483517, + "grad_norm": 16.17557716369629, + "learning_rate": 3.291758241758242e-05, + "loss": 0.2734, + "step": 12436 + }, + { + "epoch": 34.167582417582416, + "grad_norm": 6.757792949676514, + "learning_rate": 3.291620879120879e-05, + "loss": 0.0875, + "step": 12437 + }, + { + "epoch": 34.17032967032967, + "grad_norm": 14.71623420715332, + "learning_rate": 3.291483516483517e-05, + "loss": 0.334, + "step": 12438 + }, + { + "epoch": 34.17307692307692, + "grad_norm": 6.842291355133057, + "learning_rate": 3.291346153846154e-05, + "loss": 0.1411, + "step": 12439 + }, + { + "epoch": 34.175824175824175, + "grad_norm": 16.616701126098633, + "learning_rate": 3.2912087912087914e-05, + "loss": 0.3162, + "step": 12440 + }, + { + "epoch": 34.17857142857143, + "grad_norm": 5.513857841491699, + "learning_rate": 3.291071428571429e-05, + "loss": 0.1112, + "step": 12441 + }, + { + "epoch": 34.18131868131868, + "grad_norm": 2.7297799587249756, + "learning_rate": 3.290934065934066e-05, + "loss": 0.0586, + "step": 12442 + }, + { + "epoch": 34.184065934065934, + "grad_norm": 10.58809757232666, + "learning_rate": 3.290796703296704e-05, + "loss": 0.2325, + "step": 12443 + }, + { + "epoch": 34.18681318681319, + "grad_norm": 11.413763999938965, + "learning_rate": 3.290659340659341e-05, + "loss": 0.3056, + "step": 12444 + }, + { + "epoch": 34.18956043956044, + "grad_norm": 17.819536209106445, + "learning_rate": 3.2905219780219785e-05, + "loss": 0.5043, + "step": 12445 + }, + { + "epoch": 34.19230769230769, + "grad_norm": 13.0699462890625, + "learning_rate": 3.2903846153846155e-05, + "loss": 0.311, + "step": 12446 + }, + { + "epoch": 34.19505494505494, + "grad_norm": 6.171142101287842, + "learning_rate": 3.2902472527472525e-05, + "loss": 0.2002, + "step": 12447 + }, + { + "epoch": 34.1978021978022, + "grad_norm": 13.378907203674316, + "learning_rate": 3.29010989010989e-05, + "loss": 0.3098, + "step": 12448 + }, + { + "epoch": 34.20054945054945, + "grad_norm": 15.563434600830078, + "learning_rate": 3.289972527472527e-05, + "loss": 0.3578, + "step": 12449 + }, + { + "epoch": 34.2032967032967, + "grad_norm": 11.226622581481934, + "learning_rate": 3.289835164835165e-05, + "loss": 0.1918, + "step": 12450 + }, + { + "epoch": 34.206043956043956, + "grad_norm": 14.959722518920898, + "learning_rate": 3.2896978021978025e-05, + "loss": 0.5105, + "step": 12451 + }, + { + "epoch": 34.20879120879121, + "grad_norm": 9.905826568603516, + "learning_rate": 3.2895604395604395e-05, + "loss": 0.1864, + "step": 12452 + }, + { + "epoch": 34.21153846153846, + "grad_norm": 17.499826431274414, + "learning_rate": 3.289423076923077e-05, + "loss": 0.7274, + "step": 12453 + }, + { + "epoch": 34.214285714285715, + "grad_norm": 4.338573932647705, + "learning_rate": 3.289285714285714e-05, + "loss": 0.097, + "step": 12454 + }, + { + "epoch": 34.217032967032964, + "grad_norm": 5.498216152191162, + "learning_rate": 3.289148351648352e-05, + "loss": 0.1786, + "step": 12455 + }, + { + "epoch": 34.21978021978022, + "grad_norm": 3.620877981185913, + "learning_rate": 3.2890109890109895e-05, + "loss": 0.0616, + "step": 12456 + }, + { + "epoch": 34.222527472527474, + "grad_norm": 7.137650012969971, + "learning_rate": 3.2888736263736265e-05, + "loss": 0.1744, + "step": 12457 + }, + { + "epoch": 34.22527472527472, + "grad_norm": 8.38719367980957, + "learning_rate": 3.288736263736264e-05, + "loss": 0.1428, + "step": 12458 + }, + { + "epoch": 34.22802197802198, + "grad_norm": 6.388996124267578, + "learning_rate": 3.288598901098901e-05, + "loss": 0.1502, + "step": 12459 + }, + { + "epoch": 34.23076923076923, + "grad_norm": 5.311285972595215, + "learning_rate": 3.288461538461539e-05, + "loss": 0.1352, + "step": 12460 + }, + { + "epoch": 34.23351648351648, + "grad_norm": 15.586581230163574, + "learning_rate": 3.288324175824176e-05, + "loss": 0.4491, + "step": 12461 + }, + { + "epoch": 34.23626373626374, + "grad_norm": 9.078309059143066, + "learning_rate": 3.288186813186813e-05, + "loss": 0.2753, + "step": 12462 + }, + { + "epoch": 34.239010989010985, + "grad_norm": 10.071200370788574, + "learning_rate": 3.2880494505494506e-05, + "loss": 0.1887, + "step": 12463 + }, + { + "epoch": 34.24175824175824, + "grad_norm": 6.533703804016113, + "learning_rate": 3.2879120879120876e-05, + "loss": 0.1398, + "step": 12464 + }, + { + "epoch": 34.244505494505496, + "grad_norm": 6.536171913146973, + "learning_rate": 3.287774725274725e-05, + "loss": 0.2288, + "step": 12465 + }, + { + "epoch": 34.247252747252745, + "grad_norm": 8.561832427978516, + "learning_rate": 3.287637362637363e-05, + "loss": 0.1117, + "step": 12466 + }, + { + "epoch": 34.25, + "grad_norm": 16.944934844970703, + "learning_rate": 3.2875e-05, + "loss": 0.4168, + "step": 12467 + }, + { + "epoch": 34.252747252747255, + "grad_norm": 27.292333602905273, + "learning_rate": 3.2873626373626376e-05, + "loss": 1.2563, + "step": 12468 + }, + { + "epoch": 34.255494505494504, + "grad_norm": 10.080665588378906, + "learning_rate": 3.2872252747252746e-05, + "loss": 0.3469, + "step": 12469 + }, + { + "epoch": 34.25824175824176, + "grad_norm": 7.383419513702393, + "learning_rate": 3.287087912087912e-05, + "loss": 0.1566, + "step": 12470 + }, + { + "epoch": 34.260989010989015, + "grad_norm": 12.153979301452637, + "learning_rate": 3.28695054945055e-05, + "loss": 0.198, + "step": 12471 + }, + { + "epoch": 34.26373626373626, + "grad_norm": 6.735881805419922, + "learning_rate": 3.286813186813187e-05, + "loss": 0.1242, + "step": 12472 + }, + { + "epoch": 34.26648351648352, + "grad_norm": 2.7894034385681152, + "learning_rate": 3.286675824175825e-05, + "loss": 0.0518, + "step": 12473 + }, + { + "epoch": 34.26923076923077, + "grad_norm": 9.350672721862793, + "learning_rate": 3.286538461538462e-05, + "loss": 0.2562, + "step": 12474 + }, + { + "epoch": 34.27197802197802, + "grad_norm": 6.9584879875183105, + "learning_rate": 3.2864010989010994e-05, + "loss": 0.1165, + "step": 12475 + }, + { + "epoch": 34.27472527472528, + "grad_norm": 13.16452407836914, + "learning_rate": 3.2862637362637364e-05, + "loss": 0.3752, + "step": 12476 + }, + { + "epoch": 34.277472527472526, + "grad_norm": 7.235367298126221, + "learning_rate": 3.2861263736263734e-05, + "loss": 0.0913, + "step": 12477 + }, + { + "epoch": 34.28021978021978, + "grad_norm": 11.560012817382812, + "learning_rate": 3.285989010989011e-05, + "loss": 0.3266, + "step": 12478 + }, + { + "epoch": 34.282967032967036, + "grad_norm": 11.218254089355469, + "learning_rate": 3.285851648351648e-05, + "loss": 0.2864, + "step": 12479 + }, + { + "epoch": 34.285714285714285, + "grad_norm": 11.900146484375, + "learning_rate": 3.285714285714286e-05, + "loss": 0.1777, + "step": 12480 + }, + { + "epoch": 34.28846153846154, + "grad_norm": 19.40536117553711, + "learning_rate": 3.2855769230769234e-05, + "loss": 0.8039, + "step": 12481 + }, + { + "epoch": 34.29120879120879, + "grad_norm": 8.291423797607422, + "learning_rate": 3.2854395604395604e-05, + "loss": 0.1396, + "step": 12482 + }, + { + "epoch": 34.293956043956044, + "grad_norm": 6.312324523925781, + "learning_rate": 3.285302197802198e-05, + "loss": 0.0913, + "step": 12483 + }, + { + "epoch": 34.2967032967033, + "grad_norm": 10.472886085510254, + "learning_rate": 3.285164835164835e-05, + "loss": 0.3839, + "step": 12484 + }, + { + "epoch": 34.29945054945055, + "grad_norm": 15.740071296691895, + "learning_rate": 3.285027472527473e-05, + "loss": 0.3454, + "step": 12485 + }, + { + "epoch": 34.3021978021978, + "grad_norm": 13.001035690307617, + "learning_rate": 3.2848901098901104e-05, + "loss": 0.4388, + "step": 12486 + }, + { + "epoch": 34.30494505494506, + "grad_norm": 10.01260757446289, + "learning_rate": 3.2847527472527474e-05, + "loss": 0.3793, + "step": 12487 + }, + { + "epoch": 34.30769230769231, + "grad_norm": 9.251896858215332, + "learning_rate": 3.284615384615385e-05, + "loss": 0.1568, + "step": 12488 + }, + { + "epoch": 34.31043956043956, + "grad_norm": 6.41449499130249, + "learning_rate": 3.284478021978022e-05, + "loss": 0.2146, + "step": 12489 + }, + { + "epoch": 34.31318681318681, + "grad_norm": 17.484169006347656, + "learning_rate": 3.28434065934066e-05, + "loss": 0.267, + "step": 12490 + }, + { + "epoch": 34.315934065934066, + "grad_norm": 11.686661720275879, + "learning_rate": 3.284203296703297e-05, + "loss": 0.3661, + "step": 12491 + }, + { + "epoch": 34.31868131868132, + "grad_norm": 8.934728622436523, + "learning_rate": 3.284065934065934e-05, + "loss": 0.1854, + "step": 12492 + }, + { + "epoch": 34.32142857142857, + "grad_norm": 11.90088939666748, + "learning_rate": 3.2839285714285715e-05, + "loss": 0.2854, + "step": 12493 + }, + { + "epoch": 34.324175824175825, + "grad_norm": 12.490768432617188, + "learning_rate": 3.2837912087912085e-05, + "loss": 0.2654, + "step": 12494 + }, + { + "epoch": 34.32692307692308, + "grad_norm": 13.638689994812012, + "learning_rate": 3.283653846153846e-05, + "loss": 0.1987, + "step": 12495 + }, + { + "epoch": 34.32967032967033, + "grad_norm": 11.471529960632324, + "learning_rate": 3.283516483516484e-05, + "loss": 0.2736, + "step": 12496 + }, + { + "epoch": 34.332417582417584, + "grad_norm": 9.634547233581543, + "learning_rate": 3.283379120879121e-05, + "loss": 0.2576, + "step": 12497 + }, + { + "epoch": 34.33516483516483, + "grad_norm": 6.0089192390441895, + "learning_rate": 3.2832417582417585e-05, + "loss": 0.2156, + "step": 12498 + }, + { + "epoch": 34.33791208791209, + "grad_norm": 14.771797180175781, + "learning_rate": 3.2831043956043955e-05, + "loss": 0.4648, + "step": 12499 + }, + { + "epoch": 34.34065934065934, + "grad_norm": 3.2000131607055664, + "learning_rate": 3.282967032967033e-05, + "loss": 0.0472, + "step": 12500 + }, + { + "epoch": 34.34340659340659, + "grad_norm": 9.125516891479492, + "learning_rate": 3.282829670329671e-05, + "loss": 0.1999, + "step": 12501 + }, + { + "epoch": 34.34615384615385, + "grad_norm": 11.92603874206543, + "learning_rate": 3.282692307692308e-05, + "loss": 0.4466, + "step": 12502 + }, + { + "epoch": 34.3489010989011, + "grad_norm": 14.070782661437988, + "learning_rate": 3.2825549450549456e-05, + "loss": 0.3031, + "step": 12503 + }, + { + "epoch": 34.35164835164835, + "grad_norm": 3.849407196044922, + "learning_rate": 3.2824175824175826e-05, + "loss": 0.0612, + "step": 12504 + }, + { + "epoch": 34.354395604395606, + "grad_norm": 6.95611047744751, + "learning_rate": 3.28228021978022e-05, + "loss": 0.1618, + "step": 12505 + }, + { + "epoch": 34.357142857142854, + "grad_norm": 11.227635383605957, + "learning_rate": 3.282142857142857e-05, + "loss": 0.192, + "step": 12506 + }, + { + "epoch": 34.35989010989011, + "grad_norm": 10.058408737182617, + "learning_rate": 3.282005494505494e-05, + "loss": 0.2272, + "step": 12507 + }, + { + "epoch": 34.362637362637365, + "grad_norm": 6.771273612976074, + "learning_rate": 3.281868131868132e-05, + "loss": 0.1084, + "step": 12508 + }, + { + "epoch": 34.36538461538461, + "grad_norm": 14.46347427368164, + "learning_rate": 3.281730769230769e-05, + "loss": 0.6129, + "step": 12509 + }, + { + "epoch": 34.36813186813187, + "grad_norm": 18.987838745117188, + "learning_rate": 3.2815934065934066e-05, + "loss": 0.6102, + "step": 12510 + }, + { + "epoch": 34.370879120879124, + "grad_norm": 16.34148597717285, + "learning_rate": 3.281456043956044e-05, + "loss": 0.3759, + "step": 12511 + }, + { + "epoch": 34.37362637362637, + "grad_norm": 13.791003227233887, + "learning_rate": 3.281318681318681e-05, + "loss": 0.4212, + "step": 12512 + }, + { + "epoch": 34.37637362637363, + "grad_norm": 9.685257911682129, + "learning_rate": 3.281181318681319e-05, + "loss": 0.1527, + "step": 12513 + }, + { + "epoch": 34.379120879120876, + "grad_norm": 19.775123596191406, + "learning_rate": 3.281043956043956e-05, + "loss": 0.5993, + "step": 12514 + }, + { + "epoch": 34.38186813186813, + "grad_norm": 16.629037857055664, + "learning_rate": 3.2809065934065937e-05, + "loss": 0.2483, + "step": 12515 + }, + { + "epoch": 34.38461538461539, + "grad_norm": 12.362595558166504, + "learning_rate": 3.280769230769231e-05, + "loss": 0.2074, + "step": 12516 + }, + { + "epoch": 34.387362637362635, + "grad_norm": 18.26288604736328, + "learning_rate": 3.280631868131868e-05, + "loss": 0.4438, + "step": 12517 + }, + { + "epoch": 34.39010989010989, + "grad_norm": 19.04238510131836, + "learning_rate": 3.280494505494506e-05, + "loss": 0.6653, + "step": 12518 + }, + { + "epoch": 34.392857142857146, + "grad_norm": 19.193002700805664, + "learning_rate": 3.280357142857143e-05, + "loss": 0.7566, + "step": 12519 + }, + { + "epoch": 34.395604395604394, + "grad_norm": 10.740224838256836, + "learning_rate": 3.280219780219781e-05, + "loss": 0.2742, + "step": 12520 + }, + { + "epoch": 34.39835164835165, + "grad_norm": 20.3518009185791, + "learning_rate": 3.280082417582418e-05, + "loss": 0.506, + "step": 12521 + }, + { + "epoch": 34.4010989010989, + "grad_norm": 5.329682350158691, + "learning_rate": 3.279945054945055e-05, + "loss": 0.1173, + "step": 12522 + }, + { + "epoch": 34.40384615384615, + "grad_norm": 15.080463409423828, + "learning_rate": 3.2798076923076924e-05, + "loss": 0.5739, + "step": 12523 + }, + { + "epoch": 34.40659340659341, + "grad_norm": 6.653066158294678, + "learning_rate": 3.2796703296703294e-05, + "loss": 0.108, + "step": 12524 + }, + { + "epoch": 34.40934065934066, + "grad_norm": 5.3466668128967285, + "learning_rate": 3.279532967032967e-05, + "loss": 0.1051, + "step": 12525 + }, + { + "epoch": 34.41208791208791, + "grad_norm": 12.65473461151123, + "learning_rate": 3.279395604395605e-05, + "loss": 0.2817, + "step": 12526 + }, + { + "epoch": 34.41483516483517, + "grad_norm": 11.849400520324707, + "learning_rate": 3.279258241758242e-05, + "loss": 0.3371, + "step": 12527 + }, + { + "epoch": 34.417582417582416, + "grad_norm": 6.489381313323975, + "learning_rate": 3.2791208791208794e-05, + "loss": 0.1729, + "step": 12528 + }, + { + "epoch": 34.42032967032967, + "grad_norm": 14.816705703735352, + "learning_rate": 3.2789835164835164e-05, + "loss": 0.4569, + "step": 12529 + }, + { + "epoch": 34.42307692307692, + "grad_norm": 13.176441192626953, + "learning_rate": 3.278846153846154e-05, + "loss": 0.235, + "step": 12530 + }, + { + "epoch": 34.425824175824175, + "grad_norm": 3.1699416637420654, + "learning_rate": 3.278708791208792e-05, + "loss": 0.036, + "step": 12531 + }, + { + "epoch": 34.42857142857143, + "grad_norm": 8.917859077453613, + "learning_rate": 3.278571428571429e-05, + "loss": 0.229, + "step": 12532 + }, + { + "epoch": 34.43131868131868, + "grad_norm": 9.674813270568848, + "learning_rate": 3.2784340659340665e-05, + "loss": 0.2562, + "step": 12533 + }, + { + "epoch": 34.434065934065934, + "grad_norm": 11.530549049377441, + "learning_rate": 3.2782967032967035e-05, + "loss": 0.1856, + "step": 12534 + }, + { + "epoch": 34.43681318681319, + "grad_norm": 15.820806503295898, + "learning_rate": 3.278159340659341e-05, + "loss": 0.3957, + "step": 12535 + }, + { + "epoch": 34.43956043956044, + "grad_norm": 14.136774063110352, + "learning_rate": 3.278021978021978e-05, + "loss": 0.2723, + "step": 12536 + }, + { + "epoch": 34.44230769230769, + "grad_norm": 11.057345390319824, + "learning_rate": 3.277884615384615e-05, + "loss": 0.2402, + "step": 12537 + }, + { + "epoch": 34.44505494505494, + "grad_norm": 8.3103609085083, + "learning_rate": 3.277747252747253e-05, + "loss": 0.1264, + "step": 12538 + }, + { + "epoch": 34.4478021978022, + "grad_norm": 6.275415420532227, + "learning_rate": 3.27760989010989e-05, + "loss": 0.1767, + "step": 12539 + }, + { + "epoch": 34.45054945054945, + "grad_norm": 9.168905258178711, + "learning_rate": 3.2774725274725275e-05, + "loss": 0.2527, + "step": 12540 + }, + { + "epoch": 34.4532967032967, + "grad_norm": 8.933794975280762, + "learning_rate": 3.277335164835165e-05, + "loss": 0.245, + "step": 12541 + }, + { + "epoch": 34.456043956043956, + "grad_norm": 10.675021171569824, + "learning_rate": 3.277197802197802e-05, + "loss": 0.3917, + "step": 12542 + }, + { + "epoch": 34.45879120879121, + "grad_norm": 17.427350997924805, + "learning_rate": 3.27706043956044e-05, + "loss": 0.4562, + "step": 12543 + }, + { + "epoch": 34.46153846153846, + "grad_norm": 13.536020278930664, + "learning_rate": 3.276923076923077e-05, + "loss": 0.3847, + "step": 12544 + }, + { + "epoch": 34.464285714285715, + "grad_norm": 20.816709518432617, + "learning_rate": 3.2767857142857146e-05, + "loss": 0.3093, + "step": 12545 + }, + { + "epoch": 34.467032967032964, + "grad_norm": 22.01938819885254, + "learning_rate": 3.276648351648352e-05, + "loss": 0.4407, + "step": 12546 + }, + { + "epoch": 34.46978021978022, + "grad_norm": 5.283707141876221, + "learning_rate": 3.276510989010989e-05, + "loss": 0.0878, + "step": 12547 + }, + { + "epoch": 34.472527472527474, + "grad_norm": 14.331253051757812, + "learning_rate": 3.276373626373627e-05, + "loss": 0.4358, + "step": 12548 + }, + { + "epoch": 34.47527472527472, + "grad_norm": 14.344727516174316, + "learning_rate": 3.276236263736264e-05, + "loss": 0.3518, + "step": 12549 + }, + { + "epoch": 34.47802197802198, + "grad_norm": 11.226815223693848, + "learning_rate": 3.2760989010989016e-05, + "loss": 0.3718, + "step": 12550 + }, + { + "epoch": 34.48076923076923, + "grad_norm": 8.31573486328125, + "learning_rate": 3.2759615384615386e-05, + "loss": 0.1958, + "step": 12551 + }, + { + "epoch": 34.48351648351648, + "grad_norm": 11.083487510681152, + "learning_rate": 3.2758241758241756e-05, + "loss": 0.2205, + "step": 12552 + }, + { + "epoch": 34.48626373626374, + "grad_norm": 16.900297164916992, + "learning_rate": 3.275686813186813e-05, + "loss": 0.3919, + "step": 12553 + }, + { + "epoch": 34.489010989010985, + "grad_norm": 9.348381042480469, + "learning_rate": 3.27554945054945e-05, + "loss": 0.1485, + "step": 12554 + }, + { + "epoch": 34.49175824175824, + "grad_norm": 14.323558807373047, + "learning_rate": 3.275412087912088e-05, + "loss": 0.3448, + "step": 12555 + }, + { + "epoch": 34.494505494505496, + "grad_norm": 28.426410675048828, + "learning_rate": 3.2752747252747256e-05, + "loss": 0.5678, + "step": 12556 + }, + { + "epoch": 34.497252747252745, + "grad_norm": 3.941702127456665, + "learning_rate": 3.2751373626373626e-05, + "loss": 0.0755, + "step": 12557 + }, + { + "epoch": 34.5, + "grad_norm": 8.430215835571289, + "learning_rate": 3.275e-05, + "loss": 0.1622, + "step": 12558 + }, + { + "epoch": 34.502747252747255, + "grad_norm": 9.319442749023438, + "learning_rate": 3.274862637362637e-05, + "loss": 0.3186, + "step": 12559 + }, + { + "epoch": 34.505494505494504, + "grad_norm": 23.929805755615234, + "learning_rate": 3.274725274725275e-05, + "loss": 0.4472, + "step": 12560 + }, + { + "epoch": 34.50824175824176, + "grad_norm": 18.26799774169922, + "learning_rate": 3.274587912087913e-05, + "loss": 0.4898, + "step": 12561 + }, + { + "epoch": 34.51098901098901, + "grad_norm": 14.617281913757324, + "learning_rate": 3.27445054945055e-05, + "loss": 0.5962, + "step": 12562 + }, + { + "epoch": 34.51373626373626, + "grad_norm": 18.751192092895508, + "learning_rate": 3.2743131868131874e-05, + "loss": 0.5281, + "step": 12563 + }, + { + "epoch": 34.51648351648352, + "grad_norm": 10.85023307800293, + "learning_rate": 3.2741758241758244e-05, + "loss": 0.2798, + "step": 12564 + }, + { + "epoch": 34.51923076923077, + "grad_norm": 15.201956748962402, + "learning_rate": 3.274038461538462e-05, + "loss": 0.3719, + "step": 12565 + }, + { + "epoch": 34.52197802197802, + "grad_norm": 13.530393600463867, + "learning_rate": 3.273901098901099e-05, + "loss": 0.2267, + "step": 12566 + }, + { + "epoch": 34.52472527472528, + "grad_norm": 3.4591312408447266, + "learning_rate": 3.273763736263736e-05, + "loss": 0.0576, + "step": 12567 + }, + { + "epoch": 34.527472527472526, + "grad_norm": 6.965660572052002, + "learning_rate": 3.273626373626374e-05, + "loss": 0.1509, + "step": 12568 + }, + { + "epoch": 34.53021978021978, + "grad_norm": 17.32884407043457, + "learning_rate": 3.273489010989011e-05, + "loss": 0.3758, + "step": 12569 + }, + { + "epoch": 34.532967032967036, + "grad_norm": 14.897051811218262, + "learning_rate": 3.2733516483516484e-05, + "loss": 0.4913, + "step": 12570 + }, + { + "epoch": 34.535714285714285, + "grad_norm": 10.6370267868042, + "learning_rate": 3.273214285714286e-05, + "loss": 0.391, + "step": 12571 + }, + { + "epoch": 34.53846153846154, + "grad_norm": 9.991246223449707, + "learning_rate": 3.273076923076923e-05, + "loss": 0.2062, + "step": 12572 + }, + { + "epoch": 34.54120879120879, + "grad_norm": 6.083110809326172, + "learning_rate": 3.272939560439561e-05, + "loss": 0.1429, + "step": 12573 + }, + { + "epoch": 34.543956043956044, + "grad_norm": 3.694887638092041, + "learning_rate": 3.272802197802198e-05, + "loss": 0.0854, + "step": 12574 + }, + { + "epoch": 34.5467032967033, + "grad_norm": 10.43628978729248, + "learning_rate": 3.2726648351648355e-05, + "loss": 0.2791, + "step": 12575 + }, + { + "epoch": 34.54945054945055, + "grad_norm": 12.363859176635742, + "learning_rate": 3.272527472527473e-05, + "loss": 0.4583, + "step": 12576 + }, + { + "epoch": 34.5521978021978, + "grad_norm": 16.62812042236328, + "learning_rate": 3.27239010989011e-05, + "loss": 0.2902, + "step": 12577 + }, + { + "epoch": 34.55494505494506, + "grad_norm": 8.159710884094238, + "learning_rate": 3.272252747252748e-05, + "loss": 0.1864, + "step": 12578 + }, + { + "epoch": 34.55769230769231, + "grad_norm": 12.618170738220215, + "learning_rate": 3.272115384615385e-05, + "loss": 0.2987, + "step": 12579 + }, + { + "epoch": 34.56043956043956, + "grad_norm": 10.4751558303833, + "learning_rate": 3.2719780219780225e-05, + "loss": 0.2315, + "step": 12580 + }, + { + "epoch": 34.56318681318681, + "grad_norm": 13.27663803100586, + "learning_rate": 3.2718406593406595e-05, + "loss": 0.2953, + "step": 12581 + }, + { + "epoch": 34.565934065934066, + "grad_norm": 20.27959632873535, + "learning_rate": 3.2717032967032965e-05, + "loss": 0.7815, + "step": 12582 + }, + { + "epoch": 34.56868131868132, + "grad_norm": 8.266077995300293, + "learning_rate": 3.271565934065934e-05, + "loss": 0.1801, + "step": 12583 + }, + { + "epoch": 34.57142857142857, + "grad_norm": 12.573859214782715, + "learning_rate": 3.271428571428571e-05, + "loss": 0.2168, + "step": 12584 + }, + { + "epoch": 34.574175824175825, + "grad_norm": 16.652652740478516, + "learning_rate": 3.271291208791209e-05, + "loss": 0.4374, + "step": 12585 + }, + { + "epoch": 34.57692307692308, + "grad_norm": 13.986705780029297, + "learning_rate": 3.2711538461538465e-05, + "loss": 0.2533, + "step": 12586 + }, + { + "epoch": 34.57967032967033, + "grad_norm": 12.593778610229492, + "learning_rate": 3.2710164835164835e-05, + "loss": 0.2726, + "step": 12587 + }, + { + "epoch": 34.582417582417584, + "grad_norm": 13.992915153503418, + "learning_rate": 3.270879120879121e-05, + "loss": 0.771, + "step": 12588 + }, + { + "epoch": 34.58516483516483, + "grad_norm": 10.965091705322266, + "learning_rate": 3.270741758241758e-05, + "loss": 0.147, + "step": 12589 + }, + { + "epoch": 34.58791208791209, + "grad_norm": 12.289957046508789, + "learning_rate": 3.270604395604396e-05, + "loss": 0.3493, + "step": 12590 + }, + { + "epoch": 34.59065934065934, + "grad_norm": 9.541821479797363, + "learning_rate": 3.2704670329670336e-05, + "loss": 0.2363, + "step": 12591 + }, + { + "epoch": 34.59340659340659, + "grad_norm": 16.879446029663086, + "learning_rate": 3.2703296703296706e-05, + "loss": 0.4441, + "step": 12592 + }, + { + "epoch": 34.59615384615385, + "grad_norm": 9.690661430358887, + "learning_rate": 3.270192307692308e-05, + "loss": 0.3725, + "step": 12593 + }, + { + "epoch": 34.5989010989011, + "grad_norm": 10.407564163208008, + "learning_rate": 3.270054945054945e-05, + "loss": 0.2577, + "step": 12594 + }, + { + "epoch": 34.60164835164835, + "grad_norm": 16.351669311523438, + "learning_rate": 3.269917582417583e-05, + "loss": 0.5997, + "step": 12595 + }, + { + "epoch": 34.604395604395606, + "grad_norm": 15.123249053955078, + "learning_rate": 3.26978021978022e-05, + "loss": 0.383, + "step": 12596 + }, + { + "epoch": 34.607142857142854, + "grad_norm": 15.281329154968262, + "learning_rate": 3.269642857142857e-05, + "loss": 0.3175, + "step": 12597 + }, + { + "epoch": 34.60989010989011, + "grad_norm": 10.44604206085205, + "learning_rate": 3.2695054945054946e-05, + "loss": 0.3477, + "step": 12598 + }, + { + "epoch": 34.612637362637365, + "grad_norm": 11.902412414550781, + "learning_rate": 3.2693681318681316e-05, + "loss": 0.2402, + "step": 12599 + }, + { + "epoch": 34.61538461538461, + "grad_norm": 16.02181053161621, + "learning_rate": 3.269230769230769e-05, + "loss": 0.4892, + "step": 12600 + }, + { + "epoch": 34.61813186813187, + "grad_norm": 9.531821250915527, + "learning_rate": 3.269093406593407e-05, + "loss": 0.1291, + "step": 12601 + }, + { + "epoch": 34.620879120879124, + "grad_norm": 13.229519844055176, + "learning_rate": 3.268956043956044e-05, + "loss": 0.3215, + "step": 12602 + }, + { + "epoch": 34.62362637362637, + "grad_norm": 19.786970138549805, + "learning_rate": 3.268818681318682e-05, + "loss": 0.6317, + "step": 12603 + }, + { + "epoch": 34.62637362637363, + "grad_norm": 16.003103256225586, + "learning_rate": 3.268681318681319e-05, + "loss": 0.6623, + "step": 12604 + }, + { + "epoch": 34.629120879120876, + "grad_norm": 23.096412658691406, + "learning_rate": 3.2685439560439564e-05, + "loss": 0.897, + "step": 12605 + }, + { + "epoch": 34.63186813186813, + "grad_norm": 9.580869674682617, + "learning_rate": 3.268406593406594e-05, + "loss": 0.1558, + "step": 12606 + }, + { + "epoch": 34.63461538461539, + "grad_norm": 16.531757354736328, + "learning_rate": 3.268269230769231e-05, + "loss": 0.6763, + "step": 12607 + }, + { + "epoch": 34.637362637362635, + "grad_norm": 9.017356872558594, + "learning_rate": 3.268131868131869e-05, + "loss": 0.1481, + "step": 12608 + }, + { + "epoch": 34.64010989010989, + "grad_norm": 7.635753631591797, + "learning_rate": 3.267994505494506e-05, + "loss": 0.216, + "step": 12609 + }, + { + "epoch": 34.642857142857146, + "grad_norm": 17.40386199951172, + "learning_rate": 3.2678571428571434e-05, + "loss": 0.7216, + "step": 12610 + }, + { + "epoch": 34.645604395604394, + "grad_norm": 10.130624771118164, + "learning_rate": 3.2677197802197804e-05, + "loss": 0.4206, + "step": 12611 + }, + { + "epoch": 34.64835164835165, + "grad_norm": 28.390748977661133, + "learning_rate": 3.2675824175824174e-05, + "loss": 0.8217, + "step": 12612 + }, + { + "epoch": 34.6510989010989, + "grad_norm": 4.6632795333862305, + "learning_rate": 3.267445054945055e-05, + "loss": 0.1175, + "step": 12613 + }, + { + "epoch": 34.65384615384615, + "grad_norm": 14.842565536499023, + "learning_rate": 3.267307692307692e-05, + "loss": 0.4244, + "step": 12614 + }, + { + "epoch": 34.65659340659341, + "grad_norm": 19.21156883239746, + "learning_rate": 3.26717032967033e-05, + "loss": 0.4393, + "step": 12615 + }, + { + "epoch": 34.65934065934066, + "grad_norm": 17.713539123535156, + "learning_rate": 3.2670329670329674e-05, + "loss": 0.6509, + "step": 12616 + }, + { + "epoch": 34.66208791208791, + "grad_norm": 8.365655899047852, + "learning_rate": 3.2668956043956044e-05, + "loss": 0.2935, + "step": 12617 + }, + { + "epoch": 34.66483516483517, + "grad_norm": 9.326982498168945, + "learning_rate": 3.266758241758242e-05, + "loss": 0.1982, + "step": 12618 + }, + { + "epoch": 34.667582417582416, + "grad_norm": 14.121830940246582, + "learning_rate": 3.266620879120879e-05, + "loss": 0.3568, + "step": 12619 + }, + { + "epoch": 34.67032967032967, + "grad_norm": 8.74819564819336, + "learning_rate": 3.266483516483517e-05, + "loss": 0.181, + "step": 12620 + }, + { + "epoch": 34.67307692307692, + "grad_norm": 8.004547119140625, + "learning_rate": 3.2663461538461545e-05, + "loss": 0.1978, + "step": 12621 + }, + { + "epoch": 34.675824175824175, + "grad_norm": 13.313194274902344, + "learning_rate": 3.2662087912087915e-05, + "loss": 0.3487, + "step": 12622 + }, + { + "epoch": 34.67857142857143, + "grad_norm": 17.4610538482666, + "learning_rate": 3.266071428571429e-05, + "loss": 0.5605, + "step": 12623 + }, + { + "epoch": 34.68131868131868, + "grad_norm": 13.118425369262695, + "learning_rate": 3.265934065934066e-05, + "loss": 0.312, + "step": 12624 + }, + { + "epoch": 34.684065934065934, + "grad_norm": 20.51620864868164, + "learning_rate": 3.265796703296704e-05, + "loss": 0.4499, + "step": 12625 + }, + { + "epoch": 34.68681318681319, + "grad_norm": 10.926060676574707, + "learning_rate": 3.265659340659341e-05, + "loss": 0.2034, + "step": 12626 + }, + { + "epoch": 34.68956043956044, + "grad_norm": 6.451573371887207, + "learning_rate": 3.265521978021978e-05, + "loss": 0.1285, + "step": 12627 + }, + { + "epoch": 34.69230769230769, + "grad_norm": 13.158548355102539, + "learning_rate": 3.2653846153846155e-05, + "loss": 0.3846, + "step": 12628 + }, + { + "epoch": 34.69505494505494, + "grad_norm": 20.62721061706543, + "learning_rate": 3.2652472527472525e-05, + "loss": 0.7277, + "step": 12629 + }, + { + "epoch": 34.6978021978022, + "grad_norm": 16.282947540283203, + "learning_rate": 3.26510989010989e-05, + "loss": 0.3867, + "step": 12630 + }, + { + "epoch": 34.70054945054945, + "grad_norm": 12.08804988861084, + "learning_rate": 3.264972527472527e-05, + "loss": 0.3024, + "step": 12631 + }, + { + "epoch": 34.7032967032967, + "grad_norm": 7.374798774719238, + "learning_rate": 3.264835164835165e-05, + "loss": 0.2019, + "step": 12632 + }, + { + "epoch": 34.706043956043956, + "grad_norm": 16.99942398071289, + "learning_rate": 3.2646978021978026e-05, + "loss": 0.5881, + "step": 12633 + }, + { + "epoch": 34.70879120879121, + "grad_norm": 12.035393714904785, + "learning_rate": 3.2645604395604396e-05, + "loss": 0.1641, + "step": 12634 + }, + { + "epoch": 34.71153846153846, + "grad_norm": 17.551898956298828, + "learning_rate": 3.264423076923077e-05, + "loss": 0.6485, + "step": 12635 + }, + { + "epoch": 34.714285714285715, + "grad_norm": 4.341081619262695, + "learning_rate": 3.264285714285714e-05, + "loss": 0.0557, + "step": 12636 + }, + { + "epoch": 34.717032967032964, + "grad_norm": 20.997610092163086, + "learning_rate": 3.264148351648352e-05, + "loss": 0.4545, + "step": 12637 + }, + { + "epoch": 34.71978021978022, + "grad_norm": 16.460737228393555, + "learning_rate": 3.2640109890109896e-05, + "loss": 0.6308, + "step": 12638 + }, + { + "epoch": 34.722527472527474, + "grad_norm": 8.50013256072998, + "learning_rate": 3.2638736263736266e-05, + "loss": 0.2191, + "step": 12639 + }, + { + "epoch": 34.72527472527472, + "grad_norm": 13.259483337402344, + "learning_rate": 3.263736263736264e-05, + "loss": 0.4008, + "step": 12640 + }, + { + "epoch": 34.72802197802198, + "grad_norm": 12.779818534851074, + "learning_rate": 3.263598901098901e-05, + "loss": 0.542, + "step": 12641 + }, + { + "epoch": 34.73076923076923, + "grad_norm": 12.088774681091309, + "learning_rate": 3.263461538461538e-05, + "loss": 0.3507, + "step": 12642 + }, + { + "epoch": 34.73351648351648, + "grad_norm": 14.022979736328125, + "learning_rate": 3.263324175824176e-05, + "loss": 0.4555, + "step": 12643 + }, + { + "epoch": 34.73626373626374, + "grad_norm": 11.166045188903809, + "learning_rate": 3.263186813186813e-05, + "loss": 0.3805, + "step": 12644 + }, + { + "epoch": 34.73901098901099, + "grad_norm": 9.348093032836914, + "learning_rate": 3.2630494505494507e-05, + "loss": 0.1885, + "step": 12645 + }, + { + "epoch": 34.74175824175824, + "grad_norm": 10.44465446472168, + "learning_rate": 3.2629120879120877e-05, + "loss": 0.3269, + "step": 12646 + }, + { + "epoch": 34.744505494505496, + "grad_norm": 15.855527877807617, + "learning_rate": 3.262774725274725e-05, + "loss": 0.371, + "step": 12647 + }, + { + "epoch": 34.747252747252745, + "grad_norm": 14.838949203491211, + "learning_rate": 3.262637362637363e-05, + "loss": 0.4744, + "step": 12648 + }, + { + "epoch": 34.75, + "grad_norm": 9.84881591796875, + "learning_rate": 3.2625e-05, + "loss": 0.1534, + "step": 12649 + }, + { + "epoch": 34.752747252747255, + "grad_norm": 18.37013053894043, + "learning_rate": 3.262362637362638e-05, + "loss": 0.5226, + "step": 12650 + }, + { + "epoch": 34.755494505494504, + "grad_norm": 9.601852416992188, + "learning_rate": 3.262225274725275e-05, + "loss": 0.1851, + "step": 12651 + }, + { + "epoch": 34.75824175824176, + "grad_norm": 19.688861846923828, + "learning_rate": 3.2620879120879124e-05, + "loss": 0.4347, + "step": 12652 + }, + { + "epoch": 34.76098901098901, + "grad_norm": 6.49160623550415, + "learning_rate": 3.26195054945055e-05, + "loss": 0.1662, + "step": 12653 + }, + { + "epoch": 34.76373626373626, + "grad_norm": 10.153070449829102, + "learning_rate": 3.261813186813187e-05, + "loss": 0.4029, + "step": 12654 + }, + { + "epoch": 34.76648351648352, + "grad_norm": 5.346350193023682, + "learning_rate": 3.261675824175824e-05, + "loss": 0.0638, + "step": 12655 + }, + { + "epoch": 34.76923076923077, + "grad_norm": 8.947046279907227, + "learning_rate": 3.261538461538462e-05, + "loss": 0.1561, + "step": 12656 + }, + { + "epoch": 34.77197802197802, + "grad_norm": 13.365396499633789, + "learning_rate": 3.261401098901099e-05, + "loss": 0.2778, + "step": 12657 + }, + { + "epoch": 34.77472527472528, + "grad_norm": 11.95436954498291, + "learning_rate": 3.2612637362637364e-05, + "loss": 0.3065, + "step": 12658 + }, + { + "epoch": 34.777472527472526, + "grad_norm": 15.889585494995117, + "learning_rate": 3.2611263736263734e-05, + "loss": 0.5193, + "step": 12659 + }, + { + "epoch": 34.78021978021978, + "grad_norm": 18.995065689086914, + "learning_rate": 3.260989010989011e-05, + "loss": 0.5492, + "step": 12660 + }, + { + "epoch": 34.782967032967036, + "grad_norm": 20.32193374633789, + "learning_rate": 3.260851648351648e-05, + "loss": 0.6816, + "step": 12661 + }, + { + "epoch": 34.785714285714285, + "grad_norm": 8.861870765686035, + "learning_rate": 3.260714285714286e-05, + "loss": 0.1779, + "step": 12662 + }, + { + "epoch": 34.78846153846154, + "grad_norm": 3.743623733520508, + "learning_rate": 3.2605769230769235e-05, + "loss": 0.1456, + "step": 12663 + }, + { + "epoch": 34.79120879120879, + "grad_norm": 10.668373107910156, + "learning_rate": 3.2604395604395605e-05, + "loss": 0.3511, + "step": 12664 + }, + { + "epoch": 34.793956043956044, + "grad_norm": 3.2757694721221924, + "learning_rate": 3.260302197802198e-05, + "loss": 0.0787, + "step": 12665 + }, + { + "epoch": 34.7967032967033, + "grad_norm": 3.402817964553833, + "learning_rate": 3.260164835164835e-05, + "loss": 0.0728, + "step": 12666 + }, + { + "epoch": 34.79945054945055, + "grad_norm": 18.222543716430664, + "learning_rate": 3.260027472527473e-05, + "loss": 0.5087, + "step": 12667 + }, + { + "epoch": 34.8021978021978, + "grad_norm": 8.123104095458984, + "learning_rate": 3.2598901098901105e-05, + "loss": 0.2421, + "step": 12668 + }, + { + "epoch": 34.80494505494506, + "grad_norm": 24.639467239379883, + "learning_rate": 3.2597527472527475e-05, + "loss": 0.7509, + "step": 12669 + }, + { + "epoch": 34.80769230769231, + "grad_norm": 7.888538837432861, + "learning_rate": 3.2596153846153845e-05, + "loss": 0.2468, + "step": 12670 + }, + { + "epoch": 34.81043956043956, + "grad_norm": 12.012076377868652, + "learning_rate": 3.259478021978022e-05, + "loss": 0.3286, + "step": 12671 + }, + { + "epoch": 34.81318681318681, + "grad_norm": 5.141489505767822, + "learning_rate": 3.259340659340659e-05, + "loss": 0.0789, + "step": 12672 + }, + { + "epoch": 34.815934065934066, + "grad_norm": 13.371540069580078, + "learning_rate": 3.259203296703297e-05, + "loss": 0.2966, + "step": 12673 + }, + { + "epoch": 34.81868131868132, + "grad_norm": 6.4891462326049805, + "learning_rate": 3.259065934065934e-05, + "loss": 0.129, + "step": 12674 + }, + { + "epoch": 34.82142857142857, + "grad_norm": 14.468785285949707, + "learning_rate": 3.2589285714285716e-05, + "loss": 0.5466, + "step": 12675 + }, + { + "epoch": 34.824175824175825, + "grad_norm": 4.831415176391602, + "learning_rate": 3.2587912087912086e-05, + "loss": 0.0739, + "step": 12676 + }, + { + "epoch": 34.82692307692308, + "grad_norm": 12.12656307220459, + "learning_rate": 3.258653846153846e-05, + "loss": 0.2687, + "step": 12677 + }, + { + "epoch": 34.82967032967033, + "grad_norm": 9.646245956420898, + "learning_rate": 3.258516483516484e-05, + "loss": 0.2289, + "step": 12678 + }, + { + "epoch": 34.832417582417584, + "grad_norm": 18.930814743041992, + "learning_rate": 3.258379120879121e-05, + "loss": 0.5007, + "step": 12679 + }, + { + "epoch": 34.83516483516483, + "grad_norm": 18.19925880432129, + "learning_rate": 3.2582417582417586e-05, + "loss": 0.6041, + "step": 12680 + }, + { + "epoch": 34.83791208791209, + "grad_norm": 19.681886672973633, + "learning_rate": 3.2581043956043956e-05, + "loss": 0.5652, + "step": 12681 + }, + { + "epoch": 34.84065934065934, + "grad_norm": 13.346115112304688, + "learning_rate": 3.257967032967033e-05, + "loss": 0.3675, + "step": 12682 + }, + { + "epoch": 34.84340659340659, + "grad_norm": 12.601401329040527, + "learning_rate": 3.257829670329671e-05, + "loss": 0.4242, + "step": 12683 + }, + { + "epoch": 34.84615384615385, + "grad_norm": 22.134737014770508, + "learning_rate": 3.257692307692308e-05, + "loss": 0.9342, + "step": 12684 + }, + { + "epoch": 34.8489010989011, + "grad_norm": 21.338197708129883, + "learning_rate": 3.257554945054945e-05, + "loss": 0.383, + "step": 12685 + }, + { + "epoch": 34.85164835164835, + "grad_norm": 14.816136360168457, + "learning_rate": 3.2574175824175826e-05, + "loss": 0.3903, + "step": 12686 + }, + { + "epoch": 34.854395604395606, + "grad_norm": 10.599478721618652, + "learning_rate": 3.2572802197802196e-05, + "loss": 0.3479, + "step": 12687 + }, + { + "epoch": 34.857142857142854, + "grad_norm": 11.232577323913574, + "learning_rate": 3.257142857142857e-05, + "loss": 0.3068, + "step": 12688 + }, + { + "epoch": 34.85989010989011, + "grad_norm": 16.933847427368164, + "learning_rate": 3.257005494505494e-05, + "loss": 0.3341, + "step": 12689 + }, + { + "epoch": 34.862637362637365, + "grad_norm": 19.535037994384766, + "learning_rate": 3.256868131868132e-05, + "loss": 0.8067, + "step": 12690 + }, + { + "epoch": 34.86538461538461, + "grad_norm": 11.385320663452148, + "learning_rate": 3.256730769230769e-05, + "loss": 0.1835, + "step": 12691 + }, + { + "epoch": 34.86813186813187, + "grad_norm": 8.186949729919434, + "learning_rate": 3.256593406593407e-05, + "loss": 0.1579, + "step": 12692 + }, + { + "epoch": 34.870879120879124, + "grad_norm": 12.468692779541016, + "learning_rate": 3.2564560439560444e-05, + "loss": 0.2992, + "step": 12693 + }, + { + "epoch": 34.87362637362637, + "grad_norm": 6.987845420837402, + "learning_rate": 3.2563186813186814e-05, + "loss": 0.0984, + "step": 12694 + }, + { + "epoch": 34.87637362637363, + "grad_norm": 11.694125175476074, + "learning_rate": 3.256181318681319e-05, + "loss": 0.3622, + "step": 12695 + }, + { + "epoch": 34.879120879120876, + "grad_norm": 8.282219886779785, + "learning_rate": 3.256043956043956e-05, + "loss": 0.2256, + "step": 12696 + }, + { + "epoch": 34.88186813186813, + "grad_norm": 12.190515518188477, + "learning_rate": 3.255906593406594e-05, + "loss": 0.2612, + "step": 12697 + }, + { + "epoch": 34.88461538461539, + "grad_norm": 9.226447105407715, + "learning_rate": 3.2557692307692314e-05, + "loss": 0.1719, + "step": 12698 + }, + { + "epoch": 34.887362637362635, + "grad_norm": 14.600519180297852, + "learning_rate": 3.2556318681318684e-05, + "loss": 0.292, + "step": 12699 + }, + { + "epoch": 34.89010989010989, + "grad_norm": 12.979714393615723, + "learning_rate": 3.2554945054945054e-05, + "loss": 0.2491, + "step": 12700 + }, + { + "epoch": 34.892857142857146, + "grad_norm": 7.463362216949463, + "learning_rate": 3.255357142857143e-05, + "loss": 0.1933, + "step": 12701 + }, + { + "epoch": 34.895604395604394, + "grad_norm": 20.929698944091797, + "learning_rate": 3.25521978021978e-05, + "loss": 0.6165, + "step": 12702 + }, + { + "epoch": 34.89835164835165, + "grad_norm": 11.334724426269531, + "learning_rate": 3.255082417582418e-05, + "loss": 0.3103, + "step": 12703 + }, + { + "epoch": 34.9010989010989, + "grad_norm": 8.840843200683594, + "learning_rate": 3.254945054945055e-05, + "loss": 0.2691, + "step": 12704 + }, + { + "epoch": 34.90384615384615, + "grad_norm": 7.7527923583984375, + "learning_rate": 3.2548076923076925e-05, + "loss": 0.1308, + "step": 12705 + }, + { + "epoch": 34.90659340659341, + "grad_norm": 20.505441665649414, + "learning_rate": 3.2546703296703295e-05, + "loss": 0.7162, + "step": 12706 + }, + { + "epoch": 34.90934065934066, + "grad_norm": 12.336650848388672, + "learning_rate": 3.254532967032967e-05, + "loss": 0.5092, + "step": 12707 + }, + { + "epoch": 34.91208791208791, + "grad_norm": 15.320679664611816, + "learning_rate": 3.254395604395605e-05, + "loss": 0.2864, + "step": 12708 + }, + { + "epoch": 34.91483516483517, + "grad_norm": 7.791702747344971, + "learning_rate": 3.254258241758242e-05, + "loss": 0.1913, + "step": 12709 + }, + { + "epoch": 34.917582417582416, + "grad_norm": 10.989909172058105, + "learning_rate": 3.2541208791208795e-05, + "loss": 0.2441, + "step": 12710 + }, + { + "epoch": 34.92032967032967, + "grad_norm": 11.961153030395508, + "learning_rate": 3.2539835164835165e-05, + "loss": 0.2872, + "step": 12711 + }, + { + "epoch": 34.92307692307692, + "grad_norm": 8.167884826660156, + "learning_rate": 3.253846153846154e-05, + "loss": 0.1437, + "step": 12712 + }, + { + "epoch": 34.925824175824175, + "grad_norm": 8.323400497436523, + "learning_rate": 3.253708791208792e-05, + "loss": 0.2841, + "step": 12713 + }, + { + "epoch": 34.92857142857143, + "grad_norm": 4.859616756439209, + "learning_rate": 3.253571428571429e-05, + "loss": 0.1077, + "step": 12714 + }, + { + "epoch": 34.93131868131868, + "grad_norm": 12.904799461364746, + "learning_rate": 3.253434065934066e-05, + "loss": 0.259, + "step": 12715 + }, + { + "epoch": 34.934065934065934, + "grad_norm": 12.84079647064209, + "learning_rate": 3.2532967032967035e-05, + "loss": 0.4586, + "step": 12716 + }, + { + "epoch": 34.93681318681319, + "grad_norm": 9.465787887573242, + "learning_rate": 3.2531593406593405e-05, + "loss": 0.3145, + "step": 12717 + }, + { + "epoch": 34.93956043956044, + "grad_norm": 16.058992385864258, + "learning_rate": 3.253021978021978e-05, + "loss": 0.3989, + "step": 12718 + }, + { + "epoch": 34.94230769230769, + "grad_norm": 7.179629325866699, + "learning_rate": 3.252884615384615e-05, + "loss": 0.2039, + "step": 12719 + }, + { + "epoch": 34.94505494505494, + "grad_norm": 11.228252410888672, + "learning_rate": 3.252747252747253e-05, + "loss": 0.3134, + "step": 12720 + }, + { + "epoch": 34.9478021978022, + "grad_norm": 9.672874450683594, + "learning_rate": 3.25260989010989e-05, + "loss": 0.3213, + "step": 12721 + }, + { + "epoch": 34.95054945054945, + "grad_norm": 9.443523406982422, + "learning_rate": 3.2524725274725276e-05, + "loss": 0.157, + "step": 12722 + }, + { + "epoch": 34.9532967032967, + "grad_norm": 7.495344638824463, + "learning_rate": 3.252335164835165e-05, + "loss": 0.1489, + "step": 12723 + }, + { + "epoch": 34.956043956043956, + "grad_norm": 9.73542594909668, + "learning_rate": 3.252197802197802e-05, + "loss": 0.2344, + "step": 12724 + }, + { + "epoch": 34.95879120879121, + "grad_norm": 4.24397611618042, + "learning_rate": 3.25206043956044e-05, + "loss": 0.0755, + "step": 12725 + }, + { + "epoch": 34.96153846153846, + "grad_norm": 12.173704147338867, + "learning_rate": 3.251923076923077e-05, + "loss": 0.1937, + "step": 12726 + }, + { + "epoch": 34.964285714285715, + "grad_norm": 13.605133056640625, + "learning_rate": 3.2517857142857146e-05, + "loss": 0.3367, + "step": 12727 + }, + { + "epoch": 34.967032967032964, + "grad_norm": 15.221898078918457, + "learning_rate": 3.251648351648352e-05, + "loss": 0.6094, + "step": 12728 + }, + { + "epoch": 34.96978021978022, + "grad_norm": 15.84148120880127, + "learning_rate": 3.251510989010989e-05, + "loss": 0.6437, + "step": 12729 + }, + { + "epoch": 34.972527472527474, + "grad_norm": 5.840164661407471, + "learning_rate": 3.251373626373626e-05, + "loss": 0.1461, + "step": 12730 + }, + { + "epoch": 34.97527472527472, + "grad_norm": 15.501910209655762, + "learning_rate": 3.251236263736264e-05, + "loss": 0.4247, + "step": 12731 + }, + { + "epoch": 34.97802197802198, + "grad_norm": 16.224695205688477, + "learning_rate": 3.251098901098901e-05, + "loss": 0.5277, + "step": 12732 + }, + { + "epoch": 34.98076923076923, + "grad_norm": 2.726663827896118, + "learning_rate": 3.250961538461539e-05, + "loss": 0.0422, + "step": 12733 + }, + { + "epoch": 34.98351648351648, + "grad_norm": 9.015019416809082, + "learning_rate": 3.250824175824176e-05, + "loss": 0.2139, + "step": 12734 + }, + { + "epoch": 34.98626373626374, + "grad_norm": 17.161508560180664, + "learning_rate": 3.2506868131868134e-05, + "loss": 0.4416, + "step": 12735 + }, + { + "epoch": 34.98901098901099, + "grad_norm": 9.72176456451416, + "learning_rate": 3.2505494505494504e-05, + "loss": 0.2209, + "step": 12736 + }, + { + "epoch": 34.99175824175824, + "grad_norm": 10.232660293579102, + "learning_rate": 3.250412087912088e-05, + "loss": 0.344, + "step": 12737 + }, + { + "epoch": 34.994505494505496, + "grad_norm": 4.960814476013184, + "learning_rate": 3.250274725274726e-05, + "loss": 0.0737, + "step": 12738 + }, + { + "epoch": 34.997252747252745, + "grad_norm": 11.303421020507812, + "learning_rate": 3.250137362637363e-05, + "loss": 0.4018, + "step": 12739 + }, + { + "epoch": 35.0, + "grad_norm": 41.299278259277344, + "learning_rate": 3.2500000000000004e-05, + "loss": 0.9703, + "step": 12740 + }, + { + "epoch": 35.0, + "eval_accuracy": 0.5316804407713499, + "eval_f1": 0.5165329945412139, + "eval_f1_DuraRiadoRio_64x64": 0.49910873440285203, + "eval_f1_Mole_64x64": 0.7808219178082192, + "eval_f1_Quebrado_64x64": 0.35359116022099446, + "eval_f1_RiadoRio_64x64": 0.5198237885462555, + "eval_f1_RioFechado_64x64": 0.4293193717277487, + "eval_loss": 2.1654908657073975, + "eval_precision": 0.7188535821094917, + "eval_precision_DuraRiadoRio_64x64": 0.33573141486810554, + "eval_precision_Mole_64x64": 0.7702702702702703, + "eval_precision_Quebrado_64x64": 0.8648648648648649, + "eval_precision_RiadoRio_64x64": 0.7866666666666666, + "eval_precision_RioFechado_64x64": 0.8367346938775511, + "eval_recall": 0.53260028004283, + "eval_recall_DuraRiadoRio_64x64": 0.9722222222222222, + "eval_recall_Mole_64x64": 0.7916666666666666, + "eval_recall_Quebrado_64x64": 0.2222222222222222, + "eval_recall_RiadoRio_64x64": 0.3881578947368421, + "eval_recall_RioFechado_64x64": 0.2887323943661972, + "eval_runtime": 1.7496, + "eval_samples_per_second": 414.944, + "eval_steps_per_second": 26.291, + "step": 12740 + }, + { + "epoch": 35.002747252747255, + "grad_norm": 6.227866172790527, + "learning_rate": 3.2498626373626374e-05, + "loss": 0.1457, + "step": 12741 + }, + { + "epoch": 35.005494505494504, + "grad_norm": 23.671525955200195, + "learning_rate": 3.249725274725275e-05, + "loss": 0.9607, + "step": 12742 + }, + { + "epoch": 35.00824175824176, + "grad_norm": 8.684972763061523, + "learning_rate": 3.249587912087913e-05, + "loss": 0.1799, + "step": 12743 + }, + { + "epoch": 35.010989010989015, + "grad_norm": 7.401329040527344, + "learning_rate": 3.24945054945055e-05, + "loss": 0.2645, + "step": 12744 + }, + { + "epoch": 35.01373626373626, + "grad_norm": 9.493660926818848, + "learning_rate": 3.249313186813187e-05, + "loss": 0.2421, + "step": 12745 + }, + { + "epoch": 35.01648351648352, + "grad_norm": 17.939970016479492, + "learning_rate": 3.2491758241758244e-05, + "loss": 0.4588, + "step": 12746 + }, + { + "epoch": 35.01923076923077, + "grad_norm": 4.147129058837891, + "learning_rate": 3.2490384615384614e-05, + "loss": 0.085, + "step": 12747 + }, + { + "epoch": 35.02197802197802, + "grad_norm": 13.8062744140625, + "learning_rate": 3.248901098901099e-05, + "loss": 0.3801, + "step": 12748 + }, + { + "epoch": 35.02472527472528, + "grad_norm": 2.200453281402588, + "learning_rate": 3.248763736263736e-05, + "loss": 0.0504, + "step": 12749 + }, + { + "epoch": 35.027472527472526, + "grad_norm": 7.936999320983887, + "learning_rate": 3.248626373626374e-05, + "loss": 0.1509, + "step": 12750 + }, + { + "epoch": 35.03021978021978, + "grad_norm": 20.83416748046875, + "learning_rate": 3.248489010989011e-05, + "loss": 0.6429, + "step": 12751 + }, + { + "epoch": 35.032967032967036, + "grad_norm": 12.829238891601562, + "learning_rate": 3.2483516483516485e-05, + "loss": 0.2576, + "step": 12752 + }, + { + "epoch": 35.035714285714285, + "grad_norm": 14.459808349609375, + "learning_rate": 3.248214285714286e-05, + "loss": 0.3042, + "step": 12753 + }, + { + "epoch": 35.03846153846154, + "grad_norm": 18.538307189941406, + "learning_rate": 3.248076923076923e-05, + "loss": 0.3129, + "step": 12754 + }, + { + "epoch": 35.04120879120879, + "grad_norm": 8.775846481323242, + "learning_rate": 3.247939560439561e-05, + "loss": 0.2802, + "step": 12755 + }, + { + "epoch": 35.043956043956044, + "grad_norm": 11.090437889099121, + "learning_rate": 3.247802197802198e-05, + "loss": 0.3412, + "step": 12756 + }, + { + "epoch": 35.0467032967033, + "grad_norm": 13.395352363586426, + "learning_rate": 3.2476648351648355e-05, + "loss": 0.3621, + "step": 12757 + }, + { + "epoch": 35.04945054945055, + "grad_norm": 5.781243324279785, + "learning_rate": 3.247527472527473e-05, + "loss": 0.1089, + "step": 12758 + }, + { + "epoch": 35.0521978021978, + "grad_norm": 8.01569938659668, + "learning_rate": 3.24739010989011e-05, + "loss": 0.1155, + "step": 12759 + }, + { + "epoch": 35.05494505494506, + "grad_norm": 13.141151428222656, + "learning_rate": 3.247252747252747e-05, + "loss": 0.3702, + "step": 12760 + }, + { + "epoch": 35.05769230769231, + "grad_norm": 21.1961727142334, + "learning_rate": 3.247115384615385e-05, + "loss": 0.7565, + "step": 12761 + }, + { + "epoch": 35.06043956043956, + "grad_norm": 15.16064167022705, + "learning_rate": 3.246978021978022e-05, + "loss": 0.3352, + "step": 12762 + }, + { + "epoch": 35.06318681318681, + "grad_norm": 10.569680213928223, + "learning_rate": 3.2468406593406596e-05, + "loss": 0.1846, + "step": 12763 + }, + { + "epoch": 35.065934065934066, + "grad_norm": 20.772634506225586, + "learning_rate": 3.2467032967032966e-05, + "loss": 0.9308, + "step": 12764 + }, + { + "epoch": 35.06868131868132, + "grad_norm": 12.015832901000977, + "learning_rate": 3.246565934065934e-05, + "loss": 0.2517, + "step": 12765 + }, + { + "epoch": 35.07142857142857, + "grad_norm": 19.220117568969727, + "learning_rate": 3.246428571428571e-05, + "loss": 0.5957, + "step": 12766 + }, + { + "epoch": 35.074175824175825, + "grad_norm": 13.058520317077637, + "learning_rate": 3.246291208791209e-05, + "loss": 0.3709, + "step": 12767 + }, + { + "epoch": 35.07692307692308, + "grad_norm": 11.168485641479492, + "learning_rate": 3.2461538461538466e-05, + "loss": 0.1652, + "step": 12768 + }, + { + "epoch": 35.07967032967033, + "grad_norm": 15.371356964111328, + "learning_rate": 3.2460164835164836e-05, + "loss": 0.2985, + "step": 12769 + }, + { + "epoch": 35.082417582417584, + "grad_norm": 12.27424430847168, + "learning_rate": 3.245879120879121e-05, + "loss": 0.2937, + "step": 12770 + }, + { + "epoch": 35.08516483516483, + "grad_norm": 5.27914571762085, + "learning_rate": 3.245741758241758e-05, + "loss": 0.1236, + "step": 12771 + }, + { + "epoch": 35.08791208791209, + "grad_norm": 17.777482986450195, + "learning_rate": 3.245604395604396e-05, + "loss": 0.3644, + "step": 12772 + }, + { + "epoch": 35.09065934065934, + "grad_norm": 8.885295867919922, + "learning_rate": 3.2454670329670337e-05, + "loss": 0.1437, + "step": 12773 + }, + { + "epoch": 35.09340659340659, + "grad_norm": 6.79856014251709, + "learning_rate": 3.2453296703296707e-05, + "loss": 0.1909, + "step": 12774 + }, + { + "epoch": 35.09615384615385, + "grad_norm": 23.166683197021484, + "learning_rate": 3.2451923076923077e-05, + "loss": 0.9377, + "step": 12775 + }, + { + "epoch": 35.0989010989011, + "grad_norm": 7.070591926574707, + "learning_rate": 3.245054945054945e-05, + "loss": 0.1493, + "step": 12776 + }, + { + "epoch": 35.10164835164835, + "grad_norm": 10.81069564819336, + "learning_rate": 3.244917582417582e-05, + "loss": 0.2058, + "step": 12777 + }, + { + "epoch": 35.104395604395606, + "grad_norm": 9.49688720703125, + "learning_rate": 3.24478021978022e-05, + "loss": 0.1826, + "step": 12778 + }, + { + "epoch": 35.107142857142854, + "grad_norm": 7.134002685546875, + "learning_rate": 3.244642857142857e-05, + "loss": 0.1475, + "step": 12779 + }, + { + "epoch": 35.10989010989011, + "grad_norm": 16.683542251586914, + "learning_rate": 3.244505494505495e-05, + "loss": 0.4835, + "step": 12780 + }, + { + "epoch": 35.112637362637365, + "grad_norm": 7.1432671546936035, + "learning_rate": 3.244368131868132e-05, + "loss": 0.1137, + "step": 12781 + }, + { + "epoch": 35.11538461538461, + "grad_norm": 23.015592575073242, + "learning_rate": 3.2442307692307694e-05, + "loss": 1.3025, + "step": 12782 + }, + { + "epoch": 35.11813186813187, + "grad_norm": 12.057241439819336, + "learning_rate": 3.244093406593407e-05, + "loss": 0.2218, + "step": 12783 + }, + { + "epoch": 35.120879120879124, + "grad_norm": 6.016184329986572, + "learning_rate": 3.243956043956044e-05, + "loss": 0.112, + "step": 12784 + }, + { + "epoch": 35.12362637362637, + "grad_norm": 15.27901840209961, + "learning_rate": 3.243818681318682e-05, + "loss": 0.3902, + "step": 12785 + }, + { + "epoch": 35.12637362637363, + "grad_norm": 15.264242172241211, + "learning_rate": 3.243681318681319e-05, + "loss": 0.4261, + "step": 12786 + }, + { + "epoch": 35.129120879120876, + "grad_norm": 9.94363021850586, + "learning_rate": 3.2435439560439564e-05, + "loss": 0.3922, + "step": 12787 + }, + { + "epoch": 35.13186813186813, + "grad_norm": 16.30440330505371, + "learning_rate": 3.243406593406594e-05, + "loss": 0.4897, + "step": 12788 + }, + { + "epoch": 35.13461538461539, + "grad_norm": 7.6352081298828125, + "learning_rate": 3.243269230769231e-05, + "loss": 0.2378, + "step": 12789 + }, + { + "epoch": 35.137362637362635, + "grad_norm": 7.389117240905762, + "learning_rate": 3.243131868131868e-05, + "loss": 0.1699, + "step": 12790 + }, + { + "epoch": 35.14010989010989, + "grad_norm": 10.940448760986328, + "learning_rate": 3.242994505494506e-05, + "loss": 0.2637, + "step": 12791 + }, + { + "epoch": 35.142857142857146, + "grad_norm": 21.880155563354492, + "learning_rate": 3.242857142857143e-05, + "loss": 0.4918, + "step": 12792 + }, + { + "epoch": 35.145604395604394, + "grad_norm": 8.4154691696167, + "learning_rate": 3.2427197802197805e-05, + "loss": 0.3406, + "step": 12793 + }, + { + "epoch": 35.14835164835165, + "grad_norm": 3.3459908962249756, + "learning_rate": 3.2425824175824175e-05, + "loss": 0.0819, + "step": 12794 + }, + { + "epoch": 35.1510989010989, + "grad_norm": 22.668046951293945, + "learning_rate": 3.242445054945055e-05, + "loss": 0.6977, + "step": 12795 + }, + { + "epoch": 35.15384615384615, + "grad_norm": 9.657471656799316, + "learning_rate": 3.242307692307692e-05, + "loss": 0.2307, + "step": 12796 + }, + { + "epoch": 35.15659340659341, + "grad_norm": 8.782477378845215, + "learning_rate": 3.24217032967033e-05, + "loss": 0.1838, + "step": 12797 + }, + { + "epoch": 35.15934065934066, + "grad_norm": 13.597193717956543, + "learning_rate": 3.2420329670329675e-05, + "loss": 0.3032, + "step": 12798 + }, + { + "epoch": 35.16208791208791, + "grad_norm": 8.247511863708496, + "learning_rate": 3.2418956043956045e-05, + "loss": 0.2052, + "step": 12799 + }, + { + "epoch": 35.16483516483517, + "grad_norm": 16.028892517089844, + "learning_rate": 3.241758241758242e-05, + "loss": 0.2447, + "step": 12800 + }, + { + "epoch": 35.167582417582416, + "grad_norm": 17.409143447875977, + "learning_rate": 3.241620879120879e-05, + "loss": 0.4356, + "step": 12801 + }, + { + "epoch": 35.17032967032967, + "grad_norm": 10.380992889404297, + "learning_rate": 3.241483516483517e-05, + "loss": 0.2525, + "step": 12802 + }, + { + "epoch": 35.17307692307692, + "grad_norm": 21.099763870239258, + "learning_rate": 3.2413461538461545e-05, + "loss": 0.6841, + "step": 12803 + }, + { + "epoch": 35.175824175824175, + "grad_norm": 7.806293487548828, + "learning_rate": 3.2412087912087916e-05, + "loss": 0.1911, + "step": 12804 + }, + { + "epoch": 35.17857142857143, + "grad_norm": 16.72880744934082, + "learning_rate": 3.2410714285714286e-05, + "loss": 0.2156, + "step": 12805 + }, + { + "epoch": 35.18131868131868, + "grad_norm": 12.078003883361816, + "learning_rate": 3.240934065934066e-05, + "loss": 0.2093, + "step": 12806 + }, + { + "epoch": 35.184065934065934, + "grad_norm": 14.814946174621582, + "learning_rate": 3.240796703296703e-05, + "loss": 0.341, + "step": 12807 + }, + { + "epoch": 35.18681318681319, + "grad_norm": 10.026688575744629, + "learning_rate": 3.240659340659341e-05, + "loss": 0.2711, + "step": 12808 + }, + { + "epoch": 35.18956043956044, + "grad_norm": 22.985883712768555, + "learning_rate": 3.240521978021978e-05, + "loss": 0.461, + "step": 12809 + }, + { + "epoch": 35.19230769230769, + "grad_norm": 23.90281105041504, + "learning_rate": 3.2403846153846156e-05, + "loss": 0.6863, + "step": 12810 + }, + { + "epoch": 35.19505494505494, + "grad_norm": 3.515282154083252, + "learning_rate": 3.2402472527472526e-05, + "loss": 0.0479, + "step": 12811 + }, + { + "epoch": 35.1978021978022, + "grad_norm": 14.566967964172363, + "learning_rate": 3.24010989010989e-05, + "loss": 0.3513, + "step": 12812 + }, + { + "epoch": 35.20054945054945, + "grad_norm": 10.177132606506348, + "learning_rate": 3.239972527472528e-05, + "loss": 0.3345, + "step": 12813 + }, + { + "epoch": 35.2032967032967, + "grad_norm": 31.916793823242188, + "learning_rate": 3.239835164835165e-05, + "loss": 1.1137, + "step": 12814 + }, + { + "epoch": 35.206043956043956, + "grad_norm": 3.9803857803344727, + "learning_rate": 3.2396978021978026e-05, + "loss": 0.0698, + "step": 12815 + }, + { + "epoch": 35.20879120879121, + "grad_norm": 17.262147903442383, + "learning_rate": 3.2395604395604396e-05, + "loss": 0.7284, + "step": 12816 + }, + { + "epoch": 35.21153846153846, + "grad_norm": 20.93223762512207, + "learning_rate": 3.239423076923077e-05, + "loss": 0.4814, + "step": 12817 + }, + { + "epoch": 35.214285714285715, + "grad_norm": 8.43383502960205, + "learning_rate": 3.239285714285715e-05, + "loss": 0.188, + "step": 12818 + }, + { + "epoch": 35.217032967032964, + "grad_norm": 9.582491874694824, + "learning_rate": 3.239148351648352e-05, + "loss": 0.1897, + "step": 12819 + }, + { + "epoch": 35.21978021978022, + "grad_norm": 3.9540860652923584, + "learning_rate": 3.239010989010989e-05, + "loss": 0.0844, + "step": 12820 + }, + { + "epoch": 35.222527472527474, + "grad_norm": 3.570286512374878, + "learning_rate": 3.238873626373627e-05, + "loss": 0.0547, + "step": 12821 + }, + { + "epoch": 35.22527472527472, + "grad_norm": 8.69586181640625, + "learning_rate": 3.238736263736264e-05, + "loss": 0.1116, + "step": 12822 + }, + { + "epoch": 35.22802197802198, + "grad_norm": 20.453411102294922, + "learning_rate": 3.2385989010989014e-05, + "loss": 0.4482, + "step": 12823 + }, + { + "epoch": 35.23076923076923, + "grad_norm": 16.247570037841797, + "learning_rate": 3.2384615384615384e-05, + "loss": 0.3894, + "step": 12824 + }, + { + "epoch": 35.23351648351648, + "grad_norm": 13.445033073425293, + "learning_rate": 3.238324175824176e-05, + "loss": 0.5697, + "step": 12825 + }, + { + "epoch": 35.23626373626374, + "grad_norm": 6.523670196533203, + "learning_rate": 3.238186813186813e-05, + "loss": 0.2184, + "step": 12826 + }, + { + "epoch": 35.239010989010985, + "grad_norm": 7.494203567504883, + "learning_rate": 3.238049450549451e-05, + "loss": 0.1396, + "step": 12827 + }, + { + "epoch": 35.24175824175824, + "grad_norm": 11.392990112304688, + "learning_rate": 3.2379120879120884e-05, + "loss": 0.2994, + "step": 12828 + }, + { + "epoch": 35.244505494505496, + "grad_norm": 22.97459602355957, + "learning_rate": 3.2377747252747254e-05, + "loss": 0.8094, + "step": 12829 + }, + { + "epoch": 35.247252747252745, + "grad_norm": 9.753106117248535, + "learning_rate": 3.237637362637363e-05, + "loss": 0.3074, + "step": 12830 + }, + { + "epoch": 35.25, + "grad_norm": 8.606565475463867, + "learning_rate": 3.2375e-05, + "loss": 0.1861, + "step": 12831 + }, + { + "epoch": 35.252747252747255, + "grad_norm": 13.3860502243042, + "learning_rate": 3.237362637362638e-05, + "loss": 0.281, + "step": 12832 + }, + { + "epoch": 35.255494505494504, + "grad_norm": 21.09925651550293, + "learning_rate": 3.2372252747252754e-05, + "loss": 0.8397, + "step": 12833 + }, + { + "epoch": 35.25824175824176, + "grad_norm": 11.937594413757324, + "learning_rate": 3.2370879120879124e-05, + "loss": 0.3737, + "step": 12834 + }, + { + "epoch": 35.260989010989015, + "grad_norm": 16.221399307250977, + "learning_rate": 3.2369505494505495e-05, + "loss": 0.3311, + "step": 12835 + }, + { + "epoch": 35.26373626373626, + "grad_norm": 17.342538833618164, + "learning_rate": 3.236813186813187e-05, + "loss": 0.351, + "step": 12836 + }, + { + "epoch": 35.26648351648352, + "grad_norm": 12.9553861618042, + "learning_rate": 3.236675824175824e-05, + "loss": 0.2462, + "step": 12837 + }, + { + "epoch": 35.26923076923077, + "grad_norm": 12.737171173095703, + "learning_rate": 3.236538461538462e-05, + "loss": 0.2579, + "step": 12838 + }, + { + "epoch": 35.27197802197802, + "grad_norm": 14.826363563537598, + "learning_rate": 3.236401098901099e-05, + "loss": 0.441, + "step": 12839 + }, + { + "epoch": 35.27472527472528, + "grad_norm": 8.98413372039795, + "learning_rate": 3.2362637362637365e-05, + "loss": 0.1732, + "step": 12840 + }, + { + "epoch": 35.277472527472526, + "grad_norm": 11.394280433654785, + "learning_rate": 3.2361263736263735e-05, + "loss": 0.3634, + "step": 12841 + }, + { + "epoch": 35.28021978021978, + "grad_norm": 8.323573112487793, + "learning_rate": 3.235989010989011e-05, + "loss": 0.1353, + "step": 12842 + }, + { + "epoch": 35.282967032967036, + "grad_norm": 15.497907638549805, + "learning_rate": 3.235851648351649e-05, + "loss": 0.5117, + "step": 12843 + }, + { + "epoch": 35.285714285714285, + "grad_norm": 26.300086975097656, + "learning_rate": 3.235714285714286e-05, + "loss": 0.5867, + "step": 12844 + }, + { + "epoch": 35.28846153846154, + "grad_norm": 13.783570289611816, + "learning_rate": 3.2355769230769235e-05, + "loss": 0.2791, + "step": 12845 + }, + { + "epoch": 35.29120879120879, + "grad_norm": 19.426210403442383, + "learning_rate": 3.2354395604395605e-05, + "loss": 0.4329, + "step": 12846 + }, + { + "epoch": 35.293956043956044, + "grad_norm": 10.21513843536377, + "learning_rate": 3.235302197802198e-05, + "loss": 0.252, + "step": 12847 + }, + { + "epoch": 35.2967032967033, + "grad_norm": 15.088969230651855, + "learning_rate": 3.235164835164836e-05, + "loss": 0.3263, + "step": 12848 + }, + { + "epoch": 35.29945054945055, + "grad_norm": 5.880643844604492, + "learning_rate": 3.235027472527473e-05, + "loss": 0.1055, + "step": 12849 + }, + { + "epoch": 35.3021978021978, + "grad_norm": 13.411250114440918, + "learning_rate": 3.23489010989011e-05, + "loss": 0.3257, + "step": 12850 + }, + { + "epoch": 35.30494505494506, + "grad_norm": 5.44692325592041, + "learning_rate": 3.234752747252747e-05, + "loss": 0.1612, + "step": 12851 + }, + { + "epoch": 35.30769230769231, + "grad_norm": 9.604582786560059, + "learning_rate": 3.2346153846153846e-05, + "loss": 0.2369, + "step": 12852 + }, + { + "epoch": 35.31043956043956, + "grad_norm": 5.893666744232178, + "learning_rate": 3.234478021978022e-05, + "loss": 0.0902, + "step": 12853 + }, + { + "epoch": 35.31318681318681, + "grad_norm": 9.979937553405762, + "learning_rate": 3.234340659340659e-05, + "loss": 0.3366, + "step": 12854 + }, + { + "epoch": 35.315934065934066, + "grad_norm": 13.562263488769531, + "learning_rate": 3.234203296703297e-05, + "loss": 0.2912, + "step": 12855 + }, + { + "epoch": 35.31868131868132, + "grad_norm": 9.670639991760254, + "learning_rate": 3.234065934065934e-05, + "loss": 0.1443, + "step": 12856 + }, + { + "epoch": 35.32142857142857, + "grad_norm": 10.478339195251465, + "learning_rate": 3.2339285714285716e-05, + "loss": 0.1433, + "step": 12857 + }, + { + "epoch": 35.324175824175825, + "grad_norm": 19.886531829833984, + "learning_rate": 3.2337912087912086e-05, + "loss": 0.4988, + "step": 12858 + }, + { + "epoch": 35.32692307692308, + "grad_norm": 6.299717903137207, + "learning_rate": 3.233653846153846e-05, + "loss": 0.1267, + "step": 12859 + }, + { + "epoch": 35.32967032967033, + "grad_norm": 13.72131061553955, + "learning_rate": 3.233516483516484e-05, + "loss": 0.2685, + "step": 12860 + }, + { + "epoch": 35.332417582417584, + "grad_norm": 2.857940673828125, + "learning_rate": 3.233379120879121e-05, + "loss": 0.1013, + "step": 12861 + }, + { + "epoch": 35.33516483516483, + "grad_norm": 4.9627366065979, + "learning_rate": 3.233241758241759e-05, + "loss": 0.0457, + "step": 12862 + }, + { + "epoch": 35.33791208791209, + "grad_norm": 15.833224296569824, + "learning_rate": 3.233104395604396e-05, + "loss": 0.4628, + "step": 12863 + }, + { + "epoch": 35.34065934065934, + "grad_norm": 16.735530853271484, + "learning_rate": 3.2329670329670333e-05, + "loss": 0.4054, + "step": 12864 + }, + { + "epoch": 35.34340659340659, + "grad_norm": 10.055816650390625, + "learning_rate": 3.2328296703296703e-05, + "loss": 0.343, + "step": 12865 + }, + { + "epoch": 35.34615384615385, + "grad_norm": 13.76023006439209, + "learning_rate": 3.2326923076923074e-05, + "loss": 0.249, + "step": 12866 + }, + { + "epoch": 35.3489010989011, + "grad_norm": 11.823132514953613, + "learning_rate": 3.232554945054945e-05, + "loss": 0.2654, + "step": 12867 + }, + { + "epoch": 35.35164835164835, + "grad_norm": 8.288030624389648, + "learning_rate": 3.232417582417582e-05, + "loss": 0.1796, + "step": 12868 + }, + { + "epoch": 35.354395604395606, + "grad_norm": 17.36296844482422, + "learning_rate": 3.23228021978022e-05, + "loss": 0.411, + "step": 12869 + }, + { + "epoch": 35.357142857142854, + "grad_norm": 18.80947494506836, + "learning_rate": 3.2321428571428574e-05, + "loss": 0.6508, + "step": 12870 + }, + { + "epoch": 35.35989010989011, + "grad_norm": 18.439241409301758, + "learning_rate": 3.2320054945054944e-05, + "loss": 0.5082, + "step": 12871 + }, + { + "epoch": 35.362637362637365, + "grad_norm": 16.714330673217773, + "learning_rate": 3.231868131868132e-05, + "loss": 0.5748, + "step": 12872 + }, + { + "epoch": 35.36538461538461, + "grad_norm": 9.341724395751953, + "learning_rate": 3.231730769230769e-05, + "loss": 0.1727, + "step": 12873 + }, + { + "epoch": 35.36813186813187, + "grad_norm": 6.805890083312988, + "learning_rate": 3.231593406593407e-05, + "loss": 0.1077, + "step": 12874 + }, + { + "epoch": 35.370879120879124, + "grad_norm": 15.665677070617676, + "learning_rate": 3.2314560439560444e-05, + "loss": 0.4408, + "step": 12875 + }, + { + "epoch": 35.37362637362637, + "grad_norm": 9.097147941589355, + "learning_rate": 3.2313186813186814e-05, + "loss": 0.213, + "step": 12876 + }, + { + "epoch": 35.37637362637363, + "grad_norm": 17.670799255371094, + "learning_rate": 3.231181318681319e-05, + "loss": 0.4255, + "step": 12877 + }, + { + "epoch": 35.379120879120876, + "grad_norm": 14.6049222946167, + "learning_rate": 3.231043956043956e-05, + "loss": 0.4699, + "step": 12878 + }, + { + "epoch": 35.38186813186813, + "grad_norm": 13.83862590789795, + "learning_rate": 3.230906593406594e-05, + "loss": 0.4429, + "step": 12879 + }, + { + "epoch": 35.38461538461539, + "grad_norm": 13.177950859069824, + "learning_rate": 3.230769230769231e-05, + "loss": 0.2862, + "step": 12880 + }, + { + "epoch": 35.387362637362635, + "grad_norm": 6.200407028198242, + "learning_rate": 3.230631868131868e-05, + "loss": 0.132, + "step": 12881 + }, + { + "epoch": 35.39010989010989, + "grad_norm": 9.774129867553711, + "learning_rate": 3.2304945054945055e-05, + "loss": 0.1838, + "step": 12882 + }, + { + "epoch": 35.392857142857146, + "grad_norm": 12.698637962341309, + "learning_rate": 3.2303571428571425e-05, + "loss": 0.3658, + "step": 12883 + }, + { + "epoch": 35.395604395604394, + "grad_norm": 20.218605041503906, + "learning_rate": 3.23021978021978e-05, + "loss": 0.5481, + "step": 12884 + }, + { + "epoch": 35.39835164835165, + "grad_norm": 11.738119125366211, + "learning_rate": 3.230082417582418e-05, + "loss": 0.2435, + "step": 12885 + }, + { + "epoch": 35.4010989010989, + "grad_norm": 15.888827323913574, + "learning_rate": 3.229945054945055e-05, + "loss": 0.5068, + "step": 12886 + }, + { + "epoch": 35.40384615384615, + "grad_norm": 7.162083148956299, + "learning_rate": 3.2298076923076925e-05, + "loss": 0.0895, + "step": 12887 + }, + { + "epoch": 35.40659340659341, + "grad_norm": 7.457101345062256, + "learning_rate": 3.2296703296703295e-05, + "loss": 0.1369, + "step": 12888 + }, + { + "epoch": 35.40934065934066, + "grad_norm": 9.28969955444336, + "learning_rate": 3.229532967032967e-05, + "loss": 0.2997, + "step": 12889 + }, + { + "epoch": 35.41208791208791, + "grad_norm": 9.082874298095703, + "learning_rate": 3.229395604395605e-05, + "loss": 0.2406, + "step": 12890 + }, + { + "epoch": 35.41483516483517, + "grad_norm": 10.40585708618164, + "learning_rate": 3.229258241758242e-05, + "loss": 0.1399, + "step": 12891 + }, + { + "epoch": 35.417582417582416, + "grad_norm": 15.340827941894531, + "learning_rate": 3.2291208791208796e-05, + "loss": 0.4731, + "step": 12892 + }, + { + "epoch": 35.42032967032967, + "grad_norm": 15.490094184875488, + "learning_rate": 3.2289835164835166e-05, + "loss": 0.3253, + "step": 12893 + }, + { + "epoch": 35.42307692307692, + "grad_norm": 11.752588272094727, + "learning_rate": 3.228846153846154e-05, + "loss": 0.2908, + "step": 12894 + }, + { + "epoch": 35.425824175824175, + "grad_norm": 10.863763809204102, + "learning_rate": 3.228708791208791e-05, + "loss": 0.1539, + "step": 12895 + }, + { + "epoch": 35.42857142857143, + "grad_norm": 7.351193428039551, + "learning_rate": 3.228571428571428e-05, + "loss": 0.1564, + "step": 12896 + }, + { + "epoch": 35.43131868131868, + "grad_norm": 14.20709228515625, + "learning_rate": 3.228434065934066e-05, + "loss": 0.2498, + "step": 12897 + }, + { + "epoch": 35.434065934065934, + "grad_norm": 10.777547836303711, + "learning_rate": 3.228296703296703e-05, + "loss": 0.2439, + "step": 12898 + }, + { + "epoch": 35.43681318681319, + "grad_norm": 6.848024845123291, + "learning_rate": 3.2281593406593406e-05, + "loss": 0.1476, + "step": 12899 + }, + { + "epoch": 35.43956043956044, + "grad_norm": 8.829085350036621, + "learning_rate": 3.228021978021978e-05, + "loss": 0.1417, + "step": 12900 + }, + { + "epoch": 35.44230769230769, + "grad_norm": 13.404438018798828, + "learning_rate": 3.227884615384615e-05, + "loss": 0.3614, + "step": 12901 + }, + { + "epoch": 35.44505494505494, + "grad_norm": 11.551517486572266, + "learning_rate": 3.227747252747253e-05, + "loss": 0.2447, + "step": 12902 + }, + { + "epoch": 35.4478021978022, + "grad_norm": 7.485843658447266, + "learning_rate": 3.22760989010989e-05, + "loss": 0.1646, + "step": 12903 + }, + { + "epoch": 35.45054945054945, + "grad_norm": 12.116799354553223, + "learning_rate": 3.2274725274725277e-05, + "loss": 0.2981, + "step": 12904 + }, + { + "epoch": 35.4532967032967, + "grad_norm": 7.733409881591797, + "learning_rate": 3.227335164835165e-05, + "loss": 0.1353, + "step": 12905 + }, + { + "epoch": 35.456043956043956, + "grad_norm": 17.605289459228516, + "learning_rate": 3.227197802197802e-05, + "loss": 0.4784, + "step": 12906 + }, + { + "epoch": 35.45879120879121, + "grad_norm": 14.289511680603027, + "learning_rate": 3.22706043956044e-05, + "loss": 0.4092, + "step": 12907 + }, + { + "epoch": 35.46153846153846, + "grad_norm": 9.201033592224121, + "learning_rate": 3.226923076923077e-05, + "loss": 0.2154, + "step": 12908 + }, + { + "epoch": 35.464285714285715, + "grad_norm": 8.71553897857666, + "learning_rate": 3.226785714285715e-05, + "loss": 0.1667, + "step": 12909 + }, + { + "epoch": 35.467032967032964, + "grad_norm": 10.690357208251953, + "learning_rate": 3.226648351648352e-05, + "loss": 0.1926, + "step": 12910 + }, + { + "epoch": 35.46978021978022, + "grad_norm": 9.333638191223145, + "learning_rate": 3.226510989010989e-05, + "loss": 0.2125, + "step": 12911 + }, + { + "epoch": 35.472527472527474, + "grad_norm": 7.807379722595215, + "learning_rate": 3.2263736263736264e-05, + "loss": 0.1229, + "step": 12912 + }, + { + "epoch": 35.47527472527472, + "grad_norm": 17.062572479248047, + "learning_rate": 3.2262362637362634e-05, + "loss": 0.4234, + "step": 12913 + }, + { + "epoch": 35.47802197802198, + "grad_norm": 20.780168533325195, + "learning_rate": 3.226098901098901e-05, + "loss": 0.7954, + "step": 12914 + }, + { + "epoch": 35.48076923076923, + "grad_norm": 10.110066413879395, + "learning_rate": 3.225961538461539e-05, + "loss": 0.1965, + "step": 12915 + }, + { + "epoch": 35.48351648351648, + "grad_norm": 12.34604263305664, + "learning_rate": 3.225824175824176e-05, + "loss": 0.272, + "step": 12916 + }, + { + "epoch": 35.48626373626374, + "grad_norm": 20.53064727783203, + "learning_rate": 3.2256868131868134e-05, + "loss": 0.6314, + "step": 12917 + }, + { + "epoch": 35.489010989010985, + "grad_norm": 8.148435592651367, + "learning_rate": 3.2255494505494504e-05, + "loss": 0.2142, + "step": 12918 + }, + { + "epoch": 35.49175824175824, + "grad_norm": 7.586731910705566, + "learning_rate": 3.225412087912088e-05, + "loss": 0.1034, + "step": 12919 + }, + { + "epoch": 35.494505494505496, + "grad_norm": 8.896012306213379, + "learning_rate": 3.225274725274726e-05, + "loss": 0.1322, + "step": 12920 + }, + { + "epoch": 35.497252747252745, + "grad_norm": 10.533515930175781, + "learning_rate": 3.225137362637363e-05, + "loss": 0.3341, + "step": 12921 + }, + { + "epoch": 35.5, + "grad_norm": 21.1617431640625, + "learning_rate": 3.2250000000000005e-05, + "loss": 0.6493, + "step": 12922 + }, + { + "epoch": 35.502747252747255, + "grad_norm": 15.904041290283203, + "learning_rate": 3.2248626373626375e-05, + "loss": 0.2444, + "step": 12923 + }, + { + "epoch": 35.505494505494504, + "grad_norm": 12.734625816345215, + "learning_rate": 3.224725274725275e-05, + "loss": 0.268, + "step": 12924 + }, + { + "epoch": 35.50824175824176, + "grad_norm": 10.124889373779297, + "learning_rate": 3.224587912087912e-05, + "loss": 0.3761, + "step": 12925 + }, + { + "epoch": 35.51098901098901, + "grad_norm": 9.469136238098145, + "learning_rate": 3.224450549450549e-05, + "loss": 0.2432, + "step": 12926 + }, + { + "epoch": 35.51373626373626, + "grad_norm": 15.596468925476074, + "learning_rate": 3.224313186813187e-05, + "loss": 0.2474, + "step": 12927 + }, + { + "epoch": 35.51648351648352, + "grad_norm": 7.853600025177002, + "learning_rate": 3.224175824175824e-05, + "loss": 0.1788, + "step": 12928 + }, + { + "epoch": 35.51923076923077, + "grad_norm": 18.924091339111328, + "learning_rate": 3.2240384615384615e-05, + "loss": 0.4631, + "step": 12929 + }, + { + "epoch": 35.52197802197802, + "grad_norm": 10.98973274230957, + "learning_rate": 3.223901098901099e-05, + "loss": 0.3138, + "step": 12930 + }, + { + "epoch": 35.52472527472528, + "grad_norm": 23.797714233398438, + "learning_rate": 3.223763736263736e-05, + "loss": 0.8283, + "step": 12931 + }, + { + "epoch": 35.527472527472526, + "grad_norm": 8.957468032836914, + "learning_rate": 3.223626373626374e-05, + "loss": 0.2616, + "step": 12932 + }, + { + "epoch": 35.53021978021978, + "grad_norm": 14.746482849121094, + "learning_rate": 3.223489010989011e-05, + "loss": 0.4524, + "step": 12933 + }, + { + "epoch": 35.532967032967036, + "grad_norm": 9.089290618896484, + "learning_rate": 3.2233516483516486e-05, + "loss": 0.2274, + "step": 12934 + }, + { + "epoch": 35.535714285714285, + "grad_norm": 16.11282730102539, + "learning_rate": 3.223214285714286e-05, + "loss": 0.4519, + "step": 12935 + }, + { + "epoch": 35.53846153846154, + "grad_norm": 13.793639183044434, + "learning_rate": 3.223076923076923e-05, + "loss": 0.2326, + "step": 12936 + }, + { + "epoch": 35.54120879120879, + "grad_norm": 15.06219482421875, + "learning_rate": 3.222939560439561e-05, + "loss": 0.3784, + "step": 12937 + }, + { + "epoch": 35.543956043956044, + "grad_norm": 11.312755584716797, + "learning_rate": 3.222802197802198e-05, + "loss": 0.2399, + "step": 12938 + }, + { + "epoch": 35.5467032967033, + "grad_norm": 15.549696922302246, + "learning_rate": 3.2226648351648356e-05, + "loss": 0.4151, + "step": 12939 + }, + { + "epoch": 35.54945054945055, + "grad_norm": 19.513221740722656, + "learning_rate": 3.2225274725274726e-05, + "loss": 0.5339, + "step": 12940 + }, + { + "epoch": 35.5521978021978, + "grad_norm": 5.330185413360596, + "learning_rate": 3.2223901098901096e-05, + "loss": 0.1008, + "step": 12941 + }, + { + "epoch": 35.55494505494506, + "grad_norm": 8.746147155761719, + "learning_rate": 3.222252747252747e-05, + "loss": 0.347, + "step": 12942 + }, + { + "epoch": 35.55769230769231, + "grad_norm": 10.589864730834961, + "learning_rate": 3.222115384615384e-05, + "loss": 0.1966, + "step": 12943 + }, + { + "epoch": 35.56043956043956, + "grad_norm": 8.938276290893555, + "learning_rate": 3.221978021978022e-05, + "loss": 0.213, + "step": 12944 + }, + { + "epoch": 35.56318681318681, + "grad_norm": 7.883904457092285, + "learning_rate": 3.2218406593406596e-05, + "loss": 0.1493, + "step": 12945 + }, + { + "epoch": 35.565934065934066, + "grad_norm": 6.987889766693115, + "learning_rate": 3.2217032967032966e-05, + "loss": 0.0919, + "step": 12946 + }, + { + "epoch": 35.56868131868132, + "grad_norm": 15.596853256225586, + "learning_rate": 3.221565934065934e-05, + "loss": 0.3926, + "step": 12947 + }, + { + "epoch": 35.57142857142857, + "grad_norm": 9.907328605651855, + "learning_rate": 3.221428571428571e-05, + "loss": 0.1487, + "step": 12948 + }, + { + "epoch": 35.574175824175825, + "grad_norm": 12.420767784118652, + "learning_rate": 3.221291208791209e-05, + "loss": 0.2404, + "step": 12949 + }, + { + "epoch": 35.57692307692308, + "grad_norm": 10.224778175354004, + "learning_rate": 3.221153846153847e-05, + "loss": 0.2591, + "step": 12950 + }, + { + "epoch": 35.57967032967033, + "grad_norm": 13.744011878967285, + "learning_rate": 3.221016483516484e-05, + "loss": 0.3968, + "step": 12951 + }, + { + "epoch": 35.582417582417584, + "grad_norm": 5.974354267120361, + "learning_rate": 3.2208791208791214e-05, + "loss": 0.0975, + "step": 12952 + }, + { + "epoch": 35.58516483516483, + "grad_norm": 5.782031059265137, + "learning_rate": 3.2207417582417584e-05, + "loss": 0.1557, + "step": 12953 + }, + { + "epoch": 35.58791208791209, + "grad_norm": 1.250280499458313, + "learning_rate": 3.220604395604396e-05, + "loss": 0.0251, + "step": 12954 + }, + { + "epoch": 35.59065934065934, + "grad_norm": 5.1284260749816895, + "learning_rate": 3.220467032967033e-05, + "loss": 0.0925, + "step": 12955 + }, + { + "epoch": 35.59340659340659, + "grad_norm": 18.31217384338379, + "learning_rate": 3.22032967032967e-05, + "loss": 0.3667, + "step": 12956 + }, + { + "epoch": 35.59615384615385, + "grad_norm": 9.10904598236084, + "learning_rate": 3.220192307692308e-05, + "loss": 0.1843, + "step": 12957 + }, + { + "epoch": 35.5989010989011, + "grad_norm": 21.022348403930664, + "learning_rate": 3.220054945054945e-05, + "loss": 0.565, + "step": 12958 + }, + { + "epoch": 35.60164835164835, + "grad_norm": 15.277626037597656, + "learning_rate": 3.2199175824175824e-05, + "loss": 0.3686, + "step": 12959 + }, + { + "epoch": 35.604395604395606, + "grad_norm": 16.465133666992188, + "learning_rate": 3.21978021978022e-05, + "loss": 0.3268, + "step": 12960 + }, + { + "epoch": 35.607142857142854, + "grad_norm": 8.846321105957031, + "learning_rate": 3.219642857142857e-05, + "loss": 0.1607, + "step": 12961 + }, + { + "epoch": 35.60989010989011, + "grad_norm": 10.737138748168945, + "learning_rate": 3.219505494505495e-05, + "loss": 0.1929, + "step": 12962 + }, + { + "epoch": 35.612637362637365, + "grad_norm": 7.66893196105957, + "learning_rate": 3.219368131868132e-05, + "loss": 0.1355, + "step": 12963 + }, + { + "epoch": 35.61538461538461, + "grad_norm": 16.29692268371582, + "learning_rate": 3.2192307692307694e-05, + "loss": 0.4077, + "step": 12964 + }, + { + "epoch": 35.61813186813187, + "grad_norm": 14.993453025817871, + "learning_rate": 3.219093406593407e-05, + "loss": 0.606, + "step": 12965 + }, + { + "epoch": 35.620879120879124, + "grad_norm": 19.00874900817871, + "learning_rate": 3.218956043956044e-05, + "loss": 0.7825, + "step": 12966 + }, + { + "epoch": 35.62362637362637, + "grad_norm": 5.7938337326049805, + "learning_rate": 3.218818681318682e-05, + "loss": 0.1742, + "step": 12967 + }, + { + "epoch": 35.62637362637363, + "grad_norm": 17.546342849731445, + "learning_rate": 3.218681318681319e-05, + "loss": 0.631, + "step": 12968 + }, + { + "epoch": 35.629120879120876, + "grad_norm": 9.270575523376465, + "learning_rate": 3.2185439560439565e-05, + "loss": 0.1741, + "step": 12969 + }, + { + "epoch": 35.63186813186813, + "grad_norm": 10.390091896057129, + "learning_rate": 3.2184065934065935e-05, + "loss": 0.2265, + "step": 12970 + }, + { + "epoch": 35.63461538461539, + "grad_norm": 25.313678741455078, + "learning_rate": 3.2182692307692305e-05, + "loss": 1.0179, + "step": 12971 + }, + { + "epoch": 35.637362637362635, + "grad_norm": 18.692354202270508, + "learning_rate": 3.218131868131868e-05, + "loss": 0.6537, + "step": 12972 + }, + { + "epoch": 35.64010989010989, + "grad_norm": 16.571319580078125, + "learning_rate": 3.217994505494505e-05, + "loss": 0.6582, + "step": 12973 + }, + { + "epoch": 35.642857142857146, + "grad_norm": 20.42622947692871, + "learning_rate": 3.217857142857143e-05, + "loss": 0.7036, + "step": 12974 + }, + { + "epoch": 35.645604395604394, + "grad_norm": 17.871572494506836, + "learning_rate": 3.2177197802197805e-05, + "loss": 0.913, + "step": 12975 + }, + { + "epoch": 35.64835164835165, + "grad_norm": 11.919916152954102, + "learning_rate": 3.2175824175824175e-05, + "loss": 0.2645, + "step": 12976 + }, + { + "epoch": 35.6510989010989, + "grad_norm": 15.344280242919922, + "learning_rate": 3.217445054945055e-05, + "loss": 0.3775, + "step": 12977 + }, + { + "epoch": 35.65384615384615, + "grad_norm": 16.255413055419922, + "learning_rate": 3.217307692307692e-05, + "loss": 0.534, + "step": 12978 + }, + { + "epoch": 35.65659340659341, + "grad_norm": 12.556218147277832, + "learning_rate": 3.21717032967033e-05, + "loss": 0.4172, + "step": 12979 + }, + { + "epoch": 35.65934065934066, + "grad_norm": 9.524975776672363, + "learning_rate": 3.2170329670329676e-05, + "loss": 0.2197, + "step": 12980 + }, + { + "epoch": 35.66208791208791, + "grad_norm": 16.541419982910156, + "learning_rate": 3.2168956043956046e-05, + "loss": 0.6753, + "step": 12981 + }, + { + "epoch": 35.66483516483517, + "grad_norm": 6.67352294921875, + "learning_rate": 3.216758241758242e-05, + "loss": 0.1224, + "step": 12982 + }, + { + "epoch": 35.667582417582416, + "grad_norm": 4.794471263885498, + "learning_rate": 3.216620879120879e-05, + "loss": 0.1324, + "step": 12983 + }, + { + "epoch": 35.67032967032967, + "grad_norm": 9.553632736206055, + "learning_rate": 3.216483516483517e-05, + "loss": 0.2412, + "step": 12984 + }, + { + "epoch": 35.67307692307692, + "grad_norm": 11.00250244140625, + "learning_rate": 3.216346153846154e-05, + "loss": 0.3514, + "step": 12985 + }, + { + "epoch": 35.675824175824175, + "grad_norm": 14.891139030456543, + "learning_rate": 3.216208791208791e-05, + "loss": 0.355, + "step": 12986 + }, + { + "epoch": 35.67857142857143, + "grad_norm": 14.529645919799805, + "learning_rate": 3.2160714285714286e-05, + "loss": 0.329, + "step": 12987 + }, + { + "epoch": 35.68131868131868, + "grad_norm": 9.090361595153809, + "learning_rate": 3.2159340659340656e-05, + "loss": 0.2734, + "step": 12988 + }, + { + "epoch": 35.684065934065934, + "grad_norm": 16.881546020507812, + "learning_rate": 3.215796703296703e-05, + "loss": 0.4989, + "step": 12989 + }, + { + "epoch": 35.68681318681319, + "grad_norm": 9.129030227661133, + "learning_rate": 3.215659340659341e-05, + "loss": 0.2646, + "step": 12990 + }, + { + "epoch": 35.68956043956044, + "grad_norm": 7.263206481933594, + "learning_rate": 3.215521978021978e-05, + "loss": 0.0868, + "step": 12991 + }, + { + "epoch": 35.69230769230769, + "grad_norm": 8.890893936157227, + "learning_rate": 3.215384615384616e-05, + "loss": 0.1251, + "step": 12992 + }, + { + "epoch": 35.69505494505494, + "grad_norm": 11.309318542480469, + "learning_rate": 3.215247252747253e-05, + "loss": 0.3268, + "step": 12993 + }, + { + "epoch": 35.6978021978022, + "grad_norm": 17.221569061279297, + "learning_rate": 3.2151098901098903e-05, + "loss": 0.343, + "step": 12994 + }, + { + "epoch": 35.70054945054945, + "grad_norm": 15.934408187866211, + "learning_rate": 3.214972527472528e-05, + "loss": 0.5527, + "step": 12995 + }, + { + "epoch": 35.7032967032967, + "grad_norm": 16.08162498474121, + "learning_rate": 3.214835164835165e-05, + "loss": 0.5496, + "step": 12996 + }, + { + "epoch": 35.706043956043956, + "grad_norm": 2.8060925006866455, + "learning_rate": 3.214697802197803e-05, + "loss": 0.0662, + "step": 12997 + }, + { + "epoch": 35.70879120879121, + "grad_norm": 11.055123329162598, + "learning_rate": 3.21456043956044e-05, + "loss": 0.3542, + "step": 12998 + }, + { + "epoch": 35.71153846153846, + "grad_norm": 22.183809280395508, + "learning_rate": 3.2144230769230774e-05, + "loss": 0.5989, + "step": 12999 + }, + { + "epoch": 35.714285714285715, + "grad_norm": 11.907756805419922, + "learning_rate": 3.2142857142857144e-05, + "loss": 0.1876, + "step": 13000 + }, + { + "epoch": 35.717032967032964, + "grad_norm": 17.777664184570312, + "learning_rate": 3.2141483516483514e-05, + "loss": 0.5643, + "step": 13001 + }, + { + "epoch": 35.71978021978022, + "grad_norm": 10.873185157775879, + "learning_rate": 3.214010989010989e-05, + "loss": 0.2236, + "step": 13002 + }, + { + "epoch": 35.722527472527474, + "grad_norm": 22.7656192779541, + "learning_rate": 3.213873626373626e-05, + "loss": 0.502, + "step": 13003 + }, + { + "epoch": 35.72527472527472, + "grad_norm": 16.353837966918945, + "learning_rate": 3.213736263736264e-05, + "loss": 0.3922, + "step": 13004 + }, + { + "epoch": 35.72802197802198, + "grad_norm": 9.700897216796875, + "learning_rate": 3.2135989010989014e-05, + "loss": 0.1647, + "step": 13005 + }, + { + "epoch": 35.73076923076923, + "grad_norm": 16.97423553466797, + "learning_rate": 3.2134615384615384e-05, + "loss": 0.3534, + "step": 13006 + }, + { + "epoch": 35.73351648351648, + "grad_norm": 7.085005760192871, + "learning_rate": 3.213324175824176e-05, + "loss": 0.0587, + "step": 13007 + }, + { + "epoch": 35.73626373626374, + "grad_norm": 19.53965187072754, + "learning_rate": 3.213186813186813e-05, + "loss": 0.7604, + "step": 13008 + }, + { + "epoch": 35.73901098901099, + "grad_norm": 18.544315338134766, + "learning_rate": 3.213049450549451e-05, + "loss": 0.5596, + "step": 13009 + }, + { + "epoch": 35.74175824175824, + "grad_norm": 11.786535263061523, + "learning_rate": 3.2129120879120885e-05, + "loss": 0.3214, + "step": 13010 + }, + { + "epoch": 35.744505494505496, + "grad_norm": 18.356473922729492, + "learning_rate": 3.2127747252747255e-05, + "loss": 0.5595, + "step": 13011 + }, + { + "epoch": 35.747252747252745, + "grad_norm": 12.070859909057617, + "learning_rate": 3.212637362637363e-05, + "loss": 0.2717, + "step": 13012 + }, + { + "epoch": 35.75, + "grad_norm": 12.297151565551758, + "learning_rate": 3.2125e-05, + "loss": 0.4023, + "step": 13013 + }, + { + "epoch": 35.752747252747255, + "grad_norm": 8.379849433898926, + "learning_rate": 3.212362637362638e-05, + "loss": 0.186, + "step": 13014 + }, + { + "epoch": 35.755494505494504, + "grad_norm": 13.906859397888184, + "learning_rate": 3.212225274725275e-05, + "loss": 0.2362, + "step": 13015 + }, + { + "epoch": 35.75824175824176, + "grad_norm": 8.355998039245605, + "learning_rate": 3.212087912087912e-05, + "loss": 0.2263, + "step": 13016 + }, + { + "epoch": 35.76098901098901, + "grad_norm": 23.041894912719727, + "learning_rate": 3.2119505494505495e-05, + "loss": 0.6151, + "step": 13017 + }, + { + "epoch": 35.76373626373626, + "grad_norm": 6.869787693023682, + "learning_rate": 3.2118131868131865e-05, + "loss": 0.1715, + "step": 13018 + }, + { + "epoch": 35.76648351648352, + "grad_norm": 10.535178184509277, + "learning_rate": 3.211675824175824e-05, + "loss": 0.274, + "step": 13019 + }, + { + "epoch": 35.76923076923077, + "grad_norm": 10.574857711791992, + "learning_rate": 3.211538461538462e-05, + "loss": 0.3269, + "step": 13020 + }, + { + "epoch": 35.77197802197802, + "grad_norm": 14.350825309753418, + "learning_rate": 3.211401098901099e-05, + "loss": 0.5046, + "step": 13021 + }, + { + "epoch": 35.77472527472528, + "grad_norm": 11.176346778869629, + "learning_rate": 3.2112637362637366e-05, + "loss": 0.2045, + "step": 13022 + }, + { + "epoch": 35.777472527472526, + "grad_norm": 6.598123550415039, + "learning_rate": 3.2111263736263736e-05, + "loss": 0.256, + "step": 13023 + }, + { + "epoch": 35.78021978021978, + "grad_norm": 10.715229988098145, + "learning_rate": 3.210989010989011e-05, + "loss": 0.298, + "step": 13024 + }, + { + "epoch": 35.782967032967036, + "grad_norm": 10.373245239257812, + "learning_rate": 3.210851648351649e-05, + "loss": 0.2409, + "step": 13025 + }, + { + "epoch": 35.785714285714285, + "grad_norm": 9.276640892028809, + "learning_rate": 3.210714285714286e-05, + "loss": 0.2182, + "step": 13026 + }, + { + "epoch": 35.78846153846154, + "grad_norm": 12.510113716125488, + "learning_rate": 3.2105769230769236e-05, + "loss": 0.2581, + "step": 13027 + }, + { + "epoch": 35.79120879120879, + "grad_norm": 22.53425407409668, + "learning_rate": 3.2104395604395606e-05, + "loss": 0.4129, + "step": 13028 + }, + { + "epoch": 35.793956043956044, + "grad_norm": 4.074004650115967, + "learning_rate": 3.210302197802198e-05, + "loss": 0.0835, + "step": 13029 + }, + { + "epoch": 35.7967032967033, + "grad_norm": 16.17964744567871, + "learning_rate": 3.210164835164835e-05, + "loss": 0.306, + "step": 13030 + }, + { + "epoch": 35.79945054945055, + "grad_norm": 8.2380952835083, + "learning_rate": 3.210027472527472e-05, + "loss": 0.1962, + "step": 13031 + }, + { + "epoch": 35.8021978021978, + "grad_norm": 18.506643295288086, + "learning_rate": 3.20989010989011e-05, + "loss": 0.9498, + "step": 13032 + }, + { + "epoch": 35.80494505494506, + "grad_norm": 6.437776565551758, + "learning_rate": 3.209752747252747e-05, + "loss": 0.1352, + "step": 13033 + }, + { + "epoch": 35.80769230769231, + "grad_norm": 17.594343185424805, + "learning_rate": 3.2096153846153847e-05, + "loss": 0.4548, + "step": 13034 + }, + { + "epoch": 35.81043956043956, + "grad_norm": 14.813669204711914, + "learning_rate": 3.209478021978022e-05, + "loss": 0.3484, + "step": 13035 + }, + { + "epoch": 35.81318681318681, + "grad_norm": 9.378499984741211, + "learning_rate": 3.209340659340659e-05, + "loss": 0.2201, + "step": 13036 + }, + { + "epoch": 35.815934065934066, + "grad_norm": 8.25863265991211, + "learning_rate": 3.209203296703297e-05, + "loss": 0.1601, + "step": 13037 + }, + { + "epoch": 35.81868131868132, + "grad_norm": 15.953432083129883, + "learning_rate": 3.209065934065934e-05, + "loss": 0.4454, + "step": 13038 + }, + { + "epoch": 35.82142857142857, + "grad_norm": 15.453121185302734, + "learning_rate": 3.208928571428572e-05, + "loss": 0.3918, + "step": 13039 + }, + { + "epoch": 35.824175824175825, + "grad_norm": 13.753304481506348, + "learning_rate": 3.2087912087912094e-05, + "loss": 0.296, + "step": 13040 + }, + { + "epoch": 35.82692307692308, + "grad_norm": 6.135345935821533, + "learning_rate": 3.2086538461538464e-05, + "loss": 0.1817, + "step": 13041 + }, + { + "epoch": 35.82967032967033, + "grad_norm": 12.705872535705566, + "learning_rate": 3.208516483516484e-05, + "loss": 0.4603, + "step": 13042 + }, + { + "epoch": 35.832417582417584, + "grad_norm": 12.134273529052734, + "learning_rate": 3.208379120879121e-05, + "loss": 0.2824, + "step": 13043 + }, + { + "epoch": 35.83516483516483, + "grad_norm": 12.930255889892578, + "learning_rate": 3.208241758241759e-05, + "loss": 0.3333, + "step": 13044 + }, + { + "epoch": 35.83791208791209, + "grad_norm": 14.80800724029541, + "learning_rate": 3.208104395604396e-05, + "loss": 0.7966, + "step": 13045 + }, + { + "epoch": 35.84065934065934, + "grad_norm": 13.184611320495605, + "learning_rate": 3.207967032967033e-05, + "loss": 0.3174, + "step": 13046 + }, + { + "epoch": 35.84340659340659, + "grad_norm": 15.890300750732422, + "learning_rate": 3.2078296703296704e-05, + "loss": 0.4775, + "step": 13047 + }, + { + "epoch": 35.84615384615385, + "grad_norm": 9.90931510925293, + "learning_rate": 3.2076923076923074e-05, + "loss": 0.442, + "step": 13048 + }, + { + "epoch": 35.8489010989011, + "grad_norm": 14.629691123962402, + "learning_rate": 3.207554945054945e-05, + "loss": 0.4913, + "step": 13049 + }, + { + "epoch": 35.85164835164835, + "grad_norm": 17.1556339263916, + "learning_rate": 3.207417582417583e-05, + "loss": 0.4012, + "step": 13050 + }, + { + "epoch": 35.854395604395606, + "grad_norm": 12.753549575805664, + "learning_rate": 3.20728021978022e-05, + "loss": 0.3489, + "step": 13051 + }, + { + "epoch": 35.857142857142854, + "grad_norm": 17.118228912353516, + "learning_rate": 3.2071428571428575e-05, + "loss": 0.5224, + "step": 13052 + }, + { + "epoch": 35.85989010989011, + "grad_norm": 9.29394245147705, + "learning_rate": 3.2070054945054945e-05, + "loss": 0.1608, + "step": 13053 + }, + { + "epoch": 35.862637362637365, + "grad_norm": 8.191936492919922, + "learning_rate": 3.206868131868132e-05, + "loss": 0.2047, + "step": 13054 + }, + { + "epoch": 35.86538461538461, + "grad_norm": 15.101533889770508, + "learning_rate": 3.20673076923077e-05, + "loss": 0.3327, + "step": 13055 + }, + { + "epoch": 35.86813186813187, + "grad_norm": 14.124176979064941, + "learning_rate": 3.206593406593407e-05, + "loss": 0.2456, + "step": 13056 + }, + { + "epoch": 35.870879120879124, + "grad_norm": 11.136978149414062, + "learning_rate": 3.2064560439560445e-05, + "loss": 0.2193, + "step": 13057 + }, + { + "epoch": 35.87362637362637, + "grad_norm": 18.881254196166992, + "learning_rate": 3.2063186813186815e-05, + "loss": 0.7159, + "step": 13058 + }, + { + "epoch": 35.87637362637363, + "grad_norm": 4.578984260559082, + "learning_rate": 3.206181318681319e-05, + "loss": 0.0792, + "step": 13059 + }, + { + "epoch": 35.879120879120876, + "grad_norm": 8.247998237609863, + "learning_rate": 3.206043956043956e-05, + "loss": 0.1462, + "step": 13060 + }, + { + "epoch": 35.88186813186813, + "grad_norm": 12.904096603393555, + "learning_rate": 3.205906593406593e-05, + "loss": 0.347, + "step": 13061 + }, + { + "epoch": 35.88461538461539, + "grad_norm": 7.33525276184082, + "learning_rate": 3.205769230769231e-05, + "loss": 0.1313, + "step": 13062 + }, + { + "epoch": 35.887362637362635, + "grad_norm": 3.5883238315582275, + "learning_rate": 3.205631868131868e-05, + "loss": 0.0818, + "step": 13063 + }, + { + "epoch": 35.89010989010989, + "grad_norm": 12.87623119354248, + "learning_rate": 3.2054945054945056e-05, + "loss": 0.5089, + "step": 13064 + }, + { + "epoch": 35.892857142857146, + "grad_norm": 8.698254585266113, + "learning_rate": 3.205357142857143e-05, + "loss": 0.2588, + "step": 13065 + }, + { + "epoch": 35.895604395604394, + "grad_norm": 13.820755958557129, + "learning_rate": 3.20521978021978e-05, + "loss": 0.2742, + "step": 13066 + }, + { + "epoch": 35.89835164835165, + "grad_norm": 5.567465782165527, + "learning_rate": 3.205082417582418e-05, + "loss": 0.1263, + "step": 13067 + }, + { + "epoch": 35.9010989010989, + "grad_norm": 11.319364547729492, + "learning_rate": 3.204945054945055e-05, + "loss": 0.1317, + "step": 13068 + }, + { + "epoch": 35.90384615384615, + "grad_norm": 13.281635284423828, + "learning_rate": 3.2048076923076926e-05, + "loss": 0.3104, + "step": 13069 + }, + { + "epoch": 35.90659340659341, + "grad_norm": 8.042401313781738, + "learning_rate": 3.20467032967033e-05, + "loss": 0.1367, + "step": 13070 + }, + { + "epoch": 35.90934065934066, + "grad_norm": 5.026780128479004, + "learning_rate": 3.204532967032967e-05, + "loss": 0.0876, + "step": 13071 + }, + { + "epoch": 35.91208791208791, + "grad_norm": 4.215890407562256, + "learning_rate": 3.204395604395605e-05, + "loss": 0.0956, + "step": 13072 + }, + { + "epoch": 35.91483516483517, + "grad_norm": 14.242514610290527, + "learning_rate": 3.204258241758242e-05, + "loss": 0.4249, + "step": 13073 + }, + { + "epoch": 35.917582417582416, + "grad_norm": 22.650157928466797, + "learning_rate": 3.2041208791208796e-05, + "loss": 0.623, + "step": 13074 + }, + { + "epoch": 35.92032967032967, + "grad_norm": 15.535724639892578, + "learning_rate": 3.2039835164835166e-05, + "loss": 0.4145, + "step": 13075 + }, + { + "epoch": 35.92307692307692, + "grad_norm": 11.62155532836914, + "learning_rate": 3.2038461538461536e-05, + "loss": 0.2009, + "step": 13076 + }, + { + "epoch": 35.925824175824175, + "grad_norm": 12.710423469543457, + "learning_rate": 3.203708791208791e-05, + "loss": 0.2847, + "step": 13077 + }, + { + "epoch": 35.92857142857143, + "grad_norm": 7.8793253898620605, + "learning_rate": 3.203571428571428e-05, + "loss": 0.0782, + "step": 13078 + }, + { + "epoch": 35.93131868131868, + "grad_norm": 7.507936000823975, + "learning_rate": 3.203434065934066e-05, + "loss": 0.1113, + "step": 13079 + }, + { + "epoch": 35.934065934065934, + "grad_norm": 15.152633666992188, + "learning_rate": 3.203296703296704e-05, + "loss": 0.4227, + "step": 13080 + }, + { + "epoch": 35.93681318681319, + "grad_norm": 23.438379287719727, + "learning_rate": 3.203159340659341e-05, + "loss": 0.689, + "step": 13081 + }, + { + "epoch": 35.93956043956044, + "grad_norm": 8.891800880432129, + "learning_rate": 3.2030219780219784e-05, + "loss": 0.2262, + "step": 13082 + }, + { + "epoch": 35.94230769230769, + "grad_norm": 7.118220329284668, + "learning_rate": 3.2028846153846154e-05, + "loss": 0.2086, + "step": 13083 + }, + { + "epoch": 35.94505494505494, + "grad_norm": 20.5974063873291, + "learning_rate": 3.202747252747253e-05, + "loss": 0.853, + "step": 13084 + }, + { + "epoch": 35.9478021978022, + "grad_norm": 14.149443626403809, + "learning_rate": 3.20260989010989e-05, + "loss": 0.3952, + "step": 13085 + }, + { + "epoch": 35.95054945054945, + "grad_norm": 8.845590591430664, + "learning_rate": 3.202472527472528e-05, + "loss": 0.304, + "step": 13086 + }, + { + "epoch": 35.9532967032967, + "grad_norm": 3.780184268951416, + "learning_rate": 3.2023351648351654e-05, + "loss": 0.0631, + "step": 13087 + }, + { + "epoch": 35.956043956043956, + "grad_norm": 11.055135726928711, + "learning_rate": 3.2021978021978024e-05, + "loss": 0.3568, + "step": 13088 + }, + { + "epoch": 35.95879120879121, + "grad_norm": 12.584773063659668, + "learning_rate": 3.20206043956044e-05, + "loss": 0.2834, + "step": 13089 + }, + { + "epoch": 35.96153846153846, + "grad_norm": 17.24500274658203, + "learning_rate": 3.201923076923077e-05, + "loss": 0.3286, + "step": 13090 + }, + { + "epoch": 35.964285714285715, + "grad_norm": 7.947257995605469, + "learning_rate": 3.201785714285714e-05, + "loss": 0.1707, + "step": 13091 + }, + { + "epoch": 35.967032967032964, + "grad_norm": 10.13821029663086, + "learning_rate": 3.201648351648352e-05, + "loss": 0.1793, + "step": 13092 + }, + { + "epoch": 35.96978021978022, + "grad_norm": 13.677995681762695, + "learning_rate": 3.201510989010989e-05, + "loss": 0.2389, + "step": 13093 + }, + { + "epoch": 35.972527472527474, + "grad_norm": 14.559040069580078, + "learning_rate": 3.2013736263736264e-05, + "loss": 0.497, + "step": 13094 + }, + { + "epoch": 35.97527472527472, + "grad_norm": 3.9349284172058105, + "learning_rate": 3.2012362637362635e-05, + "loss": 0.0536, + "step": 13095 + }, + { + "epoch": 35.97802197802198, + "grad_norm": 9.698318481445312, + "learning_rate": 3.201098901098901e-05, + "loss": 0.2115, + "step": 13096 + }, + { + "epoch": 35.98076923076923, + "grad_norm": 5.594090938568115, + "learning_rate": 3.200961538461539e-05, + "loss": 0.0785, + "step": 13097 + }, + { + "epoch": 35.98351648351648, + "grad_norm": 17.41702651977539, + "learning_rate": 3.200824175824176e-05, + "loss": 0.2803, + "step": 13098 + }, + { + "epoch": 35.98626373626374, + "grad_norm": 15.988236427307129, + "learning_rate": 3.2006868131868135e-05, + "loss": 0.3116, + "step": 13099 + }, + { + "epoch": 35.98901098901099, + "grad_norm": 12.237424850463867, + "learning_rate": 3.2005494505494505e-05, + "loss": 0.4299, + "step": 13100 + }, + { + "epoch": 35.99175824175824, + "grad_norm": 12.03571605682373, + "learning_rate": 3.200412087912088e-05, + "loss": 0.2191, + "step": 13101 + }, + { + "epoch": 35.994505494505496, + "grad_norm": 11.959112167358398, + "learning_rate": 3.200274725274726e-05, + "loss": 0.239, + "step": 13102 + }, + { + "epoch": 35.997252747252745, + "grad_norm": 14.06458568572998, + "learning_rate": 3.200137362637363e-05, + "loss": 0.4843, + "step": 13103 + }, + { + "epoch": 36.0, + "grad_norm": 47.81563949584961, + "learning_rate": 3.2000000000000005e-05, + "loss": 4.9934, + "step": 13104 + }, + { + "epoch": 36.0, + "eval_accuracy": 0.6859504132231405, + "eval_f1": 0.7066692461775025, + "eval_f1_DuraRiadoRio_64x64": 0.7557251908396947, + "eval_f1_Mole_64x64": 0.8, + "eval_f1_Quebrado_64x64": 0.6861924686192469, + "eval_f1_RiadoRio_64x64": 0.5714285714285714, + "eval_f1_RioFechado_64x64": 0.72, + "eval_loss": 1.3484052419662476, + "eval_precision": 0.8082387745753957, + "eval_precision_DuraRiadoRio_64x64": 0.8389830508474576, + "eval_precision_Mole_64x64": 0.9433962264150944, + "eval_precision_Quebrado_64x64": 0.8631578947368421, + "eval_precision_RiadoRio_64x64": 0.41975308641975306, + "eval_precision_RioFechado_64x64": 0.9759036144578314, + "eval_recall": 0.683309653241084, + "eval_recall_DuraRiadoRio_64x64": 0.6875, + "eval_recall_Mole_64x64": 0.6944444444444444, + "eval_recall_Quebrado_64x64": 0.5694444444444444, + "eval_recall_RiadoRio_64x64": 0.8947368421052632, + "eval_recall_RioFechado_64x64": 0.5704225352112676, + "eval_runtime": 1.7509, + "eval_samples_per_second": 414.651, + "eval_steps_per_second": 26.273, + "step": 13104 + }, + { + "epoch": 36.002747252747255, + "grad_norm": 10.250346183776855, + "learning_rate": 3.1998626373626375e-05, + "loss": 0.2978, + "step": 13105 + }, + { + "epoch": 36.005494505494504, + "grad_norm": 6.062742233276367, + "learning_rate": 3.1997252747252745e-05, + "loss": 0.0728, + "step": 13106 + }, + { + "epoch": 36.00824175824176, + "grad_norm": 13.843907356262207, + "learning_rate": 3.199587912087912e-05, + "loss": 0.406, + "step": 13107 + }, + { + "epoch": 36.010989010989015, + "grad_norm": 11.805547714233398, + "learning_rate": 3.199450549450549e-05, + "loss": 0.3988, + "step": 13108 + }, + { + "epoch": 36.01373626373626, + "grad_norm": 7.087670803070068, + "learning_rate": 3.199313186813187e-05, + "loss": 0.1487, + "step": 13109 + }, + { + "epoch": 36.01648351648352, + "grad_norm": 17.31258201599121, + "learning_rate": 3.199175824175824e-05, + "loss": 0.5033, + "step": 13110 + }, + { + "epoch": 36.01923076923077, + "grad_norm": 11.40987491607666, + "learning_rate": 3.1990384615384616e-05, + "loss": 0.2646, + "step": 13111 + }, + { + "epoch": 36.02197802197802, + "grad_norm": 21.312044143676758, + "learning_rate": 3.198901098901099e-05, + "loss": 0.5008, + "step": 13112 + }, + { + "epoch": 36.02472527472528, + "grad_norm": 19.970199584960938, + "learning_rate": 3.198763736263736e-05, + "loss": 0.5885, + "step": 13113 + }, + { + "epoch": 36.027472527472526, + "grad_norm": 7.125096321105957, + "learning_rate": 3.198626373626374e-05, + "loss": 0.1015, + "step": 13114 + }, + { + "epoch": 36.03021978021978, + "grad_norm": 8.948369979858398, + "learning_rate": 3.198489010989011e-05, + "loss": 0.1492, + "step": 13115 + }, + { + "epoch": 36.032967032967036, + "grad_norm": 14.385478973388672, + "learning_rate": 3.1983516483516486e-05, + "loss": 0.3947, + "step": 13116 + }, + { + "epoch": 36.035714285714285, + "grad_norm": 14.151357650756836, + "learning_rate": 3.198214285714286e-05, + "loss": 0.4254, + "step": 13117 + }, + { + "epoch": 36.03846153846154, + "grad_norm": 4.446493148803711, + "learning_rate": 3.198076923076923e-05, + "loss": 0.106, + "step": 13118 + }, + { + "epoch": 36.04120879120879, + "grad_norm": 14.137346267700195, + "learning_rate": 3.197939560439561e-05, + "loss": 0.3253, + "step": 13119 + }, + { + "epoch": 36.043956043956044, + "grad_norm": 8.827303886413574, + "learning_rate": 3.197802197802198e-05, + "loss": 0.1159, + "step": 13120 + }, + { + "epoch": 36.0467032967033, + "grad_norm": 6.952269554138184, + "learning_rate": 3.197664835164835e-05, + "loss": 0.1005, + "step": 13121 + }, + { + "epoch": 36.04945054945055, + "grad_norm": 10.161571502685547, + "learning_rate": 3.197527472527473e-05, + "loss": 0.3134, + "step": 13122 + }, + { + "epoch": 36.0521978021978, + "grad_norm": 23.996828079223633, + "learning_rate": 3.19739010989011e-05, + "loss": 0.849, + "step": 13123 + }, + { + "epoch": 36.05494505494506, + "grad_norm": 7.303157806396484, + "learning_rate": 3.1972527472527473e-05, + "loss": 0.1846, + "step": 13124 + }, + { + "epoch": 36.05769230769231, + "grad_norm": 11.19271469116211, + "learning_rate": 3.1971153846153843e-05, + "loss": 0.3622, + "step": 13125 + }, + { + "epoch": 36.06043956043956, + "grad_norm": 15.341538429260254, + "learning_rate": 3.196978021978022e-05, + "loss": 0.3977, + "step": 13126 + }, + { + "epoch": 36.06318681318681, + "grad_norm": 4.709677696228027, + "learning_rate": 3.19684065934066e-05, + "loss": 0.0694, + "step": 13127 + }, + { + "epoch": 36.065934065934066, + "grad_norm": 5.702726364135742, + "learning_rate": 3.196703296703297e-05, + "loss": 0.0768, + "step": 13128 + }, + { + "epoch": 36.06868131868132, + "grad_norm": 12.709535598754883, + "learning_rate": 3.1965659340659344e-05, + "loss": 0.2443, + "step": 13129 + }, + { + "epoch": 36.07142857142857, + "grad_norm": 7.410712242126465, + "learning_rate": 3.1964285714285714e-05, + "loss": 0.2067, + "step": 13130 + }, + { + "epoch": 36.074175824175825, + "grad_norm": 6.177925109863281, + "learning_rate": 3.196291208791209e-05, + "loss": 0.1277, + "step": 13131 + }, + { + "epoch": 36.07692307692308, + "grad_norm": 10.891258239746094, + "learning_rate": 3.196153846153847e-05, + "loss": 0.3346, + "step": 13132 + }, + { + "epoch": 36.07967032967033, + "grad_norm": 14.36788272857666, + "learning_rate": 3.196016483516484e-05, + "loss": 0.3189, + "step": 13133 + }, + { + "epoch": 36.082417582417584, + "grad_norm": 15.929619789123535, + "learning_rate": 3.1958791208791214e-05, + "loss": 0.303, + "step": 13134 + }, + { + "epoch": 36.08516483516483, + "grad_norm": 9.74291706085205, + "learning_rate": 3.1957417582417584e-05, + "loss": 0.2259, + "step": 13135 + }, + { + "epoch": 36.08791208791209, + "grad_norm": 11.326034545898438, + "learning_rate": 3.1956043956043954e-05, + "loss": 0.2392, + "step": 13136 + }, + { + "epoch": 36.09065934065934, + "grad_norm": 14.388202667236328, + "learning_rate": 3.195467032967033e-05, + "loss": 0.5211, + "step": 13137 + }, + { + "epoch": 36.09340659340659, + "grad_norm": 20.584829330444336, + "learning_rate": 3.19532967032967e-05, + "loss": 0.4575, + "step": 13138 + }, + { + "epoch": 36.09615384615385, + "grad_norm": 16.584075927734375, + "learning_rate": 3.195192307692308e-05, + "loss": 0.3086, + "step": 13139 + }, + { + "epoch": 36.0989010989011, + "grad_norm": 9.095355033874512, + "learning_rate": 3.195054945054945e-05, + "loss": 0.179, + "step": 13140 + }, + { + "epoch": 36.10164835164835, + "grad_norm": 19.337385177612305, + "learning_rate": 3.1949175824175825e-05, + "loss": 0.5927, + "step": 13141 + }, + { + "epoch": 36.104395604395606, + "grad_norm": 4.832618713378906, + "learning_rate": 3.19478021978022e-05, + "loss": 0.0912, + "step": 13142 + }, + { + "epoch": 36.107142857142854, + "grad_norm": 16.747488021850586, + "learning_rate": 3.194642857142857e-05, + "loss": 0.2551, + "step": 13143 + }, + { + "epoch": 36.10989010989011, + "grad_norm": 9.20439624786377, + "learning_rate": 3.194505494505495e-05, + "loss": 0.2342, + "step": 13144 + }, + { + "epoch": 36.112637362637365, + "grad_norm": 5.637324333190918, + "learning_rate": 3.194368131868132e-05, + "loss": 0.1407, + "step": 13145 + }, + { + "epoch": 36.11538461538461, + "grad_norm": 8.708840370178223, + "learning_rate": 3.1942307692307695e-05, + "loss": 0.1809, + "step": 13146 + }, + { + "epoch": 36.11813186813187, + "grad_norm": 8.210907936096191, + "learning_rate": 3.194093406593407e-05, + "loss": 0.2362, + "step": 13147 + }, + { + "epoch": 36.120879120879124, + "grad_norm": 6.6651129722595215, + "learning_rate": 3.193956043956044e-05, + "loss": 0.2204, + "step": 13148 + }, + { + "epoch": 36.12362637362637, + "grad_norm": 11.84201717376709, + "learning_rate": 3.193818681318682e-05, + "loss": 0.2051, + "step": 13149 + }, + { + "epoch": 36.12637362637363, + "grad_norm": 15.204483032226562, + "learning_rate": 3.193681318681319e-05, + "loss": 0.4404, + "step": 13150 + }, + { + "epoch": 36.129120879120876, + "grad_norm": 14.326906204223633, + "learning_rate": 3.193543956043956e-05, + "loss": 0.2667, + "step": 13151 + }, + { + "epoch": 36.13186813186813, + "grad_norm": 10.288580894470215, + "learning_rate": 3.1934065934065936e-05, + "loss": 0.2984, + "step": 13152 + }, + { + "epoch": 36.13461538461539, + "grad_norm": 8.811985969543457, + "learning_rate": 3.1932692307692306e-05, + "loss": 0.1957, + "step": 13153 + }, + { + "epoch": 36.137362637362635, + "grad_norm": 11.376487731933594, + "learning_rate": 3.193131868131868e-05, + "loss": 0.3987, + "step": 13154 + }, + { + "epoch": 36.14010989010989, + "grad_norm": 9.794197082519531, + "learning_rate": 3.192994505494505e-05, + "loss": 0.2506, + "step": 13155 + }, + { + "epoch": 36.142857142857146, + "grad_norm": 15.373394966125488, + "learning_rate": 3.192857142857143e-05, + "loss": 0.6119, + "step": 13156 + }, + { + "epoch": 36.145604395604394, + "grad_norm": 15.146381378173828, + "learning_rate": 3.1927197802197806e-05, + "loss": 0.3437, + "step": 13157 + }, + { + "epoch": 36.14835164835165, + "grad_norm": 17.94722557067871, + "learning_rate": 3.1925824175824176e-05, + "loss": 0.6335, + "step": 13158 + }, + { + "epoch": 36.1510989010989, + "grad_norm": 10.964126586914062, + "learning_rate": 3.192445054945055e-05, + "loss": 0.1215, + "step": 13159 + }, + { + "epoch": 36.15384615384615, + "grad_norm": 22.32484245300293, + "learning_rate": 3.192307692307692e-05, + "loss": 0.9571, + "step": 13160 + }, + { + "epoch": 36.15659340659341, + "grad_norm": 7.155581474304199, + "learning_rate": 3.19217032967033e-05, + "loss": 0.1037, + "step": 13161 + }, + { + "epoch": 36.15934065934066, + "grad_norm": 13.297706604003906, + "learning_rate": 3.1920329670329676e-05, + "loss": 0.1945, + "step": 13162 + }, + { + "epoch": 36.16208791208791, + "grad_norm": 4.484586715698242, + "learning_rate": 3.1918956043956046e-05, + "loss": 0.0777, + "step": 13163 + }, + { + "epoch": 36.16483516483517, + "grad_norm": 14.90703010559082, + "learning_rate": 3.191758241758242e-05, + "loss": 0.2569, + "step": 13164 + }, + { + "epoch": 36.167582417582416, + "grad_norm": 7.172234535217285, + "learning_rate": 3.191620879120879e-05, + "loss": 0.1285, + "step": 13165 + }, + { + "epoch": 36.17032967032967, + "grad_norm": 4.783214092254639, + "learning_rate": 3.191483516483516e-05, + "loss": 0.0867, + "step": 13166 + }, + { + "epoch": 36.17307692307692, + "grad_norm": 6.5295867919921875, + "learning_rate": 3.191346153846154e-05, + "loss": 0.147, + "step": 13167 + }, + { + "epoch": 36.175824175824175, + "grad_norm": 13.459905624389648, + "learning_rate": 3.191208791208791e-05, + "loss": 0.3207, + "step": 13168 + }, + { + "epoch": 36.17857142857143, + "grad_norm": 8.435563087463379, + "learning_rate": 3.191071428571429e-05, + "loss": 0.1771, + "step": 13169 + }, + { + "epoch": 36.18131868131868, + "grad_norm": 21.53818702697754, + "learning_rate": 3.190934065934066e-05, + "loss": 0.914, + "step": 13170 + }, + { + "epoch": 36.184065934065934, + "grad_norm": 9.938060760498047, + "learning_rate": 3.1907967032967034e-05, + "loss": 0.3609, + "step": 13171 + }, + { + "epoch": 36.18681318681319, + "grad_norm": 10.039877891540527, + "learning_rate": 3.190659340659341e-05, + "loss": 0.2081, + "step": 13172 + }, + { + "epoch": 36.18956043956044, + "grad_norm": 16.222042083740234, + "learning_rate": 3.190521978021978e-05, + "loss": 0.5446, + "step": 13173 + }, + { + "epoch": 36.19230769230769, + "grad_norm": 15.120919227600098, + "learning_rate": 3.190384615384616e-05, + "loss": 0.3747, + "step": 13174 + }, + { + "epoch": 36.19505494505494, + "grad_norm": 10.911725044250488, + "learning_rate": 3.190247252747253e-05, + "loss": 0.2258, + "step": 13175 + }, + { + "epoch": 36.1978021978022, + "grad_norm": 12.68932819366455, + "learning_rate": 3.1901098901098904e-05, + "loss": 0.2142, + "step": 13176 + }, + { + "epoch": 36.20054945054945, + "grad_norm": 18.24224281311035, + "learning_rate": 3.189972527472528e-05, + "loss": 0.6224, + "step": 13177 + }, + { + "epoch": 36.2032967032967, + "grad_norm": 4.842036724090576, + "learning_rate": 3.189835164835165e-05, + "loss": 0.0963, + "step": 13178 + }, + { + "epoch": 36.206043956043956, + "grad_norm": 5.977288246154785, + "learning_rate": 3.189697802197803e-05, + "loss": 0.1001, + "step": 13179 + }, + { + "epoch": 36.20879120879121, + "grad_norm": 10.33862590789795, + "learning_rate": 3.18956043956044e-05, + "loss": 0.1902, + "step": 13180 + }, + { + "epoch": 36.21153846153846, + "grad_norm": 7.786355495452881, + "learning_rate": 3.189423076923077e-05, + "loss": 0.1617, + "step": 13181 + }, + { + "epoch": 36.214285714285715, + "grad_norm": 16.207809448242188, + "learning_rate": 3.1892857142857145e-05, + "loss": 0.4662, + "step": 13182 + }, + { + "epoch": 36.217032967032964, + "grad_norm": 7.486752510070801, + "learning_rate": 3.1891483516483515e-05, + "loss": 0.223, + "step": 13183 + }, + { + "epoch": 36.21978021978022, + "grad_norm": 10.101612091064453, + "learning_rate": 3.189010989010989e-05, + "loss": 0.223, + "step": 13184 + }, + { + "epoch": 36.222527472527474, + "grad_norm": 7.727370738983154, + "learning_rate": 3.188873626373626e-05, + "loss": 0.2473, + "step": 13185 + }, + { + "epoch": 36.22527472527472, + "grad_norm": 7.910109043121338, + "learning_rate": 3.188736263736264e-05, + "loss": 0.1431, + "step": 13186 + }, + { + "epoch": 36.22802197802198, + "grad_norm": 12.94028091430664, + "learning_rate": 3.1885989010989015e-05, + "loss": 0.2299, + "step": 13187 + }, + { + "epoch": 36.23076923076923, + "grad_norm": 9.553524017333984, + "learning_rate": 3.1884615384615385e-05, + "loss": 0.309, + "step": 13188 + }, + { + "epoch": 36.23351648351648, + "grad_norm": 11.60566520690918, + "learning_rate": 3.188324175824176e-05, + "loss": 0.2651, + "step": 13189 + }, + { + "epoch": 36.23626373626374, + "grad_norm": 12.83707332611084, + "learning_rate": 3.188186813186813e-05, + "loss": 0.2191, + "step": 13190 + }, + { + "epoch": 36.239010989010985, + "grad_norm": 19.484725952148438, + "learning_rate": 3.188049450549451e-05, + "loss": 0.4978, + "step": 13191 + }, + { + "epoch": 36.24175824175824, + "grad_norm": 10.01255989074707, + "learning_rate": 3.1879120879120885e-05, + "loss": 0.2873, + "step": 13192 + }, + { + "epoch": 36.244505494505496, + "grad_norm": 15.762384414672852, + "learning_rate": 3.1877747252747255e-05, + "loss": 0.4895, + "step": 13193 + }, + { + "epoch": 36.247252747252745, + "grad_norm": 10.48074722290039, + "learning_rate": 3.187637362637363e-05, + "loss": 0.316, + "step": 13194 + }, + { + "epoch": 36.25, + "grad_norm": 14.175760269165039, + "learning_rate": 3.1875e-05, + "loss": 0.2042, + "step": 13195 + }, + { + "epoch": 36.252747252747255, + "grad_norm": 13.286670684814453, + "learning_rate": 3.187362637362637e-05, + "loss": 0.266, + "step": 13196 + }, + { + "epoch": 36.255494505494504, + "grad_norm": 19.239974975585938, + "learning_rate": 3.187225274725275e-05, + "loss": 0.528, + "step": 13197 + }, + { + "epoch": 36.25824175824176, + "grad_norm": 18.230344772338867, + "learning_rate": 3.187087912087912e-05, + "loss": 0.4732, + "step": 13198 + }, + { + "epoch": 36.260989010989015, + "grad_norm": 0.8194206357002258, + "learning_rate": 3.1869505494505496e-05, + "loss": 0.0155, + "step": 13199 + }, + { + "epoch": 36.26373626373626, + "grad_norm": 24.47222328186035, + "learning_rate": 3.1868131868131866e-05, + "loss": 0.9098, + "step": 13200 + }, + { + "epoch": 36.26648351648352, + "grad_norm": 10.232236862182617, + "learning_rate": 3.186675824175824e-05, + "loss": 0.1991, + "step": 13201 + }, + { + "epoch": 36.26923076923077, + "grad_norm": 16.111637115478516, + "learning_rate": 3.186538461538462e-05, + "loss": 0.5096, + "step": 13202 + }, + { + "epoch": 36.27197802197802, + "grad_norm": 18.0321102142334, + "learning_rate": 3.186401098901099e-05, + "loss": 0.5097, + "step": 13203 + }, + { + "epoch": 36.27472527472528, + "grad_norm": 17.09800148010254, + "learning_rate": 3.1862637362637366e-05, + "loss": 0.4197, + "step": 13204 + }, + { + "epoch": 36.277472527472526, + "grad_norm": 11.759610176086426, + "learning_rate": 3.1861263736263736e-05, + "loss": 0.2526, + "step": 13205 + }, + { + "epoch": 36.28021978021978, + "grad_norm": 9.287408828735352, + "learning_rate": 3.185989010989011e-05, + "loss": 0.1732, + "step": 13206 + }, + { + "epoch": 36.282967032967036, + "grad_norm": 8.494132041931152, + "learning_rate": 3.185851648351649e-05, + "loss": 0.1303, + "step": 13207 + }, + { + "epoch": 36.285714285714285, + "grad_norm": 11.484225273132324, + "learning_rate": 3.185714285714286e-05, + "loss": 0.2153, + "step": 13208 + }, + { + "epoch": 36.28846153846154, + "grad_norm": 23.195552825927734, + "learning_rate": 3.185576923076924e-05, + "loss": 0.5082, + "step": 13209 + }, + { + "epoch": 36.29120879120879, + "grad_norm": 9.534470558166504, + "learning_rate": 3.185439560439561e-05, + "loss": 0.1784, + "step": 13210 + }, + { + "epoch": 36.293956043956044, + "grad_norm": 13.615010261535645, + "learning_rate": 3.185302197802198e-05, + "loss": 0.3843, + "step": 13211 + }, + { + "epoch": 36.2967032967033, + "grad_norm": 6.016850471496582, + "learning_rate": 3.1851648351648354e-05, + "loss": 0.1674, + "step": 13212 + }, + { + "epoch": 36.29945054945055, + "grad_norm": 7.774551868438721, + "learning_rate": 3.1850274725274724e-05, + "loss": 0.128, + "step": 13213 + }, + { + "epoch": 36.3021978021978, + "grad_norm": 12.730001449584961, + "learning_rate": 3.18489010989011e-05, + "loss": 0.259, + "step": 13214 + }, + { + "epoch": 36.30494505494506, + "grad_norm": 14.958062171936035, + "learning_rate": 3.184752747252747e-05, + "loss": 0.461, + "step": 13215 + }, + { + "epoch": 36.30769230769231, + "grad_norm": 9.680266380310059, + "learning_rate": 3.184615384615385e-05, + "loss": 0.1602, + "step": 13216 + }, + { + "epoch": 36.31043956043956, + "grad_norm": 25.152111053466797, + "learning_rate": 3.1844780219780224e-05, + "loss": 0.613, + "step": 13217 + }, + { + "epoch": 36.31318681318681, + "grad_norm": 13.741728782653809, + "learning_rate": 3.1843406593406594e-05, + "loss": 0.2125, + "step": 13218 + }, + { + "epoch": 36.315934065934066, + "grad_norm": 19.460147857666016, + "learning_rate": 3.184203296703297e-05, + "loss": 0.3875, + "step": 13219 + }, + { + "epoch": 36.31868131868132, + "grad_norm": 14.282727241516113, + "learning_rate": 3.184065934065934e-05, + "loss": 0.4855, + "step": 13220 + }, + { + "epoch": 36.32142857142857, + "grad_norm": 2.6515796184539795, + "learning_rate": 3.183928571428572e-05, + "loss": 0.056, + "step": 13221 + }, + { + "epoch": 36.324175824175825, + "grad_norm": 5.1949782371521, + "learning_rate": 3.1837912087912094e-05, + "loss": 0.1036, + "step": 13222 + }, + { + "epoch": 36.32692307692308, + "grad_norm": 9.967811584472656, + "learning_rate": 3.1836538461538464e-05, + "loss": 0.2802, + "step": 13223 + }, + { + "epoch": 36.32967032967033, + "grad_norm": 16.44148826599121, + "learning_rate": 3.183516483516484e-05, + "loss": 0.4411, + "step": 13224 + }, + { + "epoch": 36.332417582417584, + "grad_norm": 21.310855865478516, + "learning_rate": 3.183379120879121e-05, + "loss": 0.8195, + "step": 13225 + }, + { + "epoch": 36.33516483516483, + "grad_norm": 17.333011627197266, + "learning_rate": 3.183241758241758e-05, + "loss": 0.4461, + "step": 13226 + }, + { + "epoch": 36.33791208791209, + "grad_norm": 13.972418785095215, + "learning_rate": 3.183104395604396e-05, + "loss": 0.3133, + "step": 13227 + }, + { + "epoch": 36.34065934065934, + "grad_norm": 15.42204475402832, + "learning_rate": 3.182967032967033e-05, + "loss": 0.4819, + "step": 13228 + }, + { + "epoch": 36.34340659340659, + "grad_norm": 11.664719581604004, + "learning_rate": 3.1828296703296705e-05, + "loss": 0.2464, + "step": 13229 + }, + { + "epoch": 36.34615384615385, + "grad_norm": 19.678773880004883, + "learning_rate": 3.1826923076923075e-05, + "loss": 0.6465, + "step": 13230 + }, + { + "epoch": 36.3489010989011, + "grad_norm": 9.74010944366455, + "learning_rate": 3.182554945054945e-05, + "loss": 0.1969, + "step": 13231 + }, + { + "epoch": 36.35164835164835, + "grad_norm": 7.133544445037842, + "learning_rate": 3.182417582417583e-05, + "loss": 0.1306, + "step": 13232 + }, + { + "epoch": 36.354395604395606, + "grad_norm": 15.526323318481445, + "learning_rate": 3.18228021978022e-05, + "loss": 0.2692, + "step": 13233 + }, + { + "epoch": 36.357142857142854, + "grad_norm": 12.700214385986328, + "learning_rate": 3.1821428571428575e-05, + "loss": 0.2679, + "step": 13234 + }, + { + "epoch": 36.35989010989011, + "grad_norm": 11.490562438964844, + "learning_rate": 3.1820054945054945e-05, + "loss": 0.2044, + "step": 13235 + }, + { + "epoch": 36.362637362637365, + "grad_norm": 20.071794509887695, + "learning_rate": 3.181868131868132e-05, + "loss": 0.5269, + "step": 13236 + }, + { + "epoch": 36.36538461538461, + "grad_norm": 12.263632774353027, + "learning_rate": 3.18173076923077e-05, + "loss": 0.5238, + "step": 13237 + }, + { + "epoch": 36.36813186813187, + "grad_norm": 11.005292892456055, + "learning_rate": 3.181593406593407e-05, + "loss": 0.2265, + "step": 13238 + }, + { + "epoch": 36.370879120879124, + "grad_norm": 3.580880880355835, + "learning_rate": 3.1814560439560446e-05, + "loss": 0.0815, + "step": 13239 + }, + { + "epoch": 36.37362637362637, + "grad_norm": 14.659863471984863, + "learning_rate": 3.1813186813186816e-05, + "loss": 0.2856, + "step": 13240 + }, + { + "epoch": 36.37637362637363, + "grad_norm": 13.208198547363281, + "learning_rate": 3.1811813186813186e-05, + "loss": 0.2612, + "step": 13241 + }, + { + "epoch": 36.379120879120876, + "grad_norm": 10.373685836791992, + "learning_rate": 3.181043956043956e-05, + "loss": 0.1679, + "step": 13242 + }, + { + "epoch": 36.38186813186813, + "grad_norm": 10.205318450927734, + "learning_rate": 3.180906593406593e-05, + "loss": 0.3745, + "step": 13243 + }, + { + "epoch": 36.38461538461539, + "grad_norm": 14.478459358215332, + "learning_rate": 3.180769230769231e-05, + "loss": 0.4441, + "step": 13244 + }, + { + "epoch": 36.387362637362635, + "grad_norm": 8.270474433898926, + "learning_rate": 3.180631868131868e-05, + "loss": 0.2413, + "step": 13245 + }, + { + "epoch": 36.39010989010989, + "grad_norm": 12.651248931884766, + "learning_rate": 3.1804945054945056e-05, + "loss": 0.4459, + "step": 13246 + }, + { + "epoch": 36.392857142857146, + "grad_norm": 15.66655445098877, + "learning_rate": 3.180357142857143e-05, + "loss": 0.2596, + "step": 13247 + }, + { + "epoch": 36.395604395604394, + "grad_norm": 4.594401836395264, + "learning_rate": 3.18021978021978e-05, + "loss": 0.0734, + "step": 13248 + }, + { + "epoch": 36.39835164835165, + "grad_norm": 8.67956256866455, + "learning_rate": 3.180082417582418e-05, + "loss": 0.1447, + "step": 13249 + }, + { + "epoch": 36.4010989010989, + "grad_norm": 13.300740242004395, + "learning_rate": 3.179945054945055e-05, + "loss": 0.4032, + "step": 13250 + }, + { + "epoch": 36.40384615384615, + "grad_norm": 6.928972244262695, + "learning_rate": 3.179807692307693e-05, + "loss": 0.1322, + "step": 13251 + }, + { + "epoch": 36.40659340659341, + "grad_norm": 14.807917594909668, + "learning_rate": 3.1796703296703303e-05, + "loss": 0.1654, + "step": 13252 + }, + { + "epoch": 36.40934065934066, + "grad_norm": 14.728715896606445, + "learning_rate": 3.1795329670329673e-05, + "loss": 0.3501, + "step": 13253 + }, + { + "epoch": 36.41208791208791, + "grad_norm": 11.253479957580566, + "learning_rate": 3.179395604395605e-05, + "loss": 0.2484, + "step": 13254 + }, + { + "epoch": 36.41483516483517, + "grad_norm": 6.197448253631592, + "learning_rate": 3.179258241758242e-05, + "loss": 0.0862, + "step": 13255 + }, + { + "epoch": 36.417582417582416, + "grad_norm": 6.6799211502075195, + "learning_rate": 3.179120879120879e-05, + "loss": 0.1258, + "step": 13256 + }, + { + "epoch": 36.42032967032967, + "grad_norm": 7.784002780914307, + "learning_rate": 3.178983516483517e-05, + "loss": 0.206, + "step": 13257 + }, + { + "epoch": 36.42307692307692, + "grad_norm": 6.2985124588012695, + "learning_rate": 3.178846153846154e-05, + "loss": 0.1121, + "step": 13258 + }, + { + "epoch": 36.425824175824175, + "grad_norm": 14.74122428894043, + "learning_rate": 3.1787087912087914e-05, + "loss": 0.4923, + "step": 13259 + }, + { + "epoch": 36.42857142857143, + "grad_norm": 14.334455490112305, + "learning_rate": 3.1785714285714284e-05, + "loss": 0.3659, + "step": 13260 + }, + { + "epoch": 36.43131868131868, + "grad_norm": 10.825055122375488, + "learning_rate": 3.178434065934066e-05, + "loss": 0.2422, + "step": 13261 + }, + { + "epoch": 36.434065934065934, + "grad_norm": 12.699545860290527, + "learning_rate": 3.178296703296704e-05, + "loss": 0.4801, + "step": 13262 + }, + { + "epoch": 36.43681318681319, + "grad_norm": 20.129531860351562, + "learning_rate": 3.178159340659341e-05, + "loss": 0.9377, + "step": 13263 + }, + { + "epoch": 36.43956043956044, + "grad_norm": 2.3895890712738037, + "learning_rate": 3.1780219780219784e-05, + "loss": 0.0401, + "step": 13264 + }, + { + "epoch": 36.44230769230769, + "grad_norm": 14.551238059997559, + "learning_rate": 3.1778846153846154e-05, + "loss": 0.3417, + "step": 13265 + }, + { + "epoch": 36.44505494505494, + "grad_norm": 7.255878925323486, + "learning_rate": 3.177747252747253e-05, + "loss": 0.1647, + "step": 13266 + }, + { + "epoch": 36.4478021978022, + "grad_norm": 11.435589790344238, + "learning_rate": 3.177609890109891e-05, + "loss": 0.3298, + "step": 13267 + }, + { + "epoch": 36.45054945054945, + "grad_norm": 13.619932174682617, + "learning_rate": 3.177472527472528e-05, + "loss": 0.4178, + "step": 13268 + }, + { + "epoch": 36.4532967032967, + "grad_norm": 12.481657981872559, + "learning_rate": 3.1773351648351655e-05, + "loss": 0.3192, + "step": 13269 + }, + { + "epoch": 36.456043956043956, + "grad_norm": 13.743009567260742, + "learning_rate": 3.1771978021978025e-05, + "loss": 0.2609, + "step": 13270 + }, + { + "epoch": 36.45879120879121, + "grad_norm": 17.246015548706055, + "learning_rate": 3.1770604395604395e-05, + "loss": 0.6756, + "step": 13271 + }, + { + "epoch": 36.46153846153846, + "grad_norm": 17.375200271606445, + "learning_rate": 3.176923076923077e-05, + "loss": 0.7304, + "step": 13272 + }, + { + "epoch": 36.464285714285715, + "grad_norm": 10.037286758422852, + "learning_rate": 3.176785714285714e-05, + "loss": 0.2102, + "step": 13273 + }, + { + "epoch": 36.467032967032964, + "grad_norm": 2.6571249961853027, + "learning_rate": 3.176648351648352e-05, + "loss": 0.0374, + "step": 13274 + }, + { + "epoch": 36.46978021978022, + "grad_norm": 17.7924861907959, + "learning_rate": 3.176510989010989e-05, + "loss": 0.4462, + "step": 13275 + }, + { + "epoch": 36.472527472527474, + "grad_norm": 10.603202819824219, + "learning_rate": 3.1763736263736265e-05, + "loss": 0.2191, + "step": 13276 + }, + { + "epoch": 36.47527472527472, + "grad_norm": 14.670764923095703, + "learning_rate": 3.176236263736264e-05, + "loss": 0.4411, + "step": 13277 + }, + { + "epoch": 36.47802197802198, + "grad_norm": 16.089031219482422, + "learning_rate": 3.176098901098901e-05, + "loss": 0.3697, + "step": 13278 + }, + { + "epoch": 36.48076923076923, + "grad_norm": 16.428421020507812, + "learning_rate": 3.175961538461539e-05, + "loss": 0.3555, + "step": 13279 + }, + { + "epoch": 36.48351648351648, + "grad_norm": 17.07158851623535, + "learning_rate": 3.175824175824176e-05, + "loss": 0.4884, + "step": 13280 + }, + { + "epoch": 36.48626373626374, + "grad_norm": 19.899066925048828, + "learning_rate": 3.1756868131868136e-05, + "loss": 0.7842, + "step": 13281 + }, + { + "epoch": 36.489010989010985, + "grad_norm": 6.106873989105225, + "learning_rate": 3.175549450549451e-05, + "loss": 0.1508, + "step": 13282 + }, + { + "epoch": 36.49175824175824, + "grad_norm": 11.883536338806152, + "learning_rate": 3.175412087912088e-05, + "loss": 0.3117, + "step": 13283 + }, + { + "epoch": 36.494505494505496, + "grad_norm": 19.777284622192383, + "learning_rate": 3.175274725274725e-05, + "loss": 0.6111, + "step": 13284 + }, + { + "epoch": 36.497252747252745, + "grad_norm": 9.375242233276367, + "learning_rate": 3.175137362637363e-05, + "loss": 0.1455, + "step": 13285 + }, + { + "epoch": 36.5, + "grad_norm": 6.775545120239258, + "learning_rate": 3.175e-05, + "loss": 0.1801, + "step": 13286 + }, + { + "epoch": 36.502747252747255, + "grad_norm": 9.133089065551758, + "learning_rate": 3.1748626373626376e-05, + "loss": 0.1352, + "step": 13287 + }, + { + "epoch": 36.505494505494504, + "grad_norm": 6.342039585113525, + "learning_rate": 3.1747252747252746e-05, + "loss": 0.1766, + "step": 13288 + }, + { + "epoch": 36.50824175824176, + "grad_norm": 18.27202033996582, + "learning_rate": 3.174587912087912e-05, + "loss": 0.407, + "step": 13289 + }, + { + "epoch": 36.51098901098901, + "grad_norm": 14.290863990783691, + "learning_rate": 3.174450549450549e-05, + "loss": 0.2896, + "step": 13290 + }, + { + "epoch": 36.51373626373626, + "grad_norm": 13.217206954956055, + "learning_rate": 3.174313186813187e-05, + "loss": 0.2227, + "step": 13291 + }, + { + "epoch": 36.51648351648352, + "grad_norm": 16.517169952392578, + "learning_rate": 3.1741758241758246e-05, + "loss": 0.376, + "step": 13292 + }, + { + "epoch": 36.51923076923077, + "grad_norm": 17.78251838684082, + "learning_rate": 3.1740384615384616e-05, + "loss": 0.3886, + "step": 13293 + }, + { + "epoch": 36.52197802197802, + "grad_norm": 10.806853294372559, + "learning_rate": 3.173901098901099e-05, + "loss": 0.2334, + "step": 13294 + }, + { + "epoch": 36.52472527472528, + "grad_norm": 13.223201751708984, + "learning_rate": 3.173763736263736e-05, + "loss": 0.3448, + "step": 13295 + }, + { + "epoch": 36.527472527472526, + "grad_norm": 12.751945495605469, + "learning_rate": 3.173626373626374e-05, + "loss": 0.3262, + "step": 13296 + }, + { + "epoch": 36.53021978021978, + "grad_norm": 13.05609130859375, + "learning_rate": 3.173489010989012e-05, + "loss": 0.3873, + "step": 13297 + }, + { + "epoch": 36.532967032967036, + "grad_norm": 7.92680025100708, + "learning_rate": 3.173351648351649e-05, + "loss": 0.2637, + "step": 13298 + }, + { + "epoch": 36.535714285714285, + "grad_norm": 4.960567951202393, + "learning_rate": 3.173214285714286e-05, + "loss": 0.0655, + "step": 13299 + }, + { + "epoch": 36.53846153846154, + "grad_norm": 7.272856712341309, + "learning_rate": 3.1730769230769234e-05, + "loss": 0.0916, + "step": 13300 + }, + { + "epoch": 36.54120879120879, + "grad_norm": 10.49950885772705, + "learning_rate": 3.1729395604395604e-05, + "loss": 0.2564, + "step": 13301 + }, + { + "epoch": 36.543956043956044, + "grad_norm": 17.59453582763672, + "learning_rate": 3.172802197802198e-05, + "loss": 0.6323, + "step": 13302 + }, + { + "epoch": 36.5467032967033, + "grad_norm": 14.054664611816406, + "learning_rate": 3.172664835164835e-05, + "loss": 0.3059, + "step": 13303 + }, + { + "epoch": 36.54945054945055, + "grad_norm": 14.724828720092773, + "learning_rate": 3.172527472527473e-05, + "loss": 0.3916, + "step": 13304 + }, + { + "epoch": 36.5521978021978, + "grad_norm": 7.291111946105957, + "learning_rate": 3.17239010989011e-05, + "loss": 0.14, + "step": 13305 + }, + { + "epoch": 36.55494505494506, + "grad_norm": 8.272171020507812, + "learning_rate": 3.1722527472527474e-05, + "loss": 0.1883, + "step": 13306 + }, + { + "epoch": 36.55769230769231, + "grad_norm": 15.849929809570312, + "learning_rate": 3.172115384615385e-05, + "loss": 0.6947, + "step": 13307 + }, + { + "epoch": 36.56043956043956, + "grad_norm": 12.522477149963379, + "learning_rate": 3.171978021978022e-05, + "loss": 0.3828, + "step": 13308 + }, + { + "epoch": 36.56318681318681, + "grad_norm": 6.157026767730713, + "learning_rate": 3.17184065934066e-05, + "loss": 0.1184, + "step": 13309 + }, + { + "epoch": 36.565934065934066, + "grad_norm": 13.189427375793457, + "learning_rate": 3.171703296703297e-05, + "loss": 0.3962, + "step": 13310 + }, + { + "epoch": 36.56868131868132, + "grad_norm": 4.858325958251953, + "learning_rate": 3.1715659340659345e-05, + "loss": 0.1232, + "step": 13311 + }, + { + "epoch": 36.57142857142857, + "grad_norm": 7.851920127868652, + "learning_rate": 3.1714285714285715e-05, + "loss": 0.1212, + "step": 13312 + }, + { + "epoch": 36.574175824175825, + "grad_norm": 27.192012786865234, + "learning_rate": 3.171291208791209e-05, + "loss": 0.9044, + "step": 13313 + }, + { + "epoch": 36.57692307692308, + "grad_norm": 9.548079490661621, + "learning_rate": 3.171153846153846e-05, + "loss": 0.204, + "step": 13314 + }, + { + "epoch": 36.57967032967033, + "grad_norm": 10.865400314331055, + "learning_rate": 3.171016483516484e-05, + "loss": 0.2009, + "step": 13315 + }, + { + "epoch": 36.582417582417584, + "grad_norm": 13.579535484313965, + "learning_rate": 3.170879120879121e-05, + "loss": 0.3556, + "step": 13316 + }, + { + "epoch": 36.58516483516483, + "grad_norm": 9.466012954711914, + "learning_rate": 3.170741758241758e-05, + "loss": 0.277, + "step": 13317 + }, + { + "epoch": 36.58791208791209, + "grad_norm": 23.08202362060547, + "learning_rate": 3.1706043956043955e-05, + "loss": 0.9668, + "step": 13318 + }, + { + "epoch": 36.59065934065934, + "grad_norm": 18.279706954956055, + "learning_rate": 3.170467032967033e-05, + "loss": 0.5511, + "step": 13319 + }, + { + "epoch": 36.59340659340659, + "grad_norm": 7.80149507522583, + "learning_rate": 3.17032967032967e-05, + "loss": 0.2452, + "step": 13320 + }, + { + "epoch": 36.59615384615385, + "grad_norm": 18.36941146850586, + "learning_rate": 3.170192307692308e-05, + "loss": 0.5167, + "step": 13321 + }, + { + "epoch": 36.5989010989011, + "grad_norm": 2.3978049755096436, + "learning_rate": 3.170054945054945e-05, + "loss": 0.0528, + "step": 13322 + }, + { + "epoch": 36.60164835164835, + "grad_norm": 14.408312797546387, + "learning_rate": 3.1699175824175825e-05, + "loss": 0.2158, + "step": 13323 + }, + { + "epoch": 36.604395604395606, + "grad_norm": 3.7930521965026855, + "learning_rate": 3.16978021978022e-05, + "loss": 0.0888, + "step": 13324 + }, + { + "epoch": 36.607142857142854, + "grad_norm": 10.641400337219238, + "learning_rate": 3.169642857142857e-05, + "loss": 0.4105, + "step": 13325 + }, + { + "epoch": 36.60989010989011, + "grad_norm": 30.09726905822754, + "learning_rate": 3.169505494505495e-05, + "loss": 0.3376, + "step": 13326 + }, + { + "epoch": 36.612637362637365, + "grad_norm": 9.525155067443848, + "learning_rate": 3.169368131868132e-05, + "loss": 0.242, + "step": 13327 + }, + { + "epoch": 36.61538461538461, + "grad_norm": 20.161128997802734, + "learning_rate": 3.1692307692307696e-05, + "loss": 0.6291, + "step": 13328 + }, + { + "epoch": 36.61813186813187, + "grad_norm": 15.80294418334961, + "learning_rate": 3.1690934065934066e-05, + "loss": 0.2614, + "step": 13329 + }, + { + "epoch": 36.620879120879124, + "grad_norm": 10.62929630279541, + "learning_rate": 3.168956043956044e-05, + "loss": 0.2198, + "step": 13330 + }, + { + "epoch": 36.62362637362637, + "grad_norm": 8.91541862487793, + "learning_rate": 3.168818681318681e-05, + "loss": 0.3132, + "step": 13331 + }, + { + "epoch": 36.62637362637363, + "grad_norm": 16.94226837158203, + "learning_rate": 3.168681318681318e-05, + "loss": 0.4061, + "step": 13332 + }, + { + "epoch": 36.629120879120876, + "grad_norm": 10.372905731201172, + "learning_rate": 3.168543956043956e-05, + "loss": 0.3252, + "step": 13333 + }, + { + "epoch": 36.63186813186813, + "grad_norm": 13.669034004211426, + "learning_rate": 3.1684065934065936e-05, + "loss": 0.2837, + "step": 13334 + }, + { + "epoch": 36.63461538461539, + "grad_norm": 14.280369758605957, + "learning_rate": 3.1682692307692306e-05, + "loss": 0.4381, + "step": 13335 + }, + { + "epoch": 36.637362637362635, + "grad_norm": 7.055479526519775, + "learning_rate": 3.168131868131868e-05, + "loss": 0.1677, + "step": 13336 + }, + { + "epoch": 36.64010989010989, + "grad_norm": 15.462015151977539, + "learning_rate": 3.167994505494505e-05, + "loss": 0.4534, + "step": 13337 + }, + { + "epoch": 36.642857142857146, + "grad_norm": 16.076061248779297, + "learning_rate": 3.167857142857143e-05, + "loss": 0.3348, + "step": 13338 + }, + { + "epoch": 36.645604395604394, + "grad_norm": 15.96219253540039, + "learning_rate": 3.167719780219781e-05, + "loss": 0.4363, + "step": 13339 + }, + { + "epoch": 36.64835164835165, + "grad_norm": 13.920736312866211, + "learning_rate": 3.167582417582418e-05, + "loss": 0.4226, + "step": 13340 + }, + { + "epoch": 36.6510989010989, + "grad_norm": 7.213133811950684, + "learning_rate": 3.1674450549450554e-05, + "loss": 0.1721, + "step": 13341 + }, + { + "epoch": 36.65384615384615, + "grad_norm": 12.92705249786377, + "learning_rate": 3.1673076923076924e-05, + "loss": 0.4621, + "step": 13342 + }, + { + "epoch": 36.65659340659341, + "grad_norm": 8.40000057220459, + "learning_rate": 3.16717032967033e-05, + "loss": 0.2523, + "step": 13343 + }, + { + "epoch": 36.65934065934066, + "grad_norm": 23.94628143310547, + "learning_rate": 3.167032967032967e-05, + "loss": 0.8055, + "step": 13344 + }, + { + "epoch": 36.66208791208791, + "grad_norm": 10.178285598754883, + "learning_rate": 3.166895604395605e-05, + "loss": 0.3971, + "step": 13345 + }, + { + "epoch": 36.66483516483517, + "grad_norm": 21.467634201049805, + "learning_rate": 3.166758241758242e-05, + "loss": 0.6997, + "step": 13346 + }, + { + "epoch": 36.667582417582416, + "grad_norm": 14.797761917114258, + "learning_rate": 3.166620879120879e-05, + "loss": 0.4645, + "step": 13347 + }, + { + "epoch": 36.67032967032967, + "grad_norm": 14.284686088562012, + "learning_rate": 3.1664835164835164e-05, + "loss": 0.3464, + "step": 13348 + }, + { + "epoch": 36.67307692307692, + "grad_norm": 4.56424617767334, + "learning_rate": 3.166346153846154e-05, + "loss": 0.0671, + "step": 13349 + }, + { + "epoch": 36.675824175824175, + "grad_norm": 10.644233703613281, + "learning_rate": 3.166208791208791e-05, + "loss": 0.2905, + "step": 13350 + }, + { + "epoch": 36.67857142857143, + "grad_norm": 12.969590187072754, + "learning_rate": 3.166071428571429e-05, + "loss": 0.3891, + "step": 13351 + }, + { + "epoch": 36.68131868131868, + "grad_norm": 12.393533706665039, + "learning_rate": 3.165934065934066e-05, + "loss": 0.2432, + "step": 13352 + }, + { + "epoch": 36.684065934065934, + "grad_norm": 12.829976081848145, + "learning_rate": 3.1657967032967034e-05, + "loss": 0.3034, + "step": 13353 + }, + { + "epoch": 36.68681318681319, + "grad_norm": 16.532970428466797, + "learning_rate": 3.165659340659341e-05, + "loss": 0.3906, + "step": 13354 + }, + { + "epoch": 36.68956043956044, + "grad_norm": 14.000411987304688, + "learning_rate": 3.165521978021978e-05, + "loss": 0.3946, + "step": 13355 + }, + { + "epoch": 36.69230769230769, + "grad_norm": 17.84621810913086, + "learning_rate": 3.165384615384616e-05, + "loss": 0.3846, + "step": 13356 + }, + { + "epoch": 36.69505494505494, + "grad_norm": 11.138107299804688, + "learning_rate": 3.165247252747253e-05, + "loss": 0.2876, + "step": 13357 + }, + { + "epoch": 36.6978021978022, + "grad_norm": 8.698634147644043, + "learning_rate": 3.1651098901098905e-05, + "loss": 0.1406, + "step": 13358 + }, + { + "epoch": 36.70054945054945, + "grad_norm": 7.1017889976501465, + "learning_rate": 3.1649725274725275e-05, + "loss": 0.1843, + "step": 13359 + }, + { + "epoch": 36.7032967032967, + "grad_norm": 14.713237762451172, + "learning_rate": 3.164835164835165e-05, + "loss": 0.3418, + "step": 13360 + }, + { + "epoch": 36.706043956043956, + "grad_norm": 9.513764381408691, + "learning_rate": 3.164697802197802e-05, + "loss": 0.1306, + "step": 13361 + }, + { + "epoch": 36.70879120879121, + "grad_norm": 13.919963836669922, + "learning_rate": 3.164560439560439e-05, + "loss": 0.3546, + "step": 13362 + }, + { + "epoch": 36.71153846153846, + "grad_norm": 8.298151969909668, + "learning_rate": 3.164423076923077e-05, + "loss": 0.1633, + "step": 13363 + }, + { + "epoch": 36.714285714285715, + "grad_norm": 12.99411392211914, + "learning_rate": 3.1642857142857145e-05, + "loss": 0.4228, + "step": 13364 + }, + { + "epoch": 36.717032967032964, + "grad_norm": 15.075167655944824, + "learning_rate": 3.1641483516483515e-05, + "loss": 0.5862, + "step": 13365 + }, + { + "epoch": 36.71978021978022, + "grad_norm": 13.722319602966309, + "learning_rate": 3.164010989010989e-05, + "loss": 0.1943, + "step": 13366 + }, + { + "epoch": 36.722527472527474, + "grad_norm": 12.45893669128418, + "learning_rate": 3.163873626373626e-05, + "loss": 0.3098, + "step": 13367 + }, + { + "epoch": 36.72527472527472, + "grad_norm": 11.805439949035645, + "learning_rate": 3.163736263736264e-05, + "loss": 0.3492, + "step": 13368 + }, + { + "epoch": 36.72802197802198, + "grad_norm": 9.7885160446167, + "learning_rate": 3.1635989010989016e-05, + "loss": 0.2395, + "step": 13369 + }, + { + "epoch": 36.73076923076923, + "grad_norm": 9.694814682006836, + "learning_rate": 3.1634615384615386e-05, + "loss": 0.1857, + "step": 13370 + }, + { + "epoch": 36.73351648351648, + "grad_norm": 13.139274597167969, + "learning_rate": 3.163324175824176e-05, + "loss": 0.369, + "step": 13371 + }, + { + "epoch": 36.73626373626374, + "grad_norm": 12.917815208435059, + "learning_rate": 3.163186813186813e-05, + "loss": 0.344, + "step": 13372 + }, + { + "epoch": 36.73901098901099, + "grad_norm": 14.9091157913208, + "learning_rate": 3.163049450549451e-05, + "loss": 0.5841, + "step": 13373 + }, + { + "epoch": 36.74175824175824, + "grad_norm": 11.853092193603516, + "learning_rate": 3.162912087912088e-05, + "loss": 0.4407, + "step": 13374 + }, + { + "epoch": 36.744505494505496, + "grad_norm": 17.15570640563965, + "learning_rate": 3.1627747252747256e-05, + "loss": 0.4337, + "step": 13375 + }, + { + "epoch": 36.747252747252745, + "grad_norm": 15.235921859741211, + "learning_rate": 3.1626373626373626e-05, + "loss": 0.237, + "step": 13376 + }, + { + "epoch": 36.75, + "grad_norm": 23.888944625854492, + "learning_rate": 3.1624999999999996e-05, + "loss": 0.5943, + "step": 13377 + }, + { + "epoch": 36.752747252747255, + "grad_norm": 8.233369827270508, + "learning_rate": 3.162362637362637e-05, + "loss": 0.2722, + "step": 13378 + }, + { + "epoch": 36.755494505494504, + "grad_norm": 16.1263427734375, + "learning_rate": 3.162225274725275e-05, + "loss": 0.5325, + "step": 13379 + }, + { + "epoch": 36.75824175824176, + "grad_norm": 12.625539779663086, + "learning_rate": 3.162087912087912e-05, + "loss": 0.5826, + "step": 13380 + }, + { + "epoch": 36.76098901098901, + "grad_norm": 5.956910133361816, + "learning_rate": 3.16195054945055e-05, + "loss": 0.1342, + "step": 13381 + }, + { + "epoch": 36.76373626373626, + "grad_norm": 18.761302947998047, + "learning_rate": 3.161813186813187e-05, + "loss": 0.4759, + "step": 13382 + }, + { + "epoch": 36.76648351648352, + "grad_norm": 22.795137405395508, + "learning_rate": 3.1616758241758243e-05, + "loss": 0.7234, + "step": 13383 + }, + { + "epoch": 36.76923076923077, + "grad_norm": 10.94077205657959, + "learning_rate": 3.161538461538462e-05, + "loss": 0.2249, + "step": 13384 + }, + { + "epoch": 36.77197802197802, + "grad_norm": 12.884119033813477, + "learning_rate": 3.161401098901099e-05, + "loss": 0.2996, + "step": 13385 + }, + { + "epoch": 36.77472527472528, + "grad_norm": 2.80802583694458, + "learning_rate": 3.161263736263737e-05, + "loss": 0.0518, + "step": 13386 + }, + { + "epoch": 36.777472527472526, + "grad_norm": 6.591110706329346, + "learning_rate": 3.161126373626374e-05, + "loss": 0.1471, + "step": 13387 + }, + { + "epoch": 36.78021978021978, + "grad_norm": 14.744791030883789, + "learning_rate": 3.1609890109890114e-05, + "loss": 0.5238, + "step": 13388 + }, + { + "epoch": 36.782967032967036, + "grad_norm": 9.037400245666504, + "learning_rate": 3.1608516483516484e-05, + "loss": 0.2377, + "step": 13389 + }, + { + "epoch": 36.785714285714285, + "grad_norm": 13.570674896240234, + "learning_rate": 3.160714285714286e-05, + "loss": 0.3115, + "step": 13390 + }, + { + "epoch": 36.78846153846154, + "grad_norm": 7.024182319641113, + "learning_rate": 3.160576923076923e-05, + "loss": 0.1493, + "step": 13391 + }, + { + "epoch": 36.79120879120879, + "grad_norm": 12.807373046875, + "learning_rate": 3.16043956043956e-05, + "loss": 0.2897, + "step": 13392 + }, + { + "epoch": 36.793956043956044, + "grad_norm": 14.49801254272461, + "learning_rate": 3.160302197802198e-05, + "loss": 0.587, + "step": 13393 + }, + { + "epoch": 36.7967032967033, + "grad_norm": 12.545326232910156, + "learning_rate": 3.1601648351648354e-05, + "loss": 0.4737, + "step": 13394 + }, + { + "epoch": 36.79945054945055, + "grad_norm": 18.21341896057129, + "learning_rate": 3.1600274725274724e-05, + "loss": 0.5624, + "step": 13395 + }, + { + "epoch": 36.8021978021978, + "grad_norm": 8.618033409118652, + "learning_rate": 3.15989010989011e-05, + "loss": 0.2143, + "step": 13396 + }, + { + "epoch": 36.80494505494506, + "grad_norm": 11.02292537689209, + "learning_rate": 3.159752747252747e-05, + "loss": 0.252, + "step": 13397 + }, + { + "epoch": 36.80769230769231, + "grad_norm": 12.581990242004395, + "learning_rate": 3.159615384615385e-05, + "loss": 0.3242, + "step": 13398 + }, + { + "epoch": 36.81043956043956, + "grad_norm": 17.575716018676758, + "learning_rate": 3.1594780219780225e-05, + "loss": 0.5206, + "step": 13399 + }, + { + "epoch": 36.81318681318681, + "grad_norm": 10.899744033813477, + "learning_rate": 3.1593406593406595e-05, + "loss": 0.2742, + "step": 13400 + }, + { + "epoch": 36.815934065934066, + "grad_norm": 16.054670333862305, + "learning_rate": 3.159203296703297e-05, + "loss": 0.3202, + "step": 13401 + }, + { + "epoch": 36.81868131868132, + "grad_norm": 2.333857774734497, + "learning_rate": 3.159065934065934e-05, + "loss": 0.043, + "step": 13402 + }, + { + "epoch": 36.82142857142857, + "grad_norm": 17.49982452392578, + "learning_rate": 3.158928571428572e-05, + "loss": 0.6073, + "step": 13403 + }, + { + "epoch": 36.824175824175825, + "grad_norm": 17.975597381591797, + "learning_rate": 3.158791208791209e-05, + "loss": 0.5003, + "step": 13404 + }, + { + "epoch": 36.82692307692308, + "grad_norm": 9.006049156188965, + "learning_rate": 3.1586538461538465e-05, + "loss": 0.2174, + "step": 13405 + }, + { + "epoch": 36.82967032967033, + "grad_norm": 10.7898530960083, + "learning_rate": 3.1585164835164835e-05, + "loss": 0.159, + "step": 13406 + }, + { + "epoch": 36.832417582417584, + "grad_norm": 6.15164852142334, + "learning_rate": 3.1583791208791205e-05, + "loss": 0.1009, + "step": 13407 + }, + { + "epoch": 36.83516483516483, + "grad_norm": 5.491118907928467, + "learning_rate": 3.158241758241758e-05, + "loss": 0.0962, + "step": 13408 + }, + { + "epoch": 36.83791208791209, + "grad_norm": 17.66145133972168, + "learning_rate": 3.158104395604396e-05, + "loss": 0.5444, + "step": 13409 + }, + { + "epoch": 36.84065934065934, + "grad_norm": 14.151700973510742, + "learning_rate": 3.157967032967033e-05, + "loss": 0.7189, + "step": 13410 + }, + { + "epoch": 36.84340659340659, + "grad_norm": 22.952911376953125, + "learning_rate": 3.1578296703296706e-05, + "loss": 0.587, + "step": 13411 + }, + { + "epoch": 36.84615384615385, + "grad_norm": 17.516080856323242, + "learning_rate": 3.1576923076923076e-05, + "loss": 0.3315, + "step": 13412 + }, + { + "epoch": 36.8489010989011, + "grad_norm": 19.51557731628418, + "learning_rate": 3.157554945054945e-05, + "loss": 0.8264, + "step": 13413 + }, + { + "epoch": 36.85164835164835, + "grad_norm": 10.075136184692383, + "learning_rate": 3.157417582417583e-05, + "loss": 0.3309, + "step": 13414 + }, + { + "epoch": 36.854395604395606, + "grad_norm": 9.09196949005127, + "learning_rate": 3.15728021978022e-05, + "loss": 0.1475, + "step": 13415 + }, + { + "epoch": 36.857142857142854, + "grad_norm": 9.183338165283203, + "learning_rate": 3.1571428571428576e-05, + "loss": 0.1388, + "step": 13416 + }, + { + "epoch": 36.85989010989011, + "grad_norm": 10.221976280212402, + "learning_rate": 3.1570054945054946e-05, + "loss": 0.2481, + "step": 13417 + }, + { + "epoch": 36.862637362637365, + "grad_norm": 17.336315155029297, + "learning_rate": 3.156868131868132e-05, + "loss": 0.5932, + "step": 13418 + }, + { + "epoch": 36.86538461538461, + "grad_norm": 2.426652669906616, + "learning_rate": 3.156730769230769e-05, + "loss": 0.0459, + "step": 13419 + }, + { + "epoch": 36.86813186813187, + "grad_norm": 7.312464714050293, + "learning_rate": 3.156593406593407e-05, + "loss": 0.1536, + "step": 13420 + }, + { + "epoch": 36.870879120879124, + "grad_norm": 6.319157123565674, + "learning_rate": 3.156456043956044e-05, + "loss": 0.1881, + "step": 13421 + }, + { + "epoch": 36.87362637362637, + "grad_norm": 9.261896133422852, + "learning_rate": 3.156318681318681e-05, + "loss": 0.2615, + "step": 13422 + }, + { + "epoch": 36.87637362637363, + "grad_norm": 5.837242603302002, + "learning_rate": 3.1561813186813186e-05, + "loss": 0.1379, + "step": 13423 + }, + { + "epoch": 36.879120879120876, + "grad_norm": 15.803464889526367, + "learning_rate": 3.156043956043956e-05, + "loss": 0.5351, + "step": 13424 + }, + { + "epoch": 36.88186813186813, + "grad_norm": 6.614943504333496, + "learning_rate": 3.155906593406593e-05, + "loss": 0.1311, + "step": 13425 + }, + { + "epoch": 36.88461538461539, + "grad_norm": 6.392230987548828, + "learning_rate": 3.155769230769231e-05, + "loss": 0.1379, + "step": 13426 + }, + { + "epoch": 36.887362637362635, + "grad_norm": 16.6419620513916, + "learning_rate": 3.155631868131868e-05, + "loss": 0.5192, + "step": 13427 + }, + { + "epoch": 36.89010989010989, + "grad_norm": 9.126222610473633, + "learning_rate": 3.155494505494506e-05, + "loss": 0.273, + "step": 13428 + }, + { + "epoch": 36.892857142857146, + "grad_norm": 8.59221363067627, + "learning_rate": 3.1553571428571434e-05, + "loss": 0.1873, + "step": 13429 + }, + { + "epoch": 36.895604395604394, + "grad_norm": 17.479063034057617, + "learning_rate": 3.1552197802197804e-05, + "loss": 0.5038, + "step": 13430 + }, + { + "epoch": 36.89835164835165, + "grad_norm": 9.176227569580078, + "learning_rate": 3.155082417582418e-05, + "loss": 0.1806, + "step": 13431 + }, + { + "epoch": 36.9010989010989, + "grad_norm": 10.1744384765625, + "learning_rate": 3.154945054945055e-05, + "loss": 0.2552, + "step": 13432 + }, + { + "epoch": 36.90384615384615, + "grad_norm": 25.441940307617188, + "learning_rate": 3.154807692307693e-05, + "loss": 0.9735, + "step": 13433 + }, + { + "epoch": 36.90659340659341, + "grad_norm": 9.311455726623535, + "learning_rate": 3.15467032967033e-05, + "loss": 0.2565, + "step": 13434 + }, + { + "epoch": 36.90934065934066, + "grad_norm": 18.293315887451172, + "learning_rate": 3.1545329670329674e-05, + "loss": 0.3474, + "step": 13435 + }, + { + "epoch": 36.91208791208791, + "grad_norm": 17.292369842529297, + "learning_rate": 3.1543956043956044e-05, + "loss": 0.4335, + "step": 13436 + }, + { + "epoch": 36.91483516483517, + "grad_norm": 4.887317180633545, + "learning_rate": 3.1542582417582414e-05, + "loss": 0.124, + "step": 13437 + }, + { + "epoch": 36.917582417582416, + "grad_norm": 13.913670539855957, + "learning_rate": 3.154120879120879e-05, + "loss": 0.2835, + "step": 13438 + }, + { + "epoch": 36.92032967032967, + "grad_norm": 11.110634803771973, + "learning_rate": 3.153983516483517e-05, + "loss": 0.2818, + "step": 13439 + }, + { + "epoch": 36.92307692307692, + "grad_norm": 5.058565616607666, + "learning_rate": 3.153846153846154e-05, + "loss": 0.1248, + "step": 13440 + }, + { + "epoch": 36.925824175824175, + "grad_norm": 20.418291091918945, + "learning_rate": 3.1537087912087915e-05, + "loss": 0.4806, + "step": 13441 + }, + { + "epoch": 36.92857142857143, + "grad_norm": 3.8776745796203613, + "learning_rate": 3.1535714285714285e-05, + "loss": 0.05, + "step": 13442 + }, + { + "epoch": 36.93131868131868, + "grad_norm": 15.65703010559082, + "learning_rate": 3.153434065934066e-05, + "loss": 0.3337, + "step": 13443 + }, + { + "epoch": 36.934065934065934, + "grad_norm": 15.219748497009277, + "learning_rate": 3.153296703296704e-05, + "loss": 0.4173, + "step": 13444 + }, + { + "epoch": 36.93681318681319, + "grad_norm": 13.79675579071045, + "learning_rate": 3.153159340659341e-05, + "loss": 0.2861, + "step": 13445 + }, + { + "epoch": 36.93956043956044, + "grad_norm": 10.097132682800293, + "learning_rate": 3.1530219780219785e-05, + "loss": 0.1952, + "step": 13446 + }, + { + "epoch": 36.94230769230769, + "grad_norm": 6.220803260803223, + "learning_rate": 3.1528846153846155e-05, + "loss": 0.1179, + "step": 13447 + }, + { + "epoch": 36.94505494505494, + "grad_norm": 9.465810775756836, + "learning_rate": 3.152747252747253e-05, + "loss": 0.1691, + "step": 13448 + }, + { + "epoch": 36.9478021978022, + "grad_norm": 19.449848175048828, + "learning_rate": 3.15260989010989e-05, + "loss": 0.5823, + "step": 13449 + }, + { + "epoch": 36.95054945054945, + "grad_norm": 5.966644287109375, + "learning_rate": 3.152472527472528e-05, + "loss": 0.1225, + "step": 13450 + }, + { + "epoch": 36.9532967032967, + "grad_norm": 12.959949493408203, + "learning_rate": 3.152335164835165e-05, + "loss": 0.2166, + "step": 13451 + }, + { + "epoch": 36.956043956043956, + "grad_norm": 8.920249938964844, + "learning_rate": 3.152197802197802e-05, + "loss": 0.1761, + "step": 13452 + }, + { + "epoch": 36.95879120879121, + "grad_norm": 12.703454971313477, + "learning_rate": 3.1520604395604395e-05, + "loss": 0.2847, + "step": 13453 + }, + { + "epoch": 36.96153846153846, + "grad_norm": 18.636272430419922, + "learning_rate": 3.151923076923077e-05, + "loss": 0.5367, + "step": 13454 + }, + { + "epoch": 36.964285714285715, + "grad_norm": 5.160364151000977, + "learning_rate": 3.151785714285714e-05, + "loss": 0.098, + "step": 13455 + }, + { + "epoch": 36.967032967032964, + "grad_norm": 9.653491020202637, + "learning_rate": 3.151648351648352e-05, + "loss": 0.2949, + "step": 13456 + }, + { + "epoch": 36.96978021978022, + "grad_norm": 5.86399507522583, + "learning_rate": 3.151510989010989e-05, + "loss": 0.1518, + "step": 13457 + }, + { + "epoch": 36.972527472527474, + "grad_norm": 13.315560340881348, + "learning_rate": 3.1513736263736266e-05, + "loss": 0.3502, + "step": 13458 + }, + { + "epoch": 36.97527472527472, + "grad_norm": 9.792170524597168, + "learning_rate": 3.151236263736264e-05, + "loss": 0.1381, + "step": 13459 + }, + { + "epoch": 36.97802197802198, + "grad_norm": 8.689334869384766, + "learning_rate": 3.151098901098901e-05, + "loss": 0.2668, + "step": 13460 + }, + { + "epoch": 36.98076923076923, + "grad_norm": 13.310343742370605, + "learning_rate": 3.150961538461539e-05, + "loss": 0.254, + "step": 13461 + }, + { + "epoch": 36.98351648351648, + "grad_norm": 8.936946868896484, + "learning_rate": 3.150824175824176e-05, + "loss": 0.1877, + "step": 13462 + }, + { + "epoch": 36.98626373626374, + "grad_norm": 22.03775978088379, + "learning_rate": 3.1506868131868136e-05, + "loss": 0.6045, + "step": 13463 + }, + { + "epoch": 36.98901098901099, + "grad_norm": 8.221369743347168, + "learning_rate": 3.1505494505494506e-05, + "loss": 0.1772, + "step": 13464 + }, + { + "epoch": 36.99175824175824, + "grad_norm": 14.615727424621582, + "learning_rate": 3.1504120879120876e-05, + "loss": 0.5487, + "step": 13465 + }, + { + "epoch": 36.994505494505496, + "grad_norm": 17.817697525024414, + "learning_rate": 3.150274725274725e-05, + "loss": 0.6718, + "step": 13466 + }, + { + "epoch": 36.997252747252745, + "grad_norm": 26.881505966186523, + "learning_rate": 3.150137362637362e-05, + "loss": 0.6427, + "step": 13467 + }, + { + "epoch": 37.0, + "grad_norm": 8.05712604522705, + "learning_rate": 3.15e-05, + "loss": 0.1061, + "step": 13468 + }, + { + "epoch": 37.0, + "eval_accuracy": 0.6046831955922864, + "eval_f1": 0.599438335701644, + "eval_f1_DuraRiadoRio_64x64": 0.5023255813953489, + "eval_f1_Mole_64x64": 0.5833333333333334, + "eval_f1_Quebrado_64x64": 0.5887850467289719, + "eval_f1_RiadoRio_64x64": 0.5296442687747036, + "eval_f1_RioFechado_64x64": 0.7931034482758621, + "eval_loss": 1.9913824796676636, + "eval_precision": 0.7035246821218095, + "eval_precision_DuraRiadoRio_64x64": 0.7605633802816901, + "eval_precision_Mole_64x64": 0.4166666666666667, + "eval_precision_Quebrado_64x64": 0.9, + "eval_precision_RiadoRio_64x64": 0.6633663366336634, + "eval_precision_RioFechado_64x64": 0.777027027027027, + "eval_recall": 0.6070741701672021, + "eval_recall_DuraRiadoRio_64x64": 0.375, + "eval_recall_Mole_64x64": 0.9722222222222222, + "eval_recall_Quebrado_64x64": 0.4375, + "eval_recall_RiadoRio_64x64": 0.4407894736842105, + "eval_recall_RioFechado_64x64": 0.8098591549295775, + "eval_runtime": 1.7831, + "eval_samples_per_second": 407.153, + "eval_steps_per_second": 25.798, + "step": 13468 + }, + { + "epoch": 37.002747252747255, + "grad_norm": 21.949996948242188, + "learning_rate": 3.149862637362638e-05, + "loss": 0.7164, + "step": 13469 + }, + { + "epoch": 37.005494505494504, + "grad_norm": 15.092403411865234, + "learning_rate": 3.149725274725275e-05, + "loss": 0.4913, + "step": 13470 + }, + { + "epoch": 37.00824175824176, + "grad_norm": 17.481704711914062, + "learning_rate": 3.1495879120879124e-05, + "loss": 0.4746, + "step": 13471 + }, + { + "epoch": 37.010989010989015, + "grad_norm": 6.757904529571533, + "learning_rate": 3.1494505494505494e-05, + "loss": 0.1811, + "step": 13472 + }, + { + "epoch": 37.01373626373626, + "grad_norm": 10.98668384552002, + "learning_rate": 3.149313186813187e-05, + "loss": 0.246, + "step": 13473 + }, + { + "epoch": 37.01648351648352, + "grad_norm": 17.988021850585938, + "learning_rate": 3.149175824175825e-05, + "loss": 0.5375, + "step": 13474 + }, + { + "epoch": 37.01923076923077, + "grad_norm": 14.928749084472656, + "learning_rate": 3.149038461538462e-05, + "loss": 0.3875, + "step": 13475 + }, + { + "epoch": 37.02197802197802, + "grad_norm": 9.61915397644043, + "learning_rate": 3.1489010989010994e-05, + "loss": 0.3036, + "step": 13476 + }, + { + "epoch": 37.02472527472528, + "grad_norm": 14.88655948638916, + "learning_rate": 3.1487637362637364e-05, + "loss": 0.5319, + "step": 13477 + }, + { + "epoch": 37.027472527472526, + "grad_norm": 11.749478340148926, + "learning_rate": 3.148626373626374e-05, + "loss": 0.2566, + "step": 13478 + }, + { + "epoch": 37.03021978021978, + "grad_norm": 5.764830589294434, + "learning_rate": 3.148489010989011e-05, + "loss": 0.124, + "step": 13479 + }, + { + "epoch": 37.032967032967036, + "grad_norm": 5.994945526123047, + "learning_rate": 3.148351648351648e-05, + "loss": 0.0998, + "step": 13480 + }, + { + "epoch": 37.035714285714285, + "grad_norm": 10.527342796325684, + "learning_rate": 3.148214285714286e-05, + "loss": 0.3126, + "step": 13481 + }, + { + "epoch": 37.03846153846154, + "grad_norm": 13.453896522521973, + "learning_rate": 3.148076923076923e-05, + "loss": 0.2821, + "step": 13482 + }, + { + "epoch": 37.04120879120879, + "grad_norm": 1.7659544944763184, + "learning_rate": 3.1479395604395604e-05, + "loss": 0.0266, + "step": 13483 + }, + { + "epoch": 37.043956043956044, + "grad_norm": 25.706531524658203, + "learning_rate": 3.147802197802198e-05, + "loss": 0.7283, + "step": 13484 + }, + { + "epoch": 37.0467032967033, + "grad_norm": 9.855033874511719, + "learning_rate": 3.147664835164835e-05, + "loss": 0.3102, + "step": 13485 + }, + { + "epoch": 37.04945054945055, + "grad_norm": 30.840147018432617, + "learning_rate": 3.147527472527473e-05, + "loss": 0.5872, + "step": 13486 + }, + { + "epoch": 37.0521978021978, + "grad_norm": 13.287590026855469, + "learning_rate": 3.14739010989011e-05, + "loss": 0.2524, + "step": 13487 + }, + { + "epoch": 37.05494505494506, + "grad_norm": 15.628469467163086, + "learning_rate": 3.1472527472527475e-05, + "loss": 0.5712, + "step": 13488 + }, + { + "epoch": 37.05769230769231, + "grad_norm": 15.595312118530273, + "learning_rate": 3.147115384615385e-05, + "loss": 0.3082, + "step": 13489 + }, + { + "epoch": 37.06043956043956, + "grad_norm": 15.064717292785645, + "learning_rate": 3.146978021978022e-05, + "loss": 0.3581, + "step": 13490 + }, + { + "epoch": 37.06318681318681, + "grad_norm": 8.778341293334961, + "learning_rate": 3.14684065934066e-05, + "loss": 0.1693, + "step": 13491 + }, + { + "epoch": 37.065934065934066, + "grad_norm": 11.929815292358398, + "learning_rate": 3.146703296703297e-05, + "loss": 0.25, + "step": 13492 + }, + { + "epoch": 37.06868131868132, + "grad_norm": 13.112380027770996, + "learning_rate": 3.1465659340659345e-05, + "loss": 0.4144, + "step": 13493 + }, + { + "epoch": 37.07142857142857, + "grad_norm": 17.732746124267578, + "learning_rate": 3.1464285714285715e-05, + "loss": 0.8278, + "step": 13494 + }, + { + "epoch": 37.074175824175825, + "grad_norm": 8.691727638244629, + "learning_rate": 3.1462912087912085e-05, + "loss": 0.2617, + "step": 13495 + }, + { + "epoch": 37.07692307692308, + "grad_norm": 9.966930389404297, + "learning_rate": 3.146153846153846e-05, + "loss": 0.3208, + "step": 13496 + }, + { + "epoch": 37.07967032967033, + "grad_norm": 12.154440879821777, + "learning_rate": 3.146016483516483e-05, + "loss": 0.3216, + "step": 13497 + }, + { + "epoch": 37.082417582417584, + "grad_norm": 15.64085578918457, + "learning_rate": 3.145879120879121e-05, + "loss": 0.3989, + "step": 13498 + }, + { + "epoch": 37.08516483516483, + "grad_norm": 9.688626289367676, + "learning_rate": 3.1457417582417586e-05, + "loss": 0.1981, + "step": 13499 + }, + { + "epoch": 37.08791208791209, + "grad_norm": 5.165668487548828, + "learning_rate": 3.1456043956043956e-05, + "loss": 0.124, + "step": 13500 + }, + { + "epoch": 37.09065934065934, + "grad_norm": 10.923182487487793, + "learning_rate": 3.145467032967033e-05, + "loss": 0.2858, + "step": 13501 + }, + { + "epoch": 37.09340659340659, + "grad_norm": 7.027985572814941, + "learning_rate": 3.14532967032967e-05, + "loss": 0.1729, + "step": 13502 + }, + { + "epoch": 37.09615384615385, + "grad_norm": 22.071491241455078, + "learning_rate": 3.145192307692308e-05, + "loss": 0.6963, + "step": 13503 + }, + { + "epoch": 37.0989010989011, + "grad_norm": 19.24947166442871, + "learning_rate": 3.1450549450549456e-05, + "loss": 0.6482, + "step": 13504 + }, + { + "epoch": 37.10164835164835, + "grad_norm": 14.106364250183105, + "learning_rate": 3.1449175824175826e-05, + "loss": 0.2514, + "step": 13505 + }, + { + "epoch": 37.104395604395606, + "grad_norm": 22.574493408203125, + "learning_rate": 3.14478021978022e-05, + "loss": 0.4517, + "step": 13506 + }, + { + "epoch": 37.107142857142854, + "grad_norm": 12.484859466552734, + "learning_rate": 3.144642857142857e-05, + "loss": 0.3751, + "step": 13507 + }, + { + "epoch": 37.10989010989011, + "grad_norm": 6.978127956390381, + "learning_rate": 3.144505494505495e-05, + "loss": 0.1511, + "step": 13508 + }, + { + "epoch": 37.112637362637365, + "grad_norm": 16.654144287109375, + "learning_rate": 3.144368131868132e-05, + "loss": 0.39, + "step": 13509 + }, + { + "epoch": 37.11538461538461, + "grad_norm": 14.526350975036621, + "learning_rate": 3.144230769230769e-05, + "loss": 0.3482, + "step": 13510 + }, + { + "epoch": 37.11813186813187, + "grad_norm": 16.395776748657227, + "learning_rate": 3.1440934065934067e-05, + "loss": 0.4777, + "step": 13511 + }, + { + "epoch": 37.120879120879124, + "grad_norm": 13.021993637084961, + "learning_rate": 3.143956043956044e-05, + "loss": 0.3528, + "step": 13512 + }, + { + "epoch": 37.12362637362637, + "grad_norm": 13.466064453125, + "learning_rate": 3.1438186813186813e-05, + "loss": 0.5358, + "step": 13513 + }, + { + "epoch": 37.12637362637363, + "grad_norm": 6.64082145690918, + "learning_rate": 3.143681318681319e-05, + "loss": 0.1192, + "step": 13514 + }, + { + "epoch": 37.129120879120876, + "grad_norm": 15.616130828857422, + "learning_rate": 3.143543956043956e-05, + "loss": 0.4196, + "step": 13515 + }, + { + "epoch": 37.13186813186813, + "grad_norm": 10.351500511169434, + "learning_rate": 3.143406593406594e-05, + "loss": 0.2088, + "step": 13516 + }, + { + "epoch": 37.13461538461539, + "grad_norm": 9.32182788848877, + "learning_rate": 3.143269230769231e-05, + "loss": 0.1788, + "step": 13517 + }, + { + "epoch": 37.137362637362635, + "grad_norm": 11.304842948913574, + "learning_rate": 3.1431318681318684e-05, + "loss": 0.191, + "step": 13518 + }, + { + "epoch": 37.14010989010989, + "grad_norm": 9.79751968383789, + "learning_rate": 3.142994505494506e-05, + "loss": 0.2485, + "step": 13519 + }, + { + "epoch": 37.142857142857146, + "grad_norm": 2.98205828666687, + "learning_rate": 3.142857142857143e-05, + "loss": 0.0658, + "step": 13520 + }, + { + "epoch": 37.145604395604394, + "grad_norm": 17.343536376953125, + "learning_rate": 3.142719780219781e-05, + "loss": 0.3899, + "step": 13521 + }, + { + "epoch": 37.14835164835165, + "grad_norm": 9.723299980163574, + "learning_rate": 3.142582417582418e-05, + "loss": 0.2696, + "step": 13522 + }, + { + "epoch": 37.1510989010989, + "grad_norm": 6.396164894104004, + "learning_rate": 3.1424450549450554e-05, + "loss": 0.2062, + "step": 13523 + }, + { + "epoch": 37.15384615384615, + "grad_norm": 21.46136474609375, + "learning_rate": 3.1423076923076924e-05, + "loss": 0.5531, + "step": 13524 + }, + { + "epoch": 37.15659340659341, + "grad_norm": 12.08133316040039, + "learning_rate": 3.1421703296703294e-05, + "loss": 0.2475, + "step": 13525 + }, + { + "epoch": 37.15934065934066, + "grad_norm": 14.622385025024414, + "learning_rate": 3.142032967032967e-05, + "loss": 0.4155, + "step": 13526 + }, + { + "epoch": 37.16208791208791, + "grad_norm": 21.798601150512695, + "learning_rate": 3.141895604395604e-05, + "loss": 0.8085, + "step": 13527 + }, + { + "epoch": 37.16483516483517, + "grad_norm": 1.7345622777938843, + "learning_rate": 3.141758241758242e-05, + "loss": 0.0416, + "step": 13528 + }, + { + "epoch": 37.167582417582416, + "grad_norm": 17.354372024536133, + "learning_rate": 3.1416208791208795e-05, + "loss": 0.4636, + "step": 13529 + }, + { + "epoch": 37.17032967032967, + "grad_norm": 11.677336692810059, + "learning_rate": 3.1414835164835165e-05, + "loss": 0.166, + "step": 13530 + }, + { + "epoch": 37.17307692307692, + "grad_norm": 6.065888404846191, + "learning_rate": 3.141346153846154e-05, + "loss": 0.142, + "step": 13531 + }, + { + "epoch": 37.175824175824175, + "grad_norm": 15.64947509765625, + "learning_rate": 3.141208791208791e-05, + "loss": 0.3537, + "step": 13532 + }, + { + "epoch": 37.17857142857143, + "grad_norm": 11.27960205078125, + "learning_rate": 3.141071428571429e-05, + "loss": 0.5061, + "step": 13533 + }, + { + "epoch": 37.18131868131868, + "grad_norm": 10.106928825378418, + "learning_rate": 3.1409340659340665e-05, + "loss": 0.2609, + "step": 13534 + }, + { + "epoch": 37.184065934065934, + "grad_norm": 9.671475410461426, + "learning_rate": 3.1407967032967035e-05, + "loss": 0.1933, + "step": 13535 + }, + { + "epoch": 37.18681318681319, + "grad_norm": 9.941874504089355, + "learning_rate": 3.140659340659341e-05, + "loss": 0.2334, + "step": 13536 + }, + { + "epoch": 37.18956043956044, + "grad_norm": 10.889480590820312, + "learning_rate": 3.140521978021978e-05, + "loss": 0.2937, + "step": 13537 + }, + { + "epoch": 37.19230769230769, + "grad_norm": 8.937973976135254, + "learning_rate": 3.140384615384616e-05, + "loss": 0.1693, + "step": 13538 + }, + { + "epoch": 37.19505494505494, + "grad_norm": 17.858407974243164, + "learning_rate": 3.140247252747253e-05, + "loss": 0.5885, + "step": 13539 + }, + { + "epoch": 37.1978021978022, + "grad_norm": 15.615572929382324, + "learning_rate": 3.14010989010989e-05, + "loss": 0.4512, + "step": 13540 + }, + { + "epoch": 37.20054945054945, + "grad_norm": 7.36846923828125, + "learning_rate": 3.1399725274725276e-05, + "loss": 0.1816, + "step": 13541 + }, + { + "epoch": 37.2032967032967, + "grad_norm": 3.139859437942505, + "learning_rate": 3.1398351648351646e-05, + "loss": 0.0518, + "step": 13542 + }, + { + "epoch": 37.206043956043956, + "grad_norm": 12.862273216247559, + "learning_rate": 3.139697802197802e-05, + "loss": 0.3008, + "step": 13543 + }, + { + "epoch": 37.20879120879121, + "grad_norm": 6.366140842437744, + "learning_rate": 3.139560439560439e-05, + "loss": 0.138, + "step": 13544 + }, + { + "epoch": 37.21153846153846, + "grad_norm": 13.196488380432129, + "learning_rate": 3.139423076923077e-05, + "loss": 0.2423, + "step": 13545 + }, + { + "epoch": 37.214285714285715, + "grad_norm": 11.938882827758789, + "learning_rate": 3.1392857142857146e-05, + "loss": 0.2109, + "step": 13546 + }, + { + "epoch": 37.217032967032964, + "grad_norm": 7.444945335388184, + "learning_rate": 3.1391483516483516e-05, + "loss": 0.1443, + "step": 13547 + }, + { + "epoch": 37.21978021978022, + "grad_norm": 11.815577507019043, + "learning_rate": 3.139010989010989e-05, + "loss": 0.2878, + "step": 13548 + }, + { + "epoch": 37.222527472527474, + "grad_norm": 12.434412956237793, + "learning_rate": 3.138873626373626e-05, + "loss": 0.4089, + "step": 13549 + }, + { + "epoch": 37.22527472527472, + "grad_norm": 11.484323501586914, + "learning_rate": 3.138736263736264e-05, + "loss": 0.1944, + "step": 13550 + }, + { + "epoch": 37.22802197802198, + "grad_norm": 17.50873565673828, + "learning_rate": 3.1385989010989016e-05, + "loss": 0.6467, + "step": 13551 + }, + { + "epoch": 37.23076923076923, + "grad_norm": 11.667557716369629, + "learning_rate": 3.1384615384615386e-05, + "loss": 0.2291, + "step": 13552 + }, + { + "epoch": 37.23351648351648, + "grad_norm": 6.98803186416626, + "learning_rate": 3.138324175824176e-05, + "loss": 0.1623, + "step": 13553 + }, + { + "epoch": 37.23626373626374, + "grad_norm": 6.389225482940674, + "learning_rate": 3.138186813186813e-05, + "loss": 0.1767, + "step": 13554 + }, + { + "epoch": 37.239010989010985, + "grad_norm": 11.712510108947754, + "learning_rate": 3.13804945054945e-05, + "loss": 0.2091, + "step": 13555 + }, + { + "epoch": 37.24175824175824, + "grad_norm": 11.791131973266602, + "learning_rate": 3.137912087912088e-05, + "loss": 0.3893, + "step": 13556 + }, + { + "epoch": 37.244505494505496, + "grad_norm": 11.008746147155762, + "learning_rate": 3.137774725274725e-05, + "loss": 0.2918, + "step": 13557 + }, + { + "epoch": 37.247252747252745, + "grad_norm": 6.960263729095459, + "learning_rate": 3.137637362637363e-05, + "loss": 0.2174, + "step": 13558 + }, + { + "epoch": 37.25, + "grad_norm": 7.7073163986206055, + "learning_rate": 3.1375e-05, + "loss": 0.1571, + "step": 13559 + }, + { + "epoch": 37.252747252747255, + "grad_norm": 12.35814380645752, + "learning_rate": 3.1373626373626374e-05, + "loss": 0.2997, + "step": 13560 + }, + { + "epoch": 37.255494505494504, + "grad_norm": 3.7650606632232666, + "learning_rate": 3.137225274725275e-05, + "loss": 0.1023, + "step": 13561 + }, + { + "epoch": 37.25824175824176, + "grad_norm": 5.359525680541992, + "learning_rate": 3.137087912087912e-05, + "loss": 0.1339, + "step": 13562 + }, + { + "epoch": 37.260989010989015, + "grad_norm": 10.72961711883545, + "learning_rate": 3.13695054945055e-05, + "loss": 0.2553, + "step": 13563 + }, + { + "epoch": 37.26373626373626, + "grad_norm": 13.879487991333008, + "learning_rate": 3.136813186813187e-05, + "loss": 0.443, + "step": 13564 + }, + { + "epoch": 37.26648351648352, + "grad_norm": 14.989295959472656, + "learning_rate": 3.1366758241758244e-05, + "loss": 0.539, + "step": 13565 + }, + { + "epoch": 37.26923076923077, + "grad_norm": 11.499567985534668, + "learning_rate": 3.136538461538462e-05, + "loss": 0.2791, + "step": 13566 + }, + { + "epoch": 37.27197802197802, + "grad_norm": 6.034580230712891, + "learning_rate": 3.136401098901099e-05, + "loss": 0.1114, + "step": 13567 + }, + { + "epoch": 37.27472527472528, + "grad_norm": 22.77189064025879, + "learning_rate": 3.136263736263737e-05, + "loss": 0.6605, + "step": 13568 + }, + { + "epoch": 37.277472527472526, + "grad_norm": 12.28980541229248, + "learning_rate": 3.136126373626374e-05, + "loss": 0.3882, + "step": 13569 + }, + { + "epoch": 37.28021978021978, + "grad_norm": 12.404659271240234, + "learning_rate": 3.135989010989011e-05, + "loss": 0.3848, + "step": 13570 + }, + { + "epoch": 37.282967032967036, + "grad_norm": 14.005913734436035, + "learning_rate": 3.1358516483516485e-05, + "loss": 0.23, + "step": 13571 + }, + { + "epoch": 37.285714285714285, + "grad_norm": 14.58917236328125, + "learning_rate": 3.1357142857142855e-05, + "loss": 0.5292, + "step": 13572 + }, + { + "epoch": 37.28846153846154, + "grad_norm": 10.695098876953125, + "learning_rate": 3.135576923076923e-05, + "loss": 0.1704, + "step": 13573 + }, + { + "epoch": 37.29120879120879, + "grad_norm": 14.63117790222168, + "learning_rate": 3.13543956043956e-05, + "loss": 0.3793, + "step": 13574 + }, + { + "epoch": 37.293956043956044, + "grad_norm": 11.495869636535645, + "learning_rate": 3.135302197802198e-05, + "loss": 0.2435, + "step": 13575 + }, + { + "epoch": 37.2967032967033, + "grad_norm": 7.178900718688965, + "learning_rate": 3.1351648351648355e-05, + "loss": 0.1538, + "step": 13576 + }, + { + "epoch": 37.29945054945055, + "grad_norm": 6.777801990509033, + "learning_rate": 3.1350274725274725e-05, + "loss": 0.1599, + "step": 13577 + }, + { + "epoch": 37.3021978021978, + "grad_norm": 14.230561256408691, + "learning_rate": 3.13489010989011e-05, + "loss": 0.3095, + "step": 13578 + }, + { + "epoch": 37.30494505494506, + "grad_norm": 11.67146110534668, + "learning_rate": 3.134752747252747e-05, + "loss": 0.4813, + "step": 13579 + }, + { + "epoch": 37.30769230769231, + "grad_norm": 16.201345443725586, + "learning_rate": 3.134615384615385e-05, + "loss": 0.3823, + "step": 13580 + }, + { + "epoch": 37.31043956043956, + "grad_norm": 10.676923751831055, + "learning_rate": 3.1344780219780225e-05, + "loss": 0.363, + "step": 13581 + }, + { + "epoch": 37.31318681318681, + "grad_norm": 12.232617378234863, + "learning_rate": 3.1343406593406595e-05, + "loss": 0.4489, + "step": 13582 + }, + { + "epoch": 37.315934065934066, + "grad_norm": 13.710597038269043, + "learning_rate": 3.134203296703297e-05, + "loss": 0.3293, + "step": 13583 + }, + { + "epoch": 37.31868131868132, + "grad_norm": 11.268733978271484, + "learning_rate": 3.134065934065934e-05, + "loss": 0.2459, + "step": 13584 + }, + { + "epoch": 37.32142857142857, + "grad_norm": 10.620990753173828, + "learning_rate": 3.133928571428571e-05, + "loss": 0.1487, + "step": 13585 + }, + { + "epoch": 37.324175824175825, + "grad_norm": 10.605805397033691, + "learning_rate": 3.133791208791209e-05, + "loss": 0.2449, + "step": 13586 + }, + { + "epoch": 37.32692307692308, + "grad_norm": 4.75423526763916, + "learning_rate": 3.133653846153846e-05, + "loss": 0.1047, + "step": 13587 + }, + { + "epoch": 37.32967032967033, + "grad_norm": 6.440215587615967, + "learning_rate": 3.1335164835164836e-05, + "loss": 0.1143, + "step": 13588 + }, + { + "epoch": 37.332417582417584, + "grad_norm": 17.635011672973633, + "learning_rate": 3.1333791208791206e-05, + "loss": 0.3431, + "step": 13589 + }, + { + "epoch": 37.33516483516483, + "grad_norm": 2.463698148727417, + "learning_rate": 3.133241758241758e-05, + "loss": 0.0611, + "step": 13590 + }, + { + "epoch": 37.33791208791209, + "grad_norm": 14.034646987915039, + "learning_rate": 3.133104395604396e-05, + "loss": 0.3546, + "step": 13591 + }, + { + "epoch": 37.34065934065934, + "grad_norm": 9.181548118591309, + "learning_rate": 3.132967032967033e-05, + "loss": 0.232, + "step": 13592 + }, + { + "epoch": 37.34340659340659, + "grad_norm": 13.080597877502441, + "learning_rate": 3.1328296703296706e-05, + "loss": 0.2603, + "step": 13593 + }, + { + "epoch": 37.34615384615385, + "grad_norm": 21.610136032104492, + "learning_rate": 3.1326923076923076e-05, + "loss": 0.6999, + "step": 13594 + }, + { + "epoch": 37.3489010989011, + "grad_norm": 11.26864242553711, + "learning_rate": 3.132554945054945e-05, + "loss": 0.2858, + "step": 13595 + }, + { + "epoch": 37.35164835164835, + "grad_norm": 7.3453803062438965, + "learning_rate": 3.132417582417583e-05, + "loss": 0.1279, + "step": 13596 + }, + { + "epoch": 37.354395604395606, + "grad_norm": 9.78784465789795, + "learning_rate": 3.13228021978022e-05, + "loss": 0.2879, + "step": 13597 + }, + { + "epoch": 37.357142857142854, + "grad_norm": 19.083078384399414, + "learning_rate": 3.132142857142858e-05, + "loss": 0.4262, + "step": 13598 + }, + { + "epoch": 37.35989010989011, + "grad_norm": 15.26470947265625, + "learning_rate": 3.132005494505495e-05, + "loss": 0.407, + "step": 13599 + }, + { + "epoch": 37.362637362637365, + "grad_norm": 9.39515209197998, + "learning_rate": 3.131868131868132e-05, + "loss": 0.1356, + "step": 13600 + }, + { + "epoch": 37.36538461538461, + "grad_norm": 8.378613471984863, + "learning_rate": 3.1317307692307694e-05, + "loss": 0.1783, + "step": 13601 + }, + { + "epoch": 37.36813186813187, + "grad_norm": 14.282367706298828, + "learning_rate": 3.1315934065934064e-05, + "loss": 0.4019, + "step": 13602 + }, + { + "epoch": 37.370879120879124, + "grad_norm": 13.796000480651855, + "learning_rate": 3.131456043956044e-05, + "loss": 0.2757, + "step": 13603 + }, + { + "epoch": 37.37362637362637, + "grad_norm": 13.627020835876465, + "learning_rate": 3.131318681318681e-05, + "loss": 0.4258, + "step": 13604 + }, + { + "epoch": 37.37637362637363, + "grad_norm": 15.317996978759766, + "learning_rate": 3.131181318681319e-05, + "loss": 0.3568, + "step": 13605 + }, + { + "epoch": 37.379120879120876, + "grad_norm": 5.655836582183838, + "learning_rate": 3.1310439560439564e-05, + "loss": 0.0901, + "step": 13606 + }, + { + "epoch": 37.38186813186813, + "grad_norm": 14.775081634521484, + "learning_rate": 3.1309065934065934e-05, + "loss": 0.7361, + "step": 13607 + }, + { + "epoch": 37.38461538461539, + "grad_norm": 17.05482292175293, + "learning_rate": 3.130769230769231e-05, + "loss": 0.6781, + "step": 13608 + }, + { + "epoch": 37.387362637362635, + "grad_norm": 10.976304054260254, + "learning_rate": 3.130631868131868e-05, + "loss": 0.2271, + "step": 13609 + }, + { + "epoch": 37.39010989010989, + "grad_norm": 16.201141357421875, + "learning_rate": 3.130494505494506e-05, + "loss": 0.4116, + "step": 13610 + }, + { + "epoch": 37.392857142857146, + "grad_norm": 12.353788375854492, + "learning_rate": 3.1303571428571434e-05, + "loss": 0.3813, + "step": 13611 + }, + { + "epoch": 37.395604395604394, + "grad_norm": 5.743734359741211, + "learning_rate": 3.1302197802197804e-05, + "loss": 0.0914, + "step": 13612 + }, + { + "epoch": 37.39835164835165, + "grad_norm": 11.212475776672363, + "learning_rate": 3.130082417582418e-05, + "loss": 0.208, + "step": 13613 + }, + { + "epoch": 37.4010989010989, + "grad_norm": 13.991737365722656, + "learning_rate": 3.129945054945055e-05, + "loss": 0.3162, + "step": 13614 + }, + { + "epoch": 37.40384615384615, + "grad_norm": 10.838690757751465, + "learning_rate": 3.129807692307692e-05, + "loss": 0.2659, + "step": 13615 + }, + { + "epoch": 37.40659340659341, + "grad_norm": 14.418946266174316, + "learning_rate": 3.12967032967033e-05, + "loss": 0.4503, + "step": 13616 + }, + { + "epoch": 37.40934065934066, + "grad_norm": 18.386178970336914, + "learning_rate": 3.129532967032967e-05, + "loss": 0.4388, + "step": 13617 + }, + { + "epoch": 37.41208791208791, + "grad_norm": 14.451375961303711, + "learning_rate": 3.1293956043956045e-05, + "loss": 0.446, + "step": 13618 + }, + { + "epoch": 37.41483516483517, + "grad_norm": 12.49815559387207, + "learning_rate": 3.1292582417582415e-05, + "loss": 0.2432, + "step": 13619 + }, + { + "epoch": 37.417582417582416, + "grad_norm": 14.13853931427002, + "learning_rate": 3.129120879120879e-05, + "loss": 0.3553, + "step": 13620 + }, + { + "epoch": 37.42032967032967, + "grad_norm": 15.69680118560791, + "learning_rate": 3.128983516483517e-05, + "loss": 0.2311, + "step": 13621 + }, + { + "epoch": 37.42307692307692, + "grad_norm": 15.508161544799805, + "learning_rate": 3.128846153846154e-05, + "loss": 0.4007, + "step": 13622 + }, + { + "epoch": 37.425824175824175, + "grad_norm": 21.4284725189209, + "learning_rate": 3.1287087912087915e-05, + "loss": 0.5578, + "step": 13623 + }, + { + "epoch": 37.42857142857143, + "grad_norm": 2.2227365970611572, + "learning_rate": 3.1285714285714285e-05, + "loss": 0.0517, + "step": 13624 + }, + { + "epoch": 37.43131868131868, + "grad_norm": 16.0842342376709, + "learning_rate": 3.128434065934066e-05, + "loss": 0.2931, + "step": 13625 + }, + { + "epoch": 37.434065934065934, + "grad_norm": 17.938060760498047, + "learning_rate": 3.128296703296704e-05, + "loss": 0.4198, + "step": 13626 + }, + { + "epoch": 37.43681318681319, + "grad_norm": 16.21240234375, + "learning_rate": 3.128159340659341e-05, + "loss": 0.2753, + "step": 13627 + }, + { + "epoch": 37.43956043956044, + "grad_norm": 20.422595977783203, + "learning_rate": 3.1280219780219786e-05, + "loss": 0.6334, + "step": 13628 + }, + { + "epoch": 37.44230769230769, + "grad_norm": 16.995616912841797, + "learning_rate": 3.1278846153846156e-05, + "loss": 0.527, + "step": 13629 + }, + { + "epoch": 37.44505494505494, + "grad_norm": 7.912477016448975, + "learning_rate": 3.1277472527472526e-05, + "loss": 0.1891, + "step": 13630 + }, + { + "epoch": 37.4478021978022, + "grad_norm": 6.72304105758667, + "learning_rate": 3.12760989010989e-05, + "loss": 0.1196, + "step": 13631 + }, + { + "epoch": 37.45054945054945, + "grad_norm": 5.790647506713867, + "learning_rate": 3.127472527472527e-05, + "loss": 0.1157, + "step": 13632 + }, + { + "epoch": 37.4532967032967, + "grad_norm": 6.80424165725708, + "learning_rate": 3.127335164835165e-05, + "loss": 0.1452, + "step": 13633 + }, + { + "epoch": 37.456043956043956, + "grad_norm": 12.40166187286377, + "learning_rate": 3.127197802197802e-05, + "loss": 0.3137, + "step": 13634 + }, + { + "epoch": 37.45879120879121, + "grad_norm": 7.888879776000977, + "learning_rate": 3.1270604395604396e-05, + "loss": 0.1975, + "step": 13635 + }, + { + "epoch": 37.46153846153846, + "grad_norm": 12.175313949584961, + "learning_rate": 3.126923076923077e-05, + "loss": 0.2716, + "step": 13636 + }, + { + "epoch": 37.464285714285715, + "grad_norm": 4.8415327072143555, + "learning_rate": 3.126785714285714e-05, + "loss": 0.0968, + "step": 13637 + }, + { + "epoch": 37.467032967032964, + "grad_norm": 4.0673933029174805, + "learning_rate": 3.126648351648352e-05, + "loss": 0.0479, + "step": 13638 + }, + { + "epoch": 37.46978021978022, + "grad_norm": 6.8523125648498535, + "learning_rate": 3.126510989010989e-05, + "loss": 0.1642, + "step": 13639 + }, + { + "epoch": 37.472527472527474, + "grad_norm": 15.76728630065918, + "learning_rate": 3.1263736263736267e-05, + "loss": 0.4692, + "step": 13640 + }, + { + "epoch": 37.47527472527472, + "grad_norm": 17.59588623046875, + "learning_rate": 3.126236263736264e-05, + "loss": 0.5995, + "step": 13641 + }, + { + "epoch": 37.47802197802198, + "grad_norm": 15.476917266845703, + "learning_rate": 3.1260989010989013e-05, + "loss": 0.7211, + "step": 13642 + }, + { + "epoch": 37.48076923076923, + "grad_norm": 11.860130310058594, + "learning_rate": 3.125961538461539e-05, + "loss": 0.2605, + "step": 13643 + }, + { + "epoch": 37.48351648351648, + "grad_norm": 13.892992973327637, + "learning_rate": 3.125824175824176e-05, + "loss": 0.1215, + "step": 13644 + }, + { + "epoch": 37.48626373626374, + "grad_norm": 5.871065616607666, + "learning_rate": 3.125686813186813e-05, + "loss": 0.1349, + "step": 13645 + }, + { + "epoch": 37.489010989010985, + "grad_norm": 14.215169906616211, + "learning_rate": 3.125549450549451e-05, + "loss": 0.3208, + "step": 13646 + }, + { + "epoch": 37.49175824175824, + "grad_norm": 12.864694595336914, + "learning_rate": 3.125412087912088e-05, + "loss": 0.3644, + "step": 13647 + }, + { + "epoch": 37.494505494505496, + "grad_norm": 11.618371963500977, + "learning_rate": 3.1252747252747254e-05, + "loss": 0.265, + "step": 13648 + }, + { + "epoch": 37.497252747252745, + "grad_norm": 14.091907501220703, + "learning_rate": 3.1251373626373624e-05, + "loss": 0.4315, + "step": 13649 + }, + { + "epoch": 37.5, + "grad_norm": 13.2869873046875, + "learning_rate": 3.125e-05, + "loss": 0.1583, + "step": 13650 + }, + { + "epoch": 37.502747252747255, + "grad_norm": 10.394962310791016, + "learning_rate": 3.124862637362638e-05, + "loss": 0.1925, + "step": 13651 + }, + { + "epoch": 37.505494505494504, + "grad_norm": 18.89365577697754, + "learning_rate": 3.124725274725275e-05, + "loss": 0.5793, + "step": 13652 + }, + { + "epoch": 37.50824175824176, + "grad_norm": 17.448179244995117, + "learning_rate": 3.1245879120879124e-05, + "loss": 0.45, + "step": 13653 + }, + { + "epoch": 37.51098901098901, + "grad_norm": 3.7445638179779053, + "learning_rate": 3.1244505494505494e-05, + "loss": 0.0589, + "step": 13654 + }, + { + "epoch": 37.51373626373626, + "grad_norm": 18.636768341064453, + "learning_rate": 3.124313186813187e-05, + "loss": 0.5888, + "step": 13655 + }, + { + "epoch": 37.51648351648352, + "grad_norm": 10.358555793762207, + "learning_rate": 3.124175824175825e-05, + "loss": 0.1569, + "step": 13656 + }, + { + "epoch": 37.51923076923077, + "grad_norm": 10.4166841506958, + "learning_rate": 3.124038461538462e-05, + "loss": 0.1983, + "step": 13657 + }, + { + "epoch": 37.52197802197802, + "grad_norm": 8.344518661499023, + "learning_rate": 3.1239010989010995e-05, + "loss": 0.1015, + "step": 13658 + }, + { + "epoch": 37.52472527472528, + "grad_norm": 17.365482330322266, + "learning_rate": 3.1237637362637365e-05, + "loss": 0.4285, + "step": 13659 + }, + { + "epoch": 37.527472527472526, + "grad_norm": 11.37143325805664, + "learning_rate": 3.1236263736263735e-05, + "loss": 0.201, + "step": 13660 + }, + { + "epoch": 37.53021978021978, + "grad_norm": 6.381519794464111, + "learning_rate": 3.123489010989011e-05, + "loss": 0.1617, + "step": 13661 + }, + { + "epoch": 37.532967032967036, + "grad_norm": 8.316235542297363, + "learning_rate": 3.123351648351648e-05, + "loss": 0.2911, + "step": 13662 + }, + { + "epoch": 37.535714285714285, + "grad_norm": 9.338414192199707, + "learning_rate": 3.123214285714286e-05, + "loss": 0.2133, + "step": 13663 + }, + { + "epoch": 37.53846153846154, + "grad_norm": 1.6816614866256714, + "learning_rate": 3.123076923076923e-05, + "loss": 0.0291, + "step": 13664 + }, + { + "epoch": 37.54120879120879, + "grad_norm": 13.618733406066895, + "learning_rate": 3.1229395604395605e-05, + "loss": 0.2312, + "step": 13665 + }, + { + "epoch": 37.543956043956044, + "grad_norm": 16.58054542541504, + "learning_rate": 3.122802197802198e-05, + "loss": 0.3744, + "step": 13666 + }, + { + "epoch": 37.5467032967033, + "grad_norm": 13.961955070495605, + "learning_rate": 3.122664835164835e-05, + "loss": 0.3735, + "step": 13667 + }, + { + "epoch": 37.54945054945055, + "grad_norm": 8.847962379455566, + "learning_rate": 3.122527472527473e-05, + "loss": 0.175, + "step": 13668 + }, + { + "epoch": 37.5521978021978, + "grad_norm": 9.228575706481934, + "learning_rate": 3.12239010989011e-05, + "loss": 0.1138, + "step": 13669 + }, + { + "epoch": 37.55494505494506, + "grad_norm": 13.048410415649414, + "learning_rate": 3.1222527472527476e-05, + "loss": 0.5402, + "step": 13670 + }, + { + "epoch": 37.55769230769231, + "grad_norm": 17.88092613220215, + "learning_rate": 3.122115384615385e-05, + "loss": 0.3843, + "step": 13671 + }, + { + "epoch": 37.56043956043956, + "grad_norm": 15.012811660766602, + "learning_rate": 3.121978021978022e-05, + "loss": 0.456, + "step": 13672 + }, + { + "epoch": 37.56318681318681, + "grad_norm": 19.60700225830078, + "learning_rate": 3.12184065934066e-05, + "loss": 0.4864, + "step": 13673 + }, + { + "epoch": 37.565934065934066, + "grad_norm": 10.141178131103516, + "learning_rate": 3.121703296703297e-05, + "loss": 0.3578, + "step": 13674 + }, + { + "epoch": 37.56868131868132, + "grad_norm": 7.109487056732178, + "learning_rate": 3.121565934065934e-05, + "loss": 0.1255, + "step": 13675 + }, + { + "epoch": 37.57142857142857, + "grad_norm": 13.801712036132812, + "learning_rate": 3.1214285714285716e-05, + "loss": 0.4107, + "step": 13676 + }, + { + "epoch": 37.574175824175825, + "grad_norm": 9.59296703338623, + "learning_rate": 3.1212912087912086e-05, + "loss": 0.2502, + "step": 13677 + }, + { + "epoch": 37.57692307692308, + "grad_norm": 9.179302215576172, + "learning_rate": 3.121153846153846e-05, + "loss": 0.3256, + "step": 13678 + }, + { + "epoch": 37.57967032967033, + "grad_norm": 15.261613845825195, + "learning_rate": 3.121016483516483e-05, + "loss": 0.4798, + "step": 13679 + }, + { + "epoch": 37.582417582417584, + "grad_norm": 9.359046936035156, + "learning_rate": 3.120879120879121e-05, + "loss": 0.2644, + "step": 13680 + }, + { + "epoch": 37.58516483516483, + "grad_norm": 13.502229690551758, + "learning_rate": 3.1207417582417586e-05, + "loss": 0.4186, + "step": 13681 + }, + { + "epoch": 37.58791208791209, + "grad_norm": 7.815938472747803, + "learning_rate": 3.1206043956043956e-05, + "loss": 0.2917, + "step": 13682 + }, + { + "epoch": 37.59065934065934, + "grad_norm": 8.953287124633789, + "learning_rate": 3.120467032967033e-05, + "loss": 0.2044, + "step": 13683 + }, + { + "epoch": 37.59340659340659, + "grad_norm": 11.107131958007812, + "learning_rate": 3.12032967032967e-05, + "loss": 0.2019, + "step": 13684 + }, + { + "epoch": 37.59615384615385, + "grad_norm": 16.82883071899414, + "learning_rate": 3.120192307692308e-05, + "loss": 0.6975, + "step": 13685 + }, + { + "epoch": 37.5989010989011, + "grad_norm": 11.145034790039062, + "learning_rate": 3.120054945054946e-05, + "loss": 0.2689, + "step": 13686 + }, + { + "epoch": 37.60164835164835, + "grad_norm": 13.900261878967285, + "learning_rate": 3.119917582417583e-05, + "loss": 0.2602, + "step": 13687 + }, + { + "epoch": 37.604395604395606, + "grad_norm": 9.290243148803711, + "learning_rate": 3.1197802197802204e-05, + "loss": 0.2989, + "step": 13688 + }, + { + "epoch": 37.607142857142854, + "grad_norm": 16.554723739624023, + "learning_rate": 3.1196428571428574e-05, + "loss": 0.2862, + "step": 13689 + }, + { + "epoch": 37.60989010989011, + "grad_norm": 11.272116661071777, + "learning_rate": 3.1195054945054944e-05, + "loss": 0.3566, + "step": 13690 + }, + { + "epoch": 37.612637362637365, + "grad_norm": 7.614053726196289, + "learning_rate": 3.119368131868132e-05, + "loss": 0.1072, + "step": 13691 + }, + { + "epoch": 37.61538461538461, + "grad_norm": 13.252381324768066, + "learning_rate": 3.119230769230769e-05, + "loss": 0.2703, + "step": 13692 + }, + { + "epoch": 37.61813186813187, + "grad_norm": 11.698161125183105, + "learning_rate": 3.119093406593407e-05, + "loss": 0.1323, + "step": 13693 + }, + { + "epoch": 37.620879120879124, + "grad_norm": 13.845340728759766, + "learning_rate": 3.118956043956044e-05, + "loss": 0.2589, + "step": 13694 + }, + { + "epoch": 37.62362637362637, + "grad_norm": 7.06975793838501, + "learning_rate": 3.1188186813186814e-05, + "loss": 0.106, + "step": 13695 + }, + { + "epoch": 37.62637362637363, + "grad_norm": 6.94193172454834, + "learning_rate": 3.118681318681319e-05, + "loss": 0.108, + "step": 13696 + }, + { + "epoch": 37.629120879120876, + "grad_norm": 7.7534098625183105, + "learning_rate": 3.118543956043956e-05, + "loss": 0.2441, + "step": 13697 + }, + { + "epoch": 37.63186813186813, + "grad_norm": 9.60329818725586, + "learning_rate": 3.118406593406594e-05, + "loss": 0.2829, + "step": 13698 + }, + { + "epoch": 37.63461538461539, + "grad_norm": 25.047733306884766, + "learning_rate": 3.118269230769231e-05, + "loss": 0.6624, + "step": 13699 + }, + { + "epoch": 37.637362637362635, + "grad_norm": 21.222396850585938, + "learning_rate": 3.1181318681318685e-05, + "loss": 0.7312, + "step": 13700 + }, + { + "epoch": 37.64010989010989, + "grad_norm": 23.413183212280273, + "learning_rate": 3.117994505494506e-05, + "loss": 0.7229, + "step": 13701 + }, + { + "epoch": 37.642857142857146, + "grad_norm": 18.336091995239258, + "learning_rate": 3.117857142857143e-05, + "loss": 0.6607, + "step": 13702 + }, + { + "epoch": 37.645604395604394, + "grad_norm": 14.646210670471191, + "learning_rate": 3.117719780219781e-05, + "loss": 0.5052, + "step": 13703 + }, + { + "epoch": 37.64835164835165, + "grad_norm": 3.5715973377227783, + "learning_rate": 3.117582417582418e-05, + "loss": 0.0406, + "step": 13704 + }, + { + "epoch": 37.6510989010989, + "grad_norm": 7.861111164093018, + "learning_rate": 3.117445054945055e-05, + "loss": 0.1845, + "step": 13705 + }, + { + "epoch": 37.65384615384615, + "grad_norm": 11.88375473022461, + "learning_rate": 3.1173076923076925e-05, + "loss": 0.2592, + "step": 13706 + }, + { + "epoch": 37.65659340659341, + "grad_norm": 5.170490741729736, + "learning_rate": 3.1171703296703295e-05, + "loss": 0.1289, + "step": 13707 + }, + { + "epoch": 37.65934065934066, + "grad_norm": 8.149626731872559, + "learning_rate": 3.117032967032967e-05, + "loss": 0.1158, + "step": 13708 + }, + { + "epoch": 37.66208791208791, + "grad_norm": 20.741914749145508, + "learning_rate": 3.116895604395604e-05, + "loss": 0.8483, + "step": 13709 + }, + { + "epoch": 37.66483516483517, + "grad_norm": 10.221122741699219, + "learning_rate": 3.116758241758242e-05, + "loss": 0.2216, + "step": 13710 + }, + { + "epoch": 37.667582417582416, + "grad_norm": 11.674391746520996, + "learning_rate": 3.1166208791208795e-05, + "loss": 0.2777, + "step": 13711 + }, + { + "epoch": 37.67032967032967, + "grad_norm": 4.008917331695557, + "learning_rate": 3.1164835164835165e-05, + "loss": 0.0725, + "step": 13712 + }, + { + "epoch": 37.67307692307692, + "grad_norm": 9.322710990905762, + "learning_rate": 3.116346153846154e-05, + "loss": 0.2611, + "step": 13713 + }, + { + "epoch": 37.675824175824175, + "grad_norm": 9.239014625549316, + "learning_rate": 3.116208791208791e-05, + "loss": 0.1018, + "step": 13714 + }, + { + "epoch": 37.67857142857143, + "grad_norm": 14.654722213745117, + "learning_rate": 3.116071428571429e-05, + "loss": 0.3092, + "step": 13715 + }, + { + "epoch": 37.68131868131868, + "grad_norm": 9.02880859375, + "learning_rate": 3.1159340659340666e-05, + "loss": 0.2094, + "step": 13716 + }, + { + "epoch": 37.684065934065934, + "grad_norm": 13.915560722351074, + "learning_rate": 3.1157967032967036e-05, + "loss": 0.2452, + "step": 13717 + }, + { + "epoch": 37.68681318681319, + "grad_norm": 11.293723106384277, + "learning_rate": 3.115659340659341e-05, + "loss": 0.309, + "step": 13718 + }, + { + "epoch": 37.68956043956044, + "grad_norm": 17.476261138916016, + "learning_rate": 3.115521978021978e-05, + "loss": 0.7003, + "step": 13719 + }, + { + "epoch": 37.69230769230769, + "grad_norm": 6.9993109703063965, + "learning_rate": 3.115384615384615e-05, + "loss": 0.1851, + "step": 13720 + }, + { + "epoch": 37.69505494505494, + "grad_norm": 10.225573539733887, + "learning_rate": 3.115247252747253e-05, + "loss": 0.2798, + "step": 13721 + }, + { + "epoch": 37.6978021978022, + "grad_norm": 19.651046752929688, + "learning_rate": 3.11510989010989e-05, + "loss": 0.5629, + "step": 13722 + }, + { + "epoch": 37.70054945054945, + "grad_norm": 16.005937576293945, + "learning_rate": 3.1149725274725276e-05, + "loss": 0.4829, + "step": 13723 + }, + { + "epoch": 37.7032967032967, + "grad_norm": 15.365710258483887, + "learning_rate": 3.1148351648351646e-05, + "loss": 0.4283, + "step": 13724 + }, + { + "epoch": 37.706043956043956, + "grad_norm": 16.050626754760742, + "learning_rate": 3.114697802197802e-05, + "loss": 0.6154, + "step": 13725 + }, + { + "epoch": 37.70879120879121, + "grad_norm": 3.797819137573242, + "learning_rate": 3.11456043956044e-05, + "loss": 0.0655, + "step": 13726 + }, + { + "epoch": 37.71153846153846, + "grad_norm": 29.38303565979004, + "learning_rate": 3.114423076923077e-05, + "loss": 0.5137, + "step": 13727 + }, + { + "epoch": 37.714285714285715, + "grad_norm": 14.968523025512695, + "learning_rate": 3.114285714285715e-05, + "loss": 0.3367, + "step": 13728 + }, + { + "epoch": 37.717032967032964, + "grad_norm": 9.331917762756348, + "learning_rate": 3.114148351648352e-05, + "loss": 0.113, + "step": 13729 + }, + { + "epoch": 37.71978021978022, + "grad_norm": 9.267616271972656, + "learning_rate": 3.1140109890109894e-05, + "loss": 0.1935, + "step": 13730 + }, + { + "epoch": 37.722527472527474, + "grad_norm": 5.3477277755737305, + "learning_rate": 3.113873626373627e-05, + "loss": 0.0945, + "step": 13731 + }, + { + "epoch": 37.72527472527472, + "grad_norm": 11.846911430358887, + "learning_rate": 3.113736263736264e-05, + "loss": 0.2671, + "step": 13732 + }, + { + "epoch": 37.72802197802198, + "grad_norm": 4.808002471923828, + "learning_rate": 3.113598901098902e-05, + "loss": 0.0823, + "step": 13733 + }, + { + "epoch": 37.73076923076923, + "grad_norm": 18.44194984436035, + "learning_rate": 3.113461538461539e-05, + "loss": 0.5091, + "step": 13734 + }, + { + "epoch": 37.73351648351648, + "grad_norm": 16.132898330688477, + "learning_rate": 3.113324175824176e-05, + "loss": 0.4916, + "step": 13735 + }, + { + "epoch": 37.73626373626374, + "grad_norm": 9.702642440795898, + "learning_rate": 3.1131868131868134e-05, + "loss": 0.1979, + "step": 13736 + }, + { + "epoch": 37.73901098901099, + "grad_norm": 13.395936012268066, + "learning_rate": 3.1130494505494504e-05, + "loss": 0.2537, + "step": 13737 + }, + { + "epoch": 37.74175824175824, + "grad_norm": 14.31861686706543, + "learning_rate": 3.112912087912088e-05, + "loss": 0.4901, + "step": 13738 + }, + { + "epoch": 37.744505494505496, + "grad_norm": 20.15060806274414, + "learning_rate": 3.112774725274725e-05, + "loss": 0.6894, + "step": 13739 + }, + { + "epoch": 37.747252747252745, + "grad_norm": 3.416090250015259, + "learning_rate": 3.112637362637363e-05, + "loss": 0.0466, + "step": 13740 + }, + { + "epoch": 37.75, + "grad_norm": 2.5370547771453857, + "learning_rate": 3.1125000000000004e-05, + "loss": 0.044, + "step": 13741 + }, + { + "epoch": 37.752747252747255, + "grad_norm": 13.899224281311035, + "learning_rate": 3.1123626373626374e-05, + "loss": 0.3156, + "step": 13742 + }, + { + "epoch": 37.755494505494504, + "grad_norm": 15.247467041015625, + "learning_rate": 3.112225274725275e-05, + "loss": 0.4245, + "step": 13743 + }, + { + "epoch": 37.75824175824176, + "grad_norm": 8.858016014099121, + "learning_rate": 3.112087912087912e-05, + "loss": 0.1344, + "step": 13744 + }, + { + "epoch": 37.76098901098901, + "grad_norm": 15.116009712219238, + "learning_rate": 3.11195054945055e-05, + "loss": 0.3178, + "step": 13745 + }, + { + "epoch": 37.76373626373626, + "grad_norm": 8.907881736755371, + "learning_rate": 3.1118131868131875e-05, + "loss": 0.2597, + "step": 13746 + }, + { + "epoch": 37.76648351648352, + "grad_norm": 15.600292205810547, + "learning_rate": 3.1116758241758245e-05, + "loss": 0.1932, + "step": 13747 + }, + { + "epoch": 37.76923076923077, + "grad_norm": 22.172693252563477, + "learning_rate": 3.111538461538462e-05, + "loss": 0.5779, + "step": 13748 + }, + { + "epoch": 37.77197802197802, + "grad_norm": 11.40174388885498, + "learning_rate": 3.111401098901099e-05, + "loss": 0.2779, + "step": 13749 + }, + { + "epoch": 37.77472527472528, + "grad_norm": 22.23088836669922, + "learning_rate": 3.111263736263736e-05, + "loss": 0.7274, + "step": 13750 + }, + { + "epoch": 37.777472527472526, + "grad_norm": 15.037898063659668, + "learning_rate": 3.111126373626374e-05, + "loss": 0.2408, + "step": 13751 + }, + { + "epoch": 37.78021978021978, + "grad_norm": 5.781530857086182, + "learning_rate": 3.110989010989011e-05, + "loss": 0.1145, + "step": 13752 + }, + { + "epoch": 37.782967032967036, + "grad_norm": 15.381714820861816, + "learning_rate": 3.1108516483516485e-05, + "loss": 0.3668, + "step": 13753 + }, + { + "epoch": 37.785714285714285, + "grad_norm": 8.08631706237793, + "learning_rate": 3.1107142857142855e-05, + "loss": 0.1802, + "step": 13754 + }, + { + "epoch": 37.78846153846154, + "grad_norm": 19.891843795776367, + "learning_rate": 3.110576923076923e-05, + "loss": 0.5117, + "step": 13755 + }, + { + "epoch": 37.79120879120879, + "grad_norm": 10.80496597290039, + "learning_rate": 3.110439560439561e-05, + "loss": 0.3019, + "step": 13756 + }, + { + "epoch": 37.793956043956044, + "grad_norm": 7.025374412536621, + "learning_rate": 3.110302197802198e-05, + "loss": 0.1189, + "step": 13757 + }, + { + "epoch": 37.7967032967033, + "grad_norm": 10.456620216369629, + "learning_rate": 3.1101648351648356e-05, + "loss": 0.4333, + "step": 13758 + }, + { + "epoch": 37.79945054945055, + "grad_norm": 9.689163208007812, + "learning_rate": 3.1100274725274726e-05, + "loss": 0.2512, + "step": 13759 + }, + { + "epoch": 37.8021978021978, + "grad_norm": 16.443681716918945, + "learning_rate": 3.10989010989011e-05, + "loss": 0.3873, + "step": 13760 + }, + { + "epoch": 37.80494505494506, + "grad_norm": 10.226588249206543, + "learning_rate": 3.109752747252748e-05, + "loss": 0.3282, + "step": 13761 + }, + { + "epoch": 37.80769230769231, + "grad_norm": 14.514812469482422, + "learning_rate": 3.109615384615385e-05, + "loss": 0.4785, + "step": 13762 + }, + { + "epoch": 37.81043956043956, + "grad_norm": 9.077760696411133, + "learning_rate": 3.1094780219780226e-05, + "loss": 0.1919, + "step": 13763 + }, + { + "epoch": 37.81318681318681, + "grad_norm": 6.719432353973389, + "learning_rate": 3.1093406593406596e-05, + "loss": 0.185, + "step": 13764 + }, + { + "epoch": 37.815934065934066, + "grad_norm": 14.174612998962402, + "learning_rate": 3.1092032967032966e-05, + "loss": 0.248, + "step": 13765 + }, + { + "epoch": 37.81868131868132, + "grad_norm": 15.289856910705566, + "learning_rate": 3.1090659340659336e-05, + "loss": 0.4722, + "step": 13766 + }, + { + "epoch": 37.82142857142857, + "grad_norm": 10.574617385864258, + "learning_rate": 3.108928571428571e-05, + "loss": 0.3152, + "step": 13767 + }, + { + "epoch": 37.824175824175825, + "grad_norm": 17.29977798461914, + "learning_rate": 3.108791208791209e-05, + "loss": 0.5962, + "step": 13768 + }, + { + "epoch": 37.82692307692308, + "grad_norm": 21.343503952026367, + "learning_rate": 3.108653846153846e-05, + "loss": 0.6155, + "step": 13769 + }, + { + "epoch": 37.82967032967033, + "grad_norm": 22.22993278503418, + "learning_rate": 3.1085164835164837e-05, + "loss": 0.5409, + "step": 13770 + }, + { + "epoch": 37.832417582417584, + "grad_norm": 16.810007095336914, + "learning_rate": 3.1083791208791207e-05, + "loss": 0.3781, + "step": 13771 + }, + { + "epoch": 37.83516483516483, + "grad_norm": 13.594401359558105, + "learning_rate": 3.108241758241758e-05, + "loss": 0.3279, + "step": 13772 + }, + { + "epoch": 37.83791208791209, + "grad_norm": 17.70195960998535, + "learning_rate": 3.108104395604396e-05, + "loss": 0.2926, + "step": 13773 + }, + { + "epoch": 37.84065934065934, + "grad_norm": 11.209431648254395, + "learning_rate": 3.107967032967033e-05, + "loss": 0.2383, + "step": 13774 + }, + { + "epoch": 37.84340659340659, + "grad_norm": 16.829904556274414, + "learning_rate": 3.107829670329671e-05, + "loss": 0.403, + "step": 13775 + }, + { + "epoch": 37.84615384615385, + "grad_norm": 19.126991271972656, + "learning_rate": 3.107692307692308e-05, + "loss": 0.5179, + "step": 13776 + }, + { + "epoch": 37.8489010989011, + "grad_norm": 16.09735679626465, + "learning_rate": 3.1075549450549454e-05, + "loss": 0.3254, + "step": 13777 + }, + { + "epoch": 37.85164835164835, + "grad_norm": 17.029586791992188, + "learning_rate": 3.107417582417583e-05, + "loss": 0.4956, + "step": 13778 + }, + { + "epoch": 37.854395604395606, + "grad_norm": 7.324935436248779, + "learning_rate": 3.10728021978022e-05, + "loss": 0.195, + "step": 13779 + }, + { + "epoch": 37.857142857142854, + "grad_norm": 18.06420135498047, + "learning_rate": 3.107142857142857e-05, + "loss": 0.5437, + "step": 13780 + }, + { + "epoch": 37.85989010989011, + "grad_norm": 13.551776885986328, + "learning_rate": 3.107005494505494e-05, + "loss": 0.2276, + "step": 13781 + }, + { + "epoch": 37.862637362637365, + "grad_norm": 14.935647964477539, + "learning_rate": 3.106868131868132e-05, + "loss": 0.4372, + "step": 13782 + }, + { + "epoch": 37.86538461538461, + "grad_norm": 5.379157066345215, + "learning_rate": 3.1067307692307694e-05, + "loss": 0.1058, + "step": 13783 + }, + { + "epoch": 37.86813186813187, + "grad_norm": 16.983747482299805, + "learning_rate": 3.1065934065934064e-05, + "loss": 0.3935, + "step": 13784 + }, + { + "epoch": 37.870879120879124, + "grad_norm": 9.79310417175293, + "learning_rate": 3.106456043956044e-05, + "loss": 0.3123, + "step": 13785 + }, + { + "epoch": 37.87362637362637, + "grad_norm": 18.778982162475586, + "learning_rate": 3.106318681318681e-05, + "loss": 0.5853, + "step": 13786 + }, + { + "epoch": 37.87637362637363, + "grad_norm": 11.483969688415527, + "learning_rate": 3.106181318681319e-05, + "loss": 0.3754, + "step": 13787 + }, + { + "epoch": 37.879120879120876, + "grad_norm": 9.632183074951172, + "learning_rate": 3.1060439560439565e-05, + "loss": 0.1831, + "step": 13788 + }, + { + "epoch": 37.88186813186813, + "grad_norm": 10.944544792175293, + "learning_rate": 3.1059065934065935e-05, + "loss": 0.1929, + "step": 13789 + }, + { + "epoch": 37.88461538461539, + "grad_norm": 14.20201301574707, + "learning_rate": 3.105769230769231e-05, + "loss": 0.3652, + "step": 13790 + }, + { + "epoch": 37.887362637362635, + "grad_norm": 8.343483924865723, + "learning_rate": 3.105631868131868e-05, + "loss": 0.23, + "step": 13791 + }, + { + "epoch": 37.89010989010989, + "grad_norm": 9.075742721557617, + "learning_rate": 3.105494505494506e-05, + "loss": 0.226, + "step": 13792 + }, + { + "epoch": 37.892857142857146, + "grad_norm": 11.847430229187012, + "learning_rate": 3.1053571428571435e-05, + "loss": 0.2873, + "step": 13793 + }, + { + "epoch": 37.895604395604394, + "grad_norm": 6.355279922485352, + "learning_rate": 3.1052197802197805e-05, + "loss": 0.1546, + "step": 13794 + }, + { + "epoch": 37.89835164835165, + "grad_norm": 18.21607780456543, + "learning_rate": 3.1050824175824175e-05, + "loss": 0.6876, + "step": 13795 + }, + { + "epoch": 37.9010989010989, + "grad_norm": 9.440620422363281, + "learning_rate": 3.1049450549450545e-05, + "loss": 0.2168, + "step": 13796 + }, + { + "epoch": 37.90384615384615, + "grad_norm": 9.557043075561523, + "learning_rate": 3.104807692307692e-05, + "loss": 0.2478, + "step": 13797 + }, + { + "epoch": 37.90659340659341, + "grad_norm": 16.18665885925293, + "learning_rate": 3.10467032967033e-05, + "loss": 0.5019, + "step": 13798 + }, + { + "epoch": 37.90934065934066, + "grad_norm": 13.303751945495605, + "learning_rate": 3.104532967032967e-05, + "loss": 0.341, + "step": 13799 + }, + { + "epoch": 37.91208791208791, + "grad_norm": 9.744019508361816, + "learning_rate": 3.1043956043956046e-05, + "loss": 0.1177, + "step": 13800 + }, + { + "epoch": 37.91483516483517, + "grad_norm": 15.26290512084961, + "learning_rate": 3.1042582417582416e-05, + "loss": 0.3384, + "step": 13801 + }, + { + "epoch": 37.917582417582416, + "grad_norm": 14.952733039855957, + "learning_rate": 3.104120879120879e-05, + "loss": 0.3481, + "step": 13802 + }, + { + "epoch": 37.92032967032967, + "grad_norm": 8.230775833129883, + "learning_rate": 3.103983516483517e-05, + "loss": 0.1867, + "step": 13803 + }, + { + "epoch": 37.92307692307692, + "grad_norm": 8.49480152130127, + "learning_rate": 3.103846153846154e-05, + "loss": 0.1059, + "step": 13804 + }, + { + "epoch": 37.925824175824175, + "grad_norm": 10.885501861572266, + "learning_rate": 3.1037087912087916e-05, + "loss": 0.2811, + "step": 13805 + }, + { + "epoch": 37.92857142857143, + "grad_norm": 10.50080394744873, + "learning_rate": 3.1035714285714286e-05, + "loss": 0.1845, + "step": 13806 + }, + { + "epoch": 37.93131868131868, + "grad_norm": 10.589225769042969, + "learning_rate": 3.103434065934066e-05, + "loss": 0.2431, + "step": 13807 + }, + { + "epoch": 37.934065934065934, + "grad_norm": 16.44634437561035, + "learning_rate": 3.103296703296704e-05, + "loss": 0.3273, + "step": 13808 + }, + { + "epoch": 37.93681318681319, + "grad_norm": 13.830111503601074, + "learning_rate": 3.103159340659341e-05, + "loss": 0.3545, + "step": 13809 + }, + { + "epoch": 37.93956043956044, + "grad_norm": 8.557943344116211, + "learning_rate": 3.103021978021978e-05, + "loss": 0.183, + "step": 13810 + }, + { + "epoch": 37.94230769230769, + "grad_norm": 9.61967658996582, + "learning_rate": 3.102884615384615e-05, + "loss": 0.2183, + "step": 13811 + }, + { + "epoch": 37.94505494505494, + "grad_norm": 15.936676025390625, + "learning_rate": 3.1027472527472526e-05, + "loss": 0.3687, + "step": 13812 + }, + { + "epoch": 37.9478021978022, + "grad_norm": 8.093914985656738, + "learning_rate": 3.10260989010989e-05, + "loss": 0.1783, + "step": 13813 + }, + { + "epoch": 37.95054945054945, + "grad_norm": 12.492136001586914, + "learning_rate": 3.102472527472527e-05, + "loss": 0.2833, + "step": 13814 + }, + { + "epoch": 37.9532967032967, + "grad_norm": 6.217257976531982, + "learning_rate": 3.102335164835165e-05, + "loss": 0.1169, + "step": 13815 + }, + { + "epoch": 37.956043956043956, + "grad_norm": 17.689741134643555, + "learning_rate": 3.102197802197802e-05, + "loss": 0.2954, + "step": 13816 + }, + { + "epoch": 37.95879120879121, + "grad_norm": 9.307428359985352, + "learning_rate": 3.10206043956044e-05, + "loss": 0.2762, + "step": 13817 + }, + { + "epoch": 37.96153846153846, + "grad_norm": 8.814764976501465, + "learning_rate": 3.1019230769230774e-05, + "loss": 0.1683, + "step": 13818 + }, + { + "epoch": 37.964285714285715, + "grad_norm": 3.650951385498047, + "learning_rate": 3.1017857142857144e-05, + "loss": 0.0591, + "step": 13819 + }, + { + "epoch": 37.967032967032964, + "grad_norm": 3.201805830001831, + "learning_rate": 3.101648351648352e-05, + "loss": 0.0606, + "step": 13820 + }, + { + "epoch": 37.96978021978022, + "grad_norm": 12.110108375549316, + "learning_rate": 3.101510989010989e-05, + "loss": 0.3036, + "step": 13821 + }, + { + "epoch": 37.972527472527474, + "grad_norm": 12.362971305847168, + "learning_rate": 3.101373626373627e-05, + "loss": 0.183, + "step": 13822 + }, + { + "epoch": 37.97527472527472, + "grad_norm": 18.727724075317383, + "learning_rate": 3.1012362637362644e-05, + "loss": 0.5227, + "step": 13823 + }, + { + "epoch": 37.97802197802198, + "grad_norm": 4.878434181213379, + "learning_rate": 3.1010989010989014e-05, + "loss": 0.1251, + "step": 13824 + }, + { + "epoch": 37.98076923076923, + "grad_norm": 6.438190460205078, + "learning_rate": 3.1009615384615384e-05, + "loss": 0.0958, + "step": 13825 + }, + { + "epoch": 37.98351648351648, + "grad_norm": 10.92275333404541, + "learning_rate": 3.1008241758241754e-05, + "loss": 0.2365, + "step": 13826 + }, + { + "epoch": 37.98626373626374, + "grad_norm": 20.42432975769043, + "learning_rate": 3.100686813186813e-05, + "loss": 0.9203, + "step": 13827 + }, + { + "epoch": 37.98901098901099, + "grad_norm": 8.597731590270996, + "learning_rate": 3.100549450549451e-05, + "loss": 0.1542, + "step": 13828 + }, + { + "epoch": 37.99175824175824, + "grad_norm": 1.663639783859253, + "learning_rate": 3.100412087912088e-05, + "loss": 0.0368, + "step": 13829 + }, + { + "epoch": 37.994505494505496, + "grad_norm": 8.68018627166748, + "learning_rate": 3.1002747252747255e-05, + "loss": 0.2613, + "step": 13830 + }, + { + "epoch": 37.997252747252745, + "grad_norm": 8.875914573669434, + "learning_rate": 3.1001373626373625e-05, + "loss": 0.1846, + "step": 13831 + }, + { + "epoch": 38.0, + "grad_norm": 63.43025588989258, + "learning_rate": 3.1e-05, + "loss": 1.1014, + "step": 13832 + }, + { + "epoch": 38.0, + "eval_accuracy": 0.8278236914600551, + "eval_f1": 0.8284088013257989, + "eval_f1_DuraRiadoRio_64x64": 0.7534246575342466, + "eval_f1_Mole_64x64": 0.8682170542635659, + "eval_f1_Quebrado_64x64": 0.9016949152542373, + "eval_f1_RiadoRio_64x64": 0.7290969899665551, + "eval_f1_RioFechado_64x64": 0.8896103896103896, + "eval_loss": 0.7707196474075317, + "eval_precision": 0.8346583778079217, + "eval_precision_DuraRiadoRio_64x64": 0.7432432432432432, + "eval_precision_Mole_64x64": 0.9824561403508771, + "eval_precision_Quebrado_64x64": 0.8807947019867549, + "eval_precision_RiadoRio_64x64": 0.7414965986394558, + "eval_precision_RioFechado_64x64": 0.8253012048192772, + "eval_recall": 0.8294343546660077, + "eval_recall_DuraRiadoRio_64x64": 0.7638888888888888, + "eval_recall_Mole_64x64": 0.7777777777777778, + "eval_recall_Quebrado_64x64": 0.9236111111111112, + "eval_recall_RiadoRio_64x64": 0.7171052631578947, + "eval_recall_RioFechado_64x64": 0.9647887323943662, + "eval_runtime": 1.7458, + "eval_samples_per_second": 415.847, + "eval_steps_per_second": 26.348, + "step": 13832 + }, + { + "epoch": 38.002747252747255, + "grad_norm": 3.1443402767181396, + "learning_rate": 3.099862637362638e-05, + "loss": 0.0344, + "step": 13833 + }, + { + "epoch": 38.005494505494504, + "grad_norm": 18.57944107055664, + "learning_rate": 3.099725274725275e-05, + "loss": 0.3718, + "step": 13834 + }, + { + "epoch": 38.00824175824176, + "grad_norm": 16.156347274780273, + "learning_rate": 3.0995879120879125e-05, + "loss": 0.3077, + "step": 13835 + }, + { + "epoch": 38.010989010989015, + "grad_norm": 5.031259536743164, + "learning_rate": 3.0994505494505495e-05, + "loss": 0.1319, + "step": 13836 + }, + { + "epoch": 38.01373626373626, + "grad_norm": 6.444417953491211, + "learning_rate": 3.099313186813187e-05, + "loss": 0.2163, + "step": 13837 + }, + { + "epoch": 38.01648351648352, + "grad_norm": 13.405813217163086, + "learning_rate": 3.099175824175825e-05, + "loss": 0.4435, + "step": 13838 + }, + { + "epoch": 38.01923076923077, + "grad_norm": 8.73972225189209, + "learning_rate": 3.099038461538462e-05, + "loss": 0.1691, + "step": 13839 + }, + { + "epoch": 38.02197802197802, + "grad_norm": 15.511445045471191, + "learning_rate": 3.098901098901099e-05, + "loss": 0.452, + "step": 13840 + }, + { + "epoch": 38.02472527472528, + "grad_norm": 5.037198066711426, + "learning_rate": 3.098763736263736e-05, + "loss": 0.1077, + "step": 13841 + }, + { + "epoch": 38.027472527472526, + "grad_norm": 7.188210964202881, + "learning_rate": 3.0986263736263735e-05, + "loss": 0.2424, + "step": 13842 + }, + { + "epoch": 38.03021978021978, + "grad_norm": 8.232136726379395, + "learning_rate": 3.098489010989011e-05, + "loss": 0.1005, + "step": 13843 + }, + { + "epoch": 38.032967032967036, + "grad_norm": 6.4773736000061035, + "learning_rate": 3.098351648351648e-05, + "loss": 0.1835, + "step": 13844 + }, + { + "epoch": 38.035714285714285, + "grad_norm": 4.58348274230957, + "learning_rate": 3.098214285714286e-05, + "loss": 0.1419, + "step": 13845 + }, + { + "epoch": 38.03846153846154, + "grad_norm": 16.30483055114746, + "learning_rate": 3.098076923076923e-05, + "loss": 0.6595, + "step": 13846 + }, + { + "epoch": 38.04120879120879, + "grad_norm": 12.082194328308105, + "learning_rate": 3.0979395604395606e-05, + "loss": 0.1964, + "step": 13847 + }, + { + "epoch": 38.043956043956044, + "grad_norm": 6.627590656280518, + "learning_rate": 3.097802197802198e-05, + "loss": 0.0754, + "step": 13848 + }, + { + "epoch": 38.0467032967033, + "grad_norm": 8.245274543762207, + "learning_rate": 3.097664835164835e-05, + "loss": 0.2951, + "step": 13849 + }, + { + "epoch": 38.04945054945055, + "grad_norm": 24.868656158447266, + "learning_rate": 3.097527472527473e-05, + "loss": 0.5728, + "step": 13850 + }, + { + "epoch": 38.0521978021978, + "grad_norm": 10.1641263961792, + "learning_rate": 3.09739010989011e-05, + "loss": 0.3046, + "step": 13851 + }, + { + "epoch": 38.05494505494506, + "grad_norm": 14.893158912658691, + "learning_rate": 3.0972527472527476e-05, + "loss": 0.2276, + "step": 13852 + }, + { + "epoch": 38.05769230769231, + "grad_norm": 17.322021484375, + "learning_rate": 3.097115384615385e-05, + "loss": 0.355, + "step": 13853 + }, + { + "epoch": 38.06043956043956, + "grad_norm": 11.843807220458984, + "learning_rate": 3.096978021978022e-05, + "loss": 0.2323, + "step": 13854 + }, + { + "epoch": 38.06318681318681, + "grad_norm": 10.907572746276855, + "learning_rate": 3.096840659340659e-05, + "loss": 0.3351, + "step": 13855 + }, + { + "epoch": 38.065934065934066, + "grad_norm": 21.179399490356445, + "learning_rate": 3.096703296703296e-05, + "loss": 0.8295, + "step": 13856 + }, + { + "epoch": 38.06868131868132, + "grad_norm": 8.28403377532959, + "learning_rate": 3.096565934065934e-05, + "loss": 0.1673, + "step": 13857 + }, + { + "epoch": 38.07142857142857, + "grad_norm": 14.944853782653809, + "learning_rate": 3.096428571428572e-05, + "loss": 0.3493, + "step": 13858 + }, + { + "epoch": 38.074175824175825, + "grad_norm": 11.068791389465332, + "learning_rate": 3.096291208791209e-05, + "loss": 0.1726, + "step": 13859 + }, + { + "epoch": 38.07692307692308, + "grad_norm": 12.504859924316406, + "learning_rate": 3.0961538461538464e-05, + "loss": 0.4365, + "step": 13860 + }, + { + "epoch": 38.07967032967033, + "grad_norm": 7.2308349609375, + "learning_rate": 3.0960164835164834e-05, + "loss": 0.1926, + "step": 13861 + }, + { + "epoch": 38.082417582417584, + "grad_norm": 5.494581699371338, + "learning_rate": 3.095879120879121e-05, + "loss": 0.1009, + "step": 13862 + }, + { + "epoch": 38.08516483516483, + "grad_norm": 20.94745445251465, + "learning_rate": 3.095741758241759e-05, + "loss": 0.6521, + "step": 13863 + }, + { + "epoch": 38.08791208791209, + "grad_norm": 9.560503959655762, + "learning_rate": 3.095604395604396e-05, + "loss": 0.1689, + "step": 13864 + }, + { + "epoch": 38.09065934065934, + "grad_norm": 10.30218505859375, + "learning_rate": 3.0954670329670334e-05, + "loss": 0.3066, + "step": 13865 + }, + { + "epoch": 38.09340659340659, + "grad_norm": 15.19284439086914, + "learning_rate": 3.0953296703296704e-05, + "loss": 0.326, + "step": 13866 + }, + { + "epoch": 38.09615384615385, + "grad_norm": 13.965215682983398, + "learning_rate": 3.095192307692308e-05, + "loss": 0.3952, + "step": 13867 + }, + { + "epoch": 38.0989010989011, + "grad_norm": 4.93416690826416, + "learning_rate": 3.095054945054946e-05, + "loss": 0.1174, + "step": 13868 + }, + { + "epoch": 38.10164835164835, + "grad_norm": 13.876327514648438, + "learning_rate": 3.094917582417583e-05, + "loss": 0.2163, + "step": 13869 + }, + { + "epoch": 38.104395604395606, + "grad_norm": 18.80154037475586, + "learning_rate": 3.09478021978022e-05, + "loss": 0.5841, + "step": 13870 + }, + { + "epoch": 38.107142857142854, + "grad_norm": 10.840314865112305, + "learning_rate": 3.094642857142857e-05, + "loss": 0.3171, + "step": 13871 + }, + { + "epoch": 38.10989010989011, + "grad_norm": 10.9077787399292, + "learning_rate": 3.0945054945054944e-05, + "loss": 0.1924, + "step": 13872 + }, + { + "epoch": 38.112637362637365, + "grad_norm": 11.582700729370117, + "learning_rate": 3.094368131868132e-05, + "loss": 0.2452, + "step": 13873 + }, + { + "epoch": 38.11538461538461, + "grad_norm": 16.777788162231445, + "learning_rate": 3.094230769230769e-05, + "loss": 0.5869, + "step": 13874 + }, + { + "epoch": 38.11813186813187, + "grad_norm": 19.68215560913086, + "learning_rate": 3.094093406593407e-05, + "loss": 0.5709, + "step": 13875 + }, + { + "epoch": 38.120879120879124, + "grad_norm": 12.595020294189453, + "learning_rate": 3.093956043956044e-05, + "loss": 0.2526, + "step": 13876 + }, + { + "epoch": 38.12362637362637, + "grad_norm": 5.53353214263916, + "learning_rate": 3.0938186813186815e-05, + "loss": 0.1193, + "step": 13877 + }, + { + "epoch": 38.12637362637363, + "grad_norm": 19.855422973632812, + "learning_rate": 3.093681318681319e-05, + "loss": 0.4107, + "step": 13878 + }, + { + "epoch": 38.129120879120876, + "grad_norm": 8.959635734558105, + "learning_rate": 3.093543956043956e-05, + "loss": 0.1311, + "step": 13879 + }, + { + "epoch": 38.13186813186813, + "grad_norm": 12.836097717285156, + "learning_rate": 3.093406593406594e-05, + "loss": 0.2381, + "step": 13880 + }, + { + "epoch": 38.13461538461539, + "grad_norm": 7.088799476623535, + "learning_rate": 3.093269230769231e-05, + "loss": 0.1515, + "step": 13881 + }, + { + "epoch": 38.137362637362635, + "grad_norm": 10.994688987731934, + "learning_rate": 3.0931318681318685e-05, + "loss": 0.3723, + "step": 13882 + }, + { + "epoch": 38.14010989010989, + "grad_norm": 17.749061584472656, + "learning_rate": 3.092994505494506e-05, + "loss": 0.327, + "step": 13883 + }, + { + "epoch": 38.142857142857146, + "grad_norm": 4.847165584564209, + "learning_rate": 3.092857142857143e-05, + "loss": 0.0985, + "step": 13884 + }, + { + "epoch": 38.145604395604394, + "grad_norm": 13.879688262939453, + "learning_rate": 3.09271978021978e-05, + "loss": 0.4566, + "step": 13885 + }, + { + "epoch": 38.14835164835165, + "grad_norm": 13.302593231201172, + "learning_rate": 3.092582417582417e-05, + "loss": 0.2885, + "step": 13886 + }, + { + "epoch": 38.1510989010989, + "grad_norm": 7.428018569946289, + "learning_rate": 3.092445054945055e-05, + "loss": 0.1035, + "step": 13887 + }, + { + "epoch": 38.15384615384615, + "grad_norm": 14.093825340270996, + "learning_rate": 3.0923076923076926e-05, + "loss": 0.33, + "step": 13888 + }, + { + "epoch": 38.15659340659341, + "grad_norm": 7.1483235359191895, + "learning_rate": 3.0921703296703296e-05, + "loss": 0.1229, + "step": 13889 + }, + { + "epoch": 38.15934065934066, + "grad_norm": 21.873973846435547, + "learning_rate": 3.092032967032967e-05, + "loss": 0.5037, + "step": 13890 + }, + { + "epoch": 38.16208791208791, + "grad_norm": 15.093896865844727, + "learning_rate": 3.091895604395604e-05, + "loss": 0.3916, + "step": 13891 + }, + { + "epoch": 38.16483516483517, + "grad_norm": 14.89997386932373, + "learning_rate": 3.091758241758242e-05, + "loss": 0.4579, + "step": 13892 + }, + { + "epoch": 38.167582417582416, + "grad_norm": 5.325314044952393, + "learning_rate": 3.0916208791208796e-05, + "loss": 0.1096, + "step": 13893 + }, + { + "epoch": 38.17032967032967, + "grad_norm": 15.824214935302734, + "learning_rate": 3.0914835164835166e-05, + "loss": 0.6169, + "step": 13894 + }, + { + "epoch": 38.17307692307692, + "grad_norm": 7.14728307723999, + "learning_rate": 3.091346153846154e-05, + "loss": 0.2271, + "step": 13895 + }, + { + "epoch": 38.175824175824175, + "grad_norm": 9.760754585266113, + "learning_rate": 3.091208791208791e-05, + "loss": 0.1273, + "step": 13896 + }, + { + "epoch": 38.17857142857143, + "grad_norm": 23.000883102416992, + "learning_rate": 3.091071428571429e-05, + "loss": 0.6949, + "step": 13897 + }, + { + "epoch": 38.18131868131868, + "grad_norm": 16.480224609375, + "learning_rate": 3.0909340659340667e-05, + "loss": 0.4308, + "step": 13898 + }, + { + "epoch": 38.184065934065934, + "grad_norm": 16.72010040283203, + "learning_rate": 3.0907967032967037e-05, + "loss": 0.366, + "step": 13899 + }, + { + "epoch": 38.18681318681319, + "grad_norm": 3.650070905685425, + "learning_rate": 3.0906593406593407e-05, + "loss": 0.0513, + "step": 13900 + }, + { + "epoch": 38.18956043956044, + "grad_norm": 20.218551635742188, + "learning_rate": 3.0905219780219777e-05, + "loss": 0.7268, + "step": 13901 + }, + { + "epoch": 38.19230769230769, + "grad_norm": 15.249152183532715, + "learning_rate": 3.090384615384615e-05, + "loss": 0.3464, + "step": 13902 + }, + { + "epoch": 38.19505494505494, + "grad_norm": 19.931377410888672, + "learning_rate": 3.090247252747253e-05, + "loss": 0.4546, + "step": 13903 + }, + { + "epoch": 38.1978021978022, + "grad_norm": 16.804933547973633, + "learning_rate": 3.09010989010989e-05, + "loss": 0.2922, + "step": 13904 + }, + { + "epoch": 38.20054945054945, + "grad_norm": 17.257801055908203, + "learning_rate": 3.089972527472528e-05, + "loss": 1.0223, + "step": 13905 + }, + { + "epoch": 38.2032967032967, + "grad_norm": 11.526005744934082, + "learning_rate": 3.089835164835165e-05, + "loss": 0.2403, + "step": 13906 + }, + { + "epoch": 38.206043956043956, + "grad_norm": 8.4403715133667, + "learning_rate": 3.0896978021978024e-05, + "loss": 0.2062, + "step": 13907 + }, + { + "epoch": 38.20879120879121, + "grad_norm": 17.519550323486328, + "learning_rate": 3.08956043956044e-05, + "loss": 0.4225, + "step": 13908 + }, + { + "epoch": 38.21153846153846, + "grad_norm": 10.421107292175293, + "learning_rate": 3.089423076923077e-05, + "loss": 0.2512, + "step": 13909 + }, + { + "epoch": 38.214285714285715, + "grad_norm": 15.3814058303833, + "learning_rate": 3.089285714285715e-05, + "loss": 0.329, + "step": 13910 + }, + { + "epoch": 38.217032967032964, + "grad_norm": 17.753753662109375, + "learning_rate": 3.089148351648352e-05, + "loss": 0.4068, + "step": 13911 + }, + { + "epoch": 38.21978021978022, + "grad_norm": 1.9869089126586914, + "learning_rate": 3.0890109890109894e-05, + "loss": 0.0286, + "step": 13912 + }, + { + "epoch": 38.222527472527474, + "grad_norm": 6.3096232414245605, + "learning_rate": 3.0888736263736264e-05, + "loss": 0.1354, + "step": 13913 + }, + { + "epoch": 38.22527472527472, + "grad_norm": 8.929186820983887, + "learning_rate": 3.088736263736264e-05, + "loss": 0.2825, + "step": 13914 + }, + { + "epoch": 38.22802197802198, + "grad_norm": 16.414697647094727, + "learning_rate": 3.088598901098901e-05, + "loss": 0.4625, + "step": 13915 + }, + { + "epoch": 38.23076923076923, + "grad_norm": 2.2177205085754395, + "learning_rate": 3.088461538461538e-05, + "loss": 0.0364, + "step": 13916 + }, + { + "epoch": 38.23351648351648, + "grad_norm": 9.881589889526367, + "learning_rate": 3.088324175824176e-05, + "loss": 0.1944, + "step": 13917 + }, + { + "epoch": 38.23626373626374, + "grad_norm": 15.930807113647461, + "learning_rate": 3.0881868131868135e-05, + "loss": 0.5956, + "step": 13918 + }, + { + "epoch": 38.239010989010985, + "grad_norm": 7.987655162811279, + "learning_rate": 3.0880494505494505e-05, + "loss": 0.1276, + "step": 13919 + }, + { + "epoch": 38.24175824175824, + "grad_norm": 3.915768623352051, + "learning_rate": 3.087912087912088e-05, + "loss": 0.1344, + "step": 13920 + }, + { + "epoch": 38.244505494505496, + "grad_norm": 13.416327476501465, + "learning_rate": 3.087774725274725e-05, + "loss": 0.2923, + "step": 13921 + }, + { + "epoch": 38.247252747252745, + "grad_norm": 8.545279502868652, + "learning_rate": 3.087637362637363e-05, + "loss": 0.2325, + "step": 13922 + }, + { + "epoch": 38.25, + "grad_norm": 4.113905906677246, + "learning_rate": 3.0875000000000005e-05, + "loss": 0.1007, + "step": 13923 + }, + { + "epoch": 38.252747252747255, + "grad_norm": 3.811856746673584, + "learning_rate": 3.0873626373626375e-05, + "loss": 0.0585, + "step": 13924 + }, + { + "epoch": 38.255494505494504, + "grad_norm": 12.098705291748047, + "learning_rate": 3.087225274725275e-05, + "loss": 0.2918, + "step": 13925 + }, + { + "epoch": 38.25824175824176, + "grad_norm": 15.680261611938477, + "learning_rate": 3.087087912087912e-05, + "loss": 0.4161, + "step": 13926 + }, + { + "epoch": 38.260989010989015, + "grad_norm": 12.027600288391113, + "learning_rate": 3.08695054945055e-05, + "loss": 0.2974, + "step": 13927 + }, + { + "epoch": 38.26373626373626, + "grad_norm": 13.783242225646973, + "learning_rate": 3.086813186813187e-05, + "loss": 0.2551, + "step": 13928 + }, + { + "epoch": 38.26648351648352, + "grad_norm": 11.175714492797852, + "learning_rate": 3.0866758241758246e-05, + "loss": 0.2016, + "step": 13929 + }, + { + "epoch": 38.26923076923077, + "grad_norm": 17.80233383178711, + "learning_rate": 3.0865384615384616e-05, + "loss": 1.0028, + "step": 13930 + }, + { + "epoch": 38.27197802197802, + "grad_norm": 13.852346420288086, + "learning_rate": 3.0864010989010986e-05, + "loss": 0.3955, + "step": 13931 + }, + { + "epoch": 38.27472527472528, + "grad_norm": 14.271721839904785, + "learning_rate": 3.086263736263736e-05, + "loss": 0.2669, + "step": 13932 + }, + { + "epoch": 38.277472527472526, + "grad_norm": 9.4474458694458, + "learning_rate": 3.086126373626374e-05, + "loss": 0.3339, + "step": 13933 + }, + { + "epoch": 38.28021978021978, + "grad_norm": 14.16673755645752, + "learning_rate": 3.085989010989011e-05, + "loss": 0.5084, + "step": 13934 + }, + { + "epoch": 38.282967032967036, + "grad_norm": 8.860960006713867, + "learning_rate": 3.0858516483516486e-05, + "loss": 0.323, + "step": 13935 + }, + { + "epoch": 38.285714285714285, + "grad_norm": 20.64606285095215, + "learning_rate": 3.0857142857142856e-05, + "loss": 0.6223, + "step": 13936 + }, + { + "epoch": 38.28846153846154, + "grad_norm": 5.93600606918335, + "learning_rate": 3.085576923076923e-05, + "loss": 0.1041, + "step": 13937 + }, + { + "epoch": 38.29120879120879, + "grad_norm": 13.979361534118652, + "learning_rate": 3.085439560439561e-05, + "loss": 0.3383, + "step": 13938 + }, + { + "epoch": 38.293956043956044, + "grad_norm": 15.571977615356445, + "learning_rate": 3.085302197802198e-05, + "loss": 0.5141, + "step": 13939 + }, + { + "epoch": 38.2967032967033, + "grad_norm": 27.02988624572754, + "learning_rate": 3.0851648351648356e-05, + "loss": 0.6322, + "step": 13940 + }, + { + "epoch": 38.29945054945055, + "grad_norm": 18.221342086791992, + "learning_rate": 3.0850274725274726e-05, + "loss": 0.4281, + "step": 13941 + }, + { + "epoch": 38.3021978021978, + "grad_norm": 9.357452392578125, + "learning_rate": 3.08489010989011e-05, + "loss": 0.1138, + "step": 13942 + }, + { + "epoch": 38.30494505494506, + "grad_norm": 15.30685043334961, + "learning_rate": 3.084752747252747e-05, + "loss": 0.2514, + "step": 13943 + }, + { + "epoch": 38.30769230769231, + "grad_norm": 15.603915214538574, + "learning_rate": 3.084615384615385e-05, + "loss": 0.4823, + "step": 13944 + }, + { + "epoch": 38.31043956043956, + "grad_norm": 11.980952262878418, + "learning_rate": 3.084478021978022e-05, + "loss": 0.2627, + "step": 13945 + }, + { + "epoch": 38.31318681318681, + "grad_norm": 3.4429588317871094, + "learning_rate": 3.084340659340659e-05, + "loss": 0.0558, + "step": 13946 + }, + { + "epoch": 38.315934065934066, + "grad_norm": 9.058734893798828, + "learning_rate": 3.084203296703297e-05, + "loss": 0.1405, + "step": 13947 + }, + { + "epoch": 38.31868131868132, + "grad_norm": 13.530159950256348, + "learning_rate": 3.0840659340659344e-05, + "loss": 0.3628, + "step": 13948 + }, + { + "epoch": 38.32142857142857, + "grad_norm": 4.712198734283447, + "learning_rate": 3.0839285714285714e-05, + "loss": 0.0789, + "step": 13949 + }, + { + "epoch": 38.324175824175825, + "grad_norm": 14.744120597839355, + "learning_rate": 3.083791208791209e-05, + "loss": 0.5527, + "step": 13950 + }, + { + "epoch": 38.32692307692308, + "grad_norm": 16.99314308166504, + "learning_rate": 3.083653846153846e-05, + "loss": 0.556, + "step": 13951 + }, + { + "epoch": 38.32967032967033, + "grad_norm": 10.5444974899292, + "learning_rate": 3.083516483516484e-05, + "loss": 0.3223, + "step": 13952 + }, + { + "epoch": 38.332417582417584, + "grad_norm": 12.47486686706543, + "learning_rate": 3.0833791208791214e-05, + "loss": 0.1832, + "step": 13953 + }, + { + "epoch": 38.33516483516483, + "grad_norm": 9.214345932006836, + "learning_rate": 3.0832417582417584e-05, + "loss": 0.3081, + "step": 13954 + }, + { + "epoch": 38.33791208791209, + "grad_norm": 6.035243988037109, + "learning_rate": 3.083104395604396e-05, + "loss": 0.0957, + "step": 13955 + }, + { + "epoch": 38.34065934065934, + "grad_norm": 15.615618705749512, + "learning_rate": 3.082967032967033e-05, + "loss": 0.3711, + "step": 13956 + }, + { + "epoch": 38.34340659340659, + "grad_norm": 16.131044387817383, + "learning_rate": 3.082829670329671e-05, + "loss": 0.3722, + "step": 13957 + }, + { + "epoch": 38.34615384615385, + "grad_norm": 5.037200450897217, + "learning_rate": 3.082692307692308e-05, + "loss": 0.0817, + "step": 13958 + }, + { + "epoch": 38.3489010989011, + "grad_norm": 7.465909957885742, + "learning_rate": 3.0825549450549455e-05, + "loss": 0.2007, + "step": 13959 + }, + { + "epoch": 38.35164835164835, + "grad_norm": 8.315342903137207, + "learning_rate": 3.0824175824175825e-05, + "loss": 0.1275, + "step": 13960 + }, + { + "epoch": 38.354395604395606, + "grad_norm": 17.2163028717041, + "learning_rate": 3.0822802197802195e-05, + "loss": 0.4794, + "step": 13961 + }, + { + "epoch": 38.357142857142854, + "grad_norm": 16.01808738708496, + "learning_rate": 3.082142857142857e-05, + "loss": 0.4688, + "step": 13962 + }, + { + "epoch": 38.35989010989011, + "grad_norm": 4.354678153991699, + "learning_rate": 3.082005494505495e-05, + "loss": 0.0843, + "step": 13963 + }, + { + "epoch": 38.362637362637365, + "grad_norm": 13.117176055908203, + "learning_rate": 3.081868131868132e-05, + "loss": 0.2266, + "step": 13964 + }, + { + "epoch": 38.36538461538461, + "grad_norm": 6.949027061462402, + "learning_rate": 3.0817307692307695e-05, + "loss": 0.0883, + "step": 13965 + }, + { + "epoch": 38.36813186813187, + "grad_norm": 11.725706100463867, + "learning_rate": 3.0815934065934065e-05, + "loss": 0.1845, + "step": 13966 + }, + { + "epoch": 38.370879120879124, + "grad_norm": 10.615066528320312, + "learning_rate": 3.081456043956044e-05, + "loss": 0.2143, + "step": 13967 + }, + { + "epoch": 38.37362637362637, + "grad_norm": 3.1822245121002197, + "learning_rate": 3.081318681318682e-05, + "loss": 0.0549, + "step": 13968 + }, + { + "epoch": 38.37637362637363, + "grad_norm": 6.686784267425537, + "learning_rate": 3.081181318681319e-05, + "loss": 0.093, + "step": 13969 + }, + { + "epoch": 38.379120879120876, + "grad_norm": 10.317113876342773, + "learning_rate": 3.0810439560439565e-05, + "loss": 0.3112, + "step": 13970 + }, + { + "epoch": 38.38186813186813, + "grad_norm": 8.414388656616211, + "learning_rate": 3.0809065934065935e-05, + "loss": 0.1466, + "step": 13971 + }, + { + "epoch": 38.38461538461539, + "grad_norm": 5.947246551513672, + "learning_rate": 3.080769230769231e-05, + "loss": 0.1317, + "step": 13972 + }, + { + "epoch": 38.387362637362635, + "grad_norm": 6.157400608062744, + "learning_rate": 3.080631868131868e-05, + "loss": 0.1185, + "step": 13973 + }, + { + "epoch": 38.39010989010989, + "grad_norm": 5.281242847442627, + "learning_rate": 3.080494505494506e-05, + "loss": 0.0887, + "step": 13974 + }, + { + "epoch": 38.392857142857146, + "grad_norm": 16.273696899414062, + "learning_rate": 3.080357142857143e-05, + "loss": 0.4958, + "step": 13975 + }, + { + "epoch": 38.395604395604394, + "grad_norm": 20.58348274230957, + "learning_rate": 3.08021978021978e-05, + "loss": 0.6423, + "step": 13976 + }, + { + "epoch": 38.39835164835165, + "grad_norm": 19.104263305664062, + "learning_rate": 3.0800824175824176e-05, + "loss": 0.552, + "step": 13977 + }, + { + "epoch": 38.4010989010989, + "grad_norm": 14.097746849060059, + "learning_rate": 3.079945054945055e-05, + "loss": 0.4749, + "step": 13978 + }, + { + "epoch": 38.40384615384615, + "grad_norm": 11.50009822845459, + "learning_rate": 3.079807692307692e-05, + "loss": 0.3444, + "step": 13979 + }, + { + "epoch": 38.40659340659341, + "grad_norm": 15.986539840698242, + "learning_rate": 3.07967032967033e-05, + "loss": 0.5701, + "step": 13980 + }, + { + "epoch": 38.40934065934066, + "grad_norm": 12.595379829406738, + "learning_rate": 3.079532967032967e-05, + "loss": 0.2167, + "step": 13981 + }, + { + "epoch": 38.41208791208791, + "grad_norm": 4.087740898132324, + "learning_rate": 3.0793956043956046e-05, + "loss": 0.0536, + "step": 13982 + }, + { + "epoch": 38.41483516483517, + "grad_norm": 9.419866561889648, + "learning_rate": 3.079258241758242e-05, + "loss": 0.1605, + "step": 13983 + }, + { + "epoch": 38.417582417582416, + "grad_norm": 7.905567169189453, + "learning_rate": 3.079120879120879e-05, + "loss": 0.0997, + "step": 13984 + }, + { + "epoch": 38.42032967032967, + "grad_norm": 17.92059898376465, + "learning_rate": 3.078983516483517e-05, + "loss": 0.4447, + "step": 13985 + }, + { + "epoch": 38.42307692307692, + "grad_norm": 17.338333129882812, + "learning_rate": 3.078846153846154e-05, + "loss": 0.4909, + "step": 13986 + }, + { + "epoch": 38.425824175824175, + "grad_norm": 13.915741920471191, + "learning_rate": 3.078708791208792e-05, + "loss": 0.1862, + "step": 13987 + }, + { + "epoch": 38.42857142857143, + "grad_norm": 26.732398986816406, + "learning_rate": 3.078571428571429e-05, + "loss": 0.7553, + "step": 13988 + }, + { + "epoch": 38.43131868131868, + "grad_norm": 16.370220184326172, + "learning_rate": 3.0784340659340663e-05, + "loss": 0.4782, + "step": 13989 + }, + { + "epoch": 38.434065934065934, + "grad_norm": 12.275895118713379, + "learning_rate": 3.0782967032967034e-05, + "loss": 0.2575, + "step": 13990 + }, + { + "epoch": 38.43681318681319, + "grad_norm": 3.5657708644866943, + "learning_rate": 3.0781593406593404e-05, + "loss": 0.1026, + "step": 13991 + }, + { + "epoch": 38.43956043956044, + "grad_norm": 10.18969440460205, + "learning_rate": 3.078021978021978e-05, + "loss": 0.2389, + "step": 13992 + }, + { + "epoch": 38.44230769230769, + "grad_norm": 10.221291542053223, + "learning_rate": 3.077884615384615e-05, + "loss": 0.1628, + "step": 13993 + }, + { + "epoch": 38.44505494505494, + "grad_norm": 13.749948501586914, + "learning_rate": 3.077747252747253e-05, + "loss": 0.2504, + "step": 13994 + }, + { + "epoch": 38.4478021978022, + "grad_norm": 5.222842693328857, + "learning_rate": 3.0776098901098904e-05, + "loss": 0.0955, + "step": 13995 + }, + { + "epoch": 38.45054945054945, + "grad_norm": 14.179256439208984, + "learning_rate": 3.0774725274725274e-05, + "loss": 0.3917, + "step": 13996 + }, + { + "epoch": 38.4532967032967, + "grad_norm": 17.813817977905273, + "learning_rate": 3.077335164835165e-05, + "loss": 0.3235, + "step": 13997 + }, + { + "epoch": 38.456043956043956, + "grad_norm": 16.75058937072754, + "learning_rate": 3.077197802197802e-05, + "loss": 0.7077, + "step": 13998 + }, + { + "epoch": 38.45879120879121, + "grad_norm": 12.754358291625977, + "learning_rate": 3.07706043956044e-05, + "loss": 0.2274, + "step": 13999 + }, + { + "epoch": 38.46153846153846, + "grad_norm": 9.605341911315918, + "learning_rate": 3.0769230769230774e-05, + "loss": 0.2841, + "step": 14000 + }, + { + "epoch": 38.464285714285715, + "grad_norm": 13.4288330078125, + "learning_rate": 3.0767857142857144e-05, + "loss": 0.3733, + "step": 14001 + }, + { + "epoch": 38.467032967032964, + "grad_norm": 2.5360794067382812, + "learning_rate": 3.076648351648352e-05, + "loss": 0.0347, + "step": 14002 + }, + { + "epoch": 38.46978021978022, + "grad_norm": 9.435691833496094, + "learning_rate": 3.076510989010989e-05, + "loss": 0.1976, + "step": 14003 + }, + { + "epoch": 38.472527472527474, + "grad_norm": 14.81626033782959, + "learning_rate": 3.076373626373627e-05, + "loss": 0.4235, + "step": 14004 + }, + { + "epoch": 38.47527472527472, + "grad_norm": 12.20383358001709, + "learning_rate": 3.076236263736264e-05, + "loss": 0.3247, + "step": 14005 + }, + { + "epoch": 38.47802197802198, + "grad_norm": 15.970928192138672, + "learning_rate": 3.076098901098901e-05, + "loss": 0.3303, + "step": 14006 + }, + { + "epoch": 38.48076923076923, + "grad_norm": 2.609607219696045, + "learning_rate": 3.0759615384615385e-05, + "loss": 0.0382, + "step": 14007 + }, + { + "epoch": 38.48351648351648, + "grad_norm": 14.729731559753418, + "learning_rate": 3.0758241758241755e-05, + "loss": 0.3704, + "step": 14008 + }, + { + "epoch": 38.48626373626374, + "grad_norm": 16.48432731628418, + "learning_rate": 3.075686813186813e-05, + "loss": 0.4498, + "step": 14009 + }, + { + "epoch": 38.489010989010985, + "grad_norm": 14.990503311157227, + "learning_rate": 3.075549450549451e-05, + "loss": 0.276, + "step": 14010 + }, + { + "epoch": 38.49175824175824, + "grad_norm": 13.641175270080566, + "learning_rate": 3.075412087912088e-05, + "loss": 0.2883, + "step": 14011 + }, + { + "epoch": 38.494505494505496, + "grad_norm": 10.07369613647461, + "learning_rate": 3.0752747252747255e-05, + "loss": 0.2911, + "step": 14012 + }, + { + "epoch": 38.497252747252745, + "grad_norm": 10.736010551452637, + "learning_rate": 3.0751373626373625e-05, + "loss": 0.212, + "step": 14013 + }, + { + "epoch": 38.5, + "grad_norm": 7.125535011291504, + "learning_rate": 3.075e-05, + "loss": 0.1323, + "step": 14014 + }, + { + "epoch": 38.502747252747255, + "grad_norm": 12.819551467895508, + "learning_rate": 3.074862637362638e-05, + "loss": 0.2782, + "step": 14015 + }, + { + "epoch": 38.505494505494504, + "grad_norm": 11.319451332092285, + "learning_rate": 3.074725274725275e-05, + "loss": 0.1921, + "step": 14016 + }, + { + "epoch": 38.50824175824176, + "grad_norm": 19.310081481933594, + "learning_rate": 3.0745879120879126e-05, + "loss": 0.446, + "step": 14017 + }, + { + "epoch": 38.51098901098901, + "grad_norm": 16.223779678344727, + "learning_rate": 3.0744505494505496e-05, + "loss": 0.4381, + "step": 14018 + }, + { + "epoch": 38.51373626373626, + "grad_norm": 5.460299491882324, + "learning_rate": 3.074313186813187e-05, + "loss": 0.0994, + "step": 14019 + }, + { + "epoch": 38.51648351648352, + "grad_norm": 18.027820587158203, + "learning_rate": 3.074175824175824e-05, + "loss": 0.4587, + "step": 14020 + }, + { + "epoch": 38.51923076923077, + "grad_norm": 3.536090135574341, + "learning_rate": 3.074038461538461e-05, + "loss": 0.1094, + "step": 14021 + }, + { + "epoch": 38.52197802197802, + "grad_norm": 16.449073791503906, + "learning_rate": 3.073901098901099e-05, + "loss": 0.507, + "step": 14022 + }, + { + "epoch": 38.52472527472528, + "grad_norm": 10.651792526245117, + "learning_rate": 3.073763736263736e-05, + "loss": 0.3102, + "step": 14023 + }, + { + "epoch": 38.527472527472526, + "grad_norm": 12.889908790588379, + "learning_rate": 3.0736263736263736e-05, + "loss": 0.3348, + "step": 14024 + }, + { + "epoch": 38.53021978021978, + "grad_norm": 3.77767014503479, + "learning_rate": 3.073489010989011e-05, + "loss": 0.0419, + "step": 14025 + }, + { + "epoch": 38.532967032967036, + "grad_norm": 5.15432596206665, + "learning_rate": 3.073351648351648e-05, + "loss": 0.0861, + "step": 14026 + }, + { + "epoch": 38.535714285714285, + "grad_norm": 22.834903717041016, + "learning_rate": 3.073214285714286e-05, + "loss": 0.4554, + "step": 14027 + }, + { + "epoch": 38.53846153846154, + "grad_norm": 22.6854305267334, + "learning_rate": 3.073076923076923e-05, + "loss": 0.466, + "step": 14028 + }, + { + "epoch": 38.54120879120879, + "grad_norm": 10.220126152038574, + "learning_rate": 3.0729395604395607e-05, + "loss": 0.269, + "step": 14029 + }, + { + "epoch": 38.543956043956044, + "grad_norm": 19.92936134338379, + "learning_rate": 3.072802197802198e-05, + "loss": 0.4061, + "step": 14030 + }, + { + "epoch": 38.5467032967033, + "grad_norm": 10.831275939941406, + "learning_rate": 3.072664835164835e-05, + "loss": 0.2271, + "step": 14031 + }, + { + "epoch": 38.54945054945055, + "grad_norm": 9.965742111206055, + "learning_rate": 3.072527472527473e-05, + "loss": 0.3139, + "step": 14032 + }, + { + "epoch": 38.5521978021978, + "grad_norm": 14.259824752807617, + "learning_rate": 3.07239010989011e-05, + "loss": 0.221, + "step": 14033 + }, + { + "epoch": 38.55494505494506, + "grad_norm": 15.440057754516602, + "learning_rate": 3.072252747252748e-05, + "loss": 0.2744, + "step": 14034 + }, + { + "epoch": 38.55769230769231, + "grad_norm": 7.6688408851623535, + "learning_rate": 3.072115384615385e-05, + "loss": 0.1676, + "step": 14035 + }, + { + "epoch": 38.56043956043956, + "grad_norm": 14.172880172729492, + "learning_rate": 3.071978021978022e-05, + "loss": 0.5406, + "step": 14036 + }, + { + "epoch": 38.56318681318681, + "grad_norm": 2.5445799827575684, + "learning_rate": 3.0718406593406594e-05, + "loss": 0.0392, + "step": 14037 + }, + { + "epoch": 38.565934065934066, + "grad_norm": 5.621213436126709, + "learning_rate": 3.0717032967032964e-05, + "loss": 0.1001, + "step": 14038 + }, + { + "epoch": 38.56868131868132, + "grad_norm": 23.08145523071289, + "learning_rate": 3.071565934065934e-05, + "loss": 1.0094, + "step": 14039 + }, + { + "epoch": 38.57142857142857, + "grad_norm": 8.302075386047363, + "learning_rate": 3.071428571428572e-05, + "loss": 0.2063, + "step": 14040 + }, + { + "epoch": 38.574175824175825, + "grad_norm": 10.224230766296387, + "learning_rate": 3.071291208791209e-05, + "loss": 0.266, + "step": 14041 + }, + { + "epoch": 38.57692307692308, + "grad_norm": 14.80549430847168, + "learning_rate": 3.0711538461538464e-05, + "loss": 0.2003, + "step": 14042 + }, + { + "epoch": 38.57967032967033, + "grad_norm": 6.2972540855407715, + "learning_rate": 3.0710164835164834e-05, + "loss": 0.1008, + "step": 14043 + }, + { + "epoch": 38.582417582417584, + "grad_norm": 22.151611328125, + "learning_rate": 3.070879120879121e-05, + "loss": 0.5951, + "step": 14044 + }, + { + "epoch": 38.58516483516483, + "grad_norm": 12.385269165039062, + "learning_rate": 3.070741758241759e-05, + "loss": 0.4028, + "step": 14045 + }, + { + "epoch": 38.58791208791209, + "grad_norm": 5.238076686859131, + "learning_rate": 3.070604395604396e-05, + "loss": 0.2122, + "step": 14046 + }, + { + "epoch": 38.59065934065934, + "grad_norm": 5.135241985321045, + "learning_rate": 3.0704670329670335e-05, + "loss": 0.0777, + "step": 14047 + }, + { + "epoch": 38.59340659340659, + "grad_norm": 10.948684692382812, + "learning_rate": 3.0703296703296705e-05, + "loss": 0.3981, + "step": 14048 + }, + { + "epoch": 38.59615384615385, + "grad_norm": 14.111071586608887, + "learning_rate": 3.070192307692308e-05, + "loss": 0.4076, + "step": 14049 + }, + { + "epoch": 38.5989010989011, + "grad_norm": 22.179929733276367, + "learning_rate": 3.070054945054945e-05, + "loss": 0.7038, + "step": 14050 + }, + { + "epoch": 38.60164835164835, + "grad_norm": 11.514859199523926, + "learning_rate": 3.069917582417582e-05, + "loss": 0.2451, + "step": 14051 + }, + { + "epoch": 38.604395604395606, + "grad_norm": 16.648090362548828, + "learning_rate": 3.06978021978022e-05, + "loss": 0.5921, + "step": 14052 + }, + { + "epoch": 38.607142857142854, + "grad_norm": 21.335731506347656, + "learning_rate": 3.069642857142857e-05, + "loss": 0.4201, + "step": 14053 + }, + { + "epoch": 38.60989010989011, + "grad_norm": 6.359676361083984, + "learning_rate": 3.0695054945054945e-05, + "loss": 0.1185, + "step": 14054 + }, + { + "epoch": 38.612637362637365, + "grad_norm": 8.954038619995117, + "learning_rate": 3.069368131868132e-05, + "loss": 0.1213, + "step": 14055 + }, + { + "epoch": 38.61538461538461, + "grad_norm": 11.782971382141113, + "learning_rate": 3.069230769230769e-05, + "loss": 0.3178, + "step": 14056 + }, + { + "epoch": 38.61813186813187, + "grad_norm": 16.570804595947266, + "learning_rate": 3.069093406593407e-05, + "loss": 0.2271, + "step": 14057 + }, + { + "epoch": 38.620879120879124, + "grad_norm": 18.73274803161621, + "learning_rate": 3.068956043956044e-05, + "loss": 0.4656, + "step": 14058 + }, + { + "epoch": 38.62362637362637, + "grad_norm": 11.24124526977539, + "learning_rate": 3.0688186813186816e-05, + "loss": 0.2737, + "step": 14059 + }, + { + "epoch": 38.62637362637363, + "grad_norm": 4.815664291381836, + "learning_rate": 3.068681318681319e-05, + "loss": 0.0699, + "step": 14060 + }, + { + "epoch": 38.629120879120876, + "grad_norm": 11.622849464416504, + "learning_rate": 3.068543956043956e-05, + "loss": 0.279, + "step": 14061 + }, + { + "epoch": 38.63186813186813, + "grad_norm": 7.7839884757995605, + "learning_rate": 3.068406593406594e-05, + "loss": 0.1548, + "step": 14062 + }, + { + "epoch": 38.63461538461539, + "grad_norm": 5.055644512176514, + "learning_rate": 3.068269230769231e-05, + "loss": 0.0992, + "step": 14063 + }, + { + "epoch": 38.637362637362635, + "grad_norm": 10.449320793151855, + "learning_rate": 3.0681318681318686e-05, + "loss": 0.3139, + "step": 14064 + }, + { + "epoch": 38.64010989010989, + "grad_norm": 3.7266151905059814, + "learning_rate": 3.0679945054945056e-05, + "loss": 0.0695, + "step": 14065 + }, + { + "epoch": 38.642857142857146, + "grad_norm": 13.1604585647583, + "learning_rate": 3.0678571428571426e-05, + "loss": 0.467, + "step": 14066 + }, + { + "epoch": 38.645604395604394, + "grad_norm": 9.279614448547363, + "learning_rate": 3.06771978021978e-05, + "loss": 0.2232, + "step": 14067 + }, + { + "epoch": 38.64835164835165, + "grad_norm": 17.048717498779297, + "learning_rate": 3.067582417582417e-05, + "loss": 0.3973, + "step": 14068 + }, + { + "epoch": 38.6510989010989, + "grad_norm": 9.158658981323242, + "learning_rate": 3.067445054945055e-05, + "loss": 0.1698, + "step": 14069 + }, + { + "epoch": 38.65384615384615, + "grad_norm": 4.191307067871094, + "learning_rate": 3.0673076923076926e-05, + "loss": 0.0773, + "step": 14070 + }, + { + "epoch": 38.65659340659341, + "grad_norm": 13.53862476348877, + "learning_rate": 3.0671703296703296e-05, + "loss": 0.2887, + "step": 14071 + }, + { + "epoch": 38.65934065934066, + "grad_norm": 15.669641494750977, + "learning_rate": 3.067032967032967e-05, + "loss": 0.339, + "step": 14072 + }, + { + "epoch": 38.66208791208791, + "grad_norm": 14.706789016723633, + "learning_rate": 3.066895604395604e-05, + "loss": 0.2032, + "step": 14073 + }, + { + "epoch": 38.66483516483517, + "grad_norm": 23.491744995117188, + "learning_rate": 3.066758241758242e-05, + "loss": 0.5545, + "step": 14074 + }, + { + "epoch": 38.667582417582416, + "grad_norm": 9.979464530944824, + "learning_rate": 3.06662087912088e-05, + "loss": 0.243, + "step": 14075 + }, + { + "epoch": 38.67032967032967, + "grad_norm": 7.098805904388428, + "learning_rate": 3.066483516483517e-05, + "loss": 0.1188, + "step": 14076 + }, + { + "epoch": 38.67307692307692, + "grad_norm": 12.892067909240723, + "learning_rate": 3.0663461538461544e-05, + "loss": 0.4073, + "step": 14077 + }, + { + "epoch": 38.675824175824175, + "grad_norm": 20.898393630981445, + "learning_rate": 3.0662087912087914e-05, + "loss": 0.8088, + "step": 14078 + }, + { + "epoch": 38.67857142857143, + "grad_norm": 24.085725784301758, + "learning_rate": 3.066071428571429e-05, + "loss": 0.4733, + "step": 14079 + }, + { + "epoch": 38.68131868131868, + "grad_norm": 5.319276332855225, + "learning_rate": 3.065934065934066e-05, + "loss": 0.1037, + "step": 14080 + }, + { + "epoch": 38.684065934065934, + "grad_norm": 15.759316444396973, + "learning_rate": 3.065796703296703e-05, + "loss": 0.2845, + "step": 14081 + }, + { + "epoch": 38.68681318681319, + "grad_norm": 6.036018371582031, + "learning_rate": 3.065659340659341e-05, + "loss": 0.0924, + "step": 14082 + }, + { + "epoch": 38.68956043956044, + "grad_norm": 12.67905044555664, + "learning_rate": 3.065521978021978e-05, + "loss": 0.2771, + "step": 14083 + }, + { + "epoch": 38.69230769230769, + "grad_norm": 16.879276275634766, + "learning_rate": 3.0653846153846154e-05, + "loss": 0.3584, + "step": 14084 + }, + { + "epoch": 38.69505494505494, + "grad_norm": 20.654033660888672, + "learning_rate": 3.065247252747253e-05, + "loss": 0.6327, + "step": 14085 + }, + { + "epoch": 38.6978021978022, + "grad_norm": 10.659873962402344, + "learning_rate": 3.06510989010989e-05, + "loss": 0.3045, + "step": 14086 + }, + { + "epoch": 38.70054945054945, + "grad_norm": 8.378775596618652, + "learning_rate": 3.064972527472528e-05, + "loss": 0.103, + "step": 14087 + }, + { + "epoch": 38.7032967032967, + "grad_norm": 9.725234985351562, + "learning_rate": 3.064835164835165e-05, + "loss": 0.1516, + "step": 14088 + }, + { + "epoch": 38.706043956043956, + "grad_norm": 13.63185977935791, + "learning_rate": 3.0646978021978025e-05, + "loss": 0.1449, + "step": 14089 + }, + { + "epoch": 38.70879120879121, + "grad_norm": 13.175631523132324, + "learning_rate": 3.06456043956044e-05, + "loss": 0.3258, + "step": 14090 + }, + { + "epoch": 38.71153846153846, + "grad_norm": 6.40770959854126, + "learning_rate": 3.064423076923077e-05, + "loss": 0.1255, + "step": 14091 + }, + { + "epoch": 38.714285714285715, + "grad_norm": 9.492792129516602, + "learning_rate": 3.064285714285715e-05, + "loss": 0.1514, + "step": 14092 + }, + { + "epoch": 38.717032967032964, + "grad_norm": 12.92025375366211, + "learning_rate": 3.064148351648352e-05, + "loss": 0.348, + "step": 14093 + }, + { + "epoch": 38.71978021978022, + "grad_norm": 11.941499710083008, + "learning_rate": 3.064010989010989e-05, + "loss": 0.2328, + "step": 14094 + }, + { + "epoch": 38.722527472527474, + "grad_norm": 14.627127647399902, + "learning_rate": 3.0638736263736265e-05, + "loss": 0.5228, + "step": 14095 + }, + { + "epoch": 38.72527472527472, + "grad_norm": 16.808446884155273, + "learning_rate": 3.0637362637362635e-05, + "loss": 0.2986, + "step": 14096 + }, + { + "epoch": 38.72802197802198, + "grad_norm": 18.73430824279785, + "learning_rate": 3.063598901098901e-05, + "loss": 0.4353, + "step": 14097 + }, + { + "epoch": 38.73076923076923, + "grad_norm": 6.43031644821167, + "learning_rate": 3.063461538461538e-05, + "loss": 0.0837, + "step": 14098 + }, + { + "epoch": 38.73351648351648, + "grad_norm": 11.568617820739746, + "learning_rate": 3.063324175824176e-05, + "loss": 0.3207, + "step": 14099 + }, + { + "epoch": 38.73626373626374, + "grad_norm": 15.186162948608398, + "learning_rate": 3.0631868131868135e-05, + "loss": 0.4554, + "step": 14100 + }, + { + "epoch": 38.73901098901099, + "grad_norm": 5.358273029327393, + "learning_rate": 3.0630494505494505e-05, + "loss": 0.0805, + "step": 14101 + }, + { + "epoch": 38.74175824175824, + "grad_norm": 11.576417922973633, + "learning_rate": 3.062912087912088e-05, + "loss": 0.3147, + "step": 14102 + }, + { + "epoch": 38.744505494505496, + "grad_norm": 12.987367630004883, + "learning_rate": 3.062774725274725e-05, + "loss": 0.4669, + "step": 14103 + }, + { + "epoch": 38.747252747252745, + "grad_norm": 10.89536190032959, + "learning_rate": 3.062637362637363e-05, + "loss": 0.1764, + "step": 14104 + }, + { + "epoch": 38.75, + "grad_norm": 11.18057918548584, + "learning_rate": 3.0625000000000006e-05, + "loss": 0.2417, + "step": 14105 + }, + { + "epoch": 38.752747252747255, + "grad_norm": 16.616830825805664, + "learning_rate": 3.0623626373626376e-05, + "loss": 0.6957, + "step": 14106 + }, + { + "epoch": 38.755494505494504, + "grad_norm": 6.9280009269714355, + "learning_rate": 3.062225274725275e-05, + "loss": 0.1017, + "step": 14107 + }, + { + "epoch": 38.75824175824176, + "grad_norm": 24.434167861938477, + "learning_rate": 3.062087912087912e-05, + "loss": 0.7103, + "step": 14108 + }, + { + "epoch": 38.76098901098901, + "grad_norm": 1.7884927988052368, + "learning_rate": 3.061950549450549e-05, + "loss": 0.026, + "step": 14109 + }, + { + "epoch": 38.76373626373626, + "grad_norm": 18.544527053833008, + "learning_rate": 3.061813186813187e-05, + "loss": 0.5197, + "step": 14110 + }, + { + "epoch": 38.76648351648352, + "grad_norm": 9.536636352539062, + "learning_rate": 3.061675824175824e-05, + "loss": 0.3251, + "step": 14111 + }, + { + "epoch": 38.76923076923077, + "grad_norm": 3.5708532333374023, + "learning_rate": 3.0615384615384616e-05, + "loss": 0.0582, + "step": 14112 + }, + { + "epoch": 38.77197802197802, + "grad_norm": 11.673056602478027, + "learning_rate": 3.0614010989010986e-05, + "loss": 0.2848, + "step": 14113 + }, + { + "epoch": 38.77472527472528, + "grad_norm": 14.348955154418945, + "learning_rate": 3.061263736263736e-05, + "loss": 0.6138, + "step": 14114 + }, + { + "epoch": 38.777472527472526, + "grad_norm": 2.6719777584075928, + "learning_rate": 3.061126373626374e-05, + "loss": 0.0491, + "step": 14115 + }, + { + "epoch": 38.78021978021978, + "grad_norm": 6.632945537567139, + "learning_rate": 3.060989010989011e-05, + "loss": 0.1294, + "step": 14116 + }, + { + "epoch": 38.782967032967036, + "grad_norm": 21.575178146362305, + "learning_rate": 3.060851648351649e-05, + "loss": 0.6624, + "step": 14117 + }, + { + "epoch": 38.785714285714285, + "grad_norm": 9.236099243164062, + "learning_rate": 3.060714285714286e-05, + "loss": 0.141, + "step": 14118 + }, + { + "epoch": 38.78846153846154, + "grad_norm": 12.558366775512695, + "learning_rate": 3.0605769230769233e-05, + "loss": 0.2705, + "step": 14119 + }, + { + "epoch": 38.79120879120879, + "grad_norm": 15.34091854095459, + "learning_rate": 3.060439560439561e-05, + "loss": 0.4098, + "step": 14120 + }, + { + "epoch": 38.793956043956044, + "grad_norm": 17.336374282836914, + "learning_rate": 3.060302197802198e-05, + "loss": 0.2609, + "step": 14121 + }, + { + "epoch": 38.7967032967033, + "grad_norm": 20.183671951293945, + "learning_rate": 3.060164835164836e-05, + "loss": 0.6198, + "step": 14122 + }, + { + "epoch": 38.79945054945055, + "grad_norm": 20.150279998779297, + "learning_rate": 3.060027472527473e-05, + "loss": 0.4591, + "step": 14123 + }, + { + "epoch": 38.8021978021978, + "grad_norm": 19.496288299560547, + "learning_rate": 3.05989010989011e-05, + "loss": 0.5977, + "step": 14124 + }, + { + "epoch": 38.80494505494506, + "grad_norm": 4.010040760040283, + "learning_rate": 3.0597527472527474e-05, + "loss": 0.053, + "step": 14125 + }, + { + "epoch": 38.80769230769231, + "grad_norm": 1.9673759937286377, + "learning_rate": 3.0596153846153844e-05, + "loss": 0.0424, + "step": 14126 + }, + { + "epoch": 38.81043956043956, + "grad_norm": 16.541032791137695, + "learning_rate": 3.059478021978022e-05, + "loss": 0.4765, + "step": 14127 + }, + { + "epoch": 38.81318681318681, + "grad_norm": 8.635404586791992, + "learning_rate": 3.059340659340659e-05, + "loss": 0.1456, + "step": 14128 + }, + { + "epoch": 38.815934065934066, + "grad_norm": 14.914666175842285, + "learning_rate": 3.059203296703297e-05, + "loss": 0.4226, + "step": 14129 + }, + { + "epoch": 38.81868131868132, + "grad_norm": 8.052335739135742, + "learning_rate": 3.0590659340659344e-05, + "loss": 0.1922, + "step": 14130 + }, + { + "epoch": 38.82142857142857, + "grad_norm": 7.2525410652160645, + "learning_rate": 3.0589285714285714e-05, + "loss": 0.1173, + "step": 14131 + }, + { + "epoch": 38.824175824175825, + "grad_norm": 8.743548393249512, + "learning_rate": 3.058791208791209e-05, + "loss": 0.1688, + "step": 14132 + }, + { + "epoch": 38.82692307692308, + "grad_norm": 14.347586631774902, + "learning_rate": 3.058653846153846e-05, + "loss": 0.3245, + "step": 14133 + }, + { + "epoch": 38.82967032967033, + "grad_norm": 11.626349449157715, + "learning_rate": 3.058516483516484e-05, + "loss": 0.2227, + "step": 14134 + }, + { + "epoch": 38.832417582417584, + "grad_norm": 9.003921508789062, + "learning_rate": 3.0583791208791215e-05, + "loss": 0.1926, + "step": 14135 + }, + { + "epoch": 38.83516483516483, + "grad_norm": 17.04304313659668, + "learning_rate": 3.0582417582417585e-05, + "loss": 0.598, + "step": 14136 + }, + { + "epoch": 38.83791208791209, + "grad_norm": 8.195928573608398, + "learning_rate": 3.058104395604396e-05, + "loss": 0.126, + "step": 14137 + }, + { + "epoch": 38.84065934065934, + "grad_norm": 8.418603897094727, + "learning_rate": 3.057967032967033e-05, + "loss": 0.2192, + "step": 14138 + }, + { + "epoch": 38.84340659340659, + "grad_norm": 12.639692306518555, + "learning_rate": 3.05782967032967e-05, + "loss": 0.3504, + "step": 14139 + }, + { + "epoch": 38.84615384615385, + "grad_norm": 19.035388946533203, + "learning_rate": 3.057692307692308e-05, + "loss": 0.6076, + "step": 14140 + }, + { + "epoch": 38.8489010989011, + "grad_norm": 8.675719261169434, + "learning_rate": 3.057554945054945e-05, + "loss": 0.1291, + "step": 14141 + }, + { + "epoch": 38.85164835164835, + "grad_norm": 14.793675422668457, + "learning_rate": 3.0574175824175825e-05, + "loss": 0.3183, + "step": 14142 + }, + { + "epoch": 38.854395604395606, + "grad_norm": 13.56364917755127, + "learning_rate": 3.0572802197802195e-05, + "loss": 0.3492, + "step": 14143 + }, + { + "epoch": 38.857142857142854, + "grad_norm": 16.5098876953125, + "learning_rate": 3.057142857142857e-05, + "loss": 0.4402, + "step": 14144 + }, + { + "epoch": 38.85989010989011, + "grad_norm": 15.59523868560791, + "learning_rate": 3.057005494505495e-05, + "loss": 0.261, + "step": 14145 + }, + { + "epoch": 38.862637362637365, + "grad_norm": 13.315164566040039, + "learning_rate": 3.056868131868132e-05, + "loss": 0.3111, + "step": 14146 + }, + { + "epoch": 38.86538461538461, + "grad_norm": 13.439995765686035, + "learning_rate": 3.0567307692307696e-05, + "loss": 0.1997, + "step": 14147 + }, + { + "epoch": 38.86813186813187, + "grad_norm": 9.72756290435791, + "learning_rate": 3.0565934065934066e-05, + "loss": 0.1441, + "step": 14148 + }, + { + "epoch": 38.870879120879124, + "grad_norm": 14.039978981018066, + "learning_rate": 3.056456043956044e-05, + "loss": 0.3112, + "step": 14149 + }, + { + "epoch": 38.87362637362637, + "grad_norm": 8.206703186035156, + "learning_rate": 3.056318681318682e-05, + "loss": 0.0862, + "step": 14150 + }, + { + "epoch": 38.87637362637363, + "grad_norm": 27.00273895263672, + "learning_rate": 3.056181318681319e-05, + "loss": 0.7219, + "step": 14151 + }, + { + "epoch": 38.879120879120876, + "grad_norm": 19.01654815673828, + "learning_rate": 3.0560439560439566e-05, + "loss": 0.3136, + "step": 14152 + }, + { + "epoch": 38.88186813186813, + "grad_norm": 16.258403778076172, + "learning_rate": 3.0559065934065936e-05, + "loss": 0.6209, + "step": 14153 + }, + { + "epoch": 38.88461538461539, + "grad_norm": 12.654980659484863, + "learning_rate": 3.0557692307692306e-05, + "loss": 0.2939, + "step": 14154 + }, + { + "epoch": 38.887362637362635, + "grad_norm": 11.581578254699707, + "learning_rate": 3.055631868131868e-05, + "loss": 0.2259, + "step": 14155 + }, + { + "epoch": 38.89010989010989, + "grad_norm": 21.877717971801758, + "learning_rate": 3.055494505494505e-05, + "loss": 0.6413, + "step": 14156 + }, + { + "epoch": 38.892857142857146, + "grad_norm": 10.45051383972168, + "learning_rate": 3.055357142857143e-05, + "loss": 0.2304, + "step": 14157 + }, + { + "epoch": 38.895604395604394, + "grad_norm": 16.79854965209961, + "learning_rate": 3.05521978021978e-05, + "loss": 0.4033, + "step": 14158 + }, + { + "epoch": 38.89835164835165, + "grad_norm": 9.827366828918457, + "learning_rate": 3.0550824175824177e-05, + "loss": 0.2005, + "step": 14159 + }, + { + "epoch": 38.9010989010989, + "grad_norm": 20.778141021728516, + "learning_rate": 3.054945054945055e-05, + "loss": 0.3019, + "step": 14160 + }, + { + "epoch": 38.90384615384615, + "grad_norm": 8.462202072143555, + "learning_rate": 3.054807692307692e-05, + "loss": 0.1393, + "step": 14161 + }, + { + "epoch": 38.90659340659341, + "grad_norm": 14.995759963989258, + "learning_rate": 3.05467032967033e-05, + "loss": 0.2309, + "step": 14162 + }, + { + "epoch": 38.90934065934066, + "grad_norm": 3.647237539291382, + "learning_rate": 3.054532967032967e-05, + "loss": 0.0418, + "step": 14163 + }, + { + "epoch": 38.91208791208791, + "grad_norm": 11.44542407989502, + "learning_rate": 3.054395604395605e-05, + "loss": 0.3567, + "step": 14164 + }, + { + "epoch": 38.91483516483517, + "grad_norm": 17.84136390686035, + "learning_rate": 3.0542582417582424e-05, + "loss": 0.5859, + "step": 14165 + }, + { + "epoch": 38.917582417582416, + "grad_norm": 14.736663818359375, + "learning_rate": 3.0541208791208794e-05, + "loss": 0.273, + "step": 14166 + }, + { + "epoch": 38.92032967032967, + "grad_norm": 11.90268611907959, + "learning_rate": 3.053983516483517e-05, + "loss": 0.2065, + "step": 14167 + }, + { + "epoch": 38.92307692307692, + "grad_norm": 6.940710067749023, + "learning_rate": 3.053846153846154e-05, + "loss": 0.1617, + "step": 14168 + }, + { + "epoch": 38.925824175824175, + "grad_norm": 6.996801853179932, + "learning_rate": 3.053708791208791e-05, + "loss": 0.1019, + "step": 14169 + }, + { + "epoch": 38.92857142857143, + "grad_norm": 12.611064910888672, + "learning_rate": 3.053571428571429e-05, + "loss": 0.4849, + "step": 14170 + }, + { + "epoch": 38.93131868131868, + "grad_norm": 12.833438873291016, + "learning_rate": 3.053434065934066e-05, + "loss": 0.2963, + "step": 14171 + }, + { + "epoch": 38.934065934065934, + "grad_norm": 7.779490947723389, + "learning_rate": 3.0532967032967034e-05, + "loss": 0.1569, + "step": 14172 + }, + { + "epoch": 38.93681318681319, + "grad_norm": 16.559738159179688, + "learning_rate": 3.0531593406593404e-05, + "loss": 0.4684, + "step": 14173 + }, + { + "epoch": 38.93956043956044, + "grad_norm": 20.716825485229492, + "learning_rate": 3.053021978021978e-05, + "loss": 0.823, + "step": 14174 + }, + { + "epoch": 38.94230769230769, + "grad_norm": 2.721339225769043, + "learning_rate": 3.052884615384616e-05, + "loss": 0.04, + "step": 14175 + }, + { + "epoch": 38.94505494505494, + "grad_norm": 17.512636184692383, + "learning_rate": 3.052747252747253e-05, + "loss": 0.4245, + "step": 14176 + }, + { + "epoch": 38.9478021978022, + "grad_norm": 14.405922889709473, + "learning_rate": 3.0526098901098905e-05, + "loss": 0.3573, + "step": 14177 + }, + { + "epoch": 38.95054945054945, + "grad_norm": 10.894588470458984, + "learning_rate": 3.0524725274725275e-05, + "loss": 0.199, + "step": 14178 + }, + { + "epoch": 38.9532967032967, + "grad_norm": 5.456475734710693, + "learning_rate": 3.052335164835165e-05, + "loss": 0.0786, + "step": 14179 + }, + { + "epoch": 38.956043956043956, + "grad_norm": 11.337703704833984, + "learning_rate": 3.052197802197803e-05, + "loss": 0.3914, + "step": 14180 + }, + { + "epoch": 38.95879120879121, + "grad_norm": 10.284159660339355, + "learning_rate": 3.05206043956044e-05, + "loss": 0.2368, + "step": 14181 + }, + { + "epoch": 38.96153846153846, + "grad_norm": 18.572538375854492, + "learning_rate": 3.0519230769230775e-05, + "loss": 0.6046, + "step": 14182 + }, + { + "epoch": 38.964285714285715, + "grad_norm": 17.33784294128418, + "learning_rate": 3.0517857142857145e-05, + "loss": 0.3231, + "step": 14183 + }, + { + "epoch": 38.967032967032964, + "grad_norm": 13.78150463104248, + "learning_rate": 3.051648351648352e-05, + "loss": 0.3134, + "step": 14184 + }, + { + "epoch": 38.96978021978022, + "grad_norm": 8.56196403503418, + "learning_rate": 3.0515109890109895e-05, + "loss": 0.1986, + "step": 14185 + }, + { + "epoch": 38.972527472527474, + "grad_norm": 16.534347534179688, + "learning_rate": 3.0513736263736265e-05, + "loss": 0.3674, + "step": 14186 + }, + { + "epoch": 38.97527472527472, + "grad_norm": 16.451414108276367, + "learning_rate": 3.051236263736264e-05, + "loss": 0.4434, + "step": 14187 + }, + { + "epoch": 38.97802197802198, + "grad_norm": 10.80240249633789, + "learning_rate": 3.0510989010989012e-05, + "loss": 0.2872, + "step": 14188 + }, + { + "epoch": 38.98076923076923, + "grad_norm": 7.983685493469238, + "learning_rate": 3.0509615384615386e-05, + "loss": 0.3184, + "step": 14189 + }, + { + "epoch": 38.98351648351648, + "grad_norm": 9.56747055053711, + "learning_rate": 3.0508241758241762e-05, + "loss": 0.1947, + "step": 14190 + }, + { + "epoch": 38.98626373626374, + "grad_norm": 14.429801940917969, + "learning_rate": 3.0506868131868132e-05, + "loss": 0.384, + "step": 14191 + }, + { + "epoch": 38.98901098901099, + "grad_norm": 15.798835754394531, + "learning_rate": 3.050549450549451e-05, + "loss": 0.3165, + "step": 14192 + }, + { + "epoch": 38.99175824175824, + "grad_norm": 4.361967086791992, + "learning_rate": 3.050412087912088e-05, + "loss": 0.0903, + "step": 14193 + }, + { + "epoch": 38.994505494505496, + "grad_norm": 8.735729217529297, + "learning_rate": 3.0502747252747256e-05, + "loss": 0.2318, + "step": 14194 + }, + { + "epoch": 38.997252747252745, + "grad_norm": 22.280427932739258, + "learning_rate": 3.050137362637363e-05, + "loss": 0.7286, + "step": 14195 + }, + { + "epoch": 39.0, + "grad_norm": 49.20146179199219, + "learning_rate": 3.05e-05, + "loss": 0.4693, + "step": 14196 + }, + { + "epoch": 39.0, + "eval_accuracy": 0.7796143250688705, + "eval_f1": 0.7750896031041731, + "eval_f1_DuraRiadoRio_64x64": 0.6513761467889908, + "eval_f1_Mole_64x64": 0.8642857142857143, + "eval_f1_Quebrado_64x64": 0.8540925266903915, + "eval_f1_RiadoRio_64x64": 0.7266881028938906, + "eval_f1_RioFechado_64x64": 0.7790055248618785, + "eval_loss": 0.8031544089317322, + "eval_precision": 0.8153357330759974, + "eval_precision_DuraRiadoRio_64x64": 0.9594594594594594, + "eval_precision_Mole_64x64": 0.8897058823529411, + "eval_precision_Quebrado_64x64": 0.8759124087591241, + "eval_precision_RiadoRio_64x64": 0.710691823899371, + "eval_precision_RioFechado_64x64": 0.6409090909090909, + "eval_recall": 0.7806090931554237, + "eval_recall_DuraRiadoRio_64x64": 0.4930555555555556, + "eval_recall_Mole_64x64": 0.8402777777777778, + "eval_recall_Quebrado_64x64": 0.8333333333333334, + "eval_recall_RiadoRio_64x64": 0.743421052631579, + "eval_recall_RioFechado_64x64": 0.9929577464788732, + "eval_runtime": 1.7253, + "eval_samples_per_second": 420.79, + "eval_steps_per_second": 26.662, + "step": 14196 + }, + { + "epoch": 39.002747252747255, + "grad_norm": 17.461706161499023, + "learning_rate": 3.0498626373626376e-05, + "loss": 0.4646, + "step": 14197 + }, + { + "epoch": 39.005494505494504, + "grad_norm": 9.82794189453125, + "learning_rate": 3.0497252747252746e-05, + "loss": 0.2464, + "step": 14198 + }, + { + "epoch": 39.00824175824176, + "grad_norm": 5.445162773132324, + "learning_rate": 3.0495879120879123e-05, + "loss": 0.1183, + "step": 14199 + }, + { + "epoch": 39.010989010989015, + "grad_norm": 16.51742172241211, + "learning_rate": 3.04945054945055e-05, + "loss": 0.4114, + "step": 14200 + }, + { + "epoch": 39.01373626373626, + "grad_norm": 15.025115013122559, + "learning_rate": 3.049313186813187e-05, + "loss": 0.2437, + "step": 14201 + }, + { + "epoch": 39.01648351648352, + "grad_norm": 22.677953720092773, + "learning_rate": 3.0491758241758243e-05, + "loss": 0.9979, + "step": 14202 + }, + { + "epoch": 39.01923076923077, + "grad_norm": 7.7212018966674805, + "learning_rate": 3.0490384615384617e-05, + "loss": 0.119, + "step": 14203 + }, + { + "epoch": 39.02197802197802, + "grad_norm": 23.699262619018555, + "learning_rate": 3.048901098901099e-05, + "loss": 0.9102, + "step": 14204 + }, + { + "epoch": 39.02472527472528, + "grad_norm": 8.7118501663208, + "learning_rate": 3.0487637362637367e-05, + "loss": 0.2183, + "step": 14205 + }, + { + "epoch": 39.027472527472526, + "grad_norm": 9.652982711791992, + "learning_rate": 3.0486263736263737e-05, + "loss": 0.2862, + "step": 14206 + }, + { + "epoch": 39.03021978021978, + "grad_norm": 10.553731918334961, + "learning_rate": 3.0484890109890114e-05, + "loss": 0.4011, + "step": 14207 + }, + { + "epoch": 39.032967032967036, + "grad_norm": 15.618587493896484, + "learning_rate": 3.0483516483516484e-05, + "loss": 0.5803, + "step": 14208 + }, + { + "epoch": 39.035714285714285, + "grad_norm": 4.336781024932861, + "learning_rate": 3.048214285714286e-05, + "loss": 0.0686, + "step": 14209 + }, + { + "epoch": 39.03846153846154, + "grad_norm": 16.023481369018555, + "learning_rate": 3.0480769230769234e-05, + "loss": 0.3498, + "step": 14210 + }, + { + "epoch": 39.04120879120879, + "grad_norm": 7.530786514282227, + "learning_rate": 3.0479395604395604e-05, + "loss": 0.1833, + "step": 14211 + }, + { + "epoch": 39.043956043956044, + "grad_norm": 11.23493766784668, + "learning_rate": 3.047802197802198e-05, + "loss": 0.2638, + "step": 14212 + }, + { + "epoch": 39.0467032967033, + "grad_norm": 12.628923416137695, + "learning_rate": 3.047664835164835e-05, + "loss": 0.2912, + "step": 14213 + }, + { + "epoch": 39.04945054945055, + "grad_norm": 12.215790748596191, + "learning_rate": 3.0475274725274727e-05, + "loss": 0.4117, + "step": 14214 + }, + { + "epoch": 39.0521978021978, + "grad_norm": 18.82015609741211, + "learning_rate": 3.0473901098901104e-05, + "loss": 0.3239, + "step": 14215 + }, + { + "epoch": 39.05494505494506, + "grad_norm": 10.436567306518555, + "learning_rate": 3.0472527472527474e-05, + "loss": 0.3597, + "step": 14216 + }, + { + "epoch": 39.05769230769231, + "grad_norm": 17.79328155517578, + "learning_rate": 3.0471153846153848e-05, + "loss": 0.3717, + "step": 14217 + }, + { + "epoch": 39.06043956043956, + "grad_norm": 12.216925621032715, + "learning_rate": 3.046978021978022e-05, + "loss": 0.2704, + "step": 14218 + }, + { + "epoch": 39.06318681318681, + "grad_norm": 8.323862075805664, + "learning_rate": 3.0468406593406594e-05, + "loss": 0.1207, + "step": 14219 + }, + { + "epoch": 39.065934065934066, + "grad_norm": 11.080153465270996, + "learning_rate": 3.0467032967032965e-05, + "loss": 0.1742, + "step": 14220 + }, + { + "epoch": 39.06868131868132, + "grad_norm": 8.024446487426758, + "learning_rate": 3.046565934065934e-05, + "loss": 0.1083, + "step": 14221 + }, + { + "epoch": 39.07142857142857, + "grad_norm": 12.468477249145508, + "learning_rate": 3.0464285714285718e-05, + "loss": 0.3729, + "step": 14222 + }, + { + "epoch": 39.074175824175825, + "grad_norm": 23.80876922607422, + "learning_rate": 3.0462912087912088e-05, + "loss": 0.7306, + "step": 14223 + }, + { + "epoch": 39.07692307692308, + "grad_norm": 19.18185806274414, + "learning_rate": 3.0461538461538465e-05, + "loss": 0.2969, + "step": 14224 + }, + { + "epoch": 39.07967032967033, + "grad_norm": 5.894464015960693, + "learning_rate": 3.0460164835164835e-05, + "loss": 0.1141, + "step": 14225 + }, + { + "epoch": 39.082417582417584, + "grad_norm": 4.27400541305542, + "learning_rate": 3.045879120879121e-05, + "loss": 0.0566, + "step": 14226 + }, + { + "epoch": 39.08516483516483, + "grad_norm": 5.092344284057617, + "learning_rate": 3.0457417582417585e-05, + "loss": 0.0782, + "step": 14227 + }, + { + "epoch": 39.08791208791209, + "grad_norm": 5.894431114196777, + "learning_rate": 3.0456043956043955e-05, + "loss": 0.1009, + "step": 14228 + }, + { + "epoch": 39.09065934065934, + "grad_norm": 11.484325408935547, + "learning_rate": 3.0454670329670332e-05, + "loss": 0.2766, + "step": 14229 + }, + { + "epoch": 39.09340659340659, + "grad_norm": 11.001535415649414, + "learning_rate": 3.0453296703296702e-05, + "loss": 0.2942, + "step": 14230 + }, + { + "epoch": 39.09615384615385, + "grad_norm": 15.551858901977539, + "learning_rate": 3.045192307692308e-05, + "loss": 0.6003, + "step": 14231 + }, + { + "epoch": 39.0989010989011, + "grad_norm": 7.070679187774658, + "learning_rate": 3.0450549450549452e-05, + "loss": 0.1282, + "step": 14232 + }, + { + "epoch": 39.10164835164835, + "grad_norm": 7.420501232147217, + "learning_rate": 3.0449175824175826e-05, + "loss": 0.2124, + "step": 14233 + }, + { + "epoch": 39.104395604395606, + "grad_norm": 11.088910102844238, + "learning_rate": 3.04478021978022e-05, + "loss": 0.3865, + "step": 14234 + }, + { + "epoch": 39.107142857142854, + "grad_norm": 13.315025329589844, + "learning_rate": 3.044642857142857e-05, + "loss": 0.3698, + "step": 14235 + }, + { + "epoch": 39.10989010989011, + "grad_norm": 10.990256309509277, + "learning_rate": 3.0445054945054946e-05, + "loss": 0.403, + "step": 14236 + }, + { + "epoch": 39.112637362637365, + "grad_norm": 9.694419860839844, + "learning_rate": 3.0443681318681323e-05, + "loss": 0.1995, + "step": 14237 + }, + { + "epoch": 39.11538461538461, + "grad_norm": 13.624608039855957, + "learning_rate": 3.0442307692307693e-05, + "loss": 0.3004, + "step": 14238 + }, + { + "epoch": 39.11813186813187, + "grad_norm": 15.142497062683105, + "learning_rate": 3.044093406593407e-05, + "loss": 0.3385, + "step": 14239 + }, + { + "epoch": 39.120879120879124, + "grad_norm": 11.968976020812988, + "learning_rate": 3.043956043956044e-05, + "loss": 0.214, + "step": 14240 + }, + { + "epoch": 39.12362637362637, + "grad_norm": 9.852198600769043, + "learning_rate": 3.0438186813186813e-05, + "loss": 0.2081, + "step": 14241 + }, + { + "epoch": 39.12637362637363, + "grad_norm": 7.900165557861328, + "learning_rate": 3.043681318681319e-05, + "loss": 0.1772, + "step": 14242 + }, + { + "epoch": 39.129120879120876, + "grad_norm": 16.986268997192383, + "learning_rate": 3.043543956043956e-05, + "loss": 0.3126, + "step": 14243 + }, + { + "epoch": 39.13186813186813, + "grad_norm": 5.541239261627197, + "learning_rate": 3.0434065934065936e-05, + "loss": 0.1135, + "step": 14244 + }, + { + "epoch": 39.13461538461539, + "grad_norm": 6.1925177574157715, + "learning_rate": 3.0432692307692306e-05, + "loss": 0.1549, + "step": 14245 + }, + { + "epoch": 39.137362637362635, + "grad_norm": 8.75261116027832, + "learning_rate": 3.0431318681318683e-05, + "loss": 0.2518, + "step": 14246 + }, + { + "epoch": 39.14010989010989, + "grad_norm": 9.523130416870117, + "learning_rate": 3.0429945054945057e-05, + "loss": 0.2352, + "step": 14247 + }, + { + "epoch": 39.142857142857146, + "grad_norm": 8.80178451538086, + "learning_rate": 3.042857142857143e-05, + "loss": 0.1766, + "step": 14248 + }, + { + "epoch": 39.145604395604394, + "grad_norm": 19.315168380737305, + "learning_rate": 3.0427197802197803e-05, + "loss": 0.3761, + "step": 14249 + }, + { + "epoch": 39.14835164835165, + "grad_norm": 3.9316608905792236, + "learning_rate": 3.0425824175824173e-05, + "loss": 0.0681, + "step": 14250 + }, + { + "epoch": 39.1510989010989, + "grad_norm": 6.879398822784424, + "learning_rate": 3.042445054945055e-05, + "loss": 0.1267, + "step": 14251 + }, + { + "epoch": 39.15384615384615, + "grad_norm": 12.947894096374512, + "learning_rate": 3.0423076923076927e-05, + "loss": 0.2209, + "step": 14252 + }, + { + "epoch": 39.15659340659341, + "grad_norm": 8.802316665649414, + "learning_rate": 3.0421703296703297e-05, + "loss": 0.2345, + "step": 14253 + }, + { + "epoch": 39.15934065934066, + "grad_norm": 9.040358543395996, + "learning_rate": 3.0420329670329674e-05, + "loss": 0.1845, + "step": 14254 + }, + { + "epoch": 39.16208791208791, + "grad_norm": 8.950652122497559, + "learning_rate": 3.0418956043956044e-05, + "loss": 0.1508, + "step": 14255 + }, + { + "epoch": 39.16483516483517, + "grad_norm": 10.458331108093262, + "learning_rate": 3.0417582417582417e-05, + "loss": 0.1806, + "step": 14256 + }, + { + "epoch": 39.167582417582416, + "grad_norm": 9.012353897094727, + "learning_rate": 3.0416208791208794e-05, + "loss": 0.2023, + "step": 14257 + }, + { + "epoch": 39.17032967032967, + "grad_norm": 15.466757774353027, + "learning_rate": 3.0414835164835164e-05, + "loss": 0.444, + "step": 14258 + }, + { + "epoch": 39.17307692307692, + "grad_norm": 20.81153678894043, + "learning_rate": 3.041346153846154e-05, + "loss": 0.3353, + "step": 14259 + }, + { + "epoch": 39.175824175824175, + "grad_norm": 16.011524200439453, + "learning_rate": 3.041208791208791e-05, + "loss": 0.5605, + "step": 14260 + }, + { + "epoch": 39.17857142857143, + "grad_norm": 12.120978355407715, + "learning_rate": 3.0410714285714288e-05, + "loss": 0.1509, + "step": 14261 + }, + { + "epoch": 39.18131868131868, + "grad_norm": 14.101698875427246, + "learning_rate": 3.040934065934066e-05, + "loss": 0.2945, + "step": 14262 + }, + { + "epoch": 39.184065934065934, + "grad_norm": 4.727941036224365, + "learning_rate": 3.0407967032967035e-05, + "loss": 0.0841, + "step": 14263 + }, + { + "epoch": 39.18681318681319, + "grad_norm": 8.639090538024902, + "learning_rate": 3.0406593406593408e-05, + "loss": 0.1716, + "step": 14264 + }, + { + "epoch": 39.18956043956044, + "grad_norm": 17.62723731994629, + "learning_rate": 3.0405219780219778e-05, + "loss": 0.4434, + "step": 14265 + }, + { + "epoch": 39.19230769230769, + "grad_norm": 12.071415901184082, + "learning_rate": 3.0403846153846155e-05, + "loss": 0.2713, + "step": 14266 + }, + { + "epoch": 39.19505494505494, + "grad_norm": 9.665696144104004, + "learning_rate": 3.040247252747253e-05, + "loss": 0.1088, + "step": 14267 + }, + { + "epoch": 39.1978021978022, + "grad_norm": 6.065885543823242, + "learning_rate": 3.04010989010989e-05, + "loss": 0.0652, + "step": 14268 + }, + { + "epoch": 39.20054945054945, + "grad_norm": 20.22014808654785, + "learning_rate": 3.039972527472528e-05, + "loss": 0.4609, + "step": 14269 + }, + { + "epoch": 39.2032967032967, + "grad_norm": 6.59035062789917, + "learning_rate": 3.039835164835165e-05, + "loss": 0.1438, + "step": 14270 + }, + { + "epoch": 39.206043956043956, + "grad_norm": 15.718900680541992, + "learning_rate": 3.0396978021978022e-05, + "loss": 0.4417, + "step": 14271 + }, + { + "epoch": 39.20879120879121, + "grad_norm": 15.409828186035156, + "learning_rate": 3.03956043956044e-05, + "loss": 0.5144, + "step": 14272 + }, + { + "epoch": 39.21153846153846, + "grad_norm": 2.655921220779419, + "learning_rate": 3.039423076923077e-05, + "loss": 0.0297, + "step": 14273 + }, + { + "epoch": 39.214285714285715, + "grad_norm": 13.152722358703613, + "learning_rate": 3.0392857142857145e-05, + "loss": 0.4133, + "step": 14274 + }, + { + "epoch": 39.217032967032964, + "grad_norm": 16.170425415039062, + "learning_rate": 3.0391483516483515e-05, + "loss": 0.3715, + "step": 14275 + }, + { + "epoch": 39.21978021978022, + "grad_norm": 9.762767791748047, + "learning_rate": 3.0390109890109892e-05, + "loss": 0.1812, + "step": 14276 + }, + { + "epoch": 39.222527472527474, + "grad_norm": 5.984654903411865, + "learning_rate": 3.0388736263736266e-05, + "loss": 0.1269, + "step": 14277 + }, + { + "epoch": 39.22527472527472, + "grad_norm": 14.954398155212402, + "learning_rate": 3.038736263736264e-05, + "loss": 0.4253, + "step": 14278 + }, + { + "epoch": 39.22802197802198, + "grad_norm": 21.071596145629883, + "learning_rate": 3.0385989010989012e-05, + "loss": 0.536, + "step": 14279 + }, + { + "epoch": 39.23076923076923, + "grad_norm": 18.359050750732422, + "learning_rate": 3.0384615384615382e-05, + "loss": 0.5175, + "step": 14280 + }, + { + "epoch": 39.23351648351648, + "grad_norm": 12.265772819519043, + "learning_rate": 3.038324175824176e-05, + "loss": 0.3011, + "step": 14281 + }, + { + "epoch": 39.23626373626374, + "grad_norm": 5.538055419921875, + "learning_rate": 3.0381868131868136e-05, + "loss": 0.0905, + "step": 14282 + }, + { + "epoch": 39.239010989010985, + "grad_norm": 15.394150733947754, + "learning_rate": 3.0380494505494506e-05, + "loss": 0.3607, + "step": 14283 + }, + { + "epoch": 39.24175824175824, + "grad_norm": 4.37316370010376, + "learning_rate": 3.0379120879120883e-05, + "loss": 0.0608, + "step": 14284 + }, + { + "epoch": 39.244505494505496, + "grad_norm": 10.710271835327148, + "learning_rate": 3.0377747252747253e-05, + "loss": 0.1355, + "step": 14285 + }, + { + "epoch": 39.247252747252745, + "grad_norm": 9.46681022644043, + "learning_rate": 3.0376373626373626e-05, + "loss": 0.1407, + "step": 14286 + }, + { + "epoch": 39.25, + "grad_norm": 19.365617752075195, + "learning_rate": 3.0375000000000003e-05, + "loss": 0.4748, + "step": 14287 + }, + { + "epoch": 39.252747252747255, + "grad_norm": 11.10642147064209, + "learning_rate": 3.0373626373626373e-05, + "loss": 0.1838, + "step": 14288 + }, + { + "epoch": 39.255494505494504, + "grad_norm": 20.801692962646484, + "learning_rate": 3.037225274725275e-05, + "loss": 0.5771, + "step": 14289 + }, + { + "epoch": 39.25824175824176, + "grad_norm": 6.532235622406006, + "learning_rate": 3.037087912087912e-05, + "loss": 0.1044, + "step": 14290 + }, + { + "epoch": 39.260989010989015, + "grad_norm": 14.836112022399902, + "learning_rate": 3.0369505494505497e-05, + "loss": 0.4535, + "step": 14291 + }, + { + "epoch": 39.26373626373626, + "grad_norm": 16.59644317626953, + "learning_rate": 3.036813186813187e-05, + "loss": 0.404, + "step": 14292 + }, + { + "epoch": 39.26648351648352, + "grad_norm": 2.6327269077301025, + "learning_rate": 3.0366758241758244e-05, + "loss": 0.0461, + "step": 14293 + }, + { + "epoch": 39.26923076923077, + "grad_norm": 8.95003890991211, + "learning_rate": 3.0365384615384617e-05, + "loss": 0.1468, + "step": 14294 + }, + { + "epoch": 39.27197802197802, + "grad_norm": 13.898309707641602, + "learning_rate": 3.0364010989010987e-05, + "loss": 0.2728, + "step": 14295 + }, + { + "epoch": 39.27472527472528, + "grad_norm": 7.20745325088501, + "learning_rate": 3.0362637362637364e-05, + "loss": 0.1011, + "step": 14296 + }, + { + "epoch": 39.277472527472526, + "grad_norm": 7.100238800048828, + "learning_rate": 3.036126373626374e-05, + "loss": 0.1319, + "step": 14297 + }, + { + "epoch": 39.28021978021978, + "grad_norm": 16.010282516479492, + "learning_rate": 3.035989010989011e-05, + "loss": 0.3375, + "step": 14298 + }, + { + "epoch": 39.282967032967036, + "grad_norm": 11.887340545654297, + "learning_rate": 3.0358516483516487e-05, + "loss": 0.2956, + "step": 14299 + }, + { + "epoch": 39.285714285714285, + "grad_norm": 13.815665245056152, + "learning_rate": 3.0357142857142857e-05, + "loss": 0.465, + "step": 14300 + }, + { + "epoch": 39.28846153846154, + "grad_norm": 15.767646789550781, + "learning_rate": 3.035576923076923e-05, + "loss": 0.3876, + "step": 14301 + }, + { + "epoch": 39.29120879120879, + "grad_norm": 20.52989959716797, + "learning_rate": 3.0354395604395608e-05, + "loss": 0.6108, + "step": 14302 + }, + { + "epoch": 39.293956043956044, + "grad_norm": 20.962512969970703, + "learning_rate": 3.0353021978021978e-05, + "loss": 0.5444, + "step": 14303 + }, + { + "epoch": 39.2967032967033, + "grad_norm": 12.450486183166504, + "learning_rate": 3.0351648351648354e-05, + "loss": 0.3407, + "step": 14304 + }, + { + "epoch": 39.29945054945055, + "grad_norm": 5.550187587738037, + "learning_rate": 3.0350274725274724e-05, + "loss": 0.1078, + "step": 14305 + }, + { + "epoch": 39.3021978021978, + "grad_norm": 11.610548973083496, + "learning_rate": 3.03489010989011e-05, + "loss": 0.2887, + "step": 14306 + }, + { + "epoch": 39.30494505494506, + "grad_norm": 16.237056732177734, + "learning_rate": 3.0347527472527475e-05, + "loss": 0.4507, + "step": 14307 + }, + { + "epoch": 39.30769230769231, + "grad_norm": 12.04773235321045, + "learning_rate": 3.0346153846153848e-05, + "loss": 0.649, + "step": 14308 + }, + { + "epoch": 39.31043956043956, + "grad_norm": 9.969131469726562, + "learning_rate": 3.034478021978022e-05, + "loss": 0.2312, + "step": 14309 + }, + { + "epoch": 39.31318681318681, + "grad_norm": 14.050819396972656, + "learning_rate": 3.034340659340659e-05, + "loss": 0.2925, + "step": 14310 + }, + { + "epoch": 39.315934065934066, + "grad_norm": 1.66858971118927, + "learning_rate": 3.0342032967032968e-05, + "loss": 0.0292, + "step": 14311 + }, + { + "epoch": 39.31868131868132, + "grad_norm": 8.45433235168457, + "learning_rate": 3.0340659340659345e-05, + "loss": 0.1257, + "step": 14312 + }, + { + "epoch": 39.32142857142857, + "grad_norm": 10.441107749938965, + "learning_rate": 3.0339285714285715e-05, + "loss": 0.2831, + "step": 14313 + }, + { + "epoch": 39.324175824175825, + "grad_norm": 10.021581649780273, + "learning_rate": 3.0337912087912092e-05, + "loss": 0.1441, + "step": 14314 + }, + { + "epoch": 39.32692307692308, + "grad_norm": 28.909412384033203, + "learning_rate": 3.0336538461538462e-05, + "loss": 0.3689, + "step": 14315 + }, + { + "epoch": 39.32967032967033, + "grad_norm": 14.028352737426758, + "learning_rate": 3.0335164835164835e-05, + "loss": 0.2405, + "step": 14316 + }, + { + "epoch": 39.332417582417584, + "grad_norm": 14.518414497375488, + "learning_rate": 3.0333791208791212e-05, + "loss": 0.2569, + "step": 14317 + }, + { + "epoch": 39.33516483516483, + "grad_norm": 5.3087663650512695, + "learning_rate": 3.0332417582417582e-05, + "loss": 0.0825, + "step": 14318 + }, + { + "epoch": 39.33791208791209, + "grad_norm": 5.012192726135254, + "learning_rate": 3.033104395604396e-05, + "loss": 0.0754, + "step": 14319 + }, + { + "epoch": 39.34065934065934, + "grad_norm": 17.765426635742188, + "learning_rate": 3.032967032967033e-05, + "loss": 0.2913, + "step": 14320 + }, + { + "epoch": 39.34340659340659, + "grad_norm": 16.57659339904785, + "learning_rate": 3.0328296703296706e-05, + "loss": 0.3877, + "step": 14321 + }, + { + "epoch": 39.34615384615385, + "grad_norm": 16.240270614624023, + "learning_rate": 3.032692307692308e-05, + "loss": 0.3171, + "step": 14322 + }, + { + "epoch": 39.3489010989011, + "grad_norm": 9.20895767211914, + "learning_rate": 3.0325549450549453e-05, + "loss": 0.1691, + "step": 14323 + }, + { + "epoch": 39.35164835164835, + "grad_norm": 6.77410888671875, + "learning_rate": 3.0324175824175826e-05, + "loss": 0.089, + "step": 14324 + }, + { + "epoch": 39.354395604395606, + "grad_norm": 7.800548553466797, + "learning_rate": 3.0322802197802196e-05, + "loss": 0.2666, + "step": 14325 + }, + { + "epoch": 39.357142857142854, + "grad_norm": 15.383866310119629, + "learning_rate": 3.0321428571428573e-05, + "loss": 0.3427, + "step": 14326 + }, + { + "epoch": 39.35989010989011, + "grad_norm": 6.8436713218688965, + "learning_rate": 3.032005494505495e-05, + "loss": 0.0888, + "step": 14327 + }, + { + "epoch": 39.362637362637365, + "grad_norm": 11.366857528686523, + "learning_rate": 3.031868131868132e-05, + "loss": 0.1908, + "step": 14328 + }, + { + "epoch": 39.36538461538461, + "grad_norm": 9.645730018615723, + "learning_rate": 3.0317307692307696e-05, + "loss": 0.245, + "step": 14329 + }, + { + "epoch": 39.36813186813187, + "grad_norm": 11.713151931762695, + "learning_rate": 3.0315934065934066e-05, + "loss": 0.2616, + "step": 14330 + }, + { + "epoch": 39.370879120879124, + "grad_norm": 11.177519798278809, + "learning_rate": 3.031456043956044e-05, + "loss": 0.1493, + "step": 14331 + }, + { + "epoch": 39.37362637362637, + "grad_norm": 19.627147674560547, + "learning_rate": 3.0313186813186817e-05, + "loss": 0.5758, + "step": 14332 + }, + { + "epoch": 39.37637362637363, + "grad_norm": 10.628297805786133, + "learning_rate": 3.0311813186813187e-05, + "loss": 0.094, + "step": 14333 + }, + { + "epoch": 39.379120879120876, + "grad_norm": 9.589438438415527, + "learning_rate": 3.0310439560439563e-05, + "loss": 0.1379, + "step": 14334 + }, + { + "epoch": 39.38186813186813, + "grad_norm": 17.291128158569336, + "learning_rate": 3.0309065934065933e-05, + "loss": 0.6776, + "step": 14335 + }, + { + "epoch": 39.38461538461539, + "grad_norm": 20.09469223022461, + "learning_rate": 3.030769230769231e-05, + "loss": 0.7233, + "step": 14336 + }, + { + "epoch": 39.387362637362635, + "grad_norm": 14.381021499633789, + "learning_rate": 3.0306318681318684e-05, + "loss": 0.2891, + "step": 14337 + }, + { + "epoch": 39.39010989010989, + "grad_norm": 2.274125814437866, + "learning_rate": 3.0304945054945054e-05, + "loss": 0.0406, + "step": 14338 + }, + { + "epoch": 39.392857142857146, + "grad_norm": 5.630418300628662, + "learning_rate": 3.030357142857143e-05, + "loss": 0.1012, + "step": 14339 + }, + { + "epoch": 39.395604395604394, + "grad_norm": 5.439669132232666, + "learning_rate": 3.03021978021978e-05, + "loss": 0.143, + "step": 14340 + }, + { + "epoch": 39.39835164835165, + "grad_norm": 16.541399002075195, + "learning_rate": 3.0300824175824177e-05, + "loss": 0.4969, + "step": 14341 + }, + { + "epoch": 39.4010989010989, + "grad_norm": 3.7944021224975586, + "learning_rate": 3.0299450549450554e-05, + "loss": 0.0617, + "step": 14342 + }, + { + "epoch": 39.40384615384615, + "grad_norm": 10.134132385253906, + "learning_rate": 3.0298076923076924e-05, + "loss": 0.2751, + "step": 14343 + }, + { + "epoch": 39.40659340659341, + "grad_norm": 14.054547309875488, + "learning_rate": 3.02967032967033e-05, + "loss": 0.3986, + "step": 14344 + }, + { + "epoch": 39.40934065934066, + "grad_norm": 10.35342025756836, + "learning_rate": 3.029532967032967e-05, + "loss": 0.1729, + "step": 14345 + }, + { + "epoch": 39.41208791208791, + "grad_norm": 12.24304485321045, + "learning_rate": 3.0293956043956044e-05, + "loss": 0.2758, + "step": 14346 + }, + { + "epoch": 39.41483516483517, + "grad_norm": 2.1895384788513184, + "learning_rate": 3.029258241758242e-05, + "loss": 0.0381, + "step": 14347 + }, + { + "epoch": 39.417582417582416, + "grad_norm": 8.012432098388672, + "learning_rate": 3.029120879120879e-05, + "loss": 0.2368, + "step": 14348 + }, + { + "epoch": 39.42032967032967, + "grad_norm": 8.136817932128906, + "learning_rate": 3.0289835164835168e-05, + "loss": 0.2967, + "step": 14349 + }, + { + "epoch": 39.42307692307692, + "grad_norm": 19.653043746948242, + "learning_rate": 3.0288461538461538e-05, + "loss": 0.881, + "step": 14350 + }, + { + "epoch": 39.425824175824175, + "grad_norm": 14.613256454467773, + "learning_rate": 3.0287087912087915e-05, + "loss": 0.4957, + "step": 14351 + }, + { + "epoch": 39.42857142857143, + "grad_norm": 6.275825023651123, + "learning_rate": 3.0285714285714288e-05, + "loss": 0.1869, + "step": 14352 + }, + { + "epoch": 39.43131868131868, + "grad_norm": 19.523681640625, + "learning_rate": 3.0284340659340658e-05, + "loss": 0.3473, + "step": 14353 + }, + { + "epoch": 39.434065934065934, + "grad_norm": 13.044546127319336, + "learning_rate": 3.0282967032967035e-05, + "loss": 0.4004, + "step": 14354 + }, + { + "epoch": 39.43681318681319, + "grad_norm": 7.988754749298096, + "learning_rate": 3.0281593406593405e-05, + "loss": 0.1521, + "step": 14355 + }, + { + "epoch": 39.43956043956044, + "grad_norm": 18.02717399597168, + "learning_rate": 3.0280219780219782e-05, + "loss": 0.5987, + "step": 14356 + }, + { + "epoch": 39.44230769230769, + "grad_norm": 12.78439998626709, + "learning_rate": 3.027884615384616e-05, + "loss": 0.1919, + "step": 14357 + }, + { + "epoch": 39.44505494505494, + "grad_norm": 10.632868766784668, + "learning_rate": 3.027747252747253e-05, + "loss": 0.1948, + "step": 14358 + }, + { + "epoch": 39.4478021978022, + "grad_norm": 13.956012725830078, + "learning_rate": 3.0276098901098905e-05, + "loss": 0.2627, + "step": 14359 + }, + { + "epoch": 39.45054945054945, + "grad_norm": 8.378414154052734, + "learning_rate": 3.0274725274725275e-05, + "loss": 0.1482, + "step": 14360 + }, + { + "epoch": 39.4532967032967, + "grad_norm": 13.448935508728027, + "learning_rate": 3.027335164835165e-05, + "loss": 0.3019, + "step": 14361 + }, + { + "epoch": 39.456043956043956, + "grad_norm": 7.967562198638916, + "learning_rate": 3.0271978021978026e-05, + "loss": 0.1072, + "step": 14362 + }, + { + "epoch": 39.45879120879121, + "grad_norm": 7.557505130767822, + "learning_rate": 3.0270604395604396e-05, + "loss": 0.1231, + "step": 14363 + }, + { + "epoch": 39.46153846153846, + "grad_norm": 8.227314949035645, + "learning_rate": 3.0269230769230772e-05, + "loss": 0.2491, + "step": 14364 + }, + { + "epoch": 39.464285714285715, + "grad_norm": 14.955263137817383, + "learning_rate": 3.0267857142857142e-05, + "loss": 0.7636, + "step": 14365 + }, + { + "epoch": 39.467032967032964, + "grad_norm": 8.021382331848145, + "learning_rate": 3.026648351648352e-05, + "loss": 0.1002, + "step": 14366 + }, + { + "epoch": 39.46978021978022, + "grad_norm": 20.359956741333008, + "learning_rate": 3.0265109890109893e-05, + "loss": 0.9127, + "step": 14367 + }, + { + "epoch": 39.472527472527474, + "grad_norm": 6.36135196685791, + "learning_rate": 3.0263736263736263e-05, + "loss": 0.1967, + "step": 14368 + }, + { + "epoch": 39.47527472527472, + "grad_norm": 15.508070945739746, + "learning_rate": 3.026236263736264e-05, + "loss": 0.5559, + "step": 14369 + }, + { + "epoch": 39.47802197802198, + "grad_norm": 13.588520050048828, + "learning_rate": 3.026098901098901e-05, + "loss": 0.2285, + "step": 14370 + }, + { + "epoch": 39.48076923076923, + "grad_norm": 2.7282350063323975, + "learning_rate": 3.0259615384615386e-05, + "loss": 0.0433, + "step": 14371 + }, + { + "epoch": 39.48351648351648, + "grad_norm": 15.976016998291016, + "learning_rate": 3.0258241758241763e-05, + "loss": 0.5851, + "step": 14372 + }, + { + "epoch": 39.48626373626374, + "grad_norm": 23.53739356994629, + "learning_rate": 3.0256868131868133e-05, + "loss": 1.0406, + "step": 14373 + }, + { + "epoch": 39.489010989010985, + "grad_norm": 9.962200164794922, + "learning_rate": 3.025549450549451e-05, + "loss": 0.4337, + "step": 14374 + }, + { + "epoch": 39.49175824175824, + "grad_norm": 12.7207612991333, + "learning_rate": 3.025412087912088e-05, + "loss": 0.3729, + "step": 14375 + }, + { + "epoch": 39.494505494505496, + "grad_norm": 10.083551406860352, + "learning_rate": 3.0252747252747253e-05, + "loss": 0.3717, + "step": 14376 + }, + { + "epoch": 39.497252747252745, + "grad_norm": 8.137104988098145, + "learning_rate": 3.025137362637363e-05, + "loss": 0.1558, + "step": 14377 + }, + { + "epoch": 39.5, + "grad_norm": 6.895599365234375, + "learning_rate": 3.025e-05, + "loss": 0.1349, + "step": 14378 + }, + { + "epoch": 39.502747252747255, + "grad_norm": 8.596466064453125, + "learning_rate": 3.0248626373626377e-05, + "loss": 0.1658, + "step": 14379 + }, + { + "epoch": 39.505494505494504, + "grad_norm": 12.931357383728027, + "learning_rate": 3.0247252747252747e-05, + "loss": 0.348, + "step": 14380 + }, + { + "epoch": 39.50824175824176, + "grad_norm": 2.7613275051116943, + "learning_rate": 3.0245879120879124e-05, + "loss": 0.0424, + "step": 14381 + }, + { + "epoch": 39.51098901098901, + "grad_norm": 7.632529258728027, + "learning_rate": 3.0244505494505497e-05, + "loss": 0.1307, + "step": 14382 + }, + { + "epoch": 39.51373626373626, + "grad_norm": 11.453011512756348, + "learning_rate": 3.0243131868131867e-05, + "loss": 0.1875, + "step": 14383 + }, + { + "epoch": 39.51648351648352, + "grad_norm": 14.230929374694824, + "learning_rate": 3.0241758241758244e-05, + "loss": 0.2794, + "step": 14384 + }, + { + "epoch": 39.51923076923077, + "grad_norm": 8.723838806152344, + "learning_rate": 3.0240384615384614e-05, + "loss": 0.095, + "step": 14385 + }, + { + "epoch": 39.52197802197802, + "grad_norm": 14.491779327392578, + "learning_rate": 3.023901098901099e-05, + "loss": 0.337, + "step": 14386 + }, + { + "epoch": 39.52472527472528, + "grad_norm": 17.916316986083984, + "learning_rate": 3.0237637362637367e-05, + "loss": 0.4552, + "step": 14387 + }, + { + "epoch": 39.527472527472526, + "grad_norm": 11.225390434265137, + "learning_rate": 3.0236263736263738e-05, + "loss": 0.151, + "step": 14388 + }, + { + "epoch": 39.53021978021978, + "grad_norm": 14.72496223449707, + "learning_rate": 3.0234890109890114e-05, + "loss": 0.5197, + "step": 14389 + }, + { + "epoch": 39.532967032967036, + "grad_norm": 7.500596523284912, + "learning_rate": 3.0233516483516484e-05, + "loss": 0.1618, + "step": 14390 + }, + { + "epoch": 39.535714285714285, + "grad_norm": 15.50782299041748, + "learning_rate": 3.0232142857142858e-05, + "loss": 0.6164, + "step": 14391 + }, + { + "epoch": 39.53846153846154, + "grad_norm": 10.94243049621582, + "learning_rate": 3.0230769230769235e-05, + "loss": 0.3015, + "step": 14392 + }, + { + "epoch": 39.54120879120879, + "grad_norm": 3.826603889465332, + "learning_rate": 3.0229395604395605e-05, + "loss": 0.05, + "step": 14393 + }, + { + "epoch": 39.543956043956044, + "grad_norm": 11.58164119720459, + "learning_rate": 3.022802197802198e-05, + "loss": 0.2587, + "step": 14394 + }, + { + "epoch": 39.5467032967033, + "grad_norm": 7.217215538024902, + "learning_rate": 3.022664835164835e-05, + "loss": 0.1386, + "step": 14395 + }, + { + "epoch": 39.54945054945055, + "grad_norm": 12.952973365783691, + "learning_rate": 3.0225274725274728e-05, + "loss": 0.2956, + "step": 14396 + }, + { + "epoch": 39.5521978021978, + "grad_norm": 18.88902473449707, + "learning_rate": 3.02239010989011e-05, + "loss": 0.4757, + "step": 14397 + }, + { + "epoch": 39.55494505494506, + "grad_norm": 7.855182647705078, + "learning_rate": 3.022252747252747e-05, + "loss": 0.2042, + "step": 14398 + }, + { + "epoch": 39.55769230769231, + "grad_norm": 16.65684700012207, + "learning_rate": 3.022115384615385e-05, + "loss": 0.4694, + "step": 14399 + }, + { + "epoch": 39.56043956043956, + "grad_norm": 13.001230239868164, + "learning_rate": 3.021978021978022e-05, + "loss": 0.1899, + "step": 14400 + }, + { + "epoch": 39.56318681318681, + "grad_norm": 20.572538375854492, + "learning_rate": 3.0218406593406595e-05, + "loss": 0.5374, + "step": 14401 + }, + { + "epoch": 39.565934065934066, + "grad_norm": 16.487586975097656, + "learning_rate": 3.0217032967032972e-05, + "loss": 0.282, + "step": 14402 + }, + { + "epoch": 39.56868131868132, + "grad_norm": 17.32998275756836, + "learning_rate": 3.0215659340659342e-05, + "loss": 0.7535, + "step": 14403 + }, + { + "epoch": 39.57142857142857, + "grad_norm": 5.949780464172363, + "learning_rate": 3.021428571428572e-05, + "loss": 0.0949, + "step": 14404 + }, + { + "epoch": 39.574175824175825, + "grad_norm": 24.53368377685547, + "learning_rate": 3.021291208791209e-05, + "loss": 0.6882, + "step": 14405 + }, + { + "epoch": 39.57692307692308, + "grad_norm": 18.185686111450195, + "learning_rate": 3.0211538461538462e-05, + "loss": 0.4233, + "step": 14406 + }, + { + "epoch": 39.57967032967033, + "grad_norm": 9.10204029083252, + "learning_rate": 3.021016483516484e-05, + "loss": 0.1595, + "step": 14407 + }, + { + "epoch": 39.582417582417584, + "grad_norm": 10.901374816894531, + "learning_rate": 3.020879120879121e-05, + "loss": 0.2541, + "step": 14408 + }, + { + "epoch": 39.58516483516483, + "grad_norm": 7.739591598510742, + "learning_rate": 3.0207417582417586e-05, + "loss": 0.2079, + "step": 14409 + }, + { + "epoch": 39.58791208791209, + "grad_norm": 16.919307708740234, + "learning_rate": 3.0206043956043956e-05, + "loss": 0.3068, + "step": 14410 + }, + { + "epoch": 39.59065934065934, + "grad_norm": 14.357885360717773, + "learning_rate": 3.0204670329670333e-05, + "loss": 0.2231, + "step": 14411 + }, + { + "epoch": 39.59340659340659, + "grad_norm": 7.17685079574585, + "learning_rate": 3.0203296703296706e-05, + "loss": 0.139, + "step": 14412 + }, + { + "epoch": 39.59615384615385, + "grad_norm": 12.361244201660156, + "learning_rate": 3.0201923076923076e-05, + "loss": 0.2404, + "step": 14413 + }, + { + "epoch": 39.5989010989011, + "grad_norm": 10.882574081420898, + "learning_rate": 3.0200549450549453e-05, + "loss": 0.2097, + "step": 14414 + }, + { + "epoch": 39.60164835164835, + "grad_norm": 19.461727142333984, + "learning_rate": 3.0199175824175823e-05, + "loss": 0.4732, + "step": 14415 + }, + { + "epoch": 39.604395604395606, + "grad_norm": 9.156563758850098, + "learning_rate": 3.01978021978022e-05, + "loss": 0.2259, + "step": 14416 + }, + { + "epoch": 39.607142857142854, + "grad_norm": 14.44710636138916, + "learning_rate": 3.0196428571428576e-05, + "loss": 0.3746, + "step": 14417 + }, + { + "epoch": 39.60989010989011, + "grad_norm": 17.33330726623535, + "learning_rate": 3.0195054945054946e-05, + "loss": 0.3969, + "step": 14418 + }, + { + "epoch": 39.612637362637365, + "grad_norm": 15.805832862854004, + "learning_rate": 3.0193681318681323e-05, + "loss": 0.2793, + "step": 14419 + }, + { + "epoch": 39.61538461538461, + "grad_norm": 22.4156436920166, + "learning_rate": 3.0192307692307693e-05, + "loss": 0.7856, + "step": 14420 + }, + { + "epoch": 39.61813186813187, + "grad_norm": 20.046886444091797, + "learning_rate": 3.0190934065934067e-05, + "loss": 0.5838, + "step": 14421 + }, + { + "epoch": 39.620879120879124, + "grad_norm": 16.89693832397461, + "learning_rate": 3.0189560439560444e-05, + "loss": 0.4725, + "step": 14422 + }, + { + "epoch": 39.62362637362637, + "grad_norm": 7.017240047454834, + "learning_rate": 3.0188186813186814e-05, + "loss": 0.1218, + "step": 14423 + }, + { + "epoch": 39.62637362637363, + "grad_norm": 18.39144515991211, + "learning_rate": 3.018681318681319e-05, + "loss": 0.3565, + "step": 14424 + }, + { + "epoch": 39.629120879120876, + "grad_norm": 9.465444564819336, + "learning_rate": 3.018543956043956e-05, + "loss": 0.2136, + "step": 14425 + }, + { + "epoch": 39.63186813186813, + "grad_norm": 4.1823506355285645, + "learning_rate": 3.0184065934065937e-05, + "loss": 0.0616, + "step": 14426 + }, + { + "epoch": 39.63461538461539, + "grad_norm": 18.81719970703125, + "learning_rate": 3.018269230769231e-05, + "loss": 0.6422, + "step": 14427 + }, + { + "epoch": 39.637362637362635, + "grad_norm": 16.03238296508789, + "learning_rate": 3.018131868131868e-05, + "loss": 0.4029, + "step": 14428 + }, + { + "epoch": 39.64010989010989, + "grad_norm": 10.11860179901123, + "learning_rate": 3.0179945054945057e-05, + "loss": 0.2327, + "step": 14429 + }, + { + "epoch": 39.642857142857146, + "grad_norm": 12.20702838897705, + "learning_rate": 3.0178571428571427e-05, + "loss": 0.2461, + "step": 14430 + }, + { + "epoch": 39.645604395604394, + "grad_norm": 22.177202224731445, + "learning_rate": 3.0177197802197804e-05, + "loss": 0.9034, + "step": 14431 + }, + { + "epoch": 39.64835164835165, + "grad_norm": 7.995608806610107, + "learning_rate": 3.017582417582418e-05, + "loss": 0.2365, + "step": 14432 + }, + { + "epoch": 39.6510989010989, + "grad_norm": 12.282835006713867, + "learning_rate": 3.017445054945055e-05, + "loss": 0.205, + "step": 14433 + }, + { + "epoch": 39.65384615384615, + "grad_norm": 18.47810173034668, + "learning_rate": 3.0173076923076928e-05, + "loss": 0.5529, + "step": 14434 + }, + { + "epoch": 39.65659340659341, + "grad_norm": 8.293774604797363, + "learning_rate": 3.0171703296703298e-05, + "loss": 0.2413, + "step": 14435 + }, + { + "epoch": 39.65934065934066, + "grad_norm": 5.38978910446167, + "learning_rate": 3.017032967032967e-05, + "loss": 0.091, + "step": 14436 + }, + { + "epoch": 39.66208791208791, + "grad_norm": 8.164247512817383, + "learning_rate": 3.0168956043956048e-05, + "loss": 0.2668, + "step": 14437 + }, + { + "epoch": 39.66483516483517, + "grad_norm": 5.818987846374512, + "learning_rate": 3.0167582417582418e-05, + "loss": 0.1007, + "step": 14438 + }, + { + "epoch": 39.667582417582416, + "grad_norm": 18.488216400146484, + "learning_rate": 3.0166208791208795e-05, + "loss": 0.3913, + "step": 14439 + }, + { + "epoch": 39.67032967032967, + "grad_norm": 6.413449764251709, + "learning_rate": 3.0164835164835165e-05, + "loss": 0.2311, + "step": 14440 + }, + { + "epoch": 39.67307692307692, + "grad_norm": 12.0812406539917, + "learning_rate": 3.016346153846154e-05, + "loss": 0.3031, + "step": 14441 + }, + { + "epoch": 39.675824175824175, + "grad_norm": 14.83395767211914, + "learning_rate": 3.0162087912087915e-05, + "loss": 0.3322, + "step": 14442 + }, + { + "epoch": 39.67857142857143, + "grad_norm": 8.965544700622559, + "learning_rate": 3.0160714285714285e-05, + "loss": 0.2977, + "step": 14443 + }, + { + "epoch": 39.68131868131868, + "grad_norm": 22.844318389892578, + "learning_rate": 3.0159340659340662e-05, + "loss": 0.8878, + "step": 14444 + }, + { + "epoch": 39.684065934065934, + "grad_norm": 15.2172269821167, + "learning_rate": 3.0157967032967032e-05, + "loss": 0.4176, + "step": 14445 + }, + { + "epoch": 39.68681318681319, + "grad_norm": 14.134871482849121, + "learning_rate": 3.015659340659341e-05, + "loss": 0.2846, + "step": 14446 + }, + { + "epoch": 39.68956043956044, + "grad_norm": 9.522197723388672, + "learning_rate": 3.015521978021978e-05, + "loss": 0.1421, + "step": 14447 + }, + { + "epoch": 39.69230769230769, + "grad_norm": 17.735544204711914, + "learning_rate": 3.0153846153846155e-05, + "loss": 0.4505, + "step": 14448 + }, + { + "epoch": 39.69505494505494, + "grad_norm": 16.135169982910156, + "learning_rate": 3.0152472527472532e-05, + "loss": 0.4684, + "step": 14449 + }, + { + "epoch": 39.6978021978022, + "grad_norm": 2.5891525745391846, + "learning_rate": 3.0151098901098902e-05, + "loss": 0.0471, + "step": 14450 + }, + { + "epoch": 39.70054945054945, + "grad_norm": 13.21806526184082, + "learning_rate": 3.0149725274725276e-05, + "loss": 0.2767, + "step": 14451 + }, + { + "epoch": 39.7032967032967, + "grad_norm": 12.127655029296875, + "learning_rate": 3.0148351648351646e-05, + "loss": 0.2625, + "step": 14452 + }, + { + "epoch": 39.706043956043956, + "grad_norm": 14.546238899230957, + "learning_rate": 3.0146978021978023e-05, + "loss": 0.3803, + "step": 14453 + }, + { + "epoch": 39.70879120879121, + "grad_norm": 13.257452964782715, + "learning_rate": 3.01456043956044e-05, + "loss": 0.3214, + "step": 14454 + }, + { + "epoch": 39.71153846153846, + "grad_norm": 14.77890396118164, + "learning_rate": 3.014423076923077e-05, + "loss": 0.3529, + "step": 14455 + }, + { + "epoch": 39.714285714285715, + "grad_norm": 10.341043472290039, + "learning_rate": 3.0142857142857146e-05, + "loss": 0.1985, + "step": 14456 + }, + { + "epoch": 39.717032967032964, + "grad_norm": 6.4701738357543945, + "learning_rate": 3.0141483516483516e-05, + "loss": 0.0879, + "step": 14457 + }, + { + "epoch": 39.71978021978022, + "grad_norm": 7.512858867645264, + "learning_rate": 3.014010989010989e-05, + "loss": 0.2642, + "step": 14458 + }, + { + "epoch": 39.722527472527474, + "grad_norm": 23.975982666015625, + "learning_rate": 3.0138736263736266e-05, + "loss": 0.6431, + "step": 14459 + }, + { + "epoch": 39.72527472527472, + "grad_norm": 5.975916385650635, + "learning_rate": 3.0137362637362636e-05, + "loss": 0.0936, + "step": 14460 + }, + { + "epoch": 39.72802197802198, + "grad_norm": 5.738742351531982, + "learning_rate": 3.0135989010989013e-05, + "loss": 0.2056, + "step": 14461 + }, + { + "epoch": 39.73076923076923, + "grad_norm": 8.550691604614258, + "learning_rate": 3.0134615384615383e-05, + "loss": 0.1318, + "step": 14462 + }, + { + "epoch": 39.73351648351648, + "grad_norm": 8.037220001220703, + "learning_rate": 3.013324175824176e-05, + "loss": 0.1867, + "step": 14463 + }, + { + "epoch": 39.73626373626374, + "grad_norm": 10.46589469909668, + "learning_rate": 3.0131868131868133e-05, + "loss": 0.2376, + "step": 14464 + }, + { + "epoch": 39.73901098901099, + "grad_norm": 14.928295135498047, + "learning_rate": 3.0130494505494507e-05, + "loss": 0.5043, + "step": 14465 + }, + { + "epoch": 39.74175824175824, + "grad_norm": 7.298081874847412, + "learning_rate": 3.012912087912088e-05, + "loss": 0.1594, + "step": 14466 + }, + { + "epoch": 39.744505494505496, + "grad_norm": 12.442228317260742, + "learning_rate": 3.012774725274725e-05, + "loss": 0.2472, + "step": 14467 + }, + { + "epoch": 39.747252747252745, + "grad_norm": 13.905932426452637, + "learning_rate": 3.0126373626373627e-05, + "loss": 0.3661, + "step": 14468 + }, + { + "epoch": 39.75, + "grad_norm": 17.122461318969727, + "learning_rate": 3.0125000000000004e-05, + "loss": 0.4049, + "step": 14469 + }, + { + "epoch": 39.752747252747255, + "grad_norm": 11.32388973236084, + "learning_rate": 3.0123626373626374e-05, + "loss": 0.2567, + "step": 14470 + }, + { + "epoch": 39.755494505494504, + "grad_norm": 14.198902130126953, + "learning_rate": 3.012225274725275e-05, + "loss": 0.4828, + "step": 14471 + }, + { + "epoch": 39.75824175824176, + "grad_norm": 16.491859436035156, + "learning_rate": 3.012087912087912e-05, + "loss": 0.4499, + "step": 14472 + }, + { + "epoch": 39.76098901098901, + "grad_norm": 12.310738563537598, + "learning_rate": 3.0119505494505494e-05, + "loss": 0.1535, + "step": 14473 + }, + { + "epoch": 39.76373626373626, + "grad_norm": 21.035154342651367, + "learning_rate": 3.011813186813187e-05, + "loss": 0.247, + "step": 14474 + }, + { + "epoch": 39.76648351648352, + "grad_norm": 12.161182403564453, + "learning_rate": 3.011675824175824e-05, + "loss": 0.2249, + "step": 14475 + }, + { + "epoch": 39.76923076923077, + "grad_norm": 13.284642219543457, + "learning_rate": 3.0115384615384618e-05, + "loss": 0.3089, + "step": 14476 + }, + { + "epoch": 39.77197802197802, + "grad_norm": 7.1209716796875, + "learning_rate": 3.0114010989010988e-05, + "loss": 0.189, + "step": 14477 + }, + { + "epoch": 39.77472527472528, + "grad_norm": 5.446249485015869, + "learning_rate": 3.0112637362637364e-05, + "loss": 0.1708, + "step": 14478 + }, + { + "epoch": 39.777472527472526, + "grad_norm": 19.950645446777344, + "learning_rate": 3.0111263736263738e-05, + "loss": 0.5068, + "step": 14479 + }, + { + "epoch": 39.78021978021978, + "grad_norm": 13.823877334594727, + "learning_rate": 3.010989010989011e-05, + "loss": 0.4896, + "step": 14480 + }, + { + "epoch": 39.782967032967036, + "grad_norm": 22.195173263549805, + "learning_rate": 3.0108516483516485e-05, + "loss": 0.7523, + "step": 14481 + }, + { + "epoch": 39.785714285714285, + "grad_norm": 16.77321434020996, + "learning_rate": 3.0107142857142855e-05, + "loss": 0.4701, + "step": 14482 + }, + { + "epoch": 39.78846153846154, + "grad_norm": 10.182882308959961, + "learning_rate": 3.010576923076923e-05, + "loss": 0.3979, + "step": 14483 + }, + { + "epoch": 39.79120879120879, + "grad_norm": 16.52496337890625, + "learning_rate": 3.0104395604395608e-05, + "loss": 0.3997, + "step": 14484 + }, + { + "epoch": 39.793956043956044, + "grad_norm": 23.42267417907715, + "learning_rate": 3.010302197802198e-05, + "loss": 0.5633, + "step": 14485 + }, + { + "epoch": 39.7967032967033, + "grad_norm": 18.486827850341797, + "learning_rate": 3.0101648351648355e-05, + "loss": 0.2794, + "step": 14486 + }, + { + "epoch": 39.79945054945055, + "grad_norm": 13.372533798217773, + "learning_rate": 3.0100274725274725e-05, + "loss": 0.2751, + "step": 14487 + }, + { + "epoch": 39.8021978021978, + "grad_norm": 13.235381126403809, + "learning_rate": 3.00989010989011e-05, + "loss": 0.2079, + "step": 14488 + }, + { + "epoch": 39.80494505494506, + "grad_norm": 8.652449607849121, + "learning_rate": 3.0097527472527475e-05, + "loss": 0.1398, + "step": 14489 + }, + { + "epoch": 39.80769230769231, + "grad_norm": 3.0684750080108643, + "learning_rate": 3.0096153846153845e-05, + "loss": 0.0313, + "step": 14490 + }, + { + "epoch": 39.81043956043956, + "grad_norm": 25.91447639465332, + "learning_rate": 3.0094780219780222e-05, + "loss": 0.8343, + "step": 14491 + }, + { + "epoch": 39.81318681318681, + "grad_norm": 18.330400466918945, + "learning_rate": 3.0093406593406592e-05, + "loss": 0.8172, + "step": 14492 + }, + { + "epoch": 39.815934065934066, + "grad_norm": 11.66086196899414, + "learning_rate": 3.009203296703297e-05, + "loss": 0.178, + "step": 14493 + }, + { + "epoch": 39.81868131868132, + "grad_norm": 11.104039192199707, + "learning_rate": 3.0090659340659342e-05, + "loss": 0.3292, + "step": 14494 + }, + { + "epoch": 39.82142857142857, + "grad_norm": 10.079973220825195, + "learning_rate": 3.0089285714285716e-05, + "loss": 0.192, + "step": 14495 + }, + { + "epoch": 39.824175824175825, + "grad_norm": 13.171930313110352, + "learning_rate": 3.008791208791209e-05, + "loss": 0.1799, + "step": 14496 + }, + { + "epoch": 39.82692307692308, + "grad_norm": 5.942821025848389, + "learning_rate": 3.008653846153846e-05, + "loss": 0.1416, + "step": 14497 + }, + { + "epoch": 39.82967032967033, + "grad_norm": 20.432397842407227, + "learning_rate": 3.0085164835164836e-05, + "loss": 0.6229, + "step": 14498 + }, + { + "epoch": 39.832417582417584, + "grad_norm": 14.549875259399414, + "learning_rate": 3.0083791208791213e-05, + "loss": 0.3496, + "step": 14499 + }, + { + "epoch": 39.83516483516483, + "grad_norm": 10.502863883972168, + "learning_rate": 3.0082417582417583e-05, + "loss": 0.2832, + "step": 14500 + }, + { + "epoch": 39.83791208791209, + "grad_norm": 12.526490211486816, + "learning_rate": 3.008104395604396e-05, + "loss": 0.1618, + "step": 14501 + }, + { + "epoch": 39.84065934065934, + "grad_norm": 1.7440974712371826, + "learning_rate": 3.007967032967033e-05, + "loss": 0.0326, + "step": 14502 + }, + { + "epoch": 39.84340659340659, + "grad_norm": 4.677880764007568, + "learning_rate": 3.0078296703296703e-05, + "loss": 0.0827, + "step": 14503 + }, + { + "epoch": 39.84615384615385, + "grad_norm": 14.241460800170898, + "learning_rate": 3.007692307692308e-05, + "loss": 0.3792, + "step": 14504 + }, + { + "epoch": 39.8489010989011, + "grad_norm": 15.666401863098145, + "learning_rate": 3.007554945054945e-05, + "loss": 0.3754, + "step": 14505 + }, + { + "epoch": 39.85164835164835, + "grad_norm": 7.3982038497924805, + "learning_rate": 3.0074175824175827e-05, + "loss": 0.2428, + "step": 14506 + }, + { + "epoch": 39.854395604395606, + "grad_norm": 18.45895004272461, + "learning_rate": 3.0072802197802197e-05, + "loss": 0.4327, + "step": 14507 + }, + { + "epoch": 39.857142857142854, + "grad_norm": 15.61581039428711, + "learning_rate": 3.0071428571428573e-05, + "loss": 0.3167, + "step": 14508 + }, + { + "epoch": 39.85989010989011, + "grad_norm": 3.1239898204803467, + "learning_rate": 3.0070054945054947e-05, + "loss": 0.0455, + "step": 14509 + }, + { + "epoch": 39.862637362637365, + "grad_norm": 10.627362251281738, + "learning_rate": 3.006868131868132e-05, + "loss": 0.2245, + "step": 14510 + }, + { + "epoch": 39.86538461538461, + "grad_norm": 13.770556449890137, + "learning_rate": 3.0067307692307694e-05, + "loss": 0.469, + "step": 14511 + }, + { + "epoch": 39.86813186813187, + "grad_norm": 15.656790733337402, + "learning_rate": 3.0065934065934064e-05, + "loss": 0.3929, + "step": 14512 + }, + { + "epoch": 39.870879120879124, + "grad_norm": 11.969674110412598, + "learning_rate": 3.006456043956044e-05, + "loss": 0.287, + "step": 14513 + }, + { + "epoch": 39.87362637362637, + "grad_norm": 10.195810317993164, + "learning_rate": 3.0063186813186817e-05, + "loss": 0.1562, + "step": 14514 + }, + { + "epoch": 39.87637362637363, + "grad_norm": 8.54440689086914, + "learning_rate": 3.0061813186813187e-05, + "loss": 0.1489, + "step": 14515 + }, + { + "epoch": 39.879120879120876, + "grad_norm": 16.34000587463379, + "learning_rate": 3.0060439560439564e-05, + "loss": 0.4036, + "step": 14516 + }, + { + "epoch": 39.88186813186813, + "grad_norm": 17.932802200317383, + "learning_rate": 3.0059065934065934e-05, + "loss": 0.5853, + "step": 14517 + }, + { + "epoch": 39.88461538461539, + "grad_norm": 14.8379487991333, + "learning_rate": 3.0057692307692308e-05, + "loss": 0.4886, + "step": 14518 + }, + { + "epoch": 39.887362637362635, + "grad_norm": 9.19736385345459, + "learning_rate": 3.0056318681318684e-05, + "loss": 0.2023, + "step": 14519 + }, + { + "epoch": 39.89010989010989, + "grad_norm": 18.272695541381836, + "learning_rate": 3.0054945054945054e-05, + "loss": 0.5785, + "step": 14520 + }, + { + "epoch": 39.892857142857146, + "grad_norm": 18.41361427307129, + "learning_rate": 3.005357142857143e-05, + "loss": 0.4249, + "step": 14521 + }, + { + "epoch": 39.895604395604394, + "grad_norm": 3.419966459274292, + "learning_rate": 3.00521978021978e-05, + "loss": 0.0451, + "step": 14522 + }, + { + "epoch": 39.89835164835165, + "grad_norm": 19.103656768798828, + "learning_rate": 3.0050824175824178e-05, + "loss": 0.7628, + "step": 14523 + }, + { + "epoch": 39.9010989010989, + "grad_norm": 14.430853843688965, + "learning_rate": 3.004945054945055e-05, + "loss": 0.3843, + "step": 14524 + }, + { + "epoch": 39.90384615384615, + "grad_norm": 12.222345352172852, + "learning_rate": 3.0048076923076925e-05, + "loss": 0.2441, + "step": 14525 + }, + { + "epoch": 39.90659340659341, + "grad_norm": 15.984295845031738, + "learning_rate": 3.0046703296703298e-05, + "loss": 0.3111, + "step": 14526 + }, + { + "epoch": 39.90934065934066, + "grad_norm": 23.59355926513672, + "learning_rate": 3.0045329670329668e-05, + "loss": 0.4485, + "step": 14527 + }, + { + "epoch": 39.91208791208791, + "grad_norm": 12.682143211364746, + "learning_rate": 3.0043956043956045e-05, + "loss": 0.2371, + "step": 14528 + }, + { + "epoch": 39.91483516483517, + "grad_norm": 8.466572761535645, + "learning_rate": 3.0042582417582422e-05, + "loss": 0.1697, + "step": 14529 + }, + { + "epoch": 39.917582417582416, + "grad_norm": 5.44485330581665, + "learning_rate": 3.0041208791208792e-05, + "loss": 0.1315, + "step": 14530 + }, + { + "epoch": 39.92032967032967, + "grad_norm": 11.518308639526367, + "learning_rate": 3.003983516483517e-05, + "loss": 0.1453, + "step": 14531 + }, + { + "epoch": 39.92307692307692, + "grad_norm": 15.096404075622559, + "learning_rate": 3.003846153846154e-05, + "loss": 0.3143, + "step": 14532 + }, + { + "epoch": 39.925824175824175, + "grad_norm": 10.273693084716797, + "learning_rate": 3.0037087912087912e-05, + "loss": 0.2488, + "step": 14533 + }, + { + "epoch": 39.92857142857143, + "grad_norm": 5.590338706970215, + "learning_rate": 3.003571428571429e-05, + "loss": 0.0895, + "step": 14534 + }, + { + "epoch": 39.93131868131868, + "grad_norm": 8.769675254821777, + "learning_rate": 3.003434065934066e-05, + "loss": 0.1547, + "step": 14535 + }, + { + "epoch": 39.934065934065934, + "grad_norm": 10.971609115600586, + "learning_rate": 3.0032967032967036e-05, + "loss": 0.2697, + "step": 14536 + }, + { + "epoch": 39.93681318681319, + "grad_norm": 9.554670333862305, + "learning_rate": 3.0031593406593406e-05, + "loss": 0.1801, + "step": 14537 + }, + { + "epoch": 39.93956043956044, + "grad_norm": 16.523176193237305, + "learning_rate": 3.0030219780219782e-05, + "loss": 0.316, + "step": 14538 + }, + { + "epoch": 39.94230769230769, + "grad_norm": 5.942075252532959, + "learning_rate": 3.0028846153846156e-05, + "loss": 0.1052, + "step": 14539 + }, + { + "epoch": 39.94505494505494, + "grad_norm": 10.887479782104492, + "learning_rate": 3.002747252747253e-05, + "loss": 0.1739, + "step": 14540 + }, + { + "epoch": 39.9478021978022, + "grad_norm": 21.021339416503906, + "learning_rate": 3.0026098901098903e-05, + "loss": 0.5443, + "step": 14541 + }, + { + "epoch": 39.95054945054945, + "grad_norm": 21.832942962646484, + "learning_rate": 3.0024725274725273e-05, + "loss": 0.8333, + "step": 14542 + }, + { + "epoch": 39.9532967032967, + "grad_norm": 15.741280555725098, + "learning_rate": 3.002335164835165e-05, + "loss": 0.3566, + "step": 14543 + }, + { + "epoch": 39.956043956043956, + "grad_norm": 17.244739532470703, + "learning_rate": 3.0021978021978026e-05, + "loss": 0.1954, + "step": 14544 + }, + { + "epoch": 39.95879120879121, + "grad_norm": 17.72710609436035, + "learning_rate": 3.0020604395604396e-05, + "loss": 0.3703, + "step": 14545 + }, + { + "epoch": 39.96153846153846, + "grad_norm": 5.785271644592285, + "learning_rate": 3.0019230769230773e-05, + "loss": 0.0601, + "step": 14546 + }, + { + "epoch": 39.964285714285715, + "grad_norm": 8.242931365966797, + "learning_rate": 3.0017857142857143e-05, + "loss": 0.2275, + "step": 14547 + }, + { + "epoch": 39.967032967032964, + "grad_norm": 14.162135124206543, + "learning_rate": 3.0016483516483516e-05, + "loss": 0.4576, + "step": 14548 + }, + { + "epoch": 39.96978021978022, + "grad_norm": 13.024443626403809, + "learning_rate": 3.0015109890109893e-05, + "loss": 0.4625, + "step": 14549 + }, + { + "epoch": 39.972527472527474, + "grad_norm": 20.9265193939209, + "learning_rate": 3.0013736263736263e-05, + "loss": 0.6475, + "step": 14550 + }, + { + "epoch": 39.97527472527472, + "grad_norm": 10.069381713867188, + "learning_rate": 3.001236263736264e-05, + "loss": 0.1632, + "step": 14551 + }, + { + "epoch": 39.97802197802198, + "grad_norm": 6.827116966247559, + "learning_rate": 3.001098901098901e-05, + "loss": 0.1236, + "step": 14552 + }, + { + "epoch": 39.98076923076923, + "grad_norm": 17.704782485961914, + "learning_rate": 3.0009615384615387e-05, + "loss": 0.5031, + "step": 14553 + }, + { + "epoch": 39.98351648351648, + "grad_norm": 19.72224235534668, + "learning_rate": 3.000824175824176e-05, + "loss": 0.4255, + "step": 14554 + }, + { + "epoch": 39.98626373626374, + "grad_norm": 14.214129447937012, + "learning_rate": 3.0006868131868134e-05, + "loss": 0.2476, + "step": 14555 + }, + { + "epoch": 39.98901098901099, + "grad_norm": 7.248871326446533, + "learning_rate": 3.0005494505494507e-05, + "loss": 0.1532, + "step": 14556 + }, + { + "epoch": 39.99175824175824, + "grad_norm": 13.599209785461426, + "learning_rate": 3.0004120879120877e-05, + "loss": 0.4567, + "step": 14557 + }, + { + "epoch": 39.994505494505496, + "grad_norm": 7.090104103088379, + "learning_rate": 3.0002747252747254e-05, + "loss": 0.1142, + "step": 14558 + }, + { + "epoch": 39.997252747252745, + "grad_norm": 12.048318862915039, + "learning_rate": 3.000137362637363e-05, + "loss": 0.0883, + "step": 14559 + }, + { + "epoch": 40.0, + "grad_norm": 3.1508429050445557, + "learning_rate": 3e-05, + "loss": 0.03, + "step": 14560 + }, + { + "epoch": 40.0, + "eval_accuracy": 0.6212121212121212, + "eval_f1": 0.6087283146137128, + "eval_f1_DuraRiadoRio_64x64": 0.5221674876847291, + "eval_f1_Mole_64x64": 0.55, + "eval_f1_Quebrado_64x64": 0.7629427792915532, + "eval_f1_RiadoRio_64x64": 0.5764966740576497, + "eval_f1_RioFechado_64x64": 0.6320346320346321, + "eval_loss": 2.2705280780792236, + "eval_precision": 0.752651592053671, + "eval_precision_DuraRiadoRio_64x64": 0.8983050847457628, + "eval_precision_Mole_64x64": 0.9821428571428571, + "eval_precision_Quebrado_64x64": 0.6278026905829597, + "eval_precision_RiadoRio_64x64": 0.43478260869565216, + "eval_precision_RioFechado_64x64": 0.8202247191011236, + "eval_recall": 0.6183139774318425, + "eval_recall_DuraRiadoRio_64x64": 0.3680555555555556, + "eval_recall_Mole_64x64": 0.3819444444444444, + "eval_recall_Quebrado_64x64": 0.9722222222222222, + "eval_recall_RiadoRio_64x64": 0.8552631578947368, + "eval_recall_RioFechado_64x64": 0.5140845070422535, + "eval_runtime": 1.7397, + "eval_samples_per_second": 417.308, + "eval_steps_per_second": 26.441, + "step": 14560 + }, + { + "epoch": 40.002747252747255, + "grad_norm": 17.89122772216797, + "learning_rate": 2.9998626373626378e-05, + "loss": 0.2816, + "step": 14561 + }, + { + "epoch": 40.005494505494504, + "grad_norm": 9.865513801574707, + "learning_rate": 2.9997252747252748e-05, + "loss": 0.2156, + "step": 14562 + }, + { + "epoch": 40.00824175824176, + "grad_norm": 20.5864200592041, + "learning_rate": 2.999587912087912e-05, + "loss": 0.6934, + "step": 14563 + }, + { + "epoch": 40.010989010989015, + "grad_norm": 14.351116180419922, + "learning_rate": 2.9994505494505498e-05, + "loss": 0.3727, + "step": 14564 + }, + { + "epoch": 40.01373626373626, + "grad_norm": 17.100709915161133, + "learning_rate": 2.9993131868131868e-05, + "loss": 0.4511, + "step": 14565 + }, + { + "epoch": 40.01648351648352, + "grad_norm": 12.181400299072266, + "learning_rate": 2.9991758241758245e-05, + "loss": 0.2071, + "step": 14566 + }, + { + "epoch": 40.01923076923077, + "grad_norm": 7.985023498535156, + "learning_rate": 2.9990384615384615e-05, + "loss": 0.1447, + "step": 14567 + }, + { + "epoch": 40.02197802197802, + "grad_norm": 19.766387939453125, + "learning_rate": 2.998901098901099e-05, + "loss": 0.6432, + "step": 14568 + }, + { + "epoch": 40.02472527472528, + "grad_norm": 11.191910743713379, + "learning_rate": 2.9987637362637365e-05, + "loss": 0.368, + "step": 14569 + }, + { + "epoch": 40.027472527472526, + "grad_norm": 14.53110122680664, + "learning_rate": 2.9986263736263738e-05, + "loss": 0.4222, + "step": 14570 + }, + { + "epoch": 40.03021978021978, + "grad_norm": 12.030014991760254, + "learning_rate": 2.998489010989011e-05, + "loss": 0.4889, + "step": 14571 + }, + { + "epoch": 40.032967032967036, + "grad_norm": 16.131834030151367, + "learning_rate": 2.998351648351648e-05, + "loss": 0.2942, + "step": 14572 + }, + { + "epoch": 40.035714285714285, + "grad_norm": 11.584287643432617, + "learning_rate": 2.998214285714286e-05, + "loss": 0.5094, + "step": 14573 + }, + { + "epoch": 40.03846153846154, + "grad_norm": 3.0045554637908936, + "learning_rate": 2.9980769230769235e-05, + "loss": 0.0523, + "step": 14574 + }, + { + "epoch": 40.04120879120879, + "grad_norm": 19.521738052368164, + "learning_rate": 2.9979395604395605e-05, + "loss": 0.622, + "step": 14575 + }, + { + "epoch": 40.043956043956044, + "grad_norm": 19.726116180419922, + "learning_rate": 2.9978021978021982e-05, + "loss": 0.5326, + "step": 14576 + }, + { + "epoch": 40.0467032967033, + "grad_norm": 10.044360160827637, + "learning_rate": 2.9976648351648352e-05, + "loss": 0.3719, + "step": 14577 + }, + { + "epoch": 40.04945054945055, + "grad_norm": 11.240869522094727, + "learning_rate": 2.9975274725274725e-05, + "loss": 0.2159, + "step": 14578 + }, + { + "epoch": 40.0521978021978, + "grad_norm": 13.286259651184082, + "learning_rate": 2.9973901098901102e-05, + "loss": 0.2972, + "step": 14579 + }, + { + "epoch": 40.05494505494506, + "grad_norm": 11.779135704040527, + "learning_rate": 2.9972527472527472e-05, + "loss": 0.2561, + "step": 14580 + }, + { + "epoch": 40.05769230769231, + "grad_norm": 12.750520706176758, + "learning_rate": 2.997115384615385e-05, + "loss": 0.3368, + "step": 14581 + }, + { + "epoch": 40.06043956043956, + "grad_norm": 16.194883346557617, + "learning_rate": 2.996978021978022e-05, + "loss": 0.3833, + "step": 14582 + }, + { + "epoch": 40.06318681318681, + "grad_norm": 15.575078010559082, + "learning_rate": 2.9968406593406596e-05, + "loss": 0.2917, + "step": 14583 + }, + { + "epoch": 40.065934065934066, + "grad_norm": 7.995515823364258, + "learning_rate": 2.996703296703297e-05, + "loss": 0.1149, + "step": 14584 + }, + { + "epoch": 40.06868131868132, + "grad_norm": 13.456765174865723, + "learning_rate": 2.9965659340659343e-05, + "loss": 0.368, + "step": 14585 + }, + { + "epoch": 40.07142857142857, + "grad_norm": 9.149375915527344, + "learning_rate": 2.9964285714285716e-05, + "loss": 0.3015, + "step": 14586 + }, + { + "epoch": 40.074175824175825, + "grad_norm": 2.804258108139038, + "learning_rate": 2.9962912087912086e-05, + "loss": 0.0498, + "step": 14587 + }, + { + "epoch": 40.07692307692308, + "grad_norm": 11.591483116149902, + "learning_rate": 2.9961538461538463e-05, + "loss": 0.5192, + "step": 14588 + }, + { + "epoch": 40.07967032967033, + "grad_norm": 10.685778617858887, + "learning_rate": 2.996016483516484e-05, + "loss": 0.3185, + "step": 14589 + }, + { + "epoch": 40.082417582417584, + "grad_norm": 13.84553337097168, + "learning_rate": 2.995879120879121e-05, + "loss": 0.2401, + "step": 14590 + }, + { + "epoch": 40.08516483516483, + "grad_norm": 10.539667129516602, + "learning_rate": 2.9957417582417587e-05, + "loss": 0.1542, + "step": 14591 + }, + { + "epoch": 40.08791208791209, + "grad_norm": 10.390331268310547, + "learning_rate": 2.9956043956043957e-05, + "loss": 0.1575, + "step": 14592 + }, + { + "epoch": 40.09065934065934, + "grad_norm": 16.916841506958008, + "learning_rate": 2.995467032967033e-05, + "loss": 0.4119, + "step": 14593 + }, + { + "epoch": 40.09340659340659, + "grad_norm": 15.82672119140625, + "learning_rate": 2.9953296703296707e-05, + "loss": 0.3797, + "step": 14594 + }, + { + "epoch": 40.09615384615385, + "grad_norm": 6.335203647613525, + "learning_rate": 2.9951923076923077e-05, + "loss": 0.1443, + "step": 14595 + }, + { + "epoch": 40.0989010989011, + "grad_norm": 18.500837326049805, + "learning_rate": 2.9950549450549454e-05, + "loss": 0.688, + "step": 14596 + }, + { + "epoch": 40.10164835164835, + "grad_norm": 7.817336559295654, + "learning_rate": 2.9949175824175824e-05, + "loss": 0.1639, + "step": 14597 + }, + { + "epoch": 40.104395604395606, + "grad_norm": 13.738926887512207, + "learning_rate": 2.99478021978022e-05, + "loss": 0.2658, + "step": 14598 + }, + { + "epoch": 40.107142857142854, + "grad_norm": 4.894653797149658, + "learning_rate": 2.9946428571428574e-05, + "loss": 0.0858, + "step": 14599 + }, + { + "epoch": 40.10989010989011, + "grad_norm": 14.34789752960205, + "learning_rate": 2.9945054945054947e-05, + "loss": 0.4026, + "step": 14600 + }, + { + "epoch": 40.112637362637365, + "grad_norm": 13.674788475036621, + "learning_rate": 2.994368131868132e-05, + "loss": 0.3353, + "step": 14601 + }, + { + "epoch": 40.11538461538461, + "grad_norm": 2.1349074840545654, + "learning_rate": 2.994230769230769e-05, + "loss": 0.0327, + "step": 14602 + }, + { + "epoch": 40.11813186813187, + "grad_norm": 8.801925659179688, + "learning_rate": 2.9940934065934067e-05, + "loss": 0.2116, + "step": 14603 + }, + { + "epoch": 40.120879120879124, + "grad_norm": 14.03787612915039, + "learning_rate": 2.9939560439560444e-05, + "loss": 0.3737, + "step": 14604 + }, + { + "epoch": 40.12362637362637, + "grad_norm": 12.56447982788086, + "learning_rate": 2.9938186813186814e-05, + "loss": 0.4243, + "step": 14605 + }, + { + "epoch": 40.12637362637363, + "grad_norm": 14.846349716186523, + "learning_rate": 2.993681318681319e-05, + "loss": 0.4213, + "step": 14606 + }, + { + "epoch": 40.129120879120876, + "grad_norm": 8.774797439575195, + "learning_rate": 2.993543956043956e-05, + "loss": 0.248, + "step": 14607 + }, + { + "epoch": 40.13186813186813, + "grad_norm": 5.952369689941406, + "learning_rate": 2.9934065934065934e-05, + "loss": 0.0635, + "step": 14608 + }, + { + "epoch": 40.13461538461539, + "grad_norm": 15.647326469421387, + "learning_rate": 2.993269230769231e-05, + "loss": 0.5032, + "step": 14609 + }, + { + "epoch": 40.137362637362635, + "grad_norm": 13.580079078674316, + "learning_rate": 2.993131868131868e-05, + "loss": 0.183, + "step": 14610 + }, + { + "epoch": 40.14010989010989, + "grad_norm": 6.446803092956543, + "learning_rate": 2.9929945054945058e-05, + "loss": 0.1698, + "step": 14611 + }, + { + "epoch": 40.142857142857146, + "grad_norm": 9.18923282623291, + "learning_rate": 2.9928571428571428e-05, + "loss": 0.1614, + "step": 14612 + }, + { + "epoch": 40.145604395604394, + "grad_norm": 6.296559810638428, + "learning_rate": 2.9927197802197805e-05, + "loss": 0.0651, + "step": 14613 + }, + { + "epoch": 40.14835164835165, + "grad_norm": 10.338244438171387, + "learning_rate": 2.9925824175824178e-05, + "loss": 0.445, + "step": 14614 + }, + { + "epoch": 40.1510989010989, + "grad_norm": 5.843624591827393, + "learning_rate": 2.992445054945055e-05, + "loss": 0.1483, + "step": 14615 + }, + { + "epoch": 40.15384615384615, + "grad_norm": 17.342060089111328, + "learning_rate": 2.9923076923076925e-05, + "loss": 0.5058, + "step": 14616 + }, + { + "epoch": 40.15659340659341, + "grad_norm": 14.105237007141113, + "learning_rate": 2.9921703296703295e-05, + "loss": 0.2535, + "step": 14617 + }, + { + "epoch": 40.15934065934066, + "grad_norm": 9.362022399902344, + "learning_rate": 2.9920329670329672e-05, + "loss": 0.3868, + "step": 14618 + }, + { + "epoch": 40.16208791208791, + "grad_norm": 10.421785354614258, + "learning_rate": 2.991895604395605e-05, + "loss": 0.1549, + "step": 14619 + }, + { + "epoch": 40.16483516483517, + "grad_norm": 2.362919330596924, + "learning_rate": 2.991758241758242e-05, + "loss": 0.0532, + "step": 14620 + }, + { + "epoch": 40.167582417582416, + "grad_norm": 18.723085403442383, + "learning_rate": 2.9916208791208796e-05, + "loss": 0.3719, + "step": 14621 + }, + { + "epoch": 40.17032967032967, + "grad_norm": 6.236783027648926, + "learning_rate": 2.9914835164835166e-05, + "loss": 0.2269, + "step": 14622 + }, + { + "epoch": 40.17307692307692, + "grad_norm": 17.038480758666992, + "learning_rate": 2.991346153846154e-05, + "loss": 0.6947, + "step": 14623 + }, + { + "epoch": 40.175824175824175, + "grad_norm": 25.80664825439453, + "learning_rate": 2.9912087912087916e-05, + "loss": 1.2988, + "step": 14624 + }, + { + "epoch": 40.17857142857143, + "grad_norm": 10.572751998901367, + "learning_rate": 2.9910714285714286e-05, + "loss": 0.2235, + "step": 14625 + }, + { + "epoch": 40.18131868131868, + "grad_norm": 5.09040641784668, + "learning_rate": 2.9909340659340663e-05, + "loss": 0.1399, + "step": 14626 + }, + { + "epoch": 40.184065934065934, + "grad_norm": 9.36746883392334, + "learning_rate": 2.9907967032967033e-05, + "loss": 0.2197, + "step": 14627 + }, + { + "epoch": 40.18681318681319, + "grad_norm": 6.00798225402832, + "learning_rate": 2.990659340659341e-05, + "loss": 0.2082, + "step": 14628 + }, + { + "epoch": 40.18956043956044, + "grad_norm": 6.8737406730651855, + "learning_rate": 2.9905219780219783e-05, + "loss": 0.1558, + "step": 14629 + }, + { + "epoch": 40.19230769230769, + "grad_norm": 8.262412071228027, + "learning_rate": 2.9903846153846156e-05, + "loss": 0.1819, + "step": 14630 + }, + { + "epoch": 40.19505494505494, + "grad_norm": 11.986331939697266, + "learning_rate": 2.990247252747253e-05, + "loss": 0.1777, + "step": 14631 + }, + { + "epoch": 40.1978021978022, + "grad_norm": 11.711502075195312, + "learning_rate": 2.99010989010989e-05, + "loss": 0.3074, + "step": 14632 + }, + { + "epoch": 40.20054945054945, + "grad_norm": 14.389466285705566, + "learning_rate": 2.9899725274725276e-05, + "loss": 0.4817, + "step": 14633 + }, + { + "epoch": 40.2032967032967, + "grad_norm": 3.3973164558410645, + "learning_rate": 2.9898351648351653e-05, + "loss": 0.0521, + "step": 14634 + }, + { + "epoch": 40.206043956043956, + "grad_norm": 5.732739448547363, + "learning_rate": 2.9896978021978023e-05, + "loss": 0.1215, + "step": 14635 + }, + { + "epoch": 40.20879120879121, + "grad_norm": 7.438292026519775, + "learning_rate": 2.98956043956044e-05, + "loss": 0.1873, + "step": 14636 + }, + { + "epoch": 40.21153846153846, + "grad_norm": 14.576166152954102, + "learning_rate": 2.989423076923077e-05, + "loss": 0.2864, + "step": 14637 + }, + { + "epoch": 40.214285714285715, + "grad_norm": 23.69700050354004, + "learning_rate": 2.9892857142857143e-05, + "loss": 0.3087, + "step": 14638 + }, + { + "epoch": 40.217032967032964, + "grad_norm": 14.533401489257812, + "learning_rate": 2.989148351648352e-05, + "loss": 0.1654, + "step": 14639 + }, + { + "epoch": 40.21978021978022, + "grad_norm": 16.970380783081055, + "learning_rate": 2.989010989010989e-05, + "loss": 0.5875, + "step": 14640 + }, + { + "epoch": 40.222527472527474, + "grad_norm": 7.779328346252441, + "learning_rate": 2.9888736263736267e-05, + "loss": 0.1959, + "step": 14641 + }, + { + "epoch": 40.22527472527472, + "grad_norm": 5.239048004150391, + "learning_rate": 2.9887362637362637e-05, + "loss": 0.0708, + "step": 14642 + }, + { + "epoch": 40.22802197802198, + "grad_norm": 19.280141830444336, + "learning_rate": 2.9885989010989014e-05, + "loss": 0.4223, + "step": 14643 + }, + { + "epoch": 40.23076923076923, + "grad_norm": 16.12792205810547, + "learning_rate": 2.9884615384615387e-05, + "loss": 0.4032, + "step": 14644 + }, + { + "epoch": 40.23351648351648, + "grad_norm": 12.030492782592773, + "learning_rate": 2.988324175824176e-05, + "loss": 0.2634, + "step": 14645 + }, + { + "epoch": 40.23626373626374, + "grad_norm": 9.373787879943848, + "learning_rate": 2.9881868131868134e-05, + "loss": 0.2913, + "step": 14646 + }, + { + "epoch": 40.239010989010985, + "grad_norm": 8.796512603759766, + "learning_rate": 2.9880494505494504e-05, + "loss": 0.1434, + "step": 14647 + }, + { + "epoch": 40.24175824175824, + "grad_norm": 9.104995727539062, + "learning_rate": 2.987912087912088e-05, + "loss": 0.122, + "step": 14648 + }, + { + "epoch": 40.244505494505496, + "grad_norm": 12.450023651123047, + "learning_rate": 2.9877747252747258e-05, + "loss": 0.3417, + "step": 14649 + }, + { + "epoch": 40.247252747252745, + "grad_norm": 8.820220947265625, + "learning_rate": 2.9876373626373628e-05, + "loss": 0.1794, + "step": 14650 + }, + { + "epoch": 40.25, + "grad_norm": 16.64107322692871, + "learning_rate": 2.9875000000000004e-05, + "loss": 0.3772, + "step": 14651 + }, + { + "epoch": 40.252747252747255, + "grad_norm": 21.82402229309082, + "learning_rate": 2.9873626373626375e-05, + "loss": 0.425, + "step": 14652 + }, + { + "epoch": 40.255494505494504, + "grad_norm": 14.025518417358398, + "learning_rate": 2.9872252747252748e-05, + "loss": 0.4567, + "step": 14653 + }, + { + "epoch": 40.25824175824176, + "grad_norm": 13.211334228515625, + "learning_rate": 2.9870879120879125e-05, + "loss": 0.295, + "step": 14654 + }, + { + "epoch": 40.260989010989015, + "grad_norm": 15.001401901245117, + "learning_rate": 2.9869505494505495e-05, + "loss": 0.2882, + "step": 14655 + }, + { + "epoch": 40.26373626373626, + "grad_norm": 14.28398609161377, + "learning_rate": 2.986813186813187e-05, + "loss": 0.2357, + "step": 14656 + }, + { + "epoch": 40.26648351648352, + "grad_norm": 22.328243255615234, + "learning_rate": 2.986675824175824e-05, + "loss": 0.9828, + "step": 14657 + }, + { + "epoch": 40.26923076923077, + "grad_norm": 5.289479732513428, + "learning_rate": 2.986538461538462e-05, + "loss": 0.1028, + "step": 14658 + }, + { + "epoch": 40.27197802197802, + "grad_norm": 12.528610229492188, + "learning_rate": 2.9864010989010992e-05, + "loss": 0.4972, + "step": 14659 + }, + { + "epoch": 40.27472527472528, + "grad_norm": 16.230857849121094, + "learning_rate": 2.9862637362637362e-05, + "loss": 0.3803, + "step": 14660 + }, + { + "epoch": 40.277472527472526, + "grad_norm": 10.041306495666504, + "learning_rate": 2.986126373626374e-05, + "loss": 0.2155, + "step": 14661 + }, + { + "epoch": 40.28021978021978, + "grad_norm": 15.817363739013672, + "learning_rate": 2.985989010989011e-05, + "loss": 0.4013, + "step": 14662 + }, + { + "epoch": 40.282967032967036, + "grad_norm": 8.272188186645508, + "learning_rate": 2.9858516483516485e-05, + "loss": 0.1922, + "step": 14663 + }, + { + "epoch": 40.285714285714285, + "grad_norm": 7.691586017608643, + "learning_rate": 2.9857142857142862e-05, + "loss": 0.1412, + "step": 14664 + }, + { + "epoch": 40.28846153846154, + "grad_norm": 3.484144449234009, + "learning_rate": 2.9855769230769232e-05, + "loss": 0.082, + "step": 14665 + }, + { + "epoch": 40.29120879120879, + "grad_norm": 18.899017333984375, + "learning_rate": 2.985439560439561e-05, + "loss": 0.3377, + "step": 14666 + }, + { + "epoch": 40.293956043956044, + "grad_norm": 13.032648086547852, + "learning_rate": 2.985302197802198e-05, + "loss": 0.4498, + "step": 14667 + }, + { + "epoch": 40.2967032967033, + "grad_norm": 10.924822807312012, + "learning_rate": 2.9851648351648352e-05, + "loss": 0.169, + "step": 14668 + }, + { + "epoch": 40.29945054945055, + "grad_norm": 11.863165855407715, + "learning_rate": 2.985027472527473e-05, + "loss": 0.1818, + "step": 14669 + }, + { + "epoch": 40.3021978021978, + "grad_norm": 12.335180282592773, + "learning_rate": 2.98489010989011e-05, + "loss": 0.2604, + "step": 14670 + }, + { + "epoch": 40.30494505494506, + "grad_norm": 11.19330883026123, + "learning_rate": 2.9847527472527476e-05, + "loss": 0.2846, + "step": 14671 + }, + { + "epoch": 40.30769230769231, + "grad_norm": 7.538136959075928, + "learning_rate": 2.9846153846153846e-05, + "loss": 0.2606, + "step": 14672 + }, + { + "epoch": 40.31043956043956, + "grad_norm": 7.211813449859619, + "learning_rate": 2.9844780219780223e-05, + "loss": 0.1347, + "step": 14673 + }, + { + "epoch": 40.31318681318681, + "grad_norm": 5.20348596572876, + "learning_rate": 2.9843406593406593e-05, + "loss": 0.0538, + "step": 14674 + }, + { + "epoch": 40.315934065934066, + "grad_norm": 22.79124641418457, + "learning_rate": 2.9842032967032966e-05, + "loss": 0.4881, + "step": 14675 + }, + { + "epoch": 40.31868131868132, + "grad_norm": 21.045652389526367, + "learning_rate": 2.9840659340659343e-05, + "loss": 0.711, + "step": 14676 + }, + { + "epoch": 40.32142857142857, + "grad_norm": 10.14280891418457, + "learning_rate": 2.9839285714285713e-05, + "loss": 0.2122, + "step": 14677 + }, + { + "epoch": 40.324175824175825, + "grad_norm": 6.7194414138793945, + "learning_rate": 2.983791208791209e-05, + "loss": 0.1533, + "step": 14678 + }, + { + "epoch": 40.32692307692308, + "grad_norm": 16.544872283935547, + "learning_rate": 2.983653846153846e-05, + "loss": 0.4933, + "step": 14679 + }, + { + "epoch": 40.32967032967033, + "grad_norm": 14.497753143310547, + "learning_rate": 2.9835164835164837e-05, + "loss": 0.4546, + "step": 14680 + }, + { + "epoch": 40.332417582417584, + "grad_norm": 20.972156524658203, + "learning_rate": 2.9833791208791213e-05, + "loss": 0.7518, + "step": 14681 + }, + { + "epoch": 40.33516483516483, + "grad_norm": 11.859637260437012, + "learning_rate": 2.9832417582417583e-05, + "loss": 0.2506, + "step": 14682 + }, + { + "epoch": 40.33791208791209, + "grad_norm": 17.16925811767578, + "learning_rate": 2.9831043956043957e-05, + "loss": 0.6823, + "step": 14683 + }, + { + "epoch": 40.34065934065934, + "grad_norm": 4.628447532653809, + "learning_rate": 2.9829670329670327e-05, + "loss": 0.0891, + "step": 14684 + }, + { + "epoch": 40.34340659340659, + "grad_norm": 9.775022506713867, + "learning_rate": 2.9828296703296704e-05, + "loss": 0.1615, + "step": 14685 + }, + { + "epoch": 40.34615384615385, + "grad_norm": 17.741920471191406, + "learning_rate": 2.982692307692308e-05, + "loss": 0.2664, + "step": 14686 + }, + { + "epoch": 40.3489010989011, + "grad_norm": 12.347993850708008, + "learning_rate": 2.982554945054945e-05, + "loss": 0.344, + "step": 14687 + }, + { + "epoch": 40.35164835164835, + "grad_norm": 15.255753517150879, + "learning_rate": 2.9824175824175827e-05, + "loss": 0.4017, + "step": 14688 + }, + { + "epoch": 40.354395604395606, + "grad_norm": 8.52116870880127, + "learning_rate": 2.9822802197802197e-05, + "loss": 0.1712, + "step": 14689 + }, + { + "epoch": 40.357142857142854, + "grad_norm": 9.555185317993164, + "learning_rate": 2.982142857142857e-05, + "loss": 0.2469, + "step": 14690 + }, + { + "epoch": 40.35989010989011, + "grad_norm": 14.980964660644531, + "learning_rate": 2.9820054945054948e-05, + "loss": 0.2849, + "step": 14691 + }, + { + "epoch": 40.362637362637365, + "grad_norm": 14.854171752929688, + "learning_rate": 2.9818681318681318e-05, + "loss": 0.4942, + "step": 14692 + }, + { + "epoch": 40.36538461538461, + "grad_norm": 14.487110137939453, + "learning_rate": 2.9817307692307694e-05, + "loss": 0.3925, + "step": 14693 + }, + { + "epoch": 40.36813186813187, + "grad_norm": 12.615935325622559, + "learning_rate": 2.9815934065934064e-05, + "loss": 0.2442, + "step": 14694 + }, + { + "epoch": 40.370879120879124, + "grad_norm": 17.459924697875977, + "learning_rate": 2.981456043956044e-05, + "loss": 0.4943, + "step": 14695 + }, + { + "epoch": 40.37362637362637, + "grad_norm": 8.805194854736328, + "learning_rate": 2.9813186813186818e-05, + "loss": 0.1204, + "step": 14696 + }, + { + "epoch": 40.37637362637363, + "grad_norm": 10.716338157653809, + "learning_rate": 2.9811813186813188e-05, + "loss": 0.1876, + "step": 14697 + }, + { + "epoch": 40.379120879120876, + "grad_norm": 5.524768352508545, + "learning_rate": 2.981043956043956e-05, + "loss": 0.0616, + "step": 14698 + }, + { + "epoch": 40.38186813186813, + "grad_norm": 13.415855407714844, + "learning_rate": 2.980906593406593e-05, + "loss": 0.3482, + "step": 14699 + }, + { + "epoch": 40.38461538461539, + "grad_norm": 3.2734405994415283, + "learning_rate": 2.9807692307692308e-05, + "loss": 0.0656, + "step": 14700 + }, + { + "epoch": 40.387362637362635, + "grad_norm": 9.9591064453125, + "learning_rate": 2.9806318681318685e-05, + "loss": 0.294, + "step": 14701 + }, + { + "epoch": 40.39010989010989, + "grad_norm": 19.004051208496094, + "learning_rate": 2.9804945054945055e-05, + "loss": 0.7799, + "step": 14702 + }, + { + "epoch": 40.392857142857146, + "grad_norm": 14.698570251464844, + "learning_rate": 2.9803571428571432e-05, + "loss": 0.4109, + "step": 14703 + }, + { + "epoch": 40.395604395604394, + "grad_norm": 11.57473373413086, + "learning_rate": 2.9802197802197802e-05, + "loss": 0.205, + "step": 14704 + }, + { + "epoch": 40.39835164835165, + "grad_norm": 10.595921516418457, + "learning_rate": 2.9800824175824175e-05, + "loss": 0.2339, + "step": 14705 + }, + { + "epoch": 40.4010989010989, + "grad_norm": 20.019506454467773, + "learning_rate": 2.9799450549450552e-05, + "loss": 0.4678, + "step": 14706 + }, + { + "epoch": 40.40384615384615, + "grad_norm": 9.5759916305542, + "learning_rate": 2.9798076923076922e-05, + "loss": 0.1842, + "step": 14707 + }, + { + "epoch": 40.40659340659341, + "grad_norm": 20.34260368347168, + "learning_rate": 2.97967032967033e-05, + "loss": 0.3335, + "step": 14708 + }, + { + "epoch": 40.40934065934066, + "grad_norm": 12.84485912322998, + "learning_rate": 2.979532967032967e-05, + "loss": 0.1632, + "step": 14709 + }, + { + "epoch": 40.41208791208791, + "grad_norm": 17.856992721557617, + "learning_rate": 2.9793956043956046e-05, + "loss": 0.3959, + "step": 14710 + }, + { + "epoch": 40.41483516483517, + "grad_norm": 7.4157819747924805, + "learning_rate": 2.9792582417582422e-05, + "loss": 0.1171, + "step": 14711 + }, + { + "epoch": 40.417582417582416, + "grad_norm": 16.345643997192383, + "learning_rate": 2.9791208791208792e-05, + "loss": 0.3926, + "step": 14712 + }, + { + "epoch": 40.42032967032967, + "grad_norm": 12.194908142089844, + "learning_rate": 2.9789835164835166e-05, + "loss": 0.2492, + "step": 14713 + }, + { + "epoch": 40.42307692307692, + "grad_norm": 21.144699096679688, + "learning_rate": 2.9788461538461536e-05, + "loss": 0.6557, + "step": 14714 + }, + { + "epoch": 40.425824175824175, + "grad_norm": 6.337169647216797, + "learning_rate": 2.9787087912087913e-05, + "loss": 0.0798, + "step": 14715 + }, + { + "epoch": 40.42857142857143, + "grad_norm": 15.446415901184082, + "learning_rate": 2.978571428571429e-05, + "loss": 0.3568, + "step": 14716 + }, + { + "epoch": 40.43131868131868, + "grad_norm": 3.336284637451172, + "learning_rate": 2.978434065934066e-05, + "loss": 0.0562, + "step": 14717 + }, + { + "epoch": 40.434065934065934, + "grad_norm": 5.495658874511719, + "learning_rate": 2.9782967032967036e-05, + "loss": 0.143, + "step": 14718 + }, + { + "epoch": 40.43681318681319, + "grad_norm": 4.210504531860352, + "learning_rate": 2.9781593406593406e-05, + "loss": 0.1211, + "step": 14719 + }, + { + "epoch": 40.43956043956044, + "grad_norm": 7.206820487976074, + "learning_rate": 2.978021978021978e-05, + "loss": 0.149, + "step": 14720 + }, + { + "epoch": 40.44230769230769, + "grad_norm": 16.685272216796875, + "learning_rate": 2.9778846153846157e-05, + "loss": 0.3501, + "step": 14721 + }, + { + "epoch": 40.44505494505494, + "grad_norm": 18.798242568969727, + "learning_rate": 2.9777472527472527e-05, + "loss": 0.612, + "step": 14722 + }, + { + "epoch": 40.4478021978022, + "grad_norm": 11.753602981567383, + "learning_rate": 2.9776098901098903e-05, + "loss": 0.3046, + "step": 14723 + }, + { + "epoch": 40.45054945054945, + "grad_norm": 2.7436888217926025, + "learning_rate": 2.9774725274725273e-05, + "loss": 0.0481, + "step": 14724 + }, + { + "epoch": 40.4532967032967, + "grad_norm": 4.620402812957764, + "learning_rate": 2.977335164835165e-05, + "loss": 0.0977, + "step": 14725 + }, + { + "epoch": 40.456043956043956, + "grad_norm": 10.708977699279785, + "learning_rate": 2.9771978021978027e-05, + "loss": 0.1759, + "step": 14726 + }, + { + "epoch": 40.45879120879121, + "grad_norm": 5.25506067276001, + "learning_rate": 2.9770604395604397e-05, + "loss": 0.1089, + "step": 14727 + }, + { + "epoch": 40.46153846153846, + "grad_norm": 17.20186996459961, + "learning_rate": 2.976923076923077e-05, + "loss": 0.546, + "step": 14728 + }, + { + "epoch": 40.464285714285715, + "grad_norm": 8.987311363220215, + "learning_rate": 2.976785714285714e-05, + "loss": 0.2379, + "step": 14729 + }, + { + "epoch": 40.467032967032964, + "grad_norm": 20.00839614868164, + "learning_rate": 2.9766483516483517e-05, + "loss": 0.612, + "step": 14730 + }, + { + "epoch": 40.46978021978022, + "grad_norm": 8.266410827636719, + "learning_rate": 2.9765109890109894e-05, + "loss": 0.1711, + "step": 14731 + }, + { + "epoch": 40.472527472527474, + "grad_norm": 27.145313262939453, + "learning_rate": 2.9763736263736264e-05, + "loss": 0.7172, + "step": 14732 + }, + { + "epoch": 40.47527472527472, + "grad_norm": 9.896206855773926, + "learning_rate": 2.976236263736264e-05, + "loss": 0.1303, + "step": 14733 + }, + { + "epoch": 40.47802197802198, + "grad_norm": 18.29925537109375, + "learning_rate": 2.976098901098901e-05, + "loss": 0.3165, + "step": 14734 + }, + { + "epoch": 40.48076923076923, + "grad_norm": 3.0176424980163574, + "learning_rate": 2.9759615384615384e-05, + "loss": 0.0675, + "step": 14735 + }, + { + "epoch": 40.48351648351648, + "grad_norm": 13.015403747558594, + "learning_rate": 2.975824175824176e-05, + "loss": 0.4292, + "step": 14736 + }, + { + "epoch": 40.48626373626374, + "grad_norm": 14.293413162231445, + "learning_rate": 2.975686813186813e-05, + "loss": 0.5068, + "step": 14737 + }, + { + "epoch": 40.489010989010985, + "grad_norm": 20.75018882751465, + "learning_rate": 2.9755494505494508e-05, + "loss": 0.4959, + "step": 14738 + }, + { + "epoch": 40.49175824175824, + "grad_norm": 4.8069987297058105, + "learning_rate": 2.9754120879120878e-05, + "loss": 0.1009, + "step": 14739 + }, + { + "epoch": 40.494505494505496, + "grad_norm": 7.892247200012207, + "learning_rate": 2.9752747252747255e-05, + "loss": 0.2122, + "step": 14740 + }, + { + "epoch": 40.497252747252745, + "grad_norm": 13.96642017364502, + "learning_rate": 2.975137362637363e-05, + "loss": 0.2244, + "step": 14741 + }, + { + "epoch": 40.5, + "grad_norm": 9.364358901977539, + "learning_rate": 2.975e-05, + "loss": 0.1263, + "step": 14742 + }, + { + "epoch": 40.502747252747255, + "grad_norm": 7.295691967010498, + "learning_rate": 2.9748626373626375e-05, + "loss": 0.1692, + "step": 14743 + }, + { + "epoch": 40.505494505494504, + "grad_norm": 17.059890747070312, + "learning_rate": 2.9747252747252745e-05, + "loss": 0.3409, + "step": 14744 + }, + { + "epoch": 40.50824175824176, + "grad_norm": 10.519304275512695, + "learning_rate": 2.974587912087912e-05, + "loss": 0.2186, + "step": 14745 + }, + { + "epoch": 40.51098901098901, + "grad_norm": 15.107199668884277, + "learning_rate": 2.97445054945055e-05, + "loss": 0.3369, + "step": 14746 + }, + { + "epoch": 40.51373626373626, + "grad_norm": 10.926470756530762, + "learning_rate": 2.974313186813187e-05, + "loss": 0.3105, + "step": 14747 + }, + { + "epoch": 40.51648351648352, + "grad_norm": 5.448393821716309, + "learning_rate": 2.9741758241758245e-05, + "loss": 0.0895, + "step": 14748 + }, + { + "epoch": 40.51923076923077, + "grad_norm": 18.796520233154297, + "learning_rate": 2.9740384615384615e-05, + "loss": 0.5638, + "step": 14749 + }, + { + "epoch": 40.52197802197802, + "grad_norm": 6.422094821929932, + "learning_rate": 2.973901098901099e-05, + "loss": 0.1206, + "step": 14750 + }, + { + "epoch": 40.52472527472528, + "grad_norm": 15.820263862609863, + "learning_rate": 2.9737637362637366e-05, + "loss": 0.2215, + "step": 14751 + }, + { + "epoch": 40.527472527472526, + "grad_norm": 10.434236526489258, + "learning_rate": 2.9736263736263736e-05, + "loss": 0.2753, + "step": 14752 + }, + { + "epoch": 40.53021978021978, + "grad_norm": 19.111417770385742, + "learning_rate": 2.9734890109890112e-05, + "loss": 0.6555, + "step": 14753 + }, + { + "epoch": 40.532967032967036, + "grad_norm": 5.079992294311523, + "learning_rate": 2.9733516483516482e-05, + "loss": 0.1201, + "step": 14754 + }, + { + "epoch": 40.535714285714285, + "grad_norm": 5.534150123596191, + "learning_rate": 2.973214285714286e-05, + "loss": 0.073, + "step": 14755 + }, + { + "epoch": 40.53846153846154, + "grad_norm": 5.226315021514893, + "learning_rate": 2.9730769230769236e-05, + "loss": 0.1031, + "step": 14756 + }, + { + "epoch": 40.54120879120879, + "grad_norm": 7.097898960113525, + "learning_rate": 2.9729395604395606e-05, + "loss": 0.0784, + "step": 14757 + }, + { + "epoch": 40.543956043956044, + "grad_norm": 19.021276473999023, + "learning_rate": 2.972802197802198e-05, + "loss": 0.5231, + "step": 14758 + }, + { + "epoch": 40.5467032967033, + "grad_norm": 7.976159572601318, + "learning_rate": 2.972664835164835e-05, + "loss": 0.3518, + "step": 14759 + }, + { + "epoch": 40.54945054945055, + "grad_norm": 11.108391761779785, + "learning_rate": 2.9725274725274726e-05, + "loss": 0.1932, + "step": 14760 + }, + { + "epoch": 40.5521978021978, + "grad_norm": 7.947389602661133, + "learning_rate": 2.9723901098901103e-05, + "loss": 0.1076, + "step": 14761 + }, + { + "epoch": 40.55494505494506, + "grad_norm": 4.170886039733887, + "learning_rate": 2.9722527472527473e-05, + "loss": 0.0928, + "step": 14762 + }, + { + "epoch": 40.55769230769231, + "grad_norm": 9.97684383392334, + "learning_rate": 2.972115384615385e-05, + "loss": 0.1645, + "step": 14763 + }, + { + "epoch": 40.56043956043956, + "grad_norm": 16.053146362304688, + "learning_rate": 2.971978021978022e-05, + "loss": 0.76, + "step": 14764 + }, + { + "epoch": 40.56318681318681, + "grad_norm": 12.872663497924805, + "learning_rate": 2.9718406593406593e-05, + "loss": 0.2589, + "step": 14765 + }, + { + "epoch": 40.565934065934066, + "grad_norm": 9.799890518188477, + "learning_rate": 2.971703296703297e-05, + "loss": 0.1564, + "step": 14766 + }, + { + "epoch": 40.56868131868132, + "grad_norm": 7.265589714050293, + "learning_rate": 2.971565934065934e-05, + "loss": 0.1876, + "step": 14767 + }, + { + "epoch": 40.57142857142857, + "grad_norm": 21.035383224487305, + "learning_rate": 2.9714285714285717e-05, + "loss": 0.4871, + "step": 14768 + }, + { + "epoch": 40.574175824175825, + "grad_norm": 16.052804946899414, + "learning_rate": 2.9712912087912087e-05, + "loss": 0.2265, + "step": 14769 + }, + { + "epoch": 40.57692307692308, + "grad_norm": 25.011032104492188, + "learning_rate": 2.9711538461538464e-05, + "loss": 0.6306, + "step": 14770 + }, + { + "epoch": 40.57967032967033, + "grad_norm": 4.474950790405273, + "learning_rate": 2.971016483516484e-05, + "loss": 0.0798, + "step": 14771 + }, + { + "epoch": 40.582417582417584, + "grad_norm": 19.35085678100586, + "learning_rate": 2.970879120879121e-05, + "loss": 0.8143, + "step": 14772 + }, + { + "epoch": 40.58516483516483, + "grad_norm": 8.931495666503906, + "learning_rate": 2.9707417582417584e-05, + "loss": 0.1206, + "step": 14773 + }, + { + "epoch": 40.58791208791209, + "grad_norm": 18.036392211914062, + "learning_rate": 2.9706043956043954e-05, + "loss": 0.3479, + "step": 14774 + }, + { + "epoch": 40.59065934065934, + "grad_norm": 11.117386817932129, + "learning_rate": 2.970467032967033e-05, + "loss": 0.3461, + "step": 14775 + }, + { + "epoch": 40.59340659340659, + "grad_norm": 13.495428085327148, + "learning_rate": 2.9703296703296707e-05, + "loss": 0.4041, + "step": 14776 + }, + { + "epoch": 40.59615384615385, + "grad_norm": 10.021987915039062, + "learning_rate": 2.9701923076923077e-05, + "loss": 0.2197, + "step": 14777 + }, + { + "epoch": 40.5989010989011, + "grad_norm": 9.068293571472168, + "learning_rate": 2.9700549450549454e-05, + "loss": 0.2575, + "step": 14778 + }, + { + "epoch": 40.60164835164835, + "grad_norm": 14.975520133972168, + "learning_rate": 2.9699175824175824e-05, + "loss": 0.2627, + "step": 14779 + }, + { + "epoch": 40.604395604395606, + "grad_norm": 10.14875316619873, + "learning_rate": 2.9697802197802198e-05, + "loss": 0.2626, + "step": 14780 + }, + { + "epoch": 40.607142857142854, + "grad_norm": 10.559172630310059, + "learning_rate": 2.9696428571428574e-05, + "loss": 0.1592, + "step": 14781 + }, + { + "epoch": 40.60989010989011, + "grad_norm": 15.467977523803711, + "learning_rate": 2.9695054945054945e-05, + "loss": 0.4561, + "step": 14782 + }, + { + "epoch": 40.612637362637365, + "grad_norm": 19.3271427154541, + "learning_rate": 2.969368131868132e-05, + "loss": 0.6454, + "step": 14783 + }, + { + "epoch": 40.61538461538461, + "grad_norm": 6.4581708908081055, + "learning_rate": 2.969230769230769e-05, + "loss": 0.1689, + "step": 14784 + }, + { + "epoch": 40.61813186813187, + "grad_norm": 11.866332054138184, + "learning_rate": 2.9690934065934068e-05, + "loss": 0.1838, + "step": 14785 + }, + { + "epoch": 40.620879120879124, + "grad_norm": 11.79263973236084, + "learning_rate": 2.968956043956044e-05, + "loss": 0.1517, + "step": 14786 + }, + { + "epoch": 40.62362637362637, + "grad_norm": 3.456286907196045, + "learning_rate": 2.9688186813186815e-05, + "loss": 0.0607, + "step": 14787 + }, + { + "epoch": 40.62637362637363, + "grad_norm": 6.420307636260986, + "learning_rate": 2.968681318681319e-05, + "loss": 0.0836, + "step": 14788 + }, + { + "epoch": 40.629120879120876, + "grad_norm": 7.792694091796875, + "learning_rate": 2.968543956043956e-05, + "loss": 0.1603, + "step": 14789 + }, + { + "epoch": 40.63186813186813, + "grad_norm": 10.01226806640625, + "learning_rate": 2.9684065934065935e-05, + "loss": 0.2418, + "step": 14790 + }, + { + "epoch": 40.63461538461539, + "grad_norm": 11.002342224121094, + "learning_rate": 2.9682692307692312e-05, + "loss": 0.3117, + "step": 14791 + }, + { + "epoch": 40.637362637362635, + "grad_norm": 10.535445213317871, + "learning_rate": 2.9681318681318682e-05, + "loss": 0.1427, + "step": 14792 + }, + { + "epoch": 40.64010989010989, + "grad_norm": 10.353618621826172, + "learning_rate": 2.967994505494506e-05, + "loss": 0.2467, + "step": 14793 + }, + { + "epoch": 40.642857142857146, + "grad_norm": 16.954944610595703, + "learning_rate": 2.967857142857143e-05, + "loss": 0.6481, + "step": 14794 + }, + { + "epoch": 40.645604395604394, + "grad_norm": 12.386148452758789, + "learning_rate": 2.9677197802197802e-05, + "loss": 0.3809, + "step": 14795 + }, + { + "epoch": 40.64835164835165, + "grad_norm": 11.717971801757812, + "learning_rate": 2.967582417582418e-05, + "loss": 0.2457, + "step": 14796 + }, + { + "epoch": 40.6510989010989, + "grad_norm": 14.280643463134766, + "learning_rate": 2.967445054945055e-05, + "loss": 0.2215, + "step": 14797 + }, + { + "epoch": 40.65384615384615, + "grad_norm": 13.326167106628418, + "learning_rate": 2.9673076923076926e-05, + "loss": 0.4152, + "step": 14798 + }, + { + "epoch": 40.65659340659341, + "grad_norm": 14.18896198272705, + "learning_rate": 2.9671703296703296e-05, + "loss": 0.4733, + "step": 14799 + }, + { + "epoch": 40.65934065934066, + "grad_norm": 14.238141059875488, + "learning_rate": 2.9670329670329673e-05, + "loss": 0.2731, + "step": 14800 + }, + { + "epoch": 40.66208791208791, + "grad_norm": 9.755131721496582, + "learning_rate": 2.9668956043956046e-05, + "loss": 0.275, + "step": 14801 + }, + { + "epoch": 40.66483516483517, + "grad_norm": 15.29254150390625, + "learning_rate": 2.966758241758242e-05, + "loss": 0.5117, + "step": 14802 + }, + { + "epoch": 40.667582417582416, + "grad_norm": 13.141558647155762, + "learning_rate": 2.9666208791208793e-05, + "loss": 0.4216, + "step": 14803 + }, + { + "epoch": 40.67032967032967, + "grad_norm": 8.850223541259766, + "learning_rate": 2.9664835164835163e-05, + "loss": 0.1699, + "step": 14804 + }, + { + "epoch": 40.67307692307692, + "grad_norm": 11.598681449890137, + "learning_rate": 2.966346153846154e-05, + "loss": 0.3492, + "step": 14805 + }, + { + "epoch": 40.675824175824175, + "grad_norm": 11.836674690246582, + "learning_rate": 2.9662087912087916e-05, + "loss": 0.3858, + "step": 14806 + }, + { + "epoch": 40.67857142857143, + "grad_norm": 5.261388778686523, + "learning_rate": 2.9660714285714286e-05, + "loss": 0.1237, + "step": 14807 + }, + { + "epoch": 40.68131868131868, + "grad_norm": 6.7967023849487305, + "learning_rate": 2.9659340659340663e-05, + "loss": 0.0981, + "step": 14808 + }, + { + "epoch": 40.684065934065934, + "grad_norm": 11.834985733032227, + "learning_rate": 2.9657967032967033e-05, + "loss": 0.3174, + "step": 14809 + }, + { + "epoch": 40.68681318681319, + "grad_norm": 11.529104232788086, + "learning_rate": 2.9656593406593407e-05, + "loss": 0.2261, + "step": 14810 + }, + { + "epoch": 40.68956043956044, + "grad_norm": 11.864453315734863, + "learning_rate": 2.9655219780219783e-05, + "loss": 0.2953, + "step": 14811 + }, + { + "epoch": 40.69230769230769, + "grad_norm": 9.993101119995117, + "learning_rate": 2.9653846153846153e-05, + "loss": 0.1152, + "step": 14812 + }, + { + "epoch": 40.69505494505494, + "grad_norm": 15.571104049682617, + "learning_rate": 2.965247252747253e-05, + "loss": 0.4771, + "step": 14813 + }, + { + "epoch": 40.6978021978022, + "grad_norm": 11.409340858459473, + "learning_rate": 2.96510989010989e-05, + "loss": 0.1832, + "step": 14814 + }, + { + "epoch": 40.70054945054945, + "grad_norm": 13.296689987182617, + "learning_rate": 2.9649725274725277e-05, + "loss": 0.4042, + "step": 14815 + }, + { + "epoch": 40.7032967032967, + "grad_norm": 20.25571632385254, + "learning_rate": 2.964835164835165e-05, + "loss": 0.4609, + "step": 14816 + }, + { + "epoch": 40.706043956043956, + "grad_norm": 12.476327896118164, + "learning_rate": 2.9646978021978024e-05, + "loss": 0.2486, + "step": 14817 + }, + { + "epoch": 40.70879120879121, + "grad_norm": 10.773209571838379, + "learning_rate": 2.9645604395604397e-05, + "loss": 0.1551, + "step": 14818 + }, + { + "epoch": 40.71153846153846, + "grad_norm": 14.846343040466309, + "learning_rate": 2.9644230769230767e-05, + "loss": 0.2654, + "step": 14819 + }, + { + "epoch": 40.714285714285715, + "grad_norm": 8.935784339904785, + "learning_rate": 2.9642857142857144e-05, + "loss": 0.14, + "step": 14820 + }, + { + "epoch": 40.717032967032964, + "grad_norm": 20.512866973876953, + "learning_rate": 2.964148351648352e-05, + "loss": 0.6339, + "step": 14821 + }, + { + "epoch": 40.71978021978022, + "grad_norm": 17.898635864257812, + "learning_rate": 2.964010989010989e-05, + "loss": 0.4728, + "step": 14822 + }, + { + "epoch": 40.722527472527474, + "grad_norm": 14.759905815124512, + "learning_rate": 2.9638736263736268e-05, + "loss": 0.3078, + "step": 14823 + }, + { + "epoch": 40.72527472527472, + "grad_norm": 13.90107250213623, + "learning_rate": 2.9637362637362638e-05, + "loss": 0.3508, + "step": 14824 + }, + { + "epoch": 40.72802197802198, + "grad_norm": 14.913097381591797, + "learning_rate": 2.963598901098901e-05, + "loss": 0.4651, + "step": 14825 + }, + { + "epoch": 40.73076923076923, + "grad_norm": 15.306269645690918, + "learning_rate": 2.9634615384615388e-05, + "loss": 0.5048, + "step": 14826 + }, + { + "epoch": 40.73351648351648, + "grad_norm": 8.073793411254883, + "learning_rate": 2.9633241758241758e-05, + "loss": 0.1229, + "step": 14827 + }, + { + "epoch": 40.73626373626374, + "grad_norm": 3.5040972232818604, + "learning_rate": 2.9631868131868135e-05, + "loss": 0.0529, + "step": 14828 + }, + { + "epoch": 40.73901098901099, + "grad_norm": 14.973762512207031, + "learning_rate": 2.9630494505494505e-05, + "loss": 0.2423, + "step": 14829 + }, + { + "epoch": 40.74175824175824, + "grad_norm": 7.939388275146484, + "learning_rate": 2.962912087912088e-05, + "loss": 0.225, + "step": 14830 + }, + { + "epoch": 40.744505494505496, + "grad_norm": 19.378103256225586, + "learning_rate": 2.9627747252747255e-05, + "loss": 0.6126, + "step": 14831 + }, + { + "epoch": 40.747252747252745, + "grad_norm": 12.937361717224121, + "learning_rate": 2.962637362637363e-05, + "loss": 0.2972, + "step": 14832 + }, + { + "epoch": 40.75, + "grad_norm": 9.30734920501709, + "learning_rate": 2.9625000000000002e-05, + "loss": 0.2985, + "step": 14833 + }, + { + "epoch": 40.752747252747255, + "grad_norm": 10.361037254333496, + "learning_rate": 2.9623626373626372e-05, + "loss": 0.3345, + "step": 14834 + }, + { + "epoch": 40.755494505494504, + "grad_norm": 9.776388168334961, + "learning_rate": 2.962225274725275e-05, + "loss": 0.2602, + "step": 14835 + }, + { + "epoch": 40.75824175824176, + "grad_norm": 15.803227424621582, + "learning_rate": 2.9620879120879125e-05, + "loss": 0.457, + "step": 14836 + }, + { + "epoch": 40.76098901098901, + "grad_norm": 1.1371653079986572, + "learning_rate": 2.9619505494505495e-05, + "loss": 0.0194, + "step": 14837 + }, + { + "epoch": 40.76373626373626, + "grad_norm": 13.747719764709473, + "learning_rate": 2.9618131868131872e-05, + "loss": 0.3137, + "step": 14838 + }, + { + "epoch": 40.76648351648352, + "grad_norm": 24.654470443725586, + "learning_rate": 2.9616758241758242e-05, + "loss": 0.6068, + "step": 14839 + }, + { + "epoch": 40.76923076923077, + "grad_norm": 17.734867095947266, + "learning_rate": 2.9615384615384616e-05, + "loss": 0.691, + "step": 14840 + }, + { + "epoch": 40.77197802197802, + "grad_norm": 17.42448616027832, + "learning_rate": 2.9614010989010992e-05, + "loss": 0.4304, + "step": 14841 + }, + { + "epoch": 40.77472527472528, + "grad_norm": 23.204036712646484, + "learning_rate": 2.9612637362637362e-05, + "loss": 0.699, + "step": 14842 + }, + { + "epoch": 40.777472527472526, + "grad_norm": 17.1663761138916, + "learning_rate": 2.961126373626374e-05, + "loss": 0.4407, + "step": 14843 + }, + { + "epoch": 40.78021978021978, + "grad_norm": 11.87972354888916, + "learning_rate": 2.960989010989011e-05, + "loss": 0.2554, + "step": 14844 + }, + { + "epoch": 40.782967032967036, + "grad_norm": 4.079184055328369, + "learning_rate": 2.9608516483516486e-05, + "loss": 0.0946, + "step": 14845 + }, + { + "epoch": 40.785714285714285, + "grad_norm": 27.247644424438477, + "learning_rate": 2.960714285714286e-05, + "loss": 1.0083, + "step": 14846 + }, + { + "epoch": 40.78846153846154, + "grad_norm": 6.2754130363464355, + "learning_rate": 2.9605769230769233e-05, + "loss": 0.0894, + "step": 14847 + }, + { + "epoch": 40.79120879120879, + "grad_norm": 6.491919040679932, + "learning_rate": 2.9604395604395606e-05, + "loss": 0.1123, + "step": 14848 + }, + { + "epoch": 40.793956043956044, + "grad_norm": 9.764607429504395, + "learning_rate": 2.9603021978021976e-05, + "loss": 0.2139, + "step": 14849 + }, + { + "epoch": 40.7967032967033, + "grad_norm": 15.963455200195312, + "learning_rate": 2.9601648351648353e-05, + "loss": 0.3975, + "step": 14850 + }, + { + "epoch": 40.79945054945055, + "grad_norm": 12.78053092956543, + "learning_rate": 2.960027472527473e-05, + "loss": 0.4164, + "step": 14851 + }, + { + "epoch": 40.8021978021978, + "grad_norm": 13.012770652770996, + "learning_rate": 2.95989010989011e-05, + "loss": 0.1268, + "step": 14852 + }, + { + "epoch": 40.80494505494506, + "grad_norm": 7.6856770515441895, + "learning_rate": 2.9597527472527477e-05, + "loss": 0.1106, + "step": 14853 + }, + { + "epoch": 40.80769230769231, + "grad_norm": 6.915022373199463, + "learning_rate": 2.9596153846153847e-05, + "loss": 0.1313, + "step": 14854 + }, + { + "epoch": 40.81043956043956, + "grad_norm": 1.8162778615951538, + "learning_rate": 2.959478021978022e-05, + "loss": 0.0344, + "step": 14855 + }, + { + "epoch": 40.81318681318681, + "grad_norm": 9.682890892028809, + "learning_rate": 2.9593406593406597e-05, + "loss": 0.1891, + "step": 14856 + }, + { + "epoch": 40.815934065934066, + "grad_norm": 14.204009056091309, + "learning_rate": 2.9592032967032967e-05, + "loss": 0.3664, + "step": 14857 + }, + { + "epoch": 40.81868131868132, + "grad_norm": 10.62387466430664, + "learning_rate": 2.9590659340659344e-05, + "loss": 0.2, + "step": 14858 + }, + { + "epoch": 40.82142857142857, + "grad_norm": 12.052652359008789, + "learning_rate": 2.9589285714285714e-05, + "loss": 0.2657, + "step": 14859 + }, + { + "epoch": 40.824175824175825, + "grad_norm": 21.530794143676758, + "learning_rate": 2.958791208791209e-05, + "loss": 0.3593, + "step": 14860 + }, + { + "epoch": 40.82692307692308, + "grad_norm": 22.475509643554688, + "learning_rate": 2.9586538461538464e-05, + "loss": 0.7279, + "step": 14861 + }, + { + "epoch": 40.82967032967033, + "grad_norm": 5.860353946685791, + "learning_rate": 2.9585164835164837e-05, + "loss": 0.1273, + "step": 14862 + }, + { + "epoch": 40.832417582417584, + "grad_norm": 17.23482894897461, + "learning_rate": 2.958379120879121e-05, + "loss": 0.7191, + "step": 14863 + }, + { + "epoch": 40.83516483516483, + "grad_norm": 9.892925262451172, + "learning_rate": 2.958241758241758e-05, + "loss": 0.1813, + "step": 14864 + }, + { + "epoch": 40.83791208791209, + "grad_norm": 15.5864839553833, + "learning_rate": 2.9581043956043958e-05, + "loss": 0.3726, + "step": 14865 + }, + { + "epoch": 40.84065934065934, + "grad_norm": 11.528837203979492, + "learning_rate": 2.9579670329670334e-05, + "loss": 0.325, + "step": 14866 + }, + { + "epoch": 40.84340659340659, + "grad_norm": 4.890839099884033, + "learning_rate": 2.9578296703296704e-05, + "loss": 0.097, + "step": 14867 + }, + { + "epoch": 40.84615384615385, + "grad_norm": 4.419517517089844, + "learning_rate": 2.957692307692308e-05, + "loss": 0.07, + "step": 14868 + }, + { + "epoch": 40.8489010989011, + "grad_norm": 7.422081470489502, + "learning_rate": 2.957554945054945e-05, + "loss": 0.251, + "step": 14869 + }, + { + "epoch": 40.85164835164835, + "grad_norm": 13.797303199768066, + "learning_rate": 2.9574175824175825e-05, + "loss": 0.3165, + "step": 14870 + }, + { + "epoch": 40.854395604395606, + "grad_norm": 11.544970512390137, + "learning_rate": 2.95728021978022e-05, + "loss": 0.2919, + "step": 14871 + }, + { + "epoch": 40.857142857142854, + "grad_norm": 14.281599044799805, + "learning_rate": 2.957142857142857e-05, + "loss": 0.6599, + "step": 14872 + }, + { + "epoch": 40.85989010989011, + "grad_norm": 15.614384651184082, + "learning_rate": 2.9570054945054948e-05, + "loss": 0.6934, + "step": 14873 + }, + { + "epoch": 40.862637362637365, + "grad_norm": 15.147876739501953, + "learning_rate": 2.9568681318681318e-05, + "loss": 0.5431, + "step": 14874 + }, + { + "epoch": 40.86538461538461, + "grad_norm": 9.313355445861816, + "learning_rate": 2.9567307692307695e-05, + "loss": 0.2472, + "step": 14875 + }, + { + "epoch": 40.86813186813187, + "grad_norm": 17.361995697021484, + "learning_rate": 2.956593406593407e-05, + "loss": 0.4717, + "step": 14876 + }, + { + "epoch": 40.870879120879124, + "grad_norm": 18.323854446411133, + "learning_rate": 2.9564560439560442e-05, + "loss": 0.5394, + "step": 14877 + }, + { + "epoch": 40.87362637362637, + "grad_norm": 13.43687629699707, + "learning_rate": 2.9563186813186815e-05, + "loss": 0.1725, + "step": 14878 + }, + { + "epoch": 40.87637362637363, + "grad_norm": 10.236139297485352, + "learning_rate": 2.9561813186813185e-05, + "loss": 0.2492, + "step": 14879 + }, + { + "epoch": 40.879120879120876, + "grad_norm": 9.444005012512207, + "learning_rate": 2.9560439560439562e-05, + "loss": 0.2092, + "step": 14880 + }, + { + "epoch": 40.88186813186813, + "grad_norm": 29.533910751342773, + "learning_rate": 2.955906593406594e-05, + "loss": 0.7638, + "step": 14881 + }, + { + "epoch": 40.88461538461539, + "grad_norm": 10.806434631347656, + "learning_rate": 2.955769230769231e-05, + "loss": 0.1462, + "step": 14882 + }, + { + "epoch": 40.887362637362635, + "grad_norm": 5.937319278717041, + "learning_rate": 2.9556318681318686e-05, + "loss": 0.1148, + "step": 14883 + }, + { + "epoch": 40.89010989010989, + "grad_norm": 9.822107315063477, + "learning_rate": 2.9554945054945056e-05, + "loss": 0.2436, + "step": 14884 + }, + { + "epoch": 40.892857142857146, + "grad_norm": 23.3543701171875, + "learning_rate": 2.955357142857143e-05, + "loss": 0.7397, + "step": 14885 + }, + { + "epoch": 40.895604395604394, + "grad_norm": 5.169459342956543, + "learning_rate": 2.9552197802197806e-05, + "loss": 0.12, + "step": 14886 + }, + { + "epoch": 40.89835164835165, + "grad_norm": 10.317828178405762, + "learning_rate": 2.9550824175824176e-05, + "loss": 0.2017, + "step": 14887 + }, + { + "epoch": 40.9010989010989, + "grad_norm": 4.145008563995361, + "learning_rate": 2.9549450549450553e-05, + "loss": 0.1042, + "step": 14888 + }, + { + "epoch": 40.90384615384615, + "grad_norm": 19.659011840820312, + "learning_rate": 2.9548076923076923e-05, + "loss": 0.5711, + "step": 14889 + }, + { + "epoch": 40.90659340659341, + "grad_norm": 10.968124389648438, + "learning_rate": 2.95467032967033e-05, + "loss": 0.1307, + "step": 14890 + }, + { + "epoch": 40.90934065934066, + "grad_norm": 20.424081802368164, + "learning_rate": 2.9545329670329673e-05, + "loss": 0.7168, + "step": 14891 + }, + { + "epoch": 40.91208791208791, + "grad_norm": 16.52832794189453, + "learning_rate": 2.9543956043956046e-05, + "loss": 0.3, + "step": 14892 + }, + { + "epoch": 40.91483516483517, + "grad_norm": 6.93359375, + "learning_rate": 2.954258241758242e-05, + "loss": 0.1329, + "step": 14893 + }, + { + "epoch": 40.917582417582416, + "grad_norm": 10.740527153015137, + "learning_rate": 2.954120879120879e-05, + "loss": 0.1494, + "step": 14894 + }, + { + "epoch": 40.92032967032967, + "grad_norm": 7.34506893157959, + "learning_rate": 2.9539835164835167e-05, + "loss": 0.1718, + "step": 14895 + }, + { + "epoch": 40.92307692307692, + "grad_norm": 8.21036434173584, + "learning_rate": 2.9538461538461543e-05, + "loss": 0.1426, + "step": 14896 + }, + { + "epoch": 40.925824175824175, + "grad_norm": 11.545337677001953, + "learning_rate": 2.9537087912087913e-05, + "loss": 0.2947, + "step": 14897 + }, + { + "epoch": 40.92857142857143, + "grad_norm": 16.048437118530273, + "learning_rate": 2.953571428571429e-05, + "loss": 0.3317, + "step": 14898 + }, + { + "epoch": 40.93131868131868, + "grad_norm": 13.166199684143066, + "learning_rate": 2.953434065934066e-05, + "loss": 0.2962, + "step": 14899 + }, + { + "epoch": 40.934065934065934, + "grad_norm": 10.82534408569336, + "learning_rate": 2.9532967032967034e-05, + "loss": 0.3269, + "step": 14900 + }, + { + "epoch": 40.93681318681319, + "grad_norm": 17.09781837463379, + "learning_rate": 2.953159340659341e-05, + "loss": 0.3682, + "step": 14901 + }, + { + "epoch": 40.93956043956044, + "grad_norm": 20.200462341308594, + "learning_rate": 2.953021978021978e-05, + "loss": 0.8834, + "step": 14902 + }, + { + "epoch": 40.94230769230769, + "grad_norm": 6.0157976150512695, + "learning_rate": 2.9528846153846157e-05, + "loss": 0.0764, + "step": 14903 + }, + { + "epoch": 40.94505494505494, + "grad_norm": 9.615656852722168, + "learning_rate": 2.9527472527472527e-05, + "loss": 0.1604, + "step": 14904 + }, + { + "epoch": 40.9478021978022, + "grad_norm": 13.815387725830078, + "learning_rate": 2.9526098901098904e-05, + "loss": 0.38, + "step": 14905 + }, + { + "epoch": 40.95054945054945, + "grad_norm": 18.367280960083008, + "learning_rate": 2.9524725274725274e-05, + "loss": 0.696, + "step": 14906 + }, + { + "epoch": 40.9532967032967, + "grad_norm": 9.217658996582031, + "learning_rate": 2.952335164835165e-05, + "loss": 0.2106, + "step": 14907 + }, + { + "epoch": 40.956043956043956, + "grad_norm": 11.61793327331543, + "learning_rate": 2.9521978021978024e-05, + "loss": 0.3391, + "step": 14908 + }, + { + "epoch": 40.95879120879121, + "grad_norm": 12.50577449798584, + "learning_rate": 2.9520604395604394e-05, + "loss": 0.1264, + "step": 14909 + }, + { + "epoch": 40.96153846153846, + "grad_norm": 16.35674285888672, + "learning_rate": 2.951923076923077e-05, + "loss": 0.3221, + "step": 14910 + }, + { + "epoch": 40.964285714285715, + "grad_norm": 11.71704387664795, + "learning_rate": 2.951785714285714e-05, + "loss": 0.4272, + "step": 14911 + }, + { + "epoch": 40.967032967032964, + "grad_norm": 9.085674285888672, + "learning_rate": 2.9516483516483518e-05, + "loss": 0.206, + "step": 14912 + }, + { + "epoch": 40.96978021978022, + "grad_norm": 9.190374374389648, + "learning_rate": 2.9515109890109895e-05, + "loss": 0.1686, + "step": 14913 + }, + { + "epoch": 40.972527472527474, + "grad_norm": 6.594939708709717, + "learning_rate": 2.9513736263736265e-05, + "loss": 0.0992, + "step": 14914 + }, + { + "epoch": 40.97527472527472, + "grad_norm": 5.481700897216797, + "learning_rate": 2.9512362637362638e-05, + "loss": 0.0562, + "step": 14915 + }, + { + "epoch": 40.97802197802198, + "grad_norm": 17.64004135131836, + "learning_rate": 2.9510989010989008e-05, + "loss": 0.55, + "step": 14916 + }, + { + "epoch": 40.98076923076923, + "grad_norm": 6.254430294036865, + "learning_rate": 2.9509615384615385e-05, + "loss": 0.1088, + "step": 14917 + }, + { + "epoch": 40.98351648351648, + "grad_norm": 15.739588737487793, + "learning_rate": 2.9508241758241762e-05, + "loss": 0.3935, + "step": 14918 + }, + { + "epoch": 40.98626373626374, + "grad_norm": 12.014710426330566, + "learning_rate": 2.9506868131868132e-05, + "loss": 0.2971, + "step": 14919 + }, + { + "epoch": 40.98901098901099, + "grad_norm": 18.109262466430664, + "learning_rate": 2.950549450549451e-05, + "loss": 0.663, + "step": 14920 + }, + { + "epoch": 40.99175824175824, + "grad_norm": 6.597656726837158, + "learning_rate": 2.950412087912088e-05, + "loss": 0.1094, + "step": 14921 + }, + { + "epoch": 40.994505494505496, + "grad_norm": 7.482337951660156, + "learning_rate": 2.9502747252747255e-05, + "loss": 0.1649, + "step": 14922 + }, + { + "epoch": 40.997252747252745, + "grad_norm": 9.812692642211914, + "learning_rate": 2.950137362637363e-05, + "loss": 0.2603, + "step": 14923 + }, + { + "epoch": 41.0, + "grad_norm": 42.97747039794922, + "learning_rate": 2.95e-05, + "loss": 1.0635, + "step": 14924 + }, + { + "epoch": 41.0, + "eval_accuracy": 0.7699724517906336, + "eval_f1": 0.7700618965529147, + "eval_f1_DuraRiadoRio_64x64": 0.841726618705036, + "eval_f1_Mole_64x64": 0.669683257918552, + "eval_f1_Quebrado_64x64": 0.8106060606060606, + "eval_f1_RiadoRio_64x64": 0.6949602122015915, + "eval_f1_RioFechado_64x64": 0.8333333333333334, + "eval_loss": 0.8656372427940369, + "eval_precision": 0.8145536121277999, + "eval_precision_DuraRiadoRio_64x64": 0.8731343283582089, + "eval_precision_Mole_64x64": 0.961038961038961, + "eval_precision_Quebrado_64x64": 0.8916666666666667, + "eval_precision_RiadoRio_64x64": 0.5822222222222222, + "eval_precision_RioFechado_64x64": 0.7647058823529411, + "eval_recall": 0.7693559014908163, + "eval_recall_DuraRiadoRio_64x64": 0.8125, + "eval_recall_Mole_64x64": 0.5138888888888888, + "eval_recall_Quebrado_64x64": 0.7430555555555556, + "eval_recall_RiadoRio_64x64": 0.8618421052631579, + "eval_recall_RioFechado_64x64": 0.9154929577464789, + "eval_runtime": 1.7231, + "eval_samples_per_second": 421.342, + "eval_steps_per_second": 26.697, + "step": 14924 + }, + { + "epoch": 41.002747252747255, + "grad_norm": 5.886155605316162, + "learning_rate": 2.9498626373626376e-05, + "loss": 0.0823, + "step": 14925 + }, + { + "epoch": 41.005494505494504, + "grad_norm": 11.703715324401855, + "learning_rate": 2.9497252747252746e-05, + "loss": 0.2707, + "step": 14926 + }, + { + "epoch": 41.00824175824176, + "grad_norm": 11.893681526184082, + "learning_rate": 2.9495879120879122e-05, + "loss": 0.3835, + "step": 14927 + }, + { + "epoch": 41.010989010989015, + "grad_norm": 5.069748878479004, + "learning_rate": 2.94945054945055e-05, + "loss": 0.0985, + "step": 14928 + }, + { + "epoch": 41.01373626373626, + "grad_norm": 16.31309700012207, + "learning_rate": 2.949313186813187e-05, + "loss": 0.3709, + "step": 14929 + }, + { + "epoch": 41.01648351648352, + "grad_norm": 9.52926254272461, + "learning_rate": 2.9491758241758243e-05, + "loss": 0.2567, + "step": 14930 + }, + { + "epoch": 41.01923076923077, + "grad_norm": 13.276994705200195, + "learning_rate": 2.9490384615384613e-05, + "loss": 0.4626, + "step": 14931 + }, + { + "epoch": 41.02197802197802, + "grad_norm": 8.902069091796875, + "learning_rate": 2.948901098901099e-05, + "loss": 0.1882, + "step": 14932 + }, + { + "epoch": 41.02472527472528, + "grad_norm": 4.111247539520264, + "learning_rate": 2.9487637362637366e-05, + "loss": 0.0891, + "step": 14933 + }, + { + "epoch": 41.027472527472526, + "grad_norm": 13.860937118530273, + "learning_rate": 2.9486263736263736e-05, + "loss": 0.3948, + "step": 14934 + }, + { + "epoch": 41.03021978021978, + "grad_norm": 7.886391639709473, + "learning_rate": 2.9484890109890113e-05, + "loss": 0.096, + "step": 14935 + }, + { + "epoch": 41.032967032967036, + "grad_norm": 10.940435409545898, + "learning_rate": 2.9483516483516483e-05, + "loss": 0.13, + "step": 14936 + }, + { + "epoch": 41.035714285714285, + "grad_norm": 2.447263717651367, + "learning_rate": 2.948214285714286e-05, + "loss": 0.0458, + "step": 14937 + }, + { + "epoch": 41.03846153846154, + "grad_norm": 14.874563217163086, + "learning_rate": 2.9480769230769233e-05, + "loss": 0.2662, + "step": 14938 + }, + { + "epoch": 41.04120879120879, + "grad_norm": 9.83290958404541, + "learning_rate": 2.9479395604395603e-05, + "loss": 0.1588, + "step": 14939 + }, + { + "epoch": 41.043956043956044, + "grad_norm": 5.977005958557129, + "learning_rate": 2.947802197802198e-05, + "loss": 0.2077, + "step": 14940 + }, + { + "epoch": 41.0467032967033, + "grad_norm": 11.90887451171875, + "learning_rate": 2.947664835164835e-05, + "loss": 0.3149, + "step": 14941 + }, + { + "epoch": 41.04945054945055, + "grad_norm": 6.670562744140625, + "learning_rate": 2.9475274725274727e-05, + "loss": 0.175, + "step": 14942 + }, + { + "epoch": 41.0521978021978, + "grad_norm": 12.001835823059082, + "learning_rate": 2.9473901098901104e-05, + "loss": 0.3564, + "step": 14943 + }, + { + "epoch": 41.05494505494506, + "grad_norm": 8.576651573181152, + "learning_rate": 2.9472527472527474e-05, + "loss": 0.3271, + "step": 14944 + }, + { + "epoch": 41.05769230769231, + "grad_norm": 12.044623374938965, + "learning_rate": 2.9471153846153847e-05, + "loss": 0.241, + "step": 14945 + }, + { + "epoch": 41.06043956043956, + "grad_norm": 14.162774085998535, + "learning_rate": 2.9469780219780217e-05, + "loss": 0.5687, + "step": 14946 + }, + { + "epoch": 41.06318681318681, + "grad_norm": 11.31219482421875, + "learning_rate": 2.9468406593406594e-05, + "loss": 0.5382, + "step": 14947 + }, + { + "epoch": 41.065934065934066, + "grad_norm": 11.006917953491211, + "learning_rate": 2.946703296703297e-05, + "loss": 0.1952, + "step": 14948 + }, + { + "epoch": 41.06868131868132, + "grad_norm": 3.9771039485931396, + "learning_rate": 2.946565934065934e-05, + "loss": 0.0627, + "step": 14949 + }, + { + "epoch": 41.07142857142857, + "grad_norm": 10.509916305541992, + "learning_rate": 2.9464285714285718e-05, + "loss": 0.3894, + "step": 14950 + }, + { + "epoch": 41.074175824175825, + "grad_norm": 10.068354606628418, + "learning_rate": 2.9462912087912088e-05, + "loss": 0.3169, + "step": 14951 + }, + { + "epoch": 41.07692307692308, + "grad_norm": 5.463732719421387, + "learning_rate": 2.9461538461538464e-05, + "loss": 0.1153, + "step": 14952 + }, + { + "epoch": 41.07967032967033, + "grad_norm": 13.352059364318848, + "learning_rate": 2.9460164835164838e-05, + "loss": 0.4104, + "step": 14953 + }, + { + "epoch": 41.082417582417584, + "grad_norm": 10.044231414794922, + "learning_rate": 2.9458791208791208e-05, + "loss": 0.1944, + "step": 14954 + }, + { + "epoch": 41.08516483516483, + "grad_norm": 6.029911041259766, + "learning_rate": 2.9457417582417585e-05, + "loss": 0.1168, + "step": 14955 + }, + { + "epoch": 41.08791208791209, + "grad_norm": 8.509276390075684, + "learning_rate": 2.9456043956043955e-05, + "loss": 0.1224, + "step": 14956 + }, + { + "epoch": 41.09065934065934, + "grad_norm": 8.325163841247559, + "learning_rate": 2.945467032967033e-05, + "loss": 0.1677, + "step": 14957 + }, + { + "epoch": 41.09340659340659, + "grad_norm": 13.36565113067627, + "learning_rate": 2.9453296703296708e-05, + "loss": 0.26, + "step": 14958 + }, + { + "epoch": 41.09615384615385, + "grad_norm": 13.339507102966309, + "learning_rate": 2.9451923076923078e-05, + "loss": 0.381, + "step": 14959 + }, + { + "epoch": 41.0989010989011, + "grad_norm": 9.59107780456543, + "learning_rate": 2.945054945054945e-05, + "loss": 0.2677, + "step": 14960 + }, + { + "epoch": 41.10164835164835, + "grad_norm": 12.888861656188965, + "learning_rate": 2.944917582417582e-05, + "loss": 0.2442, + "step": 14961 + }, + { + "epoch": 41.104395604395606, + "grad_norm": 22.494722366333008, + "learning_rate": 2.94478021978022e-05, + "loss": 0.8825, + "step": 14962 + }, + { + "epoch": 41.107142857142854, + "grad_norm": 13.649489402770996, + "learning_rate": 2.9446428571428575e-05, + "loss": 0.3961, + "step": 14963 + }, + { + "epoch": 41.10989010989011, + "grad_norm": 19.226099014282227, + "learning_rate": 2.9445054945054945e-05, + "loss": 0.3247, + "step": 14964 + }, + { + "epoch": 41.112637362637365, + "grad_norm": 18.228364944458008, + "learning_rate": 2.9443681318681322e-05, + "loss": 0.3658, + "step": 14965 + }, + { + "epoch": 41.11538461538461, + "grad_norm": 16.893346786499023, + "learning_rate": 2.9442307692307692e-05, + "loss": 0.6267, + "step": 14966 + }, + { + "epoch": 41.11813186813187, + "grad_norm": 17.253746032714844, + "learning_rate": 2.9440934065934065e-05, + "loss": 0.3314, + "step": 14967 + }, + { + "epoch": 41.120879120879124, + "grad_norm": 14.751272201538086, + "learning_rate": 2.9439560439560442e-05, + "loss": 0.3201, + "step": 14968 + }, + { + "epoch": 41.12362637362637, + "grad_norm": 14.636093139648438, + "learning_rate": 2.9438186813186812e-05, + "loss": 0.5259, + "step": 14969 + }, + { + "epoch": 41.12637362637363, + "grad_norm": 9.25109577178955, + "learning_rate": 2.943681318681319e-05, + "loss": 0.1617, + "step": 14970 + }, + { + "epoch": 41.129120879120876, + "grad_norm": 4.363882064819336, + "learning_rate": 2.943543956043956e-05, + "loss": 0.0954, + "step": 14971 + }, + { + "epoch": 41.13186813186813, + "grad_norm": 14.978073120117188, + "learning_rate": 2.9434065934065936e-05, + "loss": 0.5653, + "step": 14972 + }, + { + "epoch": 41.13461538461539, + "grad_norm": 16.256519317626953, + "learning_rate": 2.9432692307692313e-05, + "loss": 0.4655, + "step": 14973 + }, + { + "epoch": 41.137362637362635, + "grad_norm": 12.350983619689941, + "learning_rate": 2.9431318681318683e-05, + "loss": 0.1922, + "step": 14974 + }, + { + "epoch": 41.14010989010989, + "grad_norm": 14.038064956665039, + "learning_rate": 2.9429945054945056e-05, + "loss": 0.4018, + "step": 14975 + }, + { + "epoch": 41.142857142857146, + "grad_norm": 9.972140312194824, + "learning_rate": 2.9428571428571426e-05, + "loss": 0.308, + "step": 14976 + }, + { + "epoch": 41.145604395604394, + "grad_norm": 7.674876689910889, + "learning_rate": 2.9427197802197803e-05, + "loss": 0.243, + "step": 14977 + }, + { + "epoch": 41.14835164835165, + "grad_norm": 7.635115146636963, + "learning_rate": 2.942582417582418e-05, + "loss": 0.1961, + "step": 14978 + }, + { + "epoch": 41.1510989010989, + "grad_norm": 11.271135330200195, + "learning_rate": 2.942445054945055e-05, + "loss": 0.2594, + "step": 14979 + }, + { + "epoch": 41.15384615384615, + "grad_norm": 16.796220779418945, + "learning_rate": 2.9423076923076926e-05, + "loss": 0.3454, + "step": 14980 + }, + { + "epoch": 41.15659340659341, + "grad_norm": 13.610023498535156, + "learning_rate": 2.9421703296703297e-05, + "loss": 0.2808, + "step": 14981 + }, + { + "epoch": 41.15934065934066, + "grad_norm": 14.644647598266602, + "learning_rate": 2.942032967032967e-05, + "loss": 0.3025, + "step": 14982 + }, + { + "epoch": 41.16208791208791, + "grad_norm": 13.01927661895752, + "learning_rate": 2.9418956043956047e-05, + "loss": 0.4569, + "step": 14983 + }, + { + "epoch": 41.16483516483517, + "grad_norm": 4.059274673461914, + "learning_rate": 2.9417582417582417e-05, + "loss": 0.0719, + "step": 14984 + }, + { + "epoch": 41.167582417582416, + "grad_norm": 19.627531051635742, + "learning_rate": 2.9416208791208794e-05, + "loss": 0.5255, + "step": 14985 + }, + { + "epoch": 41.17032967032967, + "grad_norm": 17.145172119140625, + "learning_rate": 2.9414835164835164e-05, + "loss": 0.5433, + "step": 14986 + }, + { + "epoch": 41.17307692307692, + "grad_norm": 4.733212471008301, + "learning_rate": 2.941346153846154e-05, + "loss": 0.0596, + "step": 14987 + }, + { + "epoch": 41.175824175824175, + "grad_norm": 19.21042823791504, + "learning_rate": 2.9412087912087917e-05, + "loss": 0.5939, + "step": 14988 + }, + { + "epoch": 41.17857142857143, + "grad_norm": 7.518918037414551, + "learning_rate": 2.9410714285714287e-05, + "loss": 0.2397, + "step": 14989 + }, + { + "epoch": 41.18131868131868, + "grad_norm": 6.239484786987305, + "learning_rate": 2.940934065934066e-05, + "loss": 0.1963, + "step": 14990 + }, + { + "epoch": 41.184065934065934, + "grad_norm": 8.202617645263672, + "learning_rate": 2.940796703296703e-05, + "loss": 0.1582, + "step": 14991 + }, + { + "epoch": 41.18681318681319, + "grad_norm": 15.329265594482422, + "learning_rate": 2.9406593406593407e-05, + "loss": 0.5015, + "step": 14992 + }, + { + "epoch": 41.18956043956044, + "grad_norm": 19.610490798950195, + "learning_rate": 2.9405219780219784e-05, + "loss": 0.4172, + "step": 14993 + }, + { + "epoch": 41.19230769230769, + "grad_norm": 8.70756721496582, + "learning_rate": 2.9403846153846154e-05, + "loss": 0.1943, + "step": 14994 + }, + { + "epoch": 41.19505494505494, + "grad_norm": 13.187283515930176, + "learning_rate": 2.940247252747253e-05, + "loss": 0.5293, + "step": 14995 + }, + { + "epoch": 41.1978021978022, + "grad_norm": 9.224002838134766, + "learning_rate": 2.94010989010989e-05, + "loss": 0.2481, + "step": 14996 + }, + { + "epoch": 41.20054945054945, + "grad_norm": 5.009440898895264, + "learning_rate": 2.9399725274725274e-05, + "loss": 0.0719, + "step": 14997 + }, + { + "epoch": 41.2032967032967, + "grad_norm": 11.41576862335205, + "learning_rate": 2.939835164835165e-05, + "loss": 0.1874, + "step": 14998 + }, + { + "epoch": 41.206043956043956, + "grad_norm": 12.396479606628418, + "learning_rate": 2.939697802197802e-05, + "loss": 0.424, + "step": 14999 + }, + { + "epoch": 41.20879120879121, + "grad_norm": 7.010439395904541, + "learning_rate": 2.9395604395604398e-05, + "loss": 0.1448, + "step": 15000 + }, + { + "epoch": 41.21153846153846, + "grad_norm": 11.68088150024414, + "learning_rate": 2.9394230769230768e-05, + "loss": 0.2532, + "step": 15001 + }, + { + "epoch": 41.214285714285715, + "grad_norm": 8.106027603149414, + "learning_rate": 2.9392857142857145e-05, + "loss": 0.2683, + "step": 15002 + }, + { + "epoch": 41.217032967032964, + "grad_norm": 11.44754409790039, + "learning_rate": 2.939148351648352e-05, + "loss": 0.2708, + "step": 15003 + }, + { + "epoch": 41.21978021978022, + "grad_norm": 6.921726226806641, + "learning_rate": 2.939010989010989e-05, + "loss": 0.1151, + "step": 15004 + }, + { + "epoch": 41.222527472527474, + "grad_norm": 4.102952003479004, + "learning_rate": 2.9388736263736265e-05, + "loss": 0.0832, + "step": 15005 + }, + { + "epoch": 41.22527472527472, + "grad_norm": 7.2022013664245605, + "learning_rate": 2.9387362637362635e-05, + "loss": 0.2622, + "step": 15006 + }, + { + "epoch": 41.22802197802198, + "grad_norm": 3.7927215099334717, + "learning_rate": 2.9385989010989012e-05, + "loss": 0.0802, + "step": 15007 + }, + { + "epoch": 41.23076923076923, + "grad_norm": 15.125860214233398, + "learning_rate": 2.938461538461539e-05, + "loss": 0.5148, + "step": 15008 + }, + { + "epoch": 41.23351648351648, + "grad_norm": 7.173644542694092, + "learning_rate": 2.938324175824176e-05, + "loss": 0.1472, + "step": 15009 + }, + { + "epoch": 41.23626373626374, + "grad_norm": 19.084075927734375, + "learning_rate": 2.9381868131868135e-05, + "loss": 0.4303, + "step": 15010 + }, + { + "epoch": 41.239010989010985, + "grad_norm": 20.474239349365234, + "learning_rate": 2.9380494505494505e-05, + "loss": 0.6394, + "step": 15011 + }, + { + "epoch": 41.24175824175824, + "grad_norm": 18.690507888793945, + "learning_rate": 2.937912087912088e-05, + "loss": 0.489, + "step": 15012 + }, + { + "epoch": 41.244505494505496, + "grad_norm": 13.517953872680664, + "learning_rate": 2.9377747252747256e-05, + "loss": 0.4055, + "step": 15013 + }, + { + "epoch": 41.247252747252745, + "grad_norm": 14.726604461669922, + "learning_rate": 2.9376373626373626e-05, + "loss": 0.3787, + "step": 15014 + }, + { + "epoch": 41.25, + "grad_norm": 3.944321632385254, + "learning_rate": 2.9375000000000003e-05, + "loss": 0.0635, + "step": 15015 + }, + { + "epoch": 41.252747252747255, + "grad_norm": 6.060327053070068, + "learning_rate": 2.9373626373626373e-05, + "loss": 0.1686, + "step": 15016 + }, + { + "epoch": 41.255494505494504, + "grad_norm": 14.97139835357666, + "learning_rate": 2.937225274725275e-05, + "loss": 0.3345, + "step": 15017 + }, + { + "epoch": 41.25824175824176, + "grad_norm": 4.131651401519775, + "learning_rate": 2.9370879120879126e-05, + "loss": 0.0897, + "step": 15018 + }, + { + "epoch": 41.260989010989015, + "grad_norm": 7.084799289703369, + "learning_rate": 2.9369505494505496e-05, + "loss": 0.1917, + "step": 15019 + }, + { + "epoch": 41.26373626373626, + "grad_norm": 15.904537200927734, + "learning_rate": 2.936813186813187e-05, + "loss": 0.5927, + "step": 15020 + }, + { + "epoch": 41.26648351648352, + "grad_norm": 15.360403060913086, + "learning_rate": 2.936675824175824e-05, + "loss": 0.3799, + "step": 15021 + }, + { + "epoch": 41.26923076923077, + "grad_norm": 10.684798240661621, + "learning_rate": 2.9365384615384616e-05, + "loss": 0.2101, + "step": 15022 + }, + { + "epoch": 41.27197802197802, + "grad_norm": 10.593915939331055, + "learning_rate": 2.9364010989010993e-05, + "loss": 0.1187, + "step": 15023 + }, + { + "epoch": 41.27472527472528, + "grad_norm": 14.139348030090332, + "learning_rate": 2.9362637362637363e-05, + "loss": 0.2579, + "step": 15024 + }, + { + "epoch": 41.277472527472526, + "grad_norm": 8.3692045211792, + "learning_rate": 2.936126373626374e-05, + "loss": 0.2082, + "step": 15025 + }, + { + "epoch": 41.28021978021978, + "grad_norm": 15.353959083557129, + "learning_rate": 2.935989010989011e-05, + "loss": 0.2372, + "step": 15026 + }, + { + "epoch": 41.282967032967036, + "grad_norm": 19.145109176635742, + "learning_rate": 2.9358516483516483e-05, + "loss": 0.6363, + "step": 15027 + }, + { + "epoch": 41.285714285714285, + "grad_norm": 22.355838775634766, + "learning_rate": 2.935714285714286e-05, + "loss": 0.8532, + "step": 15028 + }, + { + "epoch": 41.28846153846154, + "grad_norm": 15.768282890319824, + "learning_rate": 2.935576923076923e-05, + "loss": 0.3499, + "step": 15029 + }, + { + "epoch": 41.29120879120879, + "grad_norm": 10.21997356414795, + "learning_rate": 2.9354395604395607e-05, + "loss": 0.2616, + "step": 15030 + }, + { + "epoch": 41.293956043956044, + "grad_norm": 15.346979141235352, + "learning_rate": 2.9353021978021977e-05, + "loss": 0.1983, + "step": 15031 + }, + { + "epoch": 41.2967032967033, + "grad_norm": 16.285015106201172, + "learning_rate": 2.9351648351648354e-05, + "loss": 0.7345, + "step": 15032 + }, + { + "epoch": 41.29945054945055, + "grad_norm": 15.74637508392334, + "learning_rate": 2.935027472527473e-05, + "loss": 0.3029, + "step": 15033 + }, + { + "epoch": 41.3021978021978, + "grad_norm": 2.754185199737549, + "learning_rate": 2.93489010989011e-05, + "loss": 0.0481, + "step": 15034 + }, + { + "epoch": 41.30494505494506, + "grad_norm": 18.504587173461914, + "learning_rate": 2.9347527472527474e-05, + "loss": 0.3564, + "step": 15035 + }, + { + "epoch": 41.30769230769231, + "grad_norm": 13.766457557678223, + "learning_rate": 2.9346153846153844e-05, + "loss": 0.2021, + "step": 15036 + }, + { + "epoch": 41.31043956043956, + "grad_norm": 6.0442962646484375, + "learning_rate": 2.934478021978022e-05, + "loss": 0.1293, + "step": 15037 + }, + { + "epoch": 41.31318681318681, + "grad_norm": 13.40839672088623, + "learning_rate": 2.9343406593406598e-05, + "loss": 0.4407, + "step": 15038 + }, + { + "epoch": 41.315934065934066, + "grad_norm": 14.164055824279785, + "learning_rate": 2.9342032967032968e-05, + "loss": 0.1868, + "step": 15039 + }, + { + "epoch": 41.31868131868132, + "grad_norm": 6.0861639976501465, + "learning_rate": 2.9340659340659344e-05, + "loss": 0.1241, + "step": 15040 + }, + { + "epoch": 41.32142857142857, + "grad_norm": 14.853589057922363, + "learning_rate": 2.9339285714285714e-05, + "loss": 0.3741, + "step": 15041 + }, + { + "epoch": 41.324175824175825, + "grad_norm": 9.829648971557617, + "learning_rate": 2.9337912087912088e-05, + "loss": 0.1332, + "step": 15042 + }, + { + "epoch": 41.32692307692308, + "grad_norm": 11.50899600982666, + "learning_rate": 2.9336538461538465e-05, + "loss": 0.2531, + "step": 15043 + }, + { + "epoch": 41.32967032967033, + "grad_norm": 14.118827819824219, + "learning_rate": 2.9335164835164835e-05, + "loss": 0.3123, + "step": 15044 + }, + { + "epoch": 41.332417582417584, + "grad_norm": 16.396196365356445, + "learning_rate": 2.933379120879121e-05, + "loss": 0.3473, + "step": 15045 + }, + { + "epoch": 41.33516483516483, + "grad_norm": 9.539790153503418, + "learning_rate": 2.933241758241758e-05, + "loss": 0.2445, + "step": 15046 + }, + { + "epoch": 41.33791208791209, + "grad_norm": 15.696150779724121, + "learning_rate": 2.9331043956043958e-05, + "loss": 0.3693, + "step": 15047 + }, + { + "epoch": 41.34065934065934, + "grad_norm": 25.399883270263672, + "learning_rate": 2.9329670329670335e-05, + "loss": 1.009, + "step": 15048 + }, + { + "epoch": 41.34340659340659, + "grad_norm": 5.756139278411865, + "learning_rate": 2.9328296703296705e-05, + "loss": 0.1249, + "step": 15049 + }, + { + "epoch": 41.34615384615385, + "grad_norm": 9.622876167297363, + "learning_rate": 2.932692307692308e-05, + "loss": 0.2626, + "step": 15050 + }, + { + "epoch": 41.3489010989011, + "grad_norm": 11.421133995056152, + "learning_rate": 2.932554945054945e-05, + "loss": 0.2784, + "step": 15051 + }, + { + "epoch": 41.35164835164835, + "grad_norm": 8.200340270996094, + "learning_rate": 2.9324175824175825e-05, + "loss": 0.1292, + "step": 15052 + }, + { + "epoch": 41.354395604395606, + "grad_norm": 2.3987526893615723, + "learning_rate": 2.9322802197802202e-05, + "loss": 0.0445, + "step": 15053 + }, + { + "epoch": 41.357142857142854, + "grad_norm": 13.888603210449219, + "learning_rate": 2.9321428571428572e-05, + "loss": 0.2584, + "step": 15054 + }, + { + "epoch": 41.35989010989011, + "grad_norm": 14.708780288696289, + "learning_rate": 2.932005494505495e-05, + "loss": 0.2362, + "step": 15055 + }, + { + "epoch": 41.362637362637365, + "grad_norm": 11.321642875671387, + "learning_rate": 2.931868131868132e-05, + "loss": 0.1467, + "step": 15056 + }, + { + "epoch": 41.36538461538461, + "grad_norm": 13.659170150756836, + "learning_rate": 2.9317307692307692e-05, + "loss": 0.3054, + "step": 15057 + }, + { + "epoch": 41.36813186813187, + "grad_norm": 6.527159690856934, + "learning_rate": 2.931593406593407e-05, + "loss": 0.1089, + "step": 15058 + }, + { + "epoch": 41.370879120879124, + "grad_norm": 8.595970153808594, + "learning_rate": 2.931456043956044e-05, + "loss": 0.1255, + "step": 15059 + }, + { + "epoch": 41.37362637362637, + "grad_norm": 13.641311645507812, + "learning_rate": 2.9313186813186816e-05, + "loss": 0.3129, + "step": 15060 + }, + { + "epoch": 41.37637362637363, + "grad_norm": 5.678434371948242, + "learning_rate": 2.9311813186813186e-05, + "loss": 0.1225, + "step": 15061 + }, + { + "epoch": 41.379120879120876, + "grad_norm": 4.547367572784424, + "learning_rate": 2.9310439560439563e-05, + "loss": 0.1286, + "step": 15062 + }, + { + "epoch": 41.38186813186813, + "grad_norm": 13.911660194396973, + "learning_rate": 2.930906593406594e-05, + "loss": 0.4512, + "step": 15063 + }, + { + "epoch": 41.38461538461539, + "grad_norm": 20.506877899169922, + "learning_rate": 2.930769230769231e-05, + "loss": 0.7432, + "step": 15064 + }, + { + "epoch": 41.387362637362635, + "grad_norm": 7.179445743560791, + "learning_rate": 2.9306318681318683e-05, + "loss": 0.1317, + "step": 15065 + }, + { + "epoch": 41.39010989010989, + "grad_norm": 25.89524269104004, + "learning_rate": 2.9304945054945053e-05, + "loss": 0.8769, + "step": 15066 + }, + { + "epoch": 41.392857142857146, + "grad_norm": 8.642451286315918, + "learning_rate": 2.930357142857143e-05, + "loss": 0.215, + "step": 15067 + }, + { + "epoch": 41.395604395604394, + "grad_norm": 17.512971878051758, + "learning_rate": 2.9302197802197807e-05, + "loss": 0.4216, + "step": 15068 + }, + { + "epoch": 41.39835164835165, + "grad_norm": 6.524459362030029, + "learning_rate": 2.9300824175824177e-05, + "loss": 0.1203, + "step": 15069 + }, + { + "epoch": 41.4010989010989, + "grad_norm": 12.031135559082031, + "learning_rate": 2.9299450549450553e-05, + "loss": 0.4444, + "step": 15070 + }, + { + "epoch": 41.40384615384615, + "grad_norm": 8.254566192626953, + "learning_rate": 2.9298076923076923e-05, + "loss": 0.2382, + "step": 15071 + }, + { + "epoch": 41.40659340659341, + "grad_norm": 3.3802130222320557, + "learning_rate": 2.9296703296703297e-05, + "loss": 0.0488, + "step": 15072 + }, + { + "epoch": 41.40934065934066, + "grad_norm": 25.40291976928711, + "learning_rate": 2.9295329670329674e-05, + "loss": 0.4807, + "step": 15073 + }, + { + "epoch": 41.41208791208791, + "grad_norm": 4.824748992919922, + "learning_rate": 2.9293956043956044e-05, + "loss": 0.0685, + "step": 15074 + }, + { + "epoch": 41.41483516483517, + "grad_norm": 5.417832851409912, + "learning_rate": 2.929258241758242e-05, + "loss": 0.0788, + "step": 15075 + }, + { + "epoch": 41.417582417582416, + "grad_norm": 8.472315788269043, + "learning_rate": 2.929120879120879e-05, + "loss": 0.2564, + "step": 15076 + }, + { + "epoch": 41.42032967032967, + "grad_norm": 20.887161254882812, + "learning_rate": 2.9289835164835167e-05, + "loss": 0.6288, + "step": 15077 + }, + { + "epoch": 41.42307692307692, + "grad_norm": 8.835000038146973, + "learning_rate": 2.9288461538461544e-05, + "loss": 0.2267, + "step": 15078 + }, + { + "epoch": 41.425824175824175, + "grad_norm": 8.981173515319824, + "learning_rate": 2.9287087912087914e-05, + "loss": 0.2092, + "step": 15079 + }, + { + "epoch": 41.42857142857143, + "grad_norm": 24.07554054260254, + "learning_rate": 2.9285714285714288e-05, + "loss": 1.1163, + "step": 15080 + }, + { + "epoch": 41.43131868131868, + "grad_norm": 7.207056045532227, + "learning_rate": 2.9284340659340658e-05, + "loss": 0.1787, + "step": 15081 + }, + { + "epoch": 41.434065934065934, + "grad_norm": 9.43759822845459, + "learning_rate": 2.9282967032967034e-05, + "loss": 0.2412, + "step": 15082 + }, + { + "epoch": 41.43681318681319, + "grad_norm": 12.03708553314209, + "learning_rate": 2.928159340659341e-05, + "loss": 0.1527, + "step": 15083 + }, + { + "epoch": 41.43956043956044, + "grad_norm": 19.75638771057129, + "learning_rate": 2.928021978021978e-05, + "loss": 0.6009, + "step": 15084 + }, + { + "epoch": 41.44230769230769, + "grad_norm": 18.492704391479492, + "learning_rate": 2.9278846153846158e-05, + "loss": 0.4443, + "step": 15085 + }, + { + "epoch": 41.44505494505494, + "grad_norm": 3.564040422439575, + "learning_rate": 2.9277472527472528e-05, + "loss": 0.0926, + "step": 15086 + }, + { + "epoch": 41.4478021978022, + "grad_norm": 3.2077882289886475, + "learning_rate": 2.92760989010989e-05, + "loss": 0.0704, + "step": 15087 + }, + { + "epoch": 41.45054945054945, + "grad_norm": 8.741981506347656, + "learning_rate": 2.9274725274725278e-05, + "loss": 0.1878, + "step": 15088 + }, + { + "epoch": 41.4532967032967, + "grad_norm": 6.958609580993652, + "learning_rate": 2.9273351648351648e-05, + "loss": 0.1883, + "step": 15089 + }, + { + "epoch": 41.456043956043956, + "grad_norm": 6.30433464050293, + "learning_rate": 2.9271978021978025e-05, + "loss": 0.131, + "step": 15090 + }, + { + "epoch": 41.45879120879121, + "grad_norm": 10.099464416503906, + "learning_rate": 2.9270604395604395e-05, + "loss": 0.2061, + "step": 15091 + }, + { + "epoch": 41.46153846153846, + "grad_norm": 5.7801194190979, + "learning_rate": 2.9269230769230772e-05, + "loss": 0.1409, + "step": 15092 + }, + { + "epoch": 41.464285714285715, + "grad_norm": 19.652292251586914, + "learning_rate": 2.9267857142857145e-05, + "loss": 0.6602, + "step": 15093 + }, + { + "epoch": 41.467032967032964, + "grad_norm": 16.9371280670166, + "learning_rate": 2.926648351648352e-05, + "loss": 0.258, + "step": 15094 + }, + { + "epoch": 41.46978021978022, + "grad_norm": 11.283180236816406, + "learning_rate": 2.9265109890109892e-05, + "loss": 0.3291, + "step": 15095 + }, + { + "epoch": 41.472527472527474, + "grad_norm": 13.069146156311035, + "learning_rate": 2.9263736263736262e-05, + "loss": 0.2084, + "step": 15096 + }, + { + "epoch": 41.47527472527472, + "grad_norm": 8.102466583251953, + "learning_rate": 2.926236263736264e-05, + "loss": 0.0801, + "step": 15097 + }, + { + "epoch": 41.47802197802198, + "grad_norm": 19.658565521240234, + "learning_rate": 2.9260989010989016e-05, + "loss": 0.5822, + "step": 15098 + }, + { + "epoch": 41.48076923076923, + "grad_norm": 7.343356132507324, + "learning_rate": 2.9259615384615386e-05, + "loss": 0.186, + "step": 15099 + }, + { + "epoch": 41.48351648351648, + "grad_norm": 13.231341361999512, + "learning_rate": 2.9258241758241762e-05, + "loss": 0.3916, + "step": 15100 + }, + { + "epoch": 41.48626373626374, + "grad_norm": 14.623147964477539, + "learning_rate": 2.9256868131868132e-05, + "loss": 0.3815, + "step": 15101 + }, + { + "epoch": 41.489010989010985, + "grad_norm": 13.514344215393066, + "learning_rate": 2.9255494505494506e-05, + "loss": 0.3457, + "step": 15102 + }, + { + "epoch": 41.49175824175824, + "grad_norm": 14.837995529174805, + "learning_rate": 2.9254120879120883e-05, + "loss": 0.212, + "step": 15103 + }, + { + "epoch": 41.494505494505496, + "grad_norm": 9.665617942810059, + "learning_rate": 2.9252747252747253e-05, + "loss": 0.1442, + "step": 15104 + }, + { + "epoch": 41.497252747252745, + "grad_norm": 7.928717136383057, + "learning_rate": 2.925137362637363e-05, + "loss": 0.1382, + "step": 15105 + }, + { + "epoch": 41.5, + "grad_norm": 12.088995933532715, + "learning_rate": 2.925e-05, + "loss": 0.3468, + "step": 15106 + }, + { + "epoch": 41.502747252747255, + "grad_norm": 8.915684700012207, + "learning_rate": 2.9248626373626376e-05, + "loss": 0.1487, + "step": 15107 + }, + { + "epoch": 41.505494505494504, + "grad_norm": 17.075504302978516, + "learning_rate": 2.924725274725275e-05, + "loss": 0.4996, + "step": 15108 + }, + { + "epoch": 41.50824175824176, + "grad_norm": 12.707450866699219, + "learning_rate": 2.9245879120879123e-05, + "loss": 0.3157, + "step": 15109 + }, + { + "epoch": 41.51098901098901, + "grad_norm": 16.884138107299805, + "learning_rate": 2.9244505494505496e-05, + "loss": 0.3105, + "step": 15110 + }, + { + "epoch": 41.51373626373626, + "grad_norm": 25.246252059936523, + "learning_rate": 2.9243131868131867e-05, + "loss": 0.7842, + "step": 15111 + }, + { + "epoch": 41.51648351648352, + "grad_norm": 26.588973999023438, + "learning_rate": 2.9241758241758243e-05, + "loss": 0.591, + "step": 15112 + }, + { + "epoch": 41.51923076923077, + "grad_norm": 4.936933994293213, + "learning_rate": 2.924038461538462e-05, + "loss": 0.0656, + "step": 15113 + }, + { + "epoch": 41.52197802197802, + "grad_norm": 22.159072875976562, + "learning_rate": 2.923901098901099e-05, + "loss": 0.5589, + "step": 15114 + }, + { + "epoch": 41.52472527472528, + "grad_norm": 28.677213668823242, + "learning_rate": 2.9237637362637367e-05, + "loss": 1.0317, + "step": 15115 + }, + { + "epoch": 41.527472527472526, + "grad_norm": 7.255558013916016, + "learning_rate": 2.9236263736263737e-05, + "loss": 0.1578, + "step": 15116 + }, + { + "epoch": 41.53021978021978, + "grad_norm": 7.399598598480225, + "learning_rate": 2.923489010989011e-05, + "loss": 0.129, + "step": 15117 + }, + { + "epoch": 41.532967032967036, + "grad_norm": 8.037327766418457, + "learning_rate": 2.9233516483516487e-05, + "loss": 0.3357, + "step": 15118 + }, + { + "epoch": 41.535714285714285, + "grad_norm": 10.35141372680664, + "learning_rate": 2.9232142857142857e-05, + "loss": 0.3551, + "step": 15119 + }, + { + "epoch": 41.53846153846154, + "grad_norm": 12.66061019897461, + "learning_rate": 2.9230769230769234e-05, + "loss": 0.4849, + "step": 15120 + }, + { + "epoch": 41.54120879120879, + "grad_norm": 18.1637020111084, + "learning_rate": 2.9229395604395604e-05, + "loss": 0.6186, + "step": 15121 + }, + { + "epoch": 41.543956043956044, + "grad_norm": 12.24120044708252, + "learning_rate": 2.922802197802198e-05, + "loss": 0.1618, + "step": 15122 + }, + { + "epoch": 41.5467032967033, + "grad_norm": 3.7027764320373535, + "learning_rate": 2.9226648351648354e-05, + "loss": 0.0499, + "step": 15123 + }, + { + "epoch": 41.54945054945055, + "grad_norm": 9.965867042541504, + "learning_rate": 2.9225274725274728e-05, + "loss": 0.1514, + "step": 15124 + }, + { + "epoch": 41.5521978021978, + "grad_norm": 6.416388988494873, + "learning_rate": 2.92239010989011e-05, + "loss": 0.2071, + "step": 15125 + }, + { + "epoch": 41.55494505494506, + "grad_norm": 18.015844345092773, + "learning_rate": 2.922252747252747e-05, + "loss": 0.4188, + "step": 15126 + }, + { + "epoch": 41.55769230769231, + "grad_norm": 12.519267082214355, + "learning_rate": 2.9221153846153848e-05, + "loss": 0.4329, + "step": 15127 + }, + { + "epoch": 41.56043956043956, + "grad_norm": 11.195757865905762, + "learning_rate": 2.9219780219780225e-05, + "loss": 0.2127, + "step": 15128 + }, + { + "epoch": 41.56318681318681, + "grad_norm": 13.509904861450195, + "learning_rate": 2.9218406593406595e-05, + "loss": 0.2029, + "step": 15129 + }, + { + "epoch": 41.565934065934066, + "grad_norm": 20.08000373840332, + "learning_rate": 2.921703296703297e-05, + "loss": 0.961, + "step": 15130 + }, + { + "epoch": 41.56868131868132, + "grad_norm": 11.775564193725586, + "learning_rate": 2.921565934065934e-05, + "loss": 0.3682, + "step": 15131 + }, + { + "epoch": 41.57142857142857, + "grad_norm": 13.515804290771484, + "learning_rate": 2.9214285714285715e-05, + "loss": 0.3491, + "step": 15132 + }, + { + "epoch": 41.574175824175825, + "grad_norm": 15.821109771728516, + "learning_rate": 2.9212912087912088e-05, + "loss": 0.6589, + "step": 15133 + }, + { + "epoch": 41.57692307692308, + "grad_norm": 14.78685188293457, + "learning_rate": 2.921153846153846e-05, + "loss": 0.3574, + "step": 15134 + }, + { + "epoch": 41.57967032967033, + "grad_norm": 11.5833101272583, + "learning_rate": 2.921016483516484e-05, + "loss": 0.3035, + "step": 15135 + }, + { + "epoch": 41.582417582417584, + "grad_norm": 17.142202377319336, + "learning_rate": 2.920879120879121e-05, + "loss": 0.3422, + "step": 15136 + }, + { + "epoch": 41.58516483516483, + "grad_norm": 10.091214179992676, + "learning_rate": 2.9207417582417585e-05, + "loss": 0.1831, + "step": 15137 + }, + { + "epoch": 41.58791208791209, + "grad_norm": 15.147793769836426, + "learning_rate": 2.9206043956043955e-05, + "loss": 0.3835, + "step": 15138 + }, + { + "epoch": 41.59065934065934, + "grad_norm": 7.252460956573486, + "learning_rate": 2.9204670329670332e-05, + "loss": 0.1317, + "step": 15139 + }, + { + "epoch": 41.59340659340659, + "grad_norm": 10.842906951904297, + "learning_rate": 2.9203296703296705e-05, + "loss": 0.3161, + "step": 15140 + }, + { + "epoch": 41.59615384615385, + "grad_norm": 10.357043266296387, + "learning_rate": 2.9201923076923075e-05, + "loss": 0.2749, + "step": 15141 + }, + { + "epoch": 41.5989010989011, + "grad_norm": 15.564537048339844, + "learning_rate": 2.9200549450549452e-05, + "loss": 0.2893, + "step": 15142 + }, + { + "epoch": 41.60164835164835, + "grad_norm": 13.217684745788574, + "learning_rate": 2.9199175824175822e-05, + "loss": 0.5189, + "step": 15143 + }, + { + "epoch": 41.604395604395606, + "grad_norm": 15.62958812713623, + "learning_rate": 2.91978021978022e-05, + "loss": 0.4741, + "step": 15144 + }, + { + "epoch": 41.607142857142854, + "grad_norm": 14.60549545288086, + "learning_rate": 2.9196428571428576e-05, + "loss": 0.3044, + "step": 15145 + }, + { + "epoch": 41.60989010989011, + "grad_norm": 10.055486679077148, + "learning_rate": 2.9195054945054946e-05, + "loss": 0.1925, + "step": 15146 + }, + { + "epoch": 41.612637362637365, + "grad_norm": 8.901859283447266, + "learning_rate": 2.919368131868132e-05, + "loss": 0.1449, + "step": 15147 + }, + { + "epoch": 41.61538461538461, + "grad_norm": 13.765636444091797, + "learning_rate": 2.919230769230769e-05, + "loss": 0.3352, + "step": 15148 + }, + { + "epoch": 41.61813186813187, + "grad_norm": 15.791524887084961, + "learning_rate": 2.9190934065934066e-05, + "loss": 0.5321, + "step": 15149 + }, + { + "epoch": 41.620879120879124, + "grad_norm": 8.975919723510742, + "learning_rate": 2.9189560439560443e-05, + "loss": 0.2133, + "step": 15150 + }, + { + "epoch": 41.62362637362637, + "grad_norm": 12.086139678955078, + "learning_rate": 2.9188186813186813e-05, + "loss": 0.2663, + "step": 15151 + }, + { + "epoch": 41.62637362637363, + "grad_norm": 14.379966735839844, + "learning_rate": 2.918681318681319e-05, + "loss": 0.2607, + "step": 15152 + }, + { + "epoch": 41.629120879120876, + "grad_norm": 9.494826316833496, + "learning_rate": 2.918543956043956e-05, + "loss": 0.2236, + "step": 15153 + }, + { + "epoch": 41.63186813186813, + "grad_norm": 13.403681755065918, + "learning_rate": 2.9184065934065937e-05, + "loss": 0.3937, + "step": 15154 + }, + { + "epoch": 41.63461538461539, + "grad_norm": 20.061052322387695, + "learning_rate": 2.918269230769231e-05, + "loss": 0.322, + "step": 15155 + }, + { + "epoch": 41.637362637362635, + "grad_norm": 14.391279220581055, + "learning_rate": 2.918131868131868e-05, + "loss": 0.337, + "step": 15156 + }, + { + "epoch": 41.64010989010989, + "grad_norm": 13.691137313842773, + "learning_rate": 2.9179945054945057e-05, + "loss": 0.2618, + "step": 15157 + }, + { + "epoch": 41.642857142857146, + "grad_norm": 15.407308578491211, + "learning_rate": 2.9178571428571427e-05, + "loss": 0.3836, + "step": 15158 + }, + { + "epoch": 41.645604395604394, + "grad_norm": 9.375140190124512, + "learning_rate": 2.9177197802197804e-05, + "loss": 0.2263, + "step": 15159 + }, + { + "epoch": 41.64835164835165, + "grad_norm": 6.421125888824463, + "learning_rate": 2.917582417582418e-05, + "loss": 0.2393, + "step": 15160 + }, + { + "epoch": 41.6510989010989, + "grad_norm": 15.770298957824707, + "learning_rate": 2.917445054945055e-05, + "loss": 0.9705, + "step": 15161 + }, + { + "epoch": 41.65384615384615, + "grad_norm": 23.79906463623047, + "learning_rate": 2.9173076923076924e-05, + "loss": 0.7318, + "step": 15162 + }, + { + "epoch": 41.65659340659341, + "grad_norm": 14.851798057556152, + "learning_rate": 2.9171703296703294e-05, + "loss": 0.315, + "step": 15163 + }, + { + "epoch": 41.65934065934066, + "grad_norm": 18.444849014282227, + "learning_rate": 2.917032967032967e-05, + "loss": 0.6657, + "step": 15164 + }, + { + "epoch": 41.66208791208791, + "grad_norm": 4.793229579925537, + "learning_rate": 2.9168956043956047e-05, + "loss": 0.0641, + "step": 15165 + }, + { + "epoch": 41.66483516483517, + "grad_norm": 10.67817497253418, + "learning_rate": 2.9167582417582417e-05, + "loss": 0.3371, + "step": 15166 + }, + { + "epoch": 41.667582417582416, + "grad_norm": 15.604864120483398, + "learning_rate": 2.9166208791208794e-05, + "loss": 0.6016, + "step": 15167 + }, + { + "epoch": 41.67032967032967, + "grad_norm": 9.801040649414062, + "learning_rate": 2.9164835164835164e-05, + "loss": 0.2502, + "step": 15168 + }, + { + "epoch": 41.67307692307692, + "grad_norm": 8.752243995666504, + "learning_rate": 2.916346153846154e-05, + "loss": 0.145, + "step": 15169 + }, + { + "epoch": 41.675824175824175, + "grad_norm": 9.183998107910156, + "learning_rate": 2.9162087912087914e-05, + "loss": 0.1601, + "step": 15170 + }, + { + "epoch": 41.67857142857143, + "grad_norm": 14.965824127197266, + "learning_rate": 2.9160714285714284e-05, + "loss": 0.3823, + "step": 15171 + }, + { + "epoch": 41.68131868131868, + "grad_norm": 18.384977340698242, + "learning_rate": 2.915934065934066e-05, + "loss": 0.3967, + "step": 15172 + }, + { + "epoch": 41.684065934065934, + "grad_norm": 19.479551315307617, + "learning_rate": 2.915796703296703e-05, + "loss": 0.615, + "step": 15173 + }, + { + "epoch": 41.68681318681319, + "grad_norm": 7.117578983306885, + "learning_rate": 2.9156593406593408e-05, + "loss": 0.1769, + "step": 15174 + }, + { + "epoch": 41.68956043956044, + "grad_norm": 5.92280387878418, + "learning_rate": 2.9155219780219785e-05, + "loss": 0.1585, + "step": 15175 + }, + { + "epoch": 41.69230769230769, + "grad_norm": 24.154537200927734, + "learning_rate": 2.9153846153846155e-05, + "loss": 0.7176, + "step": 15176 + }, + { + "epoch": 41.69505494505494, + "grad_norm": 17.92036247253418, + "learning_rate": 2.9152472527472528e-05, + "loss": 0.7251, + "step": 15177 + }, + { + "epoch": 41.6978021978022, + "grad_norm": 23.34292984008789, + "learning_rate": 2.91510989010989e-05, + "loss": 0.411, + "step": 15178 + }, + { + "epoch": 41.70054945054945, + "grad_norm": 30.74140167236328, + "learning_rate": 2.9149725274725275e-05, + "loss": 1.0699, + "step": 15179 + }, + { + "epoch": 41.7032967032967, + "grad_norm": 11.43822193145752, + "learning_rate": 2.9148351648351652e-05, + "loss": 0.3064, + "step": 15180 + }, + { + "epoch": 41.706043956043956, + "grad_norm": 13.307406425476074, + "learning_rate": 2.9146978021978022e-05, + "loss": 0.3732, + "step": 15181 + }, + { + "epoch": 41.70879120879121, + "grad_norm": 8.598546028137207, + "learning_rate": 2.91456043956044e-05, + "loss": 0.1564, + "step": 15182 + }, + { + "epoch": 41.71153846153846, + "grad_norm": 10.023621559143066, + "learning_rate": 2.914423076923077e-05, + "loss": 0.2244, + "step": 15183 + }, + { + "epoch": 41.714285714285715, + "grad_norm": 16.157323837280273, + "learning_rate": 2.9142857142857146e-05, + "loss": 0.3874, + "step": 15184 + }, + { + "epoch": 41.717032967032964, + "grad_norm": 14.610156059265137, + "learning_rate": 2.914148351648352e-05, + "loss": 0.2779, + "step": 15185 + }, + { + "epoch": 41.71978021978022, + "grad_norm": 16.343032836914062, + "learning_rate": 2.914010989010989e-05, + "loss": 0.3257, + "step": 15186 + }, + { + "epoch": 41.722527472527474, + "grad_norm": 18.245023727416992, + "learning_rate": 2.9138736263736266e-05, + "loss": 0.5256, + "step": 15187 + }, + { + "epoch": 41.72527472527472, + "grad_norm": 9.659332275390625, + "learning_rate": 2.9137362637362636e-05, + "loss": 0.2806, + "step": 15188 + }, + { + "epoch": 41.72802197802198, + "grad_norm": 2.0672318935394287, + "learning_rate": 2.9135989010989013e-05, + "loss": 0.0426, + "step": 15189 + }, + { + "epoch": 41.73076923076923, + "grad_norm": 9.489117622375488, + "learning_rate": 2.913461538461539e-05, + "loss": 0.1571, + "step": 15190 + }, + { + "epoch": 41.73351648351648, + "grad_norm": 9.643675804138184, + "learning_rate": 2.913324175824176e-05, + "loss": 0.199, + "step": 15191 + }, + { + "epoch": 41.73626373626374, + "grad_norm": 7.182672023773193, + "learning_rate": 2.9131868131868133e-05, + "loss": 0.1588, + "step": 15192 + }, + { + "epoch": 41.73901098901099, + "grad_norm": 11.88379955291748, + "learning_rate": 2.9130494505494503e-05, + "loss": 0.207, + "step": 15193 + }, + { + "epoch": 41.74175824175824, + "grad_norm": 10.651863098144531, + "learning_rate": 2.912912087912088e-05, + "loss": 0.2868, + "step": 15194 + }, + { + "epoch": 41.744505494505496, + "grad_norm": 15.805852890014648, + "learning_rate": 2.9127747252747256e-05, + "loss": 0.38, + "step": 15195 + }, + { + "epoch": 41.747252747252745, + "grad_norm": 19.272695541381836, + "learning_rate": 2.9126373626373626e-05, + "loss": 0.4852, + "step": 15196 + }, + { + "epoch": 41.75, + "grad_norm": 4.204962730407715, + "learning_rate": 2.9125000000000003e-05, + "loss": 0.0744, + "step": 15197 + }, + { + "epoch": 41.752747252747255, + "grad_norm": 13.223029136657715, + "learning_rate": 2.9123626373626373e-05, + "loss": 0.2562, + "step": 15198 + }, + { + "epoch": 41.755494505494504, + "grad_norm": 31.75754165649414, + "learning_rate": 2.912225274725275e-05, + "loss": 1.4059, + "step": 15199 + }, + { + "epoch": 41.75824175824176, + "grad_norm": 11.919597625732422, + "learning_rate": 2.9120879120879123e-05, + "loss": 0.1306, + "step": 15200 + }, + { + "epoch": 41.76098901098901, + "grad_norm": 14.049091339111328, + "learning_rate": 2.9119505494505493e-05, + "loss": 0.3363, + "step": 15201 + }, + { + "epoch": 41.76373626373626, + "grad_norm": 14.59237289428711, + "learning_rate": 2.911813186813187e-05, + "loss": 0.3979, + "step": 15202 + }, + { + "epoch": 41.76648351648352, + "grad_norm": 9.445693016052246, + "learning_rate": 2.911675824175824e-05, + "loss": 0.2652, + "step": 15203 + }, + { + "epoch": 41.76923076923077, + "grad_norm": 3.4445691108703613, + "learning_rate": 2.9115384615384617e-05, + "loss": 0.0699, + "step": 15204 + }, + { + "epoch": 41.77197802197802, + "grad_norm": 11.515154838562012, + "learning_rate": 2.9114010989010994e-05, + "loss": 0.1728, + "step": 15205 + }, + { + "epoch": 41.77472527472528, + "grad_norm": 13.630777359008789, + "learning_rate": 2.9112637362637364e-05, + "loss": 0.2776, + "step": 15206 + }, + { + "epoch": 41.777472527472526, + "grad_norm": 14.05666732788086, + "learning_rate": 2.9111263736263737e-05, + "loss": 0.2558, + "step": 15207 + }, + { + "epoch": 41.78021978021978, + "grad_norm": 13.58846664428711, + "learning_rate": 2.9109890109890107e-05, + "loss": 0.1885, + "step": 15208 + }, + { + "epoch": 41.782967032967036, + "grad_norm": 17.98591423034668, + "learning_rate": 2.9108516483516484e-05, + "loss": 0.7152, + "step": 15209 + }, + { + "epoch": 41.785714285714285, + "grad_norm": 3.627370834350586, + "learning_rate": 2.910714285714286e-05, + "loss": 0.0596, + "step": 15210 + }, + { + "epoch": 41.78846153846154, + "grad_norm": 5.601871490478516, + "learning_rate": 2.910576923076923e-05, + "loss": 0.1644, + "step": 15211 + }, + { + "epoch": 41.79120879120879, + "grad_norm": 6.513457775115967, + "learning_rate": 2.9104395604395608e-05, + "loss": 0.1641, + "step": 15212 + }, + { + "epoch": 41.793956043956044, + "grad_norm": 12.616049766540527, + "learning_rate": 2.9103021978021978e-05, + "loss": 0.3285, + "step": 15213 + }, + { + "epoch": 41.7967032967033, + "grad_norm": 2.6652934551239014, + "learning_rate": 2.9101648351648355e-05, + "loss": 0.0504, + "step": 15214 + }, + { + "epoch": 41.79945054945055, + "grad_norm": 13.239304542541504, + "learning_rate": 2.9100274725274728e-05, + "loss": 0.3472, + "step": 15215 + }, + { + "epoch": 41.8021978021978, + "grad_norm": 15.524580001831055, + "learning_rate": 2.9098901098901098e-05, + "loss": 0.683, + "step": 15216 + }, + { + "epoch": 41.80494505494506, + "grad_norm": 11.621617317199707, + "learning_rate": 2.9097527472527475e-05, + "loss": 0.3403, + "step": 15217 + }, + { + "epoch": 41.80769230769231, + "grad_norm": 11.799848556518555, + "learning_rate": 2.9096153846153845e-05, + "loss": 0.2236, + "step": 15218 + }, + { + "epoch": 41.81043956043956, + "grad_norm": 20.249828338623047, + "learning_rate": 2.909478021978022e-05, + "loss": 0.3116, + "step": 15219 + }, + { + "epoch": 41.81318681318681, + "grad_norm": 16.803129196166992, + "learning_rate": 2.90934065934066e-05, + "loss": 0.5195, + "step": 15220 + }, + { + "epoch": 41.815934065934066, + "grad_norm": 9.765750885009766, + "learning_rate": 2.909203296703297e-05, + "loss": 0.3109, + "step": 15221 + }, + { + "epoch": 41.81868131868132, + "grad_norm": 13.722692489624023, + "learning_rate": 2.9090659340659342e-05, + "loss": 0.3932, + "step": 15222 + }, + { + "epoch": 41.82142857142857, + "grad_norm": 19.57485008239746, + "learning_rate": 2.9089285714285712e-05, + "loss": 0.3175, + "step": 15223 + }, + { + "epoch": 41.824175824175825, + "grad_norm": 20.29928207397461, + "learning_rate": 2.908791208791209e-05, + "loss": 0.3924, + "step": 15224 + }, + { + "epoch": 41.82692307692308, + "grad_norm": 21.116077423095703, + "learning_rate": 2.9086538461538465e-05, + "loss": 0.5997, + "step": 15225 + }, + { + "epoch": 41.82967032967033, + "grad_norm": 13.752671241760254, + "learning_rate": 2.9085164835164835e-05, + "loss": 0.3516, + "step": 15226 + }, + { + "epoch": 41.832417582417584, + "grad_norm": 16.791332244873047, + "learning_rate": 2.9083791208791212e-05, + "loss": 0.4668, + "step": 15227 + }, + { + "epoch": 41.83516483516483, + "grad_norm": 5.274090766906738, + "learning_rate": 2.9082417582417582e-05, + "loss": 0.0919, + "step": 15228 + }, + { + "epoch": 41.83791208791209, + "grad_norm": 2.230710029602051, + "learning_rate": 2.908104395604396e-05, + "loss": 0.0407, + "step": 15229 + }, + { + "epoch": 41.84065934065934, + "grad_norm": 2.452855110168457, + "learning_rate": 2.9079670329670332e-05, + "loss": 0.0461, + "step": 15230 + }, + { + "epoch": 41.84340659340659, + "grad_norm": 8.649210929870605, + "learning_rate": 2.9078296703296702e-05, + "loss": 0.2737, + "step": 15231 + }, + { + "epoch": 41.84615384615385, + "grad_norm": 7.958536148071289, + "learning_rate": 2.907692307692308e-05, + "loss": 0.1222, + "step": 15232 + }, + { + "epoch": 41.8489010989011, + "grad_norm": 6.135647773742676, + "learning_rate": 2.907554945054945e-05, + "loss": 0.1386, + "step": 15233 + }, + { + "epoch": 41.85164835164835, + "grad_norm": 4.444911956787109, + "learning_rate": 2.9074175824175826e-05, + "loss": 0.0675, + "step": 15234 + }, + { + "epoch": 41.854395604395606, + "grad_norm": 15.157458305358887, + "learning_rate": 2.9072802197802203e-05, + "loss": 0.2874, + "step": 15235 + }, + { + "epoch": 41.857142857142854, + "grad_norm": 14.244193077087402, + "learning_rate": 2.9071428571428573e-05, + "loss": 0.4519, + "step": 15236 + }, + { + "epoch": 41.85989010989011, + "grad_norm": 9.263208389282227, + "learning_rate": 2.9070054945054946e-05, + "loss": 0.2137, + "step": 15237 + }, + { + "epoch": 41.862637362637365, + "grad_norm": 22.109575271606445, + "learning_rate": 2.9068681318681316e-05, + "loss": 0.4662, + "step": 15238 + }, + { + "epoch": 41.86538461538461, + "grad_norm": 7.175282001495361, + "learning_rate": 2.9067307692307693e-05, + "loss": 0.1368, + "step": 15239 + }, + { + "epoch": 41.86813186813187, + "grad_norm": 11.402799606323242, + "learning_rate": 2.906593406593407e-05, + "loss": 0.2131, + "step": 15240 + }, + { + "epoch": 41.870879120879124, + "grad_norm": 10.545417785644531, + "learning_rate": 2.906456043956044e-05, + "loss": 0.2003, + "step": 15241 + }, + { + "epoch": 41.87362637362637, + "grad_norm": 13.346170425415039, + "learning_rate": 2.9063186813186817e-05, + "loss": 0.4121, + "step": 15242 + }, + { + "epoch": 41.87637362637363, + "grad_norm": 7.792884826660156, + "learning_rate": 2.9061813186813187e-05, + "loss": 0.1549, + "step": 15243 + }, + { + "epoch": 41.879120879120876, + "grad_norm": 11.88123607635498, + "learning_rate": 2.9060439560439563e-05, + "loss": 0.1533, + "step": 15244 + }, + { + "epoch": 41.88186813186813, + "grad_norm": 28.093717575073242, + "learning_rate": 2.9059065934065937e-05, + "loss": 0.9888, + "step": 15245 + }, + { + "epoch": 41.88461538461539, + "grad_norm": 16.432703018188477, + "learning_rate": 2.9057692307692307e-05, + "loss": 0.404, + "step": 15246 + }, + { + "epoch": 41.887362637362635, + "grad_norm": 12.76030158996582, + "learning_rate": 2.9056318681318684e-05, + "loss": 0.4791, + "step": 15247 + }, + { + "epoch": 41.89010989010989, + "grad_norm": 5.960045337677002, + "learning_rate": 2.9054945054945054e-05, + "loss": 0.1443, + "step": 15248 + }, + { + "epoch": 41.892857142857146, + "grad_norm": 22.472970962524414, + "learning_rate": 2.905357142857143e-05, + "loss": 0.3559, + "step": 15249 + }, + { + "epoch": 41.895604395604394, + "grad_norm": 8.215176582336426, + "learning_rate": 2.9052197802197807e-05, + "loss": 0.1745, + "step": 15250 + }, + { + "epoch": 41.89835164835165, + "grad_norm": 20.205825805664062, + "learning_rate": 2.9050824175824177e-05, + "loss": 0.4892, + "step": 15251 + }, + { + "epoch": 41.9010989010989, + "grad_norm": 5.523078441619873, + "learning_rate": 2.904945054945055e-05, + "loss": 0.118, + "step": 15252 + }, + { + "epoch": 41.90384615384615, + "grad_norm": 16.95836639404297, + "learning_rate": 2.904807692307692e-05, + "loss": 0.3811, + "step": 15253 + }, + { + "epoch": 41.90659340659341, + "grad_norm": 10.05534839630127, + "learning_rate": 2.9046703296703298e-05, + "loss": 0.1749, + "step": 15254 + }, + { + "epoch": 41.90934065934066, + "grad_norm": 11.302571296691895, + "learning_rate": 2.9045329670329674e-05, + "loss": 0.2665, + "step": 15255 + }, + { + "epoch": 41.91208791208791, + "grad_norm": 6.907739162445068, + "learning_rate": 2.9043956043956044e-05, + "loss": 0.1042, + "step": 15256 + }, + { + "epoch": 41.91483516483517, + "grad_norm": 19.066858291625977, + "learning_rate": 2.904258241758242e-05, + "loss": 0.5054, + "step": 15257 + }, + { + "epoch": 41.917582417582416, + "grad_norm": 14.350614547729492, + "learning_rate": 2.904120879120879e-05, + "loss": 0.2942, + "step": 15258 + }, + { + "epoch": 41.92032967032967, + "grad_norm": 19.814313888549805, + "learning_rate": 2.9039835164835168e-05, + "loss": 0.4259, + "step": 15259 + }, + { + "epoch": 41.92307692307692, + "grad_norm": 17.704303741455078, + "learning_rate": 2.903846153846154e-05, + "loss": 0.4442, + "step": 15260 + }, + { + "epoch": 41.925824175824175, + "grad_norm": 6.603689670562744, + "learning_rate": 2.903708791208791e-05, + "loss": 0.2112, + "step": 15261 + }, + { + "epoch": 41.92857142857143, + "grad_norm": 10.553215026855469, + "learning_rate": 2.9035714285714288e-05, + "loss": 0.429, + "step": 15262 + }, + { + "epoch": 41.93131868131868, + "grad_norm": 16.645145416259766, + "learning_rate": 2.9034340659340658e-05, + "loss": 0.3171, + "step": 15263 + }, + { + "epoch": 41.934065934065934, + "grad_norm": 14.706446647644043, + "learning_rate": 2.9032967032967035e-05, + "loss": 0.3169, + "step": 15264 + }, + { + "epoch": 41.93681318681319, + "grad_norm": 15.306182861328125, + "learning_rate": 2.9031593406593412e-05, + "loss": 0.3966, + "step": 15265 + }, + { + "epoch": 41.93956043956044, + "grad_norm": 12.156318664550781, + "learning_rate": 2.9030219780219782e-05, + "loss": 0.3075, + "step": 15266 + }, + { + "epoch": 41.94230769230769, + "grad_norm": 15.84872817993164, + "learning_rate": 2.9028846153846155e-05, + "loss": 0.6384, + "step": 15267 + }, + { + "epoch": 41.94505494505494, + "grad_norm": 7.935842514038086, + "learning_rate": 2.9027472527472525e-05, + "loss": 0.2013, + "step": 15268 + }, + { + "epoch": 41.9478021978022, + "grad_norm": 18.483858108520508, + "learning_rate": 2.9026098901098902e-05, + "loss": 0.4198, + "step": 15269 + }, + { + "epoch": 41.95054945054945, + "grad_norm": 17.698396682739258, + "learning_rate": 2.902472527472528e-05, + "loss": 0.3653, + "step": 15270 + }, + { + "epoch": 41.9532967032967, + "grad_norm": 12.99085807800293, + "learning_rate": 2.902335164835165e-05, + "loss": 0.2468, + "step": 15271 + }, + { + "epoch": 41.956043956043956, + "grad_norm": 3.6490561962127686, + "learning_rate": 2.9021978021978026e-05, + "loss": 0.0627, + "step": 15272 + }, + { + "epoch": 41.95879120879121, + "grad_norm": 25.917016983032227, + "learning_rate": 2.9020604395604396e-05, + "loss": 0.6652, + "step": 15273 + }, + { + "epoch": 41.96153846153846, + "grad_norm": 4.277298927307129, + "learning_rate": 2.901923076923077e-05, + "loss": 0.0596, + "step": 15274 + }, + { + "epoch": 41.964285714285715, + "grad_norm": 12.373451232910156, + "learning_rate": 2.9017857142857146e-05, + "loss": 0.2617, + "step": 15275 + }, + { + "epoch": 41.967032967032964, + "grad_norm": 12.439701080322266, + "learning_rate": 2.9016483516483516e-05, + "loss": 0.3885, + "step": 15276 + }, + { + "epoch": 41.96978021978022, + "grad_norm": 11.138212203979492, + "learning_rate": 2.9015109890109893e-05, + "loss": 0.2229, + "step": 15277 + }, + { + "epoch": 41.972527472527474, + "grad_norm": 10.692595481872559, + "learning_rate": 2.9013736263736263e-05, + "loss": 0.1045, + "step": 15278 + }, + { + "epoch": 41.97527472527472, + "grad_norm": 7.4437031745910645, + "learning_rate": 2.901236263736264e-05, + "loss": 0.2351, + "step": 15279 + }, + { + "epoch": 41.97802197802198, + "grad_norm": 21.06342887878418, + "learning_rate": 2.9010989010989016e-05, + "loss": 0.5551, + "step": 15280 + }, + { + "epoch": 41.98076923076923, + "grad_norm": 3.85257887840271, + "learning_rate": 2.9009615384615386e-05, + "loss": 0.0616, + "step": 15281 + }, + { + "epoch": 41.98351648351648, + "grad_norm": 8.72183609008789, + "learning_rate": 2.900824175824176e-05, + "loss": 0.0826, + "step": 15282 + }, + { + "epoch": 41.98626373626374, + "grad_norm": 10.861320495605469, + "learning_rate": 2.900686813186813e-05, + "loss": 0.1844, + "step": 15283 + }, + { + "epoch": 41.98901098901099, + "grad_norm": 10.03989028930664, + "learning_rate": 2.9005494505494507e-05, + "loss": 0.1537, + "step": 15284 + }, + { + "epoch": 41.99175824175824, + "grad_norm": 15.778759002685547, + "learning_rate": 2.9004120879120883e-05, + "loss": 0.3565, + "step": 15285 + }, + { + "epoch": 41.994505494505496, + "grad_norm": 10.589092254638672, + "learning_rate": 2.9002747252747253e-05, + "loss": 0.3068, + "step": 15286 + }, + { + "epoch": 41.997252747252745, + "grad_norm": 4.010269641876221, + "learning_rate": 2.900137362637363e-05, + "loss": 0.0788, + "step": 15287 + }, + { + "epoch": 42.0, + "grad_norm": 74.99853515625, + "learning_rate": 2.9e-05, + "loss": 1.7808, + "step": 15288 + }, + { + "epoch": 42.0, + "eval_accuracy": 0.8732782369146006, + "eval_f1": 0.8751172002011796, + "eval_f1_DuraRiadoRio_64x64": 0.8764044943820225, + "eval_f1_Mole_64x64": 0.9225352112676056, + "eval_f1_Quebrado_64x64": 0.8726114649681529, + "eval_f1_RiadoRio_64x64": 0.7831715210355987, + "eval_f1_RioFechado_64x64": 0.920863309352518, + "eval_loss": 0.442775160074234, + "eval_precision": 0.8809386516762989, + "eval_precision_DuraRiadoRio_64x64": 0.9512195121951219, + "eval_precision_Mole_64x64": 0.9357142857142857, + "eval_precision_Quebrado_64x64": 0.8058823529411765, + "eval_precision_RiadoRio_64x64": 0.7707006369426752, + "eval_precision_RioFechado_64x64": 0.9411764705882353, + "eval_recall": 0.8742144386788567, + "eval_recall_DuraRiadoRio_64x64": 0.8125, + "eval_recall_Mole_64x64": 0.9097222222222222, + "eval_recall_Quebrado_64x64": 0.9513888888888888, + "eval_recall_RiadoRio_64x64": 0.7960526315789473, + "eval_recall_RioFechado_64x64": 0.9014084507042254, + "eval_runtime": 1.7435, + "eval_samples_per_second": 416.403, + "eval_steps_per_second": 26.384, + "step": 15288 + }, + { + "epoch": 42.002747252747255, + "grad_norm": 17.862743377685547, + "learning_rate": 2.8998626373626374e-05, + "loss": 0.3539, + "step": 15289 + }, + { + "epoch": 42.005494505494504, + "grad_norm": 14.643366813659668, + "learning_rate": 2.899725274725275e-05, + "loss": 0.4988, + "step": 15290 + }, + { + "epoch": 42.00824175824176, + "grad_norm": 16.706844329833984, + "learning_rate": 2.899587912087912e-05, + "loss": 0.3601, + "step": 15291 + }, + { + "epoch": 42.010989010989015, + "grad_norm": 8.969159126281738, + "learning_rate": 2.8994505494505497e-05, + "loss": 0.2799, + "step": 15292 + }, + { + "epoch": 42.01373626373626, + "grad_norm": 10.96041202545166, + "learning_rate": 2.8993131868131867e-05, + "loss": 0.2979, + "step": 15293 + }, + { + "epoch": 42.01648351648352, + "grad_norm": 19.053688049316406, + "learning_rate": 2.8991758241758244e-05, + "loss": 0.7822, + "step": 15294 + }, + { + "epoch": 42.01923076923077, + "grad_norm": 13.217702865600586, + "learning_rate": 2.899038461538462e-05, + "loss": 0.2312, + "step": 15295 + }, + { + "epoch": 42.02197802197802, + "grad_norm": 3.5078980922698975, + "learning_rate": 2.898901098901099e-05, + "loss": 0.0553, + "step": 15296 + }, + { + "epoch": 42.02472527472528, + "grad_norm": 9.002620697021484, + "learning_rate": 2.8987637362637364e-05, + "loss": 0.2137, + "step": 15297 + }, + { + "epoch": 42.027472527472526, + "grad_norm": 5.697059154510498, + "learning_rate": 2.8986263736263734e-05, + "loss": 0.1149, + "step": 15298 + }, + { + "epoch": 42.03021978021978, + "grad_norm": 13.412933349609375, + "learning_rate": 2.898489010989011e-05, + "loss": 0.318, + "step": 15299 + }, + { + "epoch": 42.032967032967036, + "grad_norm": 14.145539283752441, + "learning_rate": 2.8983516483516488e-05, + "loss": 0.4127, + "step": 15300 + }, + { + "epoch": 42.035714285714285, + "grad_norm": 20.557668685913086, + "learning_rate": 2.8982142857142858e-05, + "loss": 0.4898, + "step": 15301 + }, + { + "epoch": 42.03846153846154, + "grad_norm": 9.55803394317627, + "learning_rate": 2.8980769230769235e-05, + "loss": 0.1556, + "step": 15302 + }, + { + "epoch": 42.04120879120879, + "grad_norm": 7.886651515960693, + "learning_rate": 2.8979395604395605e-05, + "loss": 0.2943, + "step": 15303 + }, + { + "epoch": 42.043956043956044, + "grad_norm": 9.909414291381836, + "learning_rate": 2.8978021978021978e-05, + "loss": 0.1853, + "step": 15304 + }, + { + "epoch": 42.0467032967033, + "grad_norm": 19.059083938598633, + "learning_rate": 2.8976648351648355e-05, + "loss": 0.5977, + "step": 15305 + }, + { + "epoch": 42.04945054945055, + "grad_norm": 21.369400024414062, + "learning_rate": 2.8975274725274725e-05, + "loss": 0.539, + "step": 15306 + }, + { + "epoch": 42.0521978021978, + "grad_norm": 14.70635986328125, + "learning_rate": 2.89739010989011e-05, + "loss": 0.5402, + "step": 15307 + }, + { + "epoch": 42.05494505494506, + "grad_norm": 15.653032302856445, + "learning_rate": 2.8972527472527472e-05, + "loss": 0.3757, + "step": 15308 + }, + { + "epoch": 42.05769230769231, + "grad_norm": 15.692090034484863, + "learning_rate": 2.897115384615385e-05, + "loss": 0.2747, + "step": 15309 + }, + { + "epoch": 42.06043956043956, + "grad_norm": 8.164202690124512, + "learning_rate": 2.8969780219780225e-05, + "loss": 0.2572, + "step": 15310 + }, + { + "epoch": 42.06318681318681, + "grad_norm": 15.575836181640625, + "learning_rate": 2.8968406593406595e-05, + "loss": 0.4661, + "step": 15311 + }, + { + "epoch": 42.065934065934066, + "grad_norm": 10.753738403320312, + "learning_rate": 2.896703296703297e-05, + "loss": 0.3865, + "step": 15312 + }, + { + "epoch": 42.06868131868132, + "grad_norm": 8.806951522827148, + "learning_rate": 2.896565934065934e-05, + "loss": 0.1241, + "step": 15313 + }, + { + "epoch": 42.07142857142857, + "grad_norm": 7.126244068145752, + "learning_rate": 2.8964285714285716e-05, + "loss": 0.0922, + "step": 15314 + }, + { + "epoch": 42.074175824175825, + "grad_norm": 2.6521999835968018, + "learning_rate": 2.8962912087912092e-05, + "loss": 0.0485, + "step": 15315 + }, + { + "epoch": 42.07692307692308, + "grad_norm": 9.50680160522461, + "learning_rate": 2.8961538461538462e-05, + "loss": 0.2345, + "step": 15316 + }, + { + "epoch": 42.07967032967033, + "grad_norm": 7.315471649169922, + "learning_rate": 2.896016483516484e-05, + "loss": 0.1398, + "step": 15317 + }, + { + "epoch": 42.082417582417584, + "grad_norm": 15.628920555114746, + "learning_rate": 2.895879120879121e-05, + "loss": 0.4078, + "step": 15318 + }, + { + "epoch": 42.08516483516483, + "grad_norm": 8.648494720458984, + "learning_rate": 2.8957417582417583e-05, + "loss": 0.1797, + "step": 15319 + }, + { + "epoch": 42.08791208791209, + "grad_norm": 20.551145553588867, + "learning_rate": 2.895604395604396e-05, + "loss": 0.3035, + "step": 15320 + }, + { + "epoch": 42.09065934065934, + "grad_norm": 13.399724006652832, + "learning_rate": 2.895467032967033e-05, + "loss": 0.4486, + "step": 15321 + }, + { + "epoch": 42.09340659340659, + "grad_norm": 13.404044151306152, + "learning_rate": 2.8953296703296706e-05, + "loss": 0.2168, + "step": 15322 + }, + { + "epoch": 42.09615384615385, + "grad_norm": 18.341094970703125, + "learning_rate": 2.8951923076923076e-05, + "loss": 0.3154, + "step": 15323 + }, + { + "epoch": 42.0989010989011, + "grad_norm": 13.903841972351074, + "learning_rate": 2.8950549450549453e-05, + "loss": 0.3861, + "step": 15324 + }, + { + "epoch": 42.10164835164835, + "grad_norm": 15.670957565307617, + "learning_rate": 2.894917582417583e-05, + "loss": 0.3702, + "step": 15325 + }, + { + "epoch": 42.104395604395606, + "grad_norm": 12.560482025146484, + "learning_rate": 2.89478021978022e-05, + "loss": 0.19, + "step": 15326 + }, + { + "epoch": 42.107142857142854, + "grad_norm": 6.463057518005371, + "learning_rate": 2.8946428571428573e-05, + "loss": 0.2312, + "step": 15327 + }, + { + "epoch": 42.10989010989011, + "grad_norm": 15.70278263092041, + "learning_rate": 2.8945054945054943e-05, + "loss": 0.396, + "step": 15328 + }, + { + "epoch": 42.112637362637365, + "grad_norm": 11.362874031066895, + "learning_rate": 2.894368131868132e-05, + "loss": 0.3816, + "step": 15329 + }, + { + "epoch": 42.11538461538461, + "grad_norm": 10.022958755493164, + "learning_rate": 2.8942307692307697e-05, + "loss": 0.2538, + "step": 15330 + }, + { + "epoch": 42.11813186813187, + "grad_norm": 1.8083722591400146, + "learning_rate": 2.8940934065934067e-05, + "loss": 0.0261, + "step": 15331 + }, + { + "epoch": 42.120879120879124, + "grad_norm": 10.873823165893555, + "learning_rate": 2.8939560439560444e-05, + "loss": 0.2108, + "step": 15332 + }, + { + "epoch": 42.12362637362637, + "grad_norm": 16.80512237548828, + "learning_rate": 2.8938186813186814e-05, + "loss": 0.3598, + "step": 15333 + }, + { + "epoch": 42.12637362637363, + "grad_norm": 6.8511061668396, + "learning_rate": 2.8936813186813187e-05, + "loss": 0.1486, + "step": 15334 + }, + { + "epoch": 42.129120879120876, + "grad_norm": 7.300039291381836, + "learning_rate": 2.8935439560439564e-05, + "loss": 0.0875, + "step": 15335 + }, + { + "epoch": 42.13186813186813, + "grad_norm": 11.447347640991211, + "learning_rate": 2.8934065934065934e-05, + "loss": 0.1672, + "step": 15336 + }, + { + "epoch": 42.13461538461539, + "grad_norm": 12.434172630310059, + "learning_rate": 2.893269230769231e-05, + "loss": 0.3246, + "step": 15337 + }, + { + "epoch": 42.137362637362635, + "grad_norm": 10.82182788848877, + "learning_rate": 2.893131868131868e-05, + "loss": 0.2203, + "step": 15338 + }, + { + "epoch": 42.14010989010989, + "grad_norm": 10.412701606750488, + "learning_rate": 2.8929945054945057e-05, + "loss": 0.2038, + "step": 15339 + }, + { + "epoch": 42.142857142857146, + "grad_norm": 3.931184768676758, + "learning_rate": 2.8928571428571434e-05, + "loss": 0.0839, + "step": 15340 + }, + { + "epoch": 42.145604395604394, + "grad_norm": 13.60643482208252, + "learning_rate": 2.8927197802197804e-05, + "loss": 0.4341, + "step": 15341 + }, + { + "epoch": 42.14835164835165, + "grad_norm": 13.609830856323242, + "learning_rate": 2.8925824175824178e-05, + "loss": 0.4738, + "step": 15342 + }, + { + "epoch": 42.1510989010989, + "grad_norm": 11.261580467224121, + "learning_rate": 2.8924450549450548e-05, + "loss": 0.1941, + "step": 15343 + }, + { + "epoch": 42.15384615384615, + "grad_norm": 10.469054222106934, + "learning_rate": 2.8923076923076925e-05, + "loss": 0.3042, + "step": 15344 + }, + { + "epoch": 42.15659340659341, + "grad_norm": 5.200736999511719, + "learning_rate": 2.89217032967033e-05, + "loss": 0.1354, + "step": 15345 + }, + { + "epoch": 42.15934065934066, + "grad_norm": 12.187819480895996, + "learning_rate": 2.892032967032967e-05, + "loss": 0.1889, + "step": 15346 + }, + { + "epoch": 42.16208791208791, + "grad_norm": 15.270601272583008, + "learning_rate": 2.8918956043956048e-05, + "loss": 0.2932, + "step": 15347 + }, + { + "epoch": 42.16483516483517, + "grad_norm": 10.202781677246094, + "learning_rate": 2.8917582417582418e-05, + "loss": 0.2549, + "step": 15348 + }, + { + "epoch": 42.167582417582416, + "grad_norm": 9.836142539978027, + "learning_rate": 2.891620879120879e-05, + "loss": 0.2717, + "step": 15349 + }, + { + "epoch": 42.17032967032967, + "grad_norm": 13.373960494995117, + "learning_rate": 2.891483516483517e-05, + "loss": 0.2884, + "step": 15350 + }, + { + "epoch": 42.17307692307692, + "grad_norm": 7.890649795532227, + "learning_rate": 2.891346153846154e-05, + "loss": 0.1236, + "step": 15351 + }, + { + "epoch": 42.175824175824175, + "grad_norm": 5.215603351593018, + "learning_rate": 2.8912087912087915e-05, + "loss": 0.1182, + "step": 15352 + }, + { + "epoch": 42.17857142857143, + "grad_norm": 6.930847644805908, + "learning_rate": 2.8910714285714285e-05, + "loss": 0.1339, + "step": 15353 + }, + { + "epoch": 42.18131868131868, + "grad_norm": 9.394644737243652, + "learning_rate": 2.8909340659340662e-05, + "loss": 0.2052, + "step": 15354 + }, + { + "epoch": 42.184065934065934, + "grad_norm": 13.967911720275879, + "learning_rate": 2.890796703296704e-05, + "loss": 0.3436, + "step": 15355 + }, + { + "epoch": 42.18681318681319, + "grad_norm": 12.231706619262695, + "learning_rate": 2.890659340659341e-05, + "loss": 0.1605, + "step": 15356 + }, + { + "epoch": 42.18956043956044, + "grad_norm": 10.60849666595459, + "learning_rate": 2.8905219780219782e-05, + "loss": 0.1395, + "step": 15357 + }, + { + "epoch": 42.19230769230769, + "grad_norm": 13.500875473022461, + "learning_rate": 2.8903846153846152e-05, + "loss": 0.3605, + "step": 15358 + }, + { + "epoch": 42.19505494505494, + "grad_norm": 14.815942764282227, + "learning_rate": 2.890247252747253e-05, + "loss": 0.2859, + "step": 15359 + }, + { + "epoch": 42.1978021978022, + "grad_norm": 6.1466064453125, + "learning_rate": 2.89010989010989e-05, + "loss": 0.0592, + "step": 15360 + }, + { + "epoch": 42.20054945054945, + "grad_norm": 6.426677227020264, + "learning_rate": 2.8899725274725276e-05, + "loss": 0.1667, + "step": 15361 + }, + { + "epoch": 42.2032967032967, + "grad_norm": 8.276533126831055, + "learning_rate": 2.8898351648351653e-05, + "loss": 0.2109, + "step": 15362 + }, + { + "epoch": 42.206043956043956, + "grad_norm": 8.681623458862305, + "learning_rate": 2.8896978021978023e-05, + "loss": 0.1126, + "step": 15363 + }, + { + "epoch": 42.20879120879121, + "grad_norm": 10.473971366882324, + "learning_rate": 2.8895604395604396e-05, + "loss": 0.2343, + "step": 15364 + }, + { + "epoch": 42.21153846153846, + "grad_norm": 11.29737663269043, + "learning_rate": 2.889423076923077e-05, + "loss": 0.2989, + "step": 15365 + }, + { + "epoch": 42.214285714285715, + "grad_norm": 17.744993209838867, + "learning_rate": 2.8892857142857143e-05, + "loss": 0.3532, + "step": 15366 + }, + { + "epoch": 42.217032967032964, + "grad_norm": 11.206491470336914, + "learning_rate": 2.889148351648352e-05, + "loss": 0.2502, + "step": 15367 + }, + { + "epoch": 42.21978021978022, + "grad_norm": 14.270639419555664, + "learning_rate": 2.889010989010989e-05, + "loss": 0.3855, + "step": 15368 + }, + { + "epoch": 42.222527472527474, + "grad_norm": 7.372495651245117, + "learning_rate": 2.8888736263736266e-05, + "loss": 0.12, + "step": 15369 + }, + { + "epoch": 42.22527472527472, + "grad_norm": 11.691737174987793, + "learning_rate": 2.8887362637362636e-05, + "loss": 0.1862, + "step": 15370 + }, + { + "epoch": 42.22802197802198, + "grad_norm": 3.0514509677886963, + "learning_rate": 2.8885989010989013e-05, + "loss": 0.0652, + "step": 15371 + }, + { + "epoch": 42.23076923076923, + "grad_norm": 6.19125509262085, + "learning_rate": 2.8884615384615387e-05, + "loss": 0.0611, + "step": 15372 + }, + { + "epoch": 42.23351648351648, + "grad_norm": 4.175676345825195, + "learning_rate": 2.8883241758241757e-05, + "loss": 0.0756, + "step": 15373 + }, + { + "epoch": 42.23626373626374, + "grad_norm": 10.924118995666504, + "learning_rate": 2.8881868131868133e-05, + "loss": 0.2712, + "step": 15374 + }, + { + "epoch": 42.239010989010985, + "grad_norm": 19.185420989990234, + "learning_rate": 2.8880494505494504e-05, + "loss": 0.4864, + "step": 15375 + }, + { + "epoch": 42.24175824175824, + "grad_norm": 13.747574806213379, + "learning_rate": 2.887912087912088e-05, + "loss": 0.3805, + "step": 15376 + }, + { + "epoch": 42.244505494505496, + "grad_norm": 15.627201080322266, + "learning_rate": 2.8877747252747257e-05, + "loss": 0.7085, + "step": 15377 + }, + { + "epoch": 42.247252747252745, + "grad_norm": 8.44785213470459, + "learning_rate": 2.8876373626373627e-05, + "loss": 0.1773, + "step": 15378 + }, + { + "epoch": 42.25, + "grad_norm": 12.108257293701172, + "learning_rate": 2.8875e-05, + "loss": 0.5349, + "step": 15379 + }, + { + "epoch": 42.252747252747255, + "grad_norm": 16.24403953552246, + "learning_rate": 2.8873626373626374e-05, + "loss": 0.7836, + "step": 15380 + }, + { + "epoch": 42.255494505494504, + "grad_norm": 7.370216369628906, + "learning_rate": 2.8872252747252747e-05, + "loss": 0.135, + "step": 15381 + }, + { + "epoch": 42.25824175824176, + "grad_norm": 9.907524108886719, + "learning_rate": 2.8870879120879124e-05, + "loss": 0.2099, + "step": 15382 + }, + { + "epoch": 42.260989010989015, + "grad_norm": 11.530097961425781, + "learning_rate": 2.8869505494505494e-05, + "loss": 0.3522, + "step": 15383 + }, + { + "epoch": 42.26373626373626, + "grad_norm": 20.50510025024414, + "learning_rate": 2.886813186813187e-05, + "loss": 0.6244, + "step": 15384 + }, + { + "epoch": 42.26648351648352, + "grad_norm": 6.43765115737915, + "learning_rate": 2.886675824175824e-05, + "loss": 0.1493, + "step": 15385 + }, + { + "epoch": 42.26923076923077, + "grad_norm": 8.316950798034668, + "learning_rate": 2.8865384615384618e-05, + "loss": 0.145, + "step": 15386 + }, + { + "epoch": 42.27197802197802, + "grad_norm": 11.993378639221191, + "learning_rate": 2.886401098901099e-05, + "loss": 0.3523, + "step": 15387 + }, + { + "epoch": 42.27472527472528, + "grad_norm": 6.4908013343811035, + "learning_rate": 2.886263736263736e-05, + "loss": 0.1983, + "step": 15388 + }, + { + "epoch": 42.277472527472526, + "grad_norm": 10.767754554748535, + "learning_rate": 2.8861263736263738e-05, + "loss": 0.2457, + "step": 15389 + }, + { + "epoch": 42.28021978021978, + "grad_norm": 3.837345600128174, + "learning_rate": 2.8859890109890108e-05, + "loss": 0.086, + "step": 15390 + }, + { + "epoch": 42.282967032967036, + "grad_norm": 14.80086612701416, + "learning_rate": 2.8858516483516485e-05, + "loss": 0.6067, + "step": 15391 + }, + { + "epoch": 42.285714285714285, + "grad_norm": 7.632920265197754, + "learning_rate": 2.885714285714286e-05, + "loss": 0.1648, + "step": 15392 + }, + { + "epoch": 42.28846153846154, + "grad_norm": 18.400733947753906, + "learning_rate": 2.885576923076923e-05, + "loss": 0.4708, + "step": 15393 + }, + { + "epoch": 42.29120879120879, + "grad_norm": 13.793708801269531, + "learning_rate": 2.8854395604395605e-05, + "loss": 0.3303, + "step": 15394 + }, + { + "epoch": 42.293956043956044, + "grad_norm": 19.401905059814453, + "learning_rate": 2.885302197802198e-05, + "loss": 0.7303, + "step": 15395 + }, + { + "epoch": 42.2967032967033, + "grad_norm": 6.8318376541137695, + "learning_rate": 2.8851648351648352e-05, + "loss": 0.1345, + "step": 15396 + }, + { + "epoch": 42.29945054945055, + "grad_norm": 13.574089050292969, + "learning_rate": 2.885027472527473e-05, + "loss": 0.2546, + "step": 15397 + }, + { + "epoch": 42.3021978021978, + "grad_norm": 6.293432235717773, + "learning_rate": 2.88489010989011e-05, + "loss": 0.0824, + "step": 15398 + }, + { + "epoch": 42.30494505494506, + "grad_norm": 19.41754913330078, + "learning_rate": 2.8847527472527475e-05, + "loss": 0.5433, + "step": 15399 + }, + { + "epoch": 42.30769230769231, + "grad_norm": 10.10014820098877, + "learning_rate": 2.8846153846153845e-05, + "loss": 0.2435, + "step": 15400 + }, + { + "epoch": 42.31043956043956, + "grad_norm": 2.0353708267211914, + "learning_rate": 2.8844780219780222e-05, + "loss": 0.0508, + "step": 15401 + }, + { + "epoch": 42.31318681318681, + "grad_norm": 5.65843391418457, + "learning_rate": 2.8843406593406596e-05, + "loss": 0.0923, + "step": 15402 + }, + { + "epoch": 42.315934065934066, + "grad_norm": 16.159330368041992, + "learning_rate": 2.8842032967032966e-05, + "loss": 0.5143, + "step": 15403 + }, + { + "epoch": 42.31868131868132, + "grad_norm": 11.253778457641602, + "learning_rate": 2.8840659340659342e-05, + "loss": 0.1885, + "step": 15404 + }, + { + "epoch": 42.32142857142857, + "grad_norm": 11.507369041442871, + "learning_rate": 2.8839285714285712e-05, + "loss": 0.3055, + "step": 15405 + }, + { + "epoch": 42.324175824175825, + "grad_norm": 12.010786056518555, + "learning_rate": 2.883791208791209e-05, + "loss": 0.2857, + "step": 15406 + }, + { + "epoch": 42.32692307692308, + "grad_norm": 16.298480987548828, + "learning_rate": 2.8836538461538466e-05, + "loss": 0.4645, + "step": 15407 + }, + { + "epoch": 42.32967032967033, + "grad_norm": 15.339211463928223, + "learning_rate": 2.8835164835164836e-05, + "loss": 0.3489, + "step": 15408 + }, + { + "epoch": 42.332417582417584, + "grad_norm": 8.919644355773926, + "learning_rate": 2.883379120879121e-05, + "loss": 0.2198, + "step": 15409 + }, + { + "epoch": 42.33516483516483, + "grad_norm": 7.308587551116943, + "learning_rate": 2.8832417582417583e-05, + "loss": 0.1358, + "step": 15410 + }, + { + "epoch": 42.33791208791209, + "grad_norm": 10.87548828125, + "learning_rate": 2.8831043956043956e-05, + "loss": 0.2737, + "step": 15411 + }, + { + "epoch": 42.34065934065934, + "grad_norm": 6.904055595397949, + "learning_rate": 2.8829670329670333e-05, + "loss": 0.2167, + "step": 15412 + }, + { + "epoch": 42.34340659340659, + "grad_norm": 17.560117721557617, + "learning_rate": 2.8828296703296703e-05, + "loss": 0.4305, + "step": 15413 + }, + { + "epoch": 42.34615384615385, + "grad_norm": 4.714691162109375, + "learning_rate": 2.882692307692308e-05, + "loss": 0.1015, + "step": 15414 + }, + { + "epoch": 42.3489010989011, + "grad_norm": 14.848610877990723, + "learning_rate": 2.882554945054945e-05, + "loss": 0.3997, + "step": 15415 + }, + { + "epoch": 42.35164835164835, + "grad_norm": 13.813095092773438, + "learning_rate": 2.8824175824175827e-05, + "loss": 0.2859, + "step": 15416 + }, + { + "epoch": 42.354395604395606, + "grad_norm": 13.912055015563965, + "learning_rate": 2.88228021978022e-05, + "loss": 0.2763, + "step": 15417 + }, + { + "epoch": 42.357142857142854, + "grad_norm": 13.522825241088867, + "learning_rate": 2.882142857142857e-05, + "loss": 0.2698, + "step": 15418 + }, + { + "epoch": 42.35989010989011, + "grad_norm": 14.26022720336914, + "learning_rate": 2.8820054945054947e-05, + "loss": 0.293, + "step": 15419 + }, + { + "epoch": 42.362637362637365, + "grad_norm": 19.456764221191406, + "learning_rate": 2.8818681318681317e-05, + "loss": 0.3593, + "step": 15420 + }, + { + "epoch": 42.36538461538461, + "grad_norm": 14.733363151550293, + "learning_rate": 2.8817307692307694e-05, + "loss": 0.2613, + "step": 15421 + }, + { + "epoch": 42.36813186813187, + "grad_norm": 8.23076057434082, + "learning_rate": 2.881593406593407e-05, + "loss": 0.3138, + "step": 15422 + }, + { + "epoch": 42.370879120879124, + "grad_norm": 5.638273239135742, + "learning_rate": 2.881456043956044e-05, + "loss": 0.126, + "step": 15423 + }, + { + "epoch": 42.37362637362637, + "grad_norm": 11.317731857299805, + "learning_rate": 2.8813186813186814e-05, + "loss": 0.2273, + "step": 15424 + }, + { + "epoch": 42.37637362637363, + "grad_norm": 15.753012657165527, + "learning_rate": 2.8811813186813187e-05, + "loss": 0.3718, + "step": 15425 + }, + { + "epoch": 42.379120879120876, + "grad_norm": 15.850873947143555, + "learning_rate": 2.881043956043956e-05, + "loss": 0.3066, + "step": 15426 + }, + { + "epoch": 42.38186813186813, + "grad_norm": 11.157604217529297, + "learning_rate": 2.8809065934065938e-05, + "loss": 0.1927, + "step": 15427 + }, + { + "epoch": 42.38461538461539, + "grad_norm": 10.583734512329102, + "learning_rate": 2.8807692307692308e-05, + "loss": 0.1733, + "step": 15428 + }, + { + "epoch": 42.387362637362635, + "grad_norm": 17.58600425720215, + "learning_rate": 2.8806318681318684e-05, + "loss": 0.4251, + "step": 15429 + }, + { + "epoch": 42.39010989010989, + "grad_norm": 6.117450714111328, + "learning_rate": 2.8804945054945054e-05, + "loss": 0.1033, + "step": 15430 + }, + { + "epoch": 42.392857142857146, + "grad_norm": 9.942527770996094, + "learning_rate": 2.880357142857143e-05, + "loss": 0.1507, + "step": 15431 + }, + { + "epoch": 42.395604395604394, + "grad_norm": 9.52158260345459, + "learning_rate": 2.8802197802197805e-05, + "loss": 0.1998, + "step": 15432 + }, + { + "epoch": 42.39835164835165, + "grad_norm": 10.285370826721191, + "learning_rate": 2.8800824175824175e-05, + "loss": 0.2001, + "step": 15433 + }, + { + "epoch": 42.4010989010989, + "grad_norm": 13.601672172546387, + "learning_rate": 2.879945054945055e-05, + "loss": 0.2391, + "step": 15434 + }, + { + "epoch": 42.40384615384615, + "grad_norm": 14.423365592956543, + "learning_rate": 2.879807692307692e-05, + "loss": 0.2524, + "step": 15435 + }, + { + "epoch": 42.40659340659341, + "grad_norm": 8.940773010253906, + "learning_rate": 2.8796703296703298e-05, + "loss": 0.1943, + "step": 15436 + }, + { + "epoch": 42.40934065934066, + "grad_norm": 11.090729713439941, + "learning_rate": 2.8795329670329675e-05, + "loss": 0.3593, + "step": 15437 + }, + { + "epoch": 42.41208791208791, + "grad_norm": 12.530749320983887, + "learning_rate": 2.8793956043956045e-05, + "loss": 0.2004, + "step": 15438 + }, + { + "epoch": 42.41483516483517, + "grad_norm": 15.07054615020752, + "learning_rate": 2.879258241758242e-05, + "loss": 0.3717, + "step": 15439 + }, + { + "epoch": 42.417582417582416, + "grad_norm": 6.052077293395996, + "learning_rate": 2.8791208791208792e-05, + "loss": 0.0987, + "step": 15440 + }, + { + "epoch": 42.42032967032967, + "grad_norm": 13.715940475463867, + "learning_rate": 2.8789835164835165e-05, + "loss": 0.2617, + "step": 15441 + }, + { + "epoch": 42.42307692307692, + "grad_norm": 4.1776957511901855, + "learning_rate": 2.8788461538461542e-05, + "loss": 0.0731, + "step": 15442 + }, + { + "epoch": 42.425824175824175, + "grad_norm": 12.377910614013672, + "learning_rate": 2.8787087912087912e-05, + "loss": 0.2633, + "step": 15443 + }, + { + "epoch": 42.42857142857143, + "grad_norm": 10.271471977233887, + "learning_rate": 2.878571428571429e-05, + "loss": 0.1915, + "step": 15444 + }, + { + "epoch": 42.43131868131868, + "grad_norm": 4.731315612792969, + "learning_rate": 2.878434065934066e-05, + "loss": 0.0919, + "step": 15445 + }, + { + "epoch": 42.434065934065934, + "grad_norm": 14.890254020690918, + "learning_rate": 2.8782967032967036e-05, + "loss": 0.5509, + "step": 15446 + }, + { + "epoch": 42.43681318681319, + "grad_norm": 4.809189319610596, + "learning_rate": 2.878159340659341e-05, + "loss": 0.0856, + "step": 15447 + }, + { + "epoch": 42.43956043956044, + "grad_norm": 14.214149475097656, + "learning_rate": 2.878021978021978e-05, + "loss": 0.3194, + "step": 15448 + }, + { + "epoch": 42.44230769230769, + "grad_norm": 3.0937042236328125, + "learning_rate": 2.8778846153846156e-05, + "loss": 0.0571, + "step": 15449 + }, + { + "epoch": 42.44505494505494, + "grad_norm": 12.353549003601074, + "learning_rate": 2.8777472527472526e-05, + "loss": 0.2652, + "step": 15450 + }, + { + "epoch": 42.4478021978022, + "grad_norm": 10.293824195861816, + "learning_rate": 2.8776098901098903e-05, + "loss": 0.219, + "step": 15451 + }, + { + "epoch": 42.45054945054945, + "grad_norm": 8.0030517578125, + "learning_rate": 2.877472527472528e-05, + "loss": 0.1667, + "step": 15452 + }, + { + "epoch": 42.4532967032967, + "grad_norm": 16.19643783569336, + "learning_rate": 2.877335164835165e-05, + "loss": 0.4013, + "step": 15453 + }, + { + "epoch": 42.456043956043956, + "grad_norm": 9.371113777160645, + "learning_rate": 2.8771978021978023e-05, + "loss": 0.2217, + "step": 15454 + }, + { + "epoch": 42.45879120879121, + "grad_norm": 11.173080444335938, + "learning_rate": 2.8770604395604396e-05, + "loss": 0.3418, + "step": 15455 + }, + { + "epoch": 42.46153846153846, + "grad_norm": 16.029844284057617, + "learning_rate": 2.876923076923077e-05, + "loss": 0.3573, + "step": 15456 + }, + { + "epoch": 42.464285714285715, + "grad_norm": 12.155592918395996, + "learning_rate": 2.8767857142857147e-05, + "loss": 0.2864, + "step": 15457 + }, + { + "epoch": 42.467032967032964, + "grad_norm": 12.86544132232666, + "learning_rate": 2.8766483516483517e-05, + "loss": 0.3104, + "step": 15458 + }, + { + "epoch": 42.46978021978022, + "grad_norm": 7.098296642303467, + "learning_rate": 2.8765109890109893e-05, + "loss": 0.166, + "step": 15459 + }, + { + "epoch": 42.472527472527474, + "grad_norm": 11.366024017333984, + "learning_rate": 2.8763736263736263e-05, + "loss": 0.1427, + "step": 15460 + }, + { + "epoch": 42.47527472527472, + "grad_norm": 16.014734268188477, + "learning_rate": 2.876236263736264e-05, + "loss": 0.3082, + "step": 15461 + }, + { + "epoch": 42.47802197802198, + "grad_norm": 12.511423110961914, + "learning_rate": 2.8760989010989014e-05, + "loss": 0.408, + "step": 15462 + }, + { + "epoch": 42.48076923076923, + "grad_norm": 14.495863914489746, + "learning_rate": 2.8759615384615384e-05, + "loss": 0.3054, + "step": 15463 + }, + { + "epoch": 42.48351648351648, + "grad_norm": 21.003135681152344, + "learning_rate": 2.875824175824176e-05, + "loss": 0.5818, + "step": 15464 + }, + { + "epoch": 42.48626373626374, + "grad_norm": 16.049467086791992, + "learning_rate": 2.875686813186813e-05, + "loss": 0.2839, + "step": 15465 + }, + { + "epoch": 42.489010989010985, + "grad_norm": 17.463254928588867, + "learning_rate": 2.8755494505494507e-05, + "loss": 0.3953, + "step": 15466 + }, + { + "epoch": 42.49175824175824, + "grad_norm": 15.070394515991211, + "learning_rate": 2.8754120879120884e-05, + "loss": 0.3511, + "step": 15467 + }, + { + "epoch": 42.494505494505496, + "grad_norm": 7.4863433837890625, + "learning_rate": 2.8752747252747254e-05, + "loss": 0.2179, + "step": 15468 + }, + { + "epoch": 42.497252747252745, + "grad_norm": 12.147073745727539, + "learning_rate": 2.8751373626373627e-05, + "loss": 0.1942, + "step": 15469 + }, + { + "epoch": 42.5, + "grad_norm": 4.045197486877441, + "learning_rate": 2.8749999999999997e-05, + "loss": 0.0441, + "step": 15470 + }, + { + "epoch": 42.502747252747255, + "grad_norm": 6.648130893707275, + "learning_rate": 2.8748626373626374e-05, + "loss": 0.1309, + "step": 15471 + }, + { + "epoch": 42.505494505494504, + "grad_norm": 17.190027236938477, + "learning_rate": 2.874725274725275e-05, + "loss": 0.4655, + "step": 15472 + }, + { + "epoch": 42.50824175824176, + "grad_norm": 13.721741676330566, + "learning_rate": 2.874587912087912e-05, + "loss": 0.3377, + "step": 15473 + }, + { + "epoch": 42.51098901098901, + "grad_norm": 12.783663749694824, + "learning_rate": 2.8744505494505498e-05, + "loss": 0.4459, + "step": 15474 + }, + { + "epoch": 42.51373626373626, + "grad_norm": 7.53205680847168, + "learning_rate": 2.8743131868131868e-05, + "loss": 0.1667, + "step": 15475 + }, + { + "epoch": 42.51648351648352, + "grad_norm": 6.66231107711792, + "learning_rate": 2.8741758241758245e-05, + "loss": 0.1303, + "step": 15476 + }, + { + "epoch": 42.51923076923077, + "grad_norm": 9.724176406860352, + "learning_rate": 2.8740384615384618e-05, + "loss": 0.1449, + "step": 15477 + }, + { + "epoch": 42.52197802197802, + "grad_norm": 14.283556938171387, + "learning_rate": 2.8739010989010988e-05, + "loss": 0.3307, + "step": 15478 + }, + { + "epoch": 42.52472527472528, + "grad_norm": 10.106154441833496, + "learning_rate": 2.8737637362637365e-05, + "loss": 0.2025, + "step": 15479 + }, + { + "epoch": 42.527472527472526, + "grad_norm": 11.897275924682617, + "learning_rate": 2.8736263736263735e-05, + "loss": 0.4255, + "step": 15480 + }, + { + "epoch": 42.53021978021978, + "grad_norm": 9.146525382995605, + "learning_rate": 2.8734890109890112e-05, + "loss": 0.208, + "step": 15481 + }, + { + "epoch": 42.532967032967036, + "grad_norm": 6.72991943359375, + "learning_rate": 2.873351648351649e-05, + "loss": 0.1487, + "step": 15482 + }, + { + "epoch": 42.535714285714285, + "grad_norm": 11.696871757507324, + "learning_rate": 2.873214285714286e-05, + "loss": 0.3303, + "step": 15483 + }, + { + "epoch": 42.53846153846154, + "grad_norm": 11.83433723449707, + "learning_rate": 2.8730769230769232e-05, + "loss": 0.2583, + "step": 15484 + }, + { + "epoch": 42.54120879120879, + "grad_norm": 10.251124382019043, + "learning_rate": 2.8729395604395602e-05, + "loss": 0.2129, + "step": 15485 + }, + { + "epoch": 42.543956043956044, + "grad_norm": 16.538328170776367, + "learning_rate": 2.872802197802198e-05, + "loss": 0.4242, + "step": 15486 + }, + { + "epoch": 42.5467032967033, + "grad_norm": 7.439411163330078, + "learning_rate": 2.8726648351648356e-05, + "loss": 0.1413, + "step": 15487 + }, + { + "epoch": 42.54945054945055, + "grad_norm": 12.274956703186035, + "learning_rate": 2.8725274725274726e-05, + "loss": 0.3988, + "step": 15488 + }, + { + "epoch": 42.5521978021978, + "grad_norm": 12.385931015014648, + "learning_rate": 2.8723901098901102e-05, + "loss": 0.4615, + "step": 15489 + }, + { + "epoch": 42.55494505494506, + "grad_norm": 16.05042266845703, + "learning_rate": 2.8722527472527472e-05, + "loss": 0.3914, + "step": 15490 + }, + { + "epoch": 42.55769230769231, + "grad_norm": 13.138219833374023, + "learning_rate": 2.872115384615385e-05, + "loss": 0.2298, + "step": 15491 + }, + { + "epoch": 42.56043956043956, + "grad_norm": 4.912968635559082, + "learning_rate": 2.8719780219780223e-05, + "loss": 0.1022, + "step": 15492 + }, + { + "epoch": 42.56318681318681, + "grad_norm": 13.424735069274902, + "learning_rate": 2.8718406593406593e-05, + "loss": 0.5587, + "step": 15493 + }, + { + "epoch": 42.565934065934066, + "grad_norm": 7.011356353759766, + "learning_rate": 2.871703296703297e-05, + "loss": 0.1225, + "step": 15494 + }, + { + "epoch": 42.56868131868132, + "grad_norm": 2.6392736434936523, + "learning_rate": 2.871565934065934e-05, + "loss": 0.055, + "step": 15495 + }, + { + "epoch": 42.57142857142857, + "grad_norm": 15.128449440002441, + "learning_rate": 2.8714285714285716e-05, + "loss": 0.356, + "step": 15496 + }, + { + "epoch": 42.574175824175825, + "grad_norm": 4.81362247467041, + "learning_rate": 2.8712912087912093e-05, + "loss": 0.1097, + "step": 15497 + }, + { + "epoch": 42.57692307692308, + "grad_norm": 15.67940902709961, + "learning_rate": 2.8711538461538463e-05, + "loss": 0.3804, + "step": 15498 + }, + { + "epoch": 42.57967032967033, + "grad_norm": 9.078889846801758, + "learning_rate": 2.8710164835164836e-05, + "loss": 0.3773, + "step": 15499 + }, + { + "epoch": 42.582417582417584, + "grad_norm": 2.3536429405212402, + "learning_rate": 2.8708791208791206e-05, + "loss": 0.0331, + "step": 15500 + }, + { + "epoch": 42.58516483516483, + "grad_norm": 11.423660278320312, + "learning_rate": 2.8707417582417583e-05, + "loss": 0.2086, + "step": 15501 + }, + { + "epoch": 42.58791208791209, + "grad_norm": 7.661891937255859, + "learning_rate": 2.870604395604396e-05, + "loss": 0.1108, + "step": 15502 + }, + { + "epoch": 42.59065934065934, + "grad_norm": 3.2098453044891357, + "learning_rate": 2.870467032967033e-05, + "loss": 0.0505, + "step": 15503 + }, + { + "epoch": 42.59340659340659, + "grad_norm": 14.480201721191406, + "learning_rate": 2.8703296703296707e-05, + "loss": 0.2951, + "step": 15504 + }, + { + "epoch": 42.59615384615385, + "grad_norm": 7.087050437927246, + "learning_rate": 2.8701923076923077e-05, + "loss": 0.2063, + "step": 15505 + }, + { + "epoch": 42.5989010989011, + "grad_norm": 7.256019115447998, + "learning_rate": 2.8700549450549454e-05, + "loss": 0.113, + "step": 15506 + }, + { + "epoch": 42.60164835164835, + "grad_norm": 9.528386116027832, + "learning_rate": 2.8699175824175827e-05, + "loss": 0.1285, + "step": 15507 + }, + { + "epoch": 42.604395604395606, + "grad_norm": 15.548068046569824, + "learning_rate": 2.8697802197802197e-05, + "loss": 0.3589, + "step": 15508 + }, + { + "epoch": 42.607142857142854, + "grad_norm": 18.37910270690918, + "learning_rate": 2.8696428571428574e-05, + "loss": 0.4743, + "step": 15509 + }, + { + "epoch": 42.60989010989011, + "grad_norm": 6.623161315917969, + "learning_rate": 2.8695054945054944e-05, + "loss": 0.1338, + "step": 15510 + }, + { + "epoch": 42.612637362637365, + "grad_norm": 10.707283020019531, + "learning_rate": 2.869368131868132e-05, + "loss": 0.1696, + "step": 15511 + }, + { + "epoch": 42.61538461538461, + "grad_norm": 14.474523544311523, + "learning_rate": 2.8692307692307698e-05, + "loss": 0.1619, + "step": 15512 + }, + { + "epoch": 42.61813186813187, + "grad_norm": 6.589081287384033, + "learning_rate": 2.8690934065934068e-05, + "loss": 0.2042, + "step": 15513 + }, + { + "epoch": 42.620879120879124, + "grad_norm": 7.909437656402588, + "learning_rate": 2.868956043956044e-05, + "loss": 0.1533, + "step": 15514 + }, + { + "epoch": 42.62362637362637, + "grad_norm": 15.052643775939941, + "learning_rate": 2.868818681318681e-05, + "loss": 0.4142, + "step": 15515 + }, + { + "epoch": 42.62637362637363, + "grad_norm": 13.017231941223145, + "learning_rate": 2.8686813186813188e-05, + "loss": 0.2475, + "step": 15516 + }, + { + "epoch": 42.629120879120876, + "grad_norm": 13.991372108459473, + "learning_rate": 2.8685439560439565e-05, + "loss": 0.2221, + "step": 15517 + }, + { + "epoch": 42.63186813186813, + "grad_norm": 14.344858169555664, + "learning_rate": 2.8684065934065935e-05, + "loss": 0.32, + "step": 15518 + }, + { + "epoch": 42.63461538461539, + "grad_norm": 8.35400676727295, + "learning_rate": 2.868269230769231e-05, + "loss": 0.1536, + "step": 15519 + }, + { + "epoch": 42.637362637362635, + "grad_norm": 10.766852378845215, + "learning_rate": 2.868131868131868e-05, + "loss": 0.0972, + "step": 15520 + }, + { + "epoch": 42.64010989010989, + "grad_norm": 6.183503150939941, + "learning_rate": 2.8679945054945058e-05, + "loss": 0.1189, + "step": 15521 + }, + { + "epoch": 42.642857142857146, + "grad_norm": 15.942299842834473, + "learning_rate": 2.867857142857143e-05, + "loss": 0.4677, + "step": 15522 + }, + { + "epoch": 42.645604395604394, + "grad_norm": 19.55794334411621, + "learning_rate": 2.86771978021978e-05, + "loss": 0.7893, + "step": 15523 + }, + { + "epoch": 42.64835164835165, + "grad_norm": 6.832330226898193, + "learning_rate": 2.867582417582418e-05, + "loss": 0.2504, + "step": 15524 + }, + { + "epoch": 42.6510989010989, + "grad_norm": 12.097590446472168, + "learning_rate": 2.867445054945055e-05, + "loss": 0.2571, + "step": 15525 + }, + { + "epoch": 42.65384615384615, + "grad_norm": 18.889484405517578, + "learning_rate": 2.8673076923076925e-05, + "loss": 0.2804, + "step": 15526 + }, + { + "epoch": 42.65659340659341, + "grad_norm": 3.557765245437622, + "learning_rate": 2.8671703296703302e-05, + "loss": 0.0455, + "step": 15527 + }, + { + "epoch": 42.65934065934066, + "grad_norm": 17.689502716064453, + "learning_rate": 2.8670329670329672e-05, + "loss": 0.6049, + "step": 15528 + }, + { + "epoch": 42.66208791208791, + "grad_norm": 17.754955291748047, + "learning_rate": 2.8668956043956045e-05, + "loss": 0.7779, + "step": 15529 + }, + { + "epoch": 42.66483516483517, + "grad_norm": 8.143850326538086, + "learning_rate": 2.8667582417582415e-05, + "loss": 0.1382, + "step": 15530 + }, + { + "epoch": 42.667582417582416, + "grad_norm": 11.163491249084473, + "learning_rate": 2.8666208791208792e-05, + "loss": 0.2638, + "step": 15531 + }, + { + "epoch": 42.67032967032967, + "grad_norm": 11.46519947052002, + "learning_rate": 2.866483516483517e-05, + "loss": 0.2847, + "step": 15532 + }, + { + "epoch": 42.67307692307692, + "grad_norm": 22.785049438476562, + "learning_rate": 2.866346153846154e-05, + "loss": 0.6621, + "step": 15533 + }, + { + "epoch": 42.675824175824175, + "grad_norm": 24.081356048583984, + "learning_rate": 2.8662087912087916e-05, + "loss": 0.4363, + "step": 15534 + }, + { + "epoch": 42.67857142857143, + "grad_norm": 10.175332069396973, + "learning_rate": 2.8660714285714286e-05, + "loss": 0.1081, + "step": 15535 + }, + { + "epoch": 42.68131868131868, + "grad_norm": 12.2495698928833, + "learning_rate": 2.8659340659340663e-05, + "loss": 0.1585, + "step": 15536 + }, + { + "epoch": 42.684065934065934, + "grad_norm": 14.972071647644043, + "learning_rate": 2.8657967032967036e-05, + "loss": 0.4941, + "step": 15537 + }, + { + "epoch": 42.68681318681319, + "grad_norm": 12.48819351196289, + "learning_rate": 2.8656593406593406e-05, + "loss": 0.2595, + "step": 15538 + }, + { + "epoch": 42.68956043956044, + "grad_norm": 16.99301528930664, + "learning_rate": 2.8655219780219783e-05, + "loss": 0.5637, + "step": 15539 + }, + { + "epoch": 42.69230769230769, + "grad_norm": 17.25996208190918, + "learning_rate": 2.8653846153846153e-05, + "loss": 0.5241, + "step": 15540 + }, + { + "epoch": 42.69505494505494, + "grad_norm": 13.806836128234863, + "learning_rate": 2.865247252747253e-05, + "loss": 0.2121, + "step": 15541 + }, + { + "epoch": 42.6978021978022, + "grad_norm": 15.286582946777344, + "learning_rate": 2.8651098901098906e-05, + "loss": 0.263, + "step": 15542 + }, + { + "epoch": 42.70054945054945, + "grad_norm": 14.813896179199219, + "learning_rate": 2.8649725274725277e-05, + "loss": 0.2794, + "step": 15543 + }, + { + "epoch": 42.7032967032967, + "grad_norm": 23.090717315673828, + "learning_rate": 2.864835164835165e-05, + "loss": 0.7828, + "step": 15544 + }, + { + "epoch": 42.706043956043956, + "grad_norm": 11.865713119506836, + "learning_rate": 2.864697802197802e-05, + "loss": 0.2273, + "step": 15545 + }, + { + "epoch": 42.70879120879121, + "grad_norm": 6.149807453155518, + "learning_rate": 2.8645604395604397e-05, + "loss": 0.1228, + "step": 15546 + }, + { + "epoch": 42.71153846153846, + "grad_norm": 10.323686599731445, + "learning_rate": 2.8644230769230774e-05, + "loss": 0.2363, + "step": 15547 + }, + { + "epoch": 42.714285714285715, + "grad_norm": 8.699928283691406, + "learning_rate": 2.8642857142857144e-05, + "loss": 0.147, + "step": 15548 + }, + { + "epoch": 42.717032967032964, + "grad_norm": 11.192429542541504, + "learning_rate": 2.864148351648352e-05, + "loss": 0.249, + "step": 15549 + }, + { + "epoch": 42.71978021978022, + "grad_norm": 11.862646102905273, + "learning_rate": 2.864010989010989e-05, + "loss": 0.2741, + "step": 15550 + }, + { + "epoch": 42.722527472527474, + "grad_norm": 10.836979866027832, + "learning_rate": 2.8638736263736267e-05, + "loss": 0.1706, + "step": 15551 + }, + { + "epoch": 42.72527472527472, + "grad_norm": 4.302961349487305, + "learning_rate": 2.863736263736264e-05, + "loss": 0.0697, + "step": 15552 + }, + { + "epoch": 42.72802197802198, + "grad_norm": 13.000520706176758, + "learning_rate": 2.863598901098901e-05, + "loss": 0.5328, + "step": 15553 + }, + { + "epoch": 42.73076923076923, + "grad_norm": 10.25277328491211, + "learning_rate": 2.8634615384615387e-05, + "loss": 0.2293, + "step": 15554 + }, + { + "epoch": 42.73351648351648, + "grad_norm": 6.661105632781982, + "learning_rate": 2.8633241758241757e-05, + "loss": 0.1307, + "step": 15555 + }, + { + "epoch": 42.73626373626374, + "grad_norm": 17.483013153076172, + "learning_rate": 2.8631868131868134e-05, + "loss": 0.3813, + "step": 15556 + }, + { + "epoch": 42.73901098901099, + "grad_norm": 19.699604034423828, + "learning_rate": 2.863049450549451e-05, + "loss": 0.6398, + "step": 15557 + }, + { + "epoch": 42.74175824175824, + "grad_norm": 11.22856330871582, + "learning_rate": 2.862912087912088e-05, + "loss": 0.3433, + "step": 15558 + }, + { + "epoch": 42.744505494505496, + "grad_norm": 12.197193145751953, + "learning_rate": 2.8627747252747254e-05, + "loss": 0.1762, + "step": 15559 + }, + { + "epoch": 42.747252747252745, + "grad_norm": 19.668331146240234, + "learning_rate": 2.8626373626373624e-05, + "loss": 0.5214, + "step": 15560 + }, + { + "epoch": 42.75, + "grad_norm": 14.815559387207031, + "learning_rate": 2.8625e-05, + "loss": 0.2506, + "step": 15561 + }, + { + "epoch": 42.752747252747255, + "grad_norm": 19.48729705810547, + "learning_rate": 2.8623626373626378e-05, + "loss": 0.4973, + "step": 15562 + }, + { + "epoch": 42.755494505494504, + "grad_norm": 6.265761375427246, + "learning_rate": 2.8622252747252748e-05, + "loss": 0.1255, + "step": 15563 + }, + { + "epoch": 42.75824175824176, + "grad_norm": 16.334142684936523, + "learning_rate": 2.8620879120879125e-05, + "loss": 0.4624, + "step": 15564 + }, + { + "epoch": 42.76098901098901, + "grad_norm": 7.945742130279541, + "learning_rate": 2.8619505494505495e-05, + "loss": 0.091, + "step": 15565 + }, + { + "epoch": 42.76373626373626, + "grad_norm": 15.656855583190918, + "learning_rate": 2.861813186813187e-05, + "loss": 0.4196, + "step": 15566 + }, + { + "epoch": 42.76648351648352, + "grad_norm": 10.399367332458496, + "learning_rate": 2.8616758241758245e-05, + "loss": 0.1832, + "step": 15567 + }, + { + "epoch": 42.76923076923077, + "grad_norm": 20.91321563720703, + "learning_rate": 2.8615384615384615e-05, + "loss": 0.3918, + "step": 15568 + }, + { + "epoch": 42.77197802197802, + "grad_norm": 12.499751091003418, + "learning_rate": 2.8614010989010992e-05, + "loss": 0.4336, + "step": 15569 + }, + { + "epoch": 42.77472527472528, + "grad_norm": 13.601685523986816, + "learning_rate": 2.8612637362637362e-05, + "loss": 0.4166, + "step": 15570 + }, + { + "epoch": 42.777472527472526, + "grad_norm": 10.268938064575195, + "learning_rate": 2.861126373626374e-05, + "loss": 0.3663, + "step": 15571 + }, + { + "epoch": 42.78021978021978, + "grad_norm": 3.5834269523620605, + "learning_rate": 2.8609890109890115e-05, + "loss": 0.0704, + "step": 15572 + }, + { + "epoch": 42.782967032967036, + "grad_norm": 14.761823654174805, + "learning_rate": 2.8608516483516485e-05, + "loss": 0.3542, + "step": 15573 + }, + { + "epoch": 42.785714285714285, + "grad_norm": 6.313930988311768, + "learning_rate": 2.860714285714286e-05, + "loss": 0.104, + "step": 15574 + }, + { + "epoch": 42.78846153846154, + "grad_norm": 4.10476541519165, + "learning_rate": 2.860576923076923e-05, + "loss": 0.1098, + "step": 15575 + }, + { + "epoch": 42.79120879120879, + "grad_norm": 17.69746208190918, + "learning_rate": 2.8604395604395606e-05, + "loss": 0.4177, + "step": 15576 + }, + { + "epoch": 42.793956043956044, + "grad_norm": 7.5234503746032715, + "learning_rate": 2.8603021978021983e-05, + "loss": 0.179, + "step": 15577 + }, + { + "epoch": 42.7967032967033, + "grad_norm": 18.372724533081055, + "learning_rate": 2.8601648351648353e-05, + "loss": 0.1447, + "step": 15578 + }, + { + "epoch": 42.79945054945055, + "grad_norm": 6.153684139251709, + "learning_rate": 2.860027472527473e-05, + "loss": 0.1416, + "step": 15579 + }, + { + "epoch": 42.8021978021978, + "grad_norm": 9.506453514099121, + "learning_rate": 2.85989010989011e-05, + "loss": 0.2125, + "step": 15580 + }, + { + "epoch": 42.80494505494506, + "grad_norm": 5.986804485321045, + "learning_rate": 2.8597527472527476e-05, + "loss": 0.1028, + "step": 15581 + }, + { + "epoch": 42.80769230769231, + "grad_norm": 13.90441608428955, + "learning_rate": 2.859615384615385e-05, + "loss": 0.408, + "step": 15582 + }, + { + "epoch": 42.81043956043956, + "grad_norm": 8.069644927978516, + "learning_rate": 2.859478021978022e-05, + "loss": 0.1901, + "step": 15583 + }, + { + "epoch": 42.81318681318681, + "grad_norm": 16.59360694885254, + "learning_rate": 2.8593406593406596e-05, + "loss": 0.5642, + "step": 15584 + }, + { + "epoch": 42.815934065934066, + "grad_norm": 6.8520708084106445, + "learning_rate": 2.8592032967032966e-05, + "loss": 0.1395, + "step": 15585 + }, + { + "epoch": 42.81868131868132, + "grad_norm": 6.2474236488342285, + "learning_rate": 2.8590659340659343e-05, + "loss": 0.052, + "step": 15586 + }, + { + "epoch": 42.82142857142857, + "grad_norm": 7.8848161697387695, + "learning_rate": 2.8589285714285713e-05, + "loss": 0.1983, + "step": 15587 + }, + { + "epoch": 42.824175824175825, + "grad_norm": 4.046441555023193, + "learning_rate": 2.858791208791209e-05, + "loss": 0.061, + "step": 15588 + }, + { + "epoch": 42.82692307692308, + "grad_norm": 13.587446212768555, + "learning_rate": 2.8586538461538463e-05, + "loss": 0.3081, + "step": 15589 + }, + { + "epoch": 42.82967032967033, + "grad_norm": 7.409442901611328, + "learning_rate": 2.8585164835164833e-05, + "loss": 0.0896, + "step": 15590 + }, + { + "epoch": 42.832417582417584, + "grad_norm": 16.048311233520508, + "learning_rate": 2.858379120879121e-05, + "loss": 0.4461, + "step": 15591 + }, + { + "epoch": 42.83516483516483, + "grad_norm": 9.167336463928223, + "learning_rate": 2.858241758241758e-05, + "loss": 0.1871, + "step": 15592 + }, + { + "epoch": 42.83791208791209, + "grad_norm": 19.59522819519043, + "learning_rate": 2.8581043956043957e-05, + "loss": 0.69, + "step": 15593 + }, + { + "epoch": 42.84065934065934, + "grad_norm": 12.22274398803711, + "learning_rate": 2.8579670329670334e-05, + "loss": 0.2917, + "step": 15594 + }, + { + "epoch": 42.84340659340659, + "grad_norm": 2.9652819633483887, + "learning_rate": 2.8578296703296704e-05, + "loss": 0.0413, + "step": 15595 + }, + { + "epoch": 42.84615384615385, + "grad_norm": 10.848246574401855, + "learning_rate": 2.8576923076923077e-05, + "loss": 0.3025, + "step": 15596 + }, + { + "epoch": 42.8489010989011, + "grad_norm": 16.911771774291992, + "learning_rate": 2.857554945054945e-05, + "loss": 0.377, + "step": 15597 + }, + { + "epoch": 42.85164835164835, + "grad_norm": 21.66309928894043, + "learning_rate": 2.8574175824175824e-05, + "loss": 0.4043, + "step": 15598 + }, + { + "epoch": 42.854395604395606, + "grad_norm": 18.98927116394043, + "learning_rate": 2.85728021978022e-05, + "loss": 0.6169, + "step": 15599 + }, + { + "epoch": 42.857142857142854, + "grad_norm": 11.242249488830566, + "learning_rate": 2.857142857142857e-05, + "loss": 0.2956, + "step": 15600 + }, + { + "epoch": 42.85989010989011, + "grad_norm": 8.062690734863281, + "learning_rate": 2.8570054945054948e-05, + "loss": 0.2379, + "step": 15601 + }, + { + "epoch": 42.862637362637365, + "grad_norm": 15.865232467651367, + "learning_rate": 2.8568681318681318e-05, + "loss": 0.4268, + "step": 15602 + }, + { + "epoch": 42.86538461538461, + "grad_norm": 11.542210578918457, + "learning_rate": 2.8567307692307694e-05, + "loss": 0.3, + "step": 15603 + }, + { + "epoch": 42.86813186813187, + "grad_norm": 16.42829704284668, + "learning_rate": 2.8565934065934068e-05, + "loss": 0.4908, + "step": 15604 + }, + { + "epoch": 42.870879120879124, + "grad_norm": 5.443167209625244, + "learning_rate": 2.8564560439560438e-05, + "loss": 0.0738, + "step": 15605 + }, + { + "epoch": 42.87362637362637, + "grad_norm": 18.86993980407715, + "learning_rate": 2.8563186813186815e-05, + "loss": 0.8322, + "step": 15606 + }, + { + "epoch": 42.87637362637363, + "grad_norm": 24.184011459350586, + "learning_rate": 2.8561813186813185e-05, + "loss": 0.7548, + "step": 15607 + }, + { + "epoch": 42.879120879120876, + "grad_norm": 15.049272537231445, + "learning_rate": 2.856043956043956e-05, + "loss": 0.4141, + "step": 15608 + }, + { + "epoch": 42.88186813186813, + "grad_norm": 11.043097496032715, + "learning_rate": 2.8559065934065938e-05, + "loss": 0.3115, + "step": 15609 + }, + { + "epoch": 42.88461538461539, + "grad_norm": 21.92937660217285, + "learning_rate": 2.855769230769231e-05, + "loss": 0.7487, + "step": 15610 + }, + { + "epoch": 42.887362637362635, + "grad_norm": 9.985843658447266, + "learning_rate": 2.8556318681318682e-05, + "loss": 0.1803, + "step": 15611 + }, + { + "epoch": 42.89010989010989, + "grad_norm": 4.91886043548584, + "learning_rate": 2.8554945054945055e-05, + "loss": 0.0639, + "step": 15612 + }, + { + "epoch": 42.892857142857146, + "grad_norm": 12.898669242858887, + "learning_rate": 2.855357142857143e-05, + "loss": 0.2501, + "step": 15613 + }, + { + "epoch": 42.895604395604394, + "grad_norm": 10.15183162689209, + "learning_rate": 2.8552197802197805e-05, + "loss": 0.1413, + "step": 15614 + }, + { + "epoch": 42.89835164835165, + "grad_norm": 12.027713775634766, + "learning_rate": 2.8550824175824175e-05, + "loss": 0.356, + "step": 15615 + }, + { + "epoch": 42.9010989010989, + "grad_norm": 11.571907043457031, + "learning_rate": 2.8549450549450552e-05, + "loss": 0.3435, + "step": 15616 + }, + { + "epoch": 42.90384615384615, + "grad_norm": 5.934432506561279, + "learning_rate": 2.8548076923076922e-05, + "loss": 0.1458, + "step": 15617 + }, + { + "epoch": 42.90659340659341, + "grad_norm": 8.132317543029785, + "learning_rate": 2.85467032967033e-05, + "loss": 0.155, + "step": 15618 + }, + { + "epoch": 42.90934065934066, + "grad_norm": 14.892951965332031, + "learning_rate": 2.8545329670329672e-05, + "loss": 0.3731, + "step": 15619 + }, + { + "epoch": 42.91208791208791, + "grad_norm": 11.51389217376709, + "learning_rate": 2.8543956043956042e-05, + "loss": 0.2161, + "step": 15620 + }, + { + "epoch": 42.91483516483517, + "grad_norm": 12.037538528442383, + "learning_rate": 2.854258241758242e-05, + "loss": 0.1345, + "step": 15621 + }, + { + "epoch": 42.917582417582416, + "grad_norm": 6.164199352264404, + "learning_rate": 2.854120879120879e-05, + "loss": 0.1529, + "step": 15622 + }, + { + "epoch": 42.92032967032967, + "grad_norm": 13.584129333496094, + "learning_rate": 2.8539835164835166e-05, + "loss": 0.3132, + "step": 15623 + }, + { + "epoch": 42.92307692307692, + "grad_norm": 17.3649845123291, + "learning_rate": 2.8538461538461543e-05, + "loss": 0.2765, + "step": 15624 + }, + { + "epoch": 42.925824175824175, + "grad_norm": 17.53666114807129, + "learning_rate": 2.8537087912087913e-05, + "loss": 0.6489, + "step": 15625 + }, + { + "epoch": 42.92857142857143, + "grad_norm": 13.524206161499023, + "learning_rate": 2.8535714285714286e-05, + "loss": 0.2159, + "step": 15626 + }, + { + "epoch": 42.93131868131868, + "grad_norm": 13.376176834106445, + "learning_rate": 2.853434065934066e-05, + "loss": 0.3304, + "step": 15627 + }, + { + "epoch": 42.934065934065934, + "grad_norm": 8.751999855041504, + "learning_rate": 2.8532967032967033e-05, + "loss": 0.1945, + "step": 15628 + }, + { + "epoch": 42.93681318681319, + "grad_norm": 11.634465217590332, + "learning_rate": 2.853159340659341e-05, + "loss": 0.246, + "step": 15629 + }, + { + "epoch": 42.93956043956044, + "grad_norm": 4.429220199584961, + "learning_rate": 2.853021978021978e-05, + "loss": 0.0826, + "step": 15630 + }, + { + "epoch": 42.94230769230769, + "grad_norm": 8.302128791809082, + "learning_rate": 2.8528846153846157e-05, + "loss": 0.1484, + "step": 15631 + }, + { + "epoch": 42.94505494505494, + "grad_norm": 7.794437408447266, + "learning_rate": 2.8527472527472527e-05, + "loss": 0.2147, + "step": 15632 + }, + { + "epoch": 42.9478021978022, + "grad_norm": 3.3092784881591797, + "learning_rate": 2.8526098901098903e-05, + "loss": 0.0607, + "step": 15633 + }, + { + "epoch": 42.95054945054945, + "grad_norm": 5.654313087463379, + "learning_rate": 2.8524725274725277e-05, + "loss": 0.1274, + "step": 15634 + }, + { + "epoch": 42.9532967032967, + "grad_norm": 13.069342613220215, + "learning_rate": 2.8523351648351647e-05, + "loss": 0.2315, + "step": 15635 + }, + { + "epoch": 42.956043956043956, + "grad_norm": 13.181724548339844, + "learning_rate": 2.8521978021978024e-05, + "loss": 0.2798, + "step": 15636 + }, + { + "epoch": 42.95879120879121, + "grad_norm": 12.181428909301758, + "learning_rate": 2.8520604395604394e-05, + "loss": 0.1836, + "step": 15637 + }, + { + "epoch": 42.96153846153846, + "grad_norm": 9.342206001281738, + "learning_rate": 2.851923076923077e-05, + "loss": 0.4478, + "step": 15638 + }, + { + "epoch": 42.964285714285715, + "grad_norm": 12.36636734008789, + "learning_rate": 2.8517857142857147e-05, + "loss": 0.2608, + "step": 15639 + }, + { + "epoch": 42.967032967032964, + "grad_norm": 6.730431079864502, + "learning_rate": 2.8516483516483517e-05, + "loss": 0.162, + "step": 15640 + }, + { + "epoch": 42.96978021978022, + "grad_norm": 9.040410041809082, + "learning_rate": 2.851510989010989e-05, + "loss": 0.1303, + "step": 15641 + }, + { + "epoch": 42.972527472527474, + "grad_norm": 12.068778038024902, + "learning_rate": 2.8513736263736264e-05, + "loss": 0.266, + "step": 15642 + }, + { + "epoch": 42.97527472527472, + "grad_norm": 21.217004776000977, + "learning_rate": 2.8512362637362638e-05, + "loss": 0.5392, + "step": 15643 + }, + { + "epoch": 42.97802197802198, + "grad_norm": 6.858759880065918, + "learning_rate": 2.8510989010989014e-05, + "loss": 0.2571, + "step": 15644 + }, + { + "epoch": 42.98076923076923, + "grad_norm": 8.034833908081055, + "learning_rate": 2.8509615384615384e-05, + "loss": 0.1957, + "step": 15645 + }, + { + "epoch": 42.98351648351648, + "grad_norm": 18.199487686157227, + "learning_rate": 2.850824175824176e-05, + "loss": 0.4244, + "step": 15646 + }, + { + "epoch": 42.98626373626374, + "grad_norm": 7.464906692504883, + "learning_rate": 2.850686813186813e-05, + "loss": 0.2015, + "step": 15647 + }, + { + "epoch": 42.98901098901099, + "grad_norm": 5.2563157081604, + "learning_rate": 2.8505494505494508e-05, + "loss": 0.0922, + "step": 15648 + }, + { + "epoch": 42.99175824175824, + "grad_norm": 15.478384017944336, + "learning_rate": 2.850412087912088e-05, + "loss": 0.5431, + "step": 15649 + }, + { + "epoch": 42.994505494505496, + "grad_norm": 12.73282527923584, + "learning_rate": 2.850274725274725e-05, + "loss": 0.2724, + "step": 15650 + }, + { + "epoch": 42.997252747252745, + "grad_norm": 14.60036563873291, + "learning_rate": 2.8501373626373628e-05, + "loss": 0.4272, + "step": 15651 + }, + { + "epoch": 43.0, + "grad_norm": 49.65138626098633, + "learning_rate": 2.8499999999999998e-05, + "loss": 1.2959, + "step": 15652 + }, + { + "epoch": 43.0, + "eval_accuracy": 0.5385674931129476, + "eval_f1": 0.5194103238480412, + "eval_f1_DuraRiadoRio_64x64": 0.4230769230769231, + "eval_f1_Mole_64x64": 0.7766990291262136, + "eval_f1_Quebrado_64x64": 0.4512820512820513, + "eval_f1_RiadoRio_64x64": 0.5306930693069307, + "eval_f1_RioFechado_64x64": 0.41530054644808745, + "eval_loss": 2.824510335922241, + "eval_precision": 0.6741176848145073, + "eval_precision_DuraRiadoRio_64x64": 0.47413793103448276, + "eval_precision_Mole_64x64": 0.7272727272727273, + "eval_precision_Quebrado_64x64": 0.8627450980392157, + "eval_precision_RiadoRio_64x64": 0.37960339943342775, + "eval_precision_RioFechado_64x64": 0.926829268292683, + "eval_recall": 0.5340035829009142, + "eval_recall_DuraRiadoRio_64x64": 0.3819444444444444, + "eval_recall_Mole_64x64": 0.8333333333333334, + "eval_recall_Quebrado_64x64": 0.3055555555555556, + "eval_recall_RiadoRio_64x64": 0.881578947368421, + "eval_recall_RioFechado_64x64": 0.2676056338028169, + "eval_runtime": 1.7389, + "eval_samples_per_second": 417.498, + "eval_steps_per_second": 26.453, + "step": 15652 + }, + { + "epoch": 43.002747252747255, + "grad_norm": 11.97615909576416, + "learning_rate": 2.8498626373626375e-05, + "loss": 0.4232, + "step": 15653 + }, + { + "epoch": 43.005494505494504, + "grad_norm": 9.306790351867676, + "learning_rate": 2.8497252747252752e-05, + "loss": 0.2402, + "step": 15654 + }, + { + "epoch": 43.00824175824176, + "grad_norm": 21.509796142578125, + "learning_rate": 2.8495879120879122e-05, + "loss": 0.6351, + "step": 15655 + }, + { + "epoch": 43.010989010989015, + "grad_norm": 13.079379081726074, + "learning_rate": 2.8494505494505495e-05, + "loss": 0.3933, + "step": 15656 + }, + { + "epoch": 43.01373626373626, + "grad_norm": 10.851645469665527, + "learning_rate": 2.849313186813187e-05, + "loss": 0.3291, + "step": 15657 + }, + { + "epoch": 43.01648351648352, + "grad_norm": 5.062093734741211, + "learning_rate": 2.8491758241758242e-05, + "loss": 0.091, + "step": 15658 + }, + { + "epoch": 43.01923076923077, + "grad_norm": 17.676483154296875, + "learning_rate": 2.849038461538462e-05, + "loss": 0.6449, + "step": 15659 + }, + { + "epoch": 43.02197802197802, + "grad_norm": 13.417471885681152, + "learning_rate": 2.848901098901099e-05, + "loss": 0.3259, + "step": 15660 + }, + { + "epoch": 43.02472527472528, + "grad_norm": 8.774625778198242, + "learning_rate": 2.8487637362637366e-05, + "loss": 0.1466, + "step": 15661 + }, + { + "epoch": 43.027472527472526, + "grad_norm": 8.324092864990234, + "learning_rate": 2.8486263736263736e-05, + "loss": 0.228, + "step": 15662 + }, + { + "epoch": 43.03021978021978, + "grad_norm": 6.1074748039245605, + "learning_rate": 2.8484890109890112e-05, + "loss": 0.076, + "step": 15663 + }, + { + "epoch": 43.032967032967036, + "grad_norm": 8.931758880615234, + "learning_rate": 2.8483516483516486e-05, + "loss": 0.1332, + "step": 15664 + }, + { + "epoch": 43.035714285714285, + "grad_norm": 12.335609436035156, + "learning_rate": 2.8482142857142856e-05, + "loss": 0.2506, + "step": 15665 + }, + { + "epoch": 43.03846153846154, + "grad_norm": 7.629190921783447, + "learning_rate": 2.8480769230769233e-05, + "loss": 0.1323, + "step": 15666 + }, + { + "epoch": 43.04120879120879, + "grad_norm": 11.09797477722168, + "learning_rate": 2.8479395604395603e-05, + "loss": 0.3695, + "step": 15667 + }, + { + "epoch": 43.043956043956044, + "grad_norm": 19.6499080657959, + "learning_rate": 2.847802197802198e-05, + "loss": 0.5848, + "step": 15668 + }, + { + "epoch": 43.0467032967033, + "grad_norm": 17.690330505371094, + "learning_rate": 2.8476648351648356e-05, + "loss": 0.4516, + "step": 15669 + }, + { + "epoch": 43.04945054945055, + "grad_norm": 7.52191686630249, + "learning_rate": 2.8475274725274726e-05, + "loss": 0.1743, + "step": 15670 + }, + { + "epoch": 43.0521978021978, + "grad_norm": 13.379596710205078, + "learning_rate": 2.84739010989011e-05, + "loss": 0.3975, + "step": 15671 + }, + { + "epoch": 43.05494505494506, + "grad_norm": 13.573302268981934, + "learning_rate": 2.8472527472527473e-05, + "loss": 0.3225, + "step": 15672 + }, + { + "epoch": 43.05769230769231, + "grad_norm": 10.57541561126709, + "learning_rate": 2.8471153846153847e-05, + "loss": 0.2276, + "step": 15673 + }, + { + "epoch": 43.06043956043956, + "grad_norm": 5.888788223266602, + "learning_rate": 2.8469780219780223e-05, + "loss": 0.1391, + "step": 15674 + }, + { + "epoch": 43.06318681318681, + "grad_norm": 11.76690673828125, + "learning_rate": 2.8468406593406593e-05, + "loss": 0.4436, + "step": 15675 + }, + { + "epoch": 43.065934065934066, + "grad_norm": 24.458171844482422, + "learning_rate": 2.846703296703297e-05, + "loss": 0.739, + "step": 15676 + }, + { + "epoch": 43.06868131868132, + "grad_norm": 10.657132148742676, + "learning_rate": 2.846565934065934e-05, + "loss": 0.1731, + "step": 15677 + }, + { + "epoch": 43.07142857142857, + "grad_norm": 15.331832885742188, + "learning_rate": 2.8464285714285717e-05, + "loss": 0.4879, + "step": 15678 + }, + { + "epoch": 43.074175824175825, + "grad_norm": 19.07785987854004, + "learning_rate": 2.846291208791209e-05, + "loss": 0.5432, + "step": 15679 + }, + { + "epoch": 43.07692307692308, + "grad_norm": 17.273544311523438, + "learning_rate": 2.846153846153846e-05, + "loss": 0.445, + "step": 15680 + }, + { + "epoch": 43.07967032967033, + "grad_norm": 9.609770774841309, + "learning_rate": 2.8460164835164837e-05, + "loss": 0.2181, + "step": 15681 + }, + { + "epoch": 43.082417582417584, + "grad_norm": 15.306884765625, + "learning_rate": 2.8458791208791207e-05, + "loss": 0.2113, + "step": 15682 + }, + { + "epoch": 43.08516483516483, + "grad_norm": 15.119187355041504, + "learning_rate": 2.8457417582417584e-05, + "loss": 0.4741, + "step": 15683 + }, + { + "epoch": 43.08791208791209, + "grad_norm": 9.491082191467285, + "learning_rate": 2.845604395604396e-05, + "loss": 0.2717, + "step": 15684 + }, + { + "epoch": 43.09065934065934, + "grad_norm": 13.801095962524414, + "learning_rate": 2.845467032967033e-05, + "loss": 0.1745, + "step": 15685 + }, + { + "epoch": 43.09340659340659, + "grad_norm": 13.278120040893555, + "learning_rate": 2.8453296703296704e-05, + "loss": 0.31, + "step": 15686 + }, + { + "epoch": 43.09615384615385, + "grad_norm": 14.281805992126465, + "learning_rate": 2.8451923076923078e-05, + "loss": 0.3652, + "step": 15687 + }, + { + "epoch": 43.0989010989011, + "grad_norm": 13.707804679870605, + "learning_rate": 2.845054945054945e-05, + "loss": 0.381, + "step": 15688 + }, + { + "epoch": 43.10164835164835, + "grad_norm": 12.773289680480957, + "learning_rate": 2.8449175824175828e-05, + "loss": 0.2397, + "step": 15689 + }, + { + "epoch": 43.104395604395606, + "grad_norm": 13.856033325195312, + "learning_rate": 2.8447802197802198e-05, + "loss": 0.3991, + "step": 15690 + }, + { + "epoch": 43.107142857142854, + "grad_norm": 7.604176044464111, + "learning_rate": 2.8446428571428575e-05, + "loss": 0.2318, + "step": 15691 + }, + { + "epoch": 43.10989010989011, + "grad_norm": 10.190299987792969, + "learning_rate": 2.8445054945054945e-05, + "loss": 0.2347, + "step": 15692 + }, + { + "epoch": 43.112637362637365, + "grad_norm": 13.031450271606445, + "learning_rate": 2.844368131868132e-05, + "loss": 0.369, + "step": 15693 + }, + { + "epoch": 43.11538461538461, + "grad_norm": 11.8302640914917, + "learning_rate": 2.8442307692307695e-05, + "loss": 0.2582, + "step": 15694 + }, + { + "epoch": 43.11813186813187, + "grad_norm": 15.065571784973145, + "learning_rate": 2.8440934065934065e-05, + "loss": 0.3516, + "step": 15695 + }, + { + "epoch": 43.120879120879124, + "grad_norm": 16.531766891479492, + "learning_rate": 2.843956043956044e-05, + "loss": 0.6623, + "step": 15696 + }, + { + "epoch": 43.12362637362637, + "grad_norm": 9.364828109741211, + "learning_rate": 2.843818681318681e-05, + "loss": 0.1444, + "step": 15697 + }, + { + "epoch": 43.12637362637363, + "grad_norm": 15.796991348266602, + "learning_rate": 2.843681318681319e-05, + "loss": 0.2531, + "step": 15698 + }, + { + "epoch": 43.129120879120876, + "grad_norm": 14.450014114379883, + "learning_rate": 2.8435439560439565e-05, + "loss": 0.3195, + "step": 15699 + }, + { + "epoch": 43.13186813186813, + "grad_norm": 12.484622955322266, + "learning_rate": 2.8434065934065935e-05, + "loss": 0.233, + "step": 15700 + }, + { + "epoch": 43.13461538461539, + "grad_norm": 13.048107147216797, + "learning_rate": 2.843269230769231e-05, + "loss": 0.3393, + "step": 15701 + }, + { + "epoch": 43.137362637362635, + "grad_norm": 8.260123252868652, + "learning_rate": 2.8431318681318682e-05, + "loss": 0.1598, + "step": 15702 + }, + { + "epoch": 43.14010989010989, + "grad_norm": 17.108016967773438, + "learning_rate": 2.8429945054945055e-05, + "loss": 0.3316, + "step": 15703 + }, + { + "epoch": 43.142857142857146, + "grad_norm": 4.270163059234619, + "learning_rate": 2.8428571428571432e-05, + "loss": 0.0987, + "step": 15704 + }, + { + "epoch": 43.145604395604394, + "grad_norm": 11.566279411315918, + "learning_rate": 2.8427197802197802e-05, + "loss": 0.2757, + "step": 15705 + }, + { + "epoch": 43.14835164835165, + "grad_norm": 12.42977237701416, + "learning_rate": 2.842582417582418e-05, + "loss": 0.1929, + "step": 15706 + }, + { + "epoch": 43.1510989010989, + "grad_norm": 7.909284591674805, + "learning_rate": 2.842445054945055e-05, + "loss": 0.1125, + "step": 15707 + }, + { + "epoch": 43.15384615384615, + "grad_norm": 16.779264450073242, + "learning_rate": 2.8423076923076926e-05, + "loss": 0.346, + "step": 15708 + }, + { + "epoch": 43.15659340659341, + "grad_norm": 16.11995506286621, + "learning_rate": 2.84217032967033e-05, + "loss": 0.3966, + "step": 15709 + }, + { + "epoch": 43.15934065934066, + "grad_norm": 11.984297752380371, + "learning_rate": 2.842032967032967e-05, + "loss": 0.158, + "step": 15710 + }, + { + "epoch": 43.16208791208791, + "grad_norm": 4.951189041137695, + "learning_rate": 2.8418956043956046e-05, + "loss": 0.0835, + "step": 15711 + }, + { + "epoch": 43.16483516483517, + "grad_norm": 7.908842086791992, + "learning_rate": 2.8417582417582416e-05, + "loss": 0.1276, + "step": 15712 + }, + { + "epoch": 43.167582417582416, + "grad_norm": 6.264468193054199, + "learning_rate": 2.8416208791208793e-05, + "loss": 0.1645, + "step": 15713 + }, + { + "epoch": 43.17032967032967, + "grad_norm": 4.554255962371826, + "learning_rate": 2.841483516483517e-05, + "loss": 0.0689, + "step": 15714 + }, + { + "epoch": 43.17307692307692, + "grad_norm": 5.626347541809082, + "learning_rate": 2.841346153846154e-05, + "loss": 0.071, + "step": 15715 + }, + { + "epoch": 43.175824175824175, + "grad_norm": 14.50833797454834, + "learning_rate": 2.8412087912087913e-05, + "loss": 0.3778, + "step": 15716 + }, + { + "epoch": 43.17857142857143, + "grad_norm": 7.462001800537109, + "learning_rate": 2.8410714285714287e-05, + "loss": 0.2057, + "step": 15717 + }, + { + "epoch": 43.18131868131868, + "grad_norm": 10.47767448425293, + "learning_rate": 2.840934065934066e-05, + "loss": 0.2501, + "step": 15718 + }, + { + "epoch": 43.184065934065934, + "grad_norm": 14.939801216125488, + "learning_rate": 2.8407967032967037e-05, + "loss": 0.2908, + "step": 15719 + }, + { + "epoch": 43.18681318681319, + "grad_norm": 5.620344638824463, + "learning_rate": 2.8406593406593407e-05, + "loss": 0.1913, + "step": 15720 + }, + { + "epoch": 43.18956043956044, + "grad_norm": 26.896713256835938, + "learning_rate": 2.8405219780219784e-05, + "loss": 0.5473, + "step": 15721 + }, + { + "epoch": 43.19230769230769, + "grad_norm": 13.121971130371094, + "learning_rate": 2.8403846153846154e-05, + "loss": 0.407, + "step": 15722 + }, + { + "epoch": 43.19505494505494, + "grad_norm": 8.14909839630127, + "learning_rate": 2.840247252747253e-05, + "loss": 0.1335, + "step": 15723 + }, + { + "epoch": 43.1978021978022, + "grad_norm": 12.021839141845703, + "learning_rate": 2.8401098901098904e-05, + "loss": 0.3369, + "step": 15724 + }, + { + "epoch": 43.20054945054945, + "grad_norm": 4.685201644897461, + "learning_rate": 2.8399725274725274e-05, + "loss": 0.0744, + "step": 15725 + }, + { + "epoch": 43.2032967032967, + "grad_norm": 14.107707023620605, + "learning_rate": 2.839835164835165e-05, + "loss": 0.3464, + "step": 15726 + }, + { + "epoch": 43.206043956043956, + "grad_norm": 16.616809844970703, + "learning_rate": 2.839697802197802e-05, + "loss": 0.2782, + "step": 15727 + }, + { + "epoch": 43.20879120879121, + "grad_norm": 6.520915985107422, + "learning_rate": 2.8395604395604397e-05, + "loss": 0.0698, + "step": 15728 + }, + { + "epoch": 43.21153846153846, + "grad_norm": 15.729679107666016, + "learning_rate": 2.8394230769230774e-05, + "loss": 0.3324, + "step": 15729 + }, + { + "epoch": 43.214285714285715, + "grad_norm": 4.882887363433838, + "learning_rate": 2.8392857142857144e-05, + "loss": 0.1124, + "step": 15730 + }, + { + "epoch": 43.217032967032964, + "grad_norm": 8.225818634033203, + "learning_rate": 2.8391483516483518e-05, + "loss": 0.2261, + "step": 15731 + }, + { + "epoch": 43.21978021978022, + "grad_norm": 14.176290512084961, + "learning_rate": 2.839010989010989e-05, + "loss": 0.3706, + "step": 15732 + }, + { + "epoch": 43.222527472527474, + "grad_norm": 24.361806869506836, + "learning_rate": 2.8388736263736264e-05, + "loss": 0.4138, + "step": 15733 + }, + { + "epoch": 43.22527472527472, + "grad_norm": 9.380452156066895, + "learning_rate": 2.838736263736264e-05, + "loss": 0.1418, + "step": 15734 + }, + { + "epoch": 43.22802197802198, + "grad_norm": 20.458730697631836, + "learning_rate": 2.838598901098901e-05, + "loss": 0.6177, + "step": 15735 + }, + { + "epoch": 43.23076923076923, + "grad_norm": 6.247960090637207, + "learning_rate": 2.8384615384615388e-05, + "loss": 0.1031, + "step": 15736 + }, + { + "epoch": 43.23351648351648, + "grad_norm": 19.972625732421875, + "learning_rate": 2.8383241758241758e-05, + "loss": 0.5406, + "step": 15737 + }, + { + "epoch": 43.23626373626374, + "grad_norm": 18.741613388061523, + "learning_rate": 2.8381868131868135e-05, + "loss": 0.5178, + "step": 15738 + }, + { + "epoch": 43.239010989010985, + "grad_norm": 14.77592658996582, + "learning_rate": 2.8380494505494508e-05, + "loss": 0.2789, + "step": 15739 + }, + { + "epoch": 43.24175824175824, + "grad_norm": 2.496488332748413, + "learning_rate": 2.837912087912088e-05, + "loss": 0.0405, + "step": 15740 + }, + { + "epoch": 43.244505494505496, + "grad_norm": 14.362061500549316, + "learning_rate": 2.8377747252747255e-05, + "loss": 0.4733, + "step": 15741 + }, + { + "epoch": 43.247252747252745, + "grad_norm": 5.632801532745361, + "learning_rate": 2.8376373626373625e-05, + "loss": 0.1188, + "step": 15742 + }, + { + "epoch": 43.25, + "grad_norm": 11.05495834350586, + "learning_rate": 2.8375000000000002e-05, + "loss": 0.2488, + "step": 15743 + }, + { + "epoch": 43.252747252747255, + "grad_norm": 7.363119125366211, + "learning_rate": 2.837362637362638e-05, + "loss": 0.1223, + "step": 15744 + }, + { + "epoch": 43.255494505494504, + "grad_norm": 8.700264930725098, + "learning_rate": 2.837225274725275e-05, + "loss": 0.2327, + "step": 15745 + }, + { + "epoch": 43.25824175824176, + "grad_norm": 9.199353218078613, + "learning_rate": 2.8370879120879122e-05, + "loss": 0.1092, + "step": 15746 + }, + { + "epoch": 43.260989010989015, + "grad_norm": 21.683504104614258, + "learning_rate": 2.8369505494505496e-05, + "loss": 0.6647, + "step": 15747 + }, + { + "epoch": 43.26373626373626, + "grad_norm": 18.417613983154297, + "learning_rate": 2.836813186813187e-05, + "loss": 0.7243, + "step": 15748 + }, + { + "epoch": 43.26648351648352, + "grad_norm": 15.908529281616211, + "learning_rate": 2.8366758241758246e-05, + "loss": 0.347, + "step": 15749 + }, + { + "epoch": 43.26923076923077, + "grad_norm": 14.2278470993042, + "learning_rate": 2.8365384615384616e-05, + "loss": 0.4646, + "step": 15750 + }, + { + "epoch": 43.27197802197802, + "grad_norm": 7.63663911819458, + "learning_rate": 2.8364010989010993e-05, + "loss": 0.158, + "step": 15751 + }, + { + "epoch": 43.27472527472528, + "grad_norm": 24.055540084838867, + "learning_rate": 2.8362637362637363e-05, + "loss": 0.6269, + "step": 15752 + }, + { + "epoch": 43.277472527472526, + "grad_norm": 10.326667785644531, + "learning_rate": 2.836126373626374e-05, + "loss": 0.2934, + "step": 15753 + }, + { + "epoch": 43.28021978021978, + "grad_norm": 23.803537368774414, + "learning_rate": 2.8359890109890113e-05, + "loss": 0.6837, + "step": 15754 + }, + { + "epoch": 43.282967032967036, + "grad_norm": 20.57655143737793, + "learning_rate": 2.8358516483516483e-05, + "loss": 0.4389, + "step": 15755 + }, + { + "epoch": 43.285714285714285, + "grad_norm": 24.00255584716797, + "learning_rate": 2.835714285714286e-05, + "loss": 0.5664, + "step": 15756 + }, + { + "epoch": 43.28846153846154, + "grad_norm": 20.700597763061523, + "learning_rate": 2.835576923076923e-05, + "loss": 0.5836, + "step": 15757 + }, + { + "epoch": 43.29120879120879, + "grad_norm": 12.877959251403809, + "learning_rate": 2.8354395604395606e-05, + "loss": 0.2681, + "step": 15758 + }, + { + "epoch": 43.293956043956044, + "grad_norm": 10.065695762634277, + "learning_rate": 2.8353021978021983e-05, + "loss": 0.3689, + "step": 15759 + }, + { + "epoch": 43.2967032967033, + "grad_norm": 14.676717758178711, + "learning_rate": 2.8351648351648353e-05, + "loss": 0.4137, + "step": 15760 + }, + { + "epoch": 43.29945054945055, + "grad_norm": 13.867271423339844, + "learning_rate": 2.8350274725274727e-05, + "loss": 0.3758, + "step": 15761 + }, + { + "epoch": 43.3021978021978, + "grad_norm": 5.946803569793701, + "learning_rate": 2.83489010989011e-05, + "loss": 0.1206, + "step": 15762 + }, + { + "epoch": 43.30494505494506, + "grad_norm": 15.310823440551758, + "learning_rate": 2.8347527472527473e-05, + "loss": 0.4047, + "step": 15763 + }, + { + "epoch": 43.30769230769231, + "grad_norm": 3.93839693069458, + "learning_rate": 2.834615384615385e-05, + "loss": 0.0849, + "step": 15764 + }, + { + "epoch": 43.31043956043956, + "grad_norm": 21.23551368713379, + "learning_rate": 2.834478021978022e-05, + "loss": 0.5101, + "step": 15765 + }, + { + "epoch": 43.31318681318681, + "grad_norm": 7.227079391479492, + "learning_rate": 2.8343406593406597e-05, + "loss": 0.2447, + "step": 15766 + }, + { + "epoch": 43.315934065934066, + "grad_norm": 15.81092357635498, + "learning_rate": 2.8342032967032967e-05, + "loss": 0.5647, + "step": 15767 + }, + { + "epoch": 43.31868131868132, + "grad_norm": 16.038179397583008, + "learning_rate": 2.8340659340659344e-05, + "loss": 0.3334, + "step": 15768 + }, + { + "epoch": 43.32142857142857, + "grad_norm": 15.782697677612305, + "learning_rate": 2.8339285714285717e-05, + "loss": 0.2059, + "step": 15769 + }, + { + "epoch": 43.324175824175825, + "grad_norm": 3.8142898082733154, + "learning_rate": 2.8337912087912087e-05, + "loss": 0.0928, + "step": 15770 + }, + { + "epoch": 43.32692307692308, + "grad_norm": 8.592000961303711, + "learning_rate": 2.8336538461538464e-05, + "loss": 0.1389, + "step": 15771 + }, + { + "epoch": 43.32967032967033, + "grad_norm": 13.872303009033203, + "learning_rate": 2.8335164835164834e-05, + "loss": 0.3676, + "step": 15772 + }, + { + "epoch": 43.332417582417584, + "grad_norm": 8.789924621582031, + "learning_rate": 2.833379120879121e-05, + "loss": 0.1191, + "step": 15773 + }, + { + "epoch": 43.33516483516483, + "grad_norm": 20.48482894897461, + "learning_rate": 2.8332417582417588e-05, + "loss": 0.4851, + "step": 15774 + }, + { + "epoch": 43.33791208791209, + "grad_norm": 11.966695785522461, + "learning_rate": 2.8331043956043958e-05, + "loss": 0.2243, + "step": 15775 + }, + { + "epoch": 43.34065934065934, + "grad_norm": 8.407476425170898, + "learning_rate": 2.832967032967033e-05, + "loss": 0.2316, + "step": 15776 + }, + { + "epoch": 43.34340659340659, + "grad_norm": 9.939811706542969, + "learning_rate": 2.83282967032967e-05, + "loss": 0.1493, + "step": 15777 + }, + { + "epoch": 43.34615384615385, + "grad_norm": 8.448830604553223, + "learning_rate": 2.8326923076923078e-05, + "loss": 0.1011, + "step": 15778 + }, + { + "epoch": 43.3489010989011, + "grad_norm": 22.823034286499023, + "learning_rate": 2.8325549450549455e-05, + "loss": 0.3198, + "step": 15779 + }, + { + "epoch": 43.35164835164835, + "grad_norm": 15.900188446044922, + "learning_rate": 2.8324175824175825e-05, + "loss": 0.3672, + "step": 15780 + }, + { + "epoch": 43.354395604395606, + "grad_norm": 9.284428596496582, + "learning_rate": 2.83228021978022e-05, + "loss": 0.1407, + "step": 15781 + }, + { + "epoch": 43.357142857142854, + "grad_norm": 8.35898208618164, + "learning_rate": 2.832142857142857e-05, + "loss": 0.1788, + "step": 15782 + }, + { + "epoch": 43.35989010989011, + "grad_norm": 2.376934051513672, + "learning_rate": 2.832005494505495e-05, + "loss": 0.0486, + "step": 15783 + }, + { + "epoch": 43.362637362637365, + "grad_norm": 19.73619842529297, + "learning_rate": 2.8318681318681322e-05, + "loss": 0.3684, + "step": 15784 + }, + { + "epoch": 43.36538461538461, + "grad_norm": 19.7282657623291, + "learning_rate": 2.8317307692307692e-05, + "loss": 0.4977, + "step": 15785 + }, + { + "epoch": 43.36813186813187, + "grad_norm": 11.73641300201416, + "learning_rate": 2.831593406593407e-05, + "loss": 0.3129, + "step": 15786 + }, + { + "epoch": 43.370879120879124, + "grad_norm": 7.577051162719727, + "learning_rate": 2.831456043956044e-05, + "loss": 0.1037, + "step": 15787 + }, + { + "epoch": 43.37362637362637, + "grad_norm": 24.344921112060547, + "learning_rate": 2.8313186813186815e-05, + "loss": 0.9239, + "step": 15788 + }, + { + "epoch": 43.37637362637363, + "grad_norm": 7.5394368171691895, + "learning_rate": 2.8311813186813192e-05, + "loss": 0.1631, + "step": 15789 + }, + { + "epoch": 43.379120879120876, + "grad_norm": 6.590243339538574, + "learning_rate": 2.8310439560439562e-05, + "loss": 0.0979, + "step": 15790 + }, + { + "epoch": 43.38186813186813, + "grad_norm": 20.097488403320312, + "learning_rate": 2.8309065934065936e-05, + "loss": 0.4906, + "step": 15791 + }, + { + "epoch": 43.38461538461539, + "grad_norm": 8.327616691589355, + "learning_rate": 2.8307692307692306e-05, + "loss": 0.2448, + "step": 15792 + }, + { + "epoch": 43.387362637362635, + "grad_norm": 15.450665473937988, + "learning_rate": 2.8306318681318682e-05, + "loss": 0.3716, + "step": 15793 + }, + { + "epoch": 43.39010989010989, + "grad_norm": 8.07423210144043, + "learning_rate": 2.830494505494506e-05, + "loss": 0.1377, + "step": 15794 + }, + { + "epoch": 43.392857142857146, + "grad_norm": 10.804645538330078, + "learning_rate": 2.830357142857143e-05, + "loss": 0.2261, + "step": 15795 + }, + { + "epoch": 43.395604395604394, + "grad_norm": 3.2460083961486816, + "learning_rate": 2.8302197802197806e-05, + "loss": 0.0286, + "step": 15796 + }, + { + "epoch": 43.39835164835165, + "grad_norm": 14.651139259338379, + "learning_rate": 2.8300824175824176e-05, + "loss": 0.2416, + "step": 15797 + }, + { + "epoch": 43.4010989010989, + "grad_norm": 14.30010986328125, + "learning_rate": 2.8299450549450553e-05, + "loss": 0.3007, + "step": 15798 + }, + { + "epoch": 43.40384615384615, + "grad_norm": 6.600274562835693, + "learning_rate": 2.8298076923076926e-05, + "loss": 0.1868, + "step": 15799 + }, + { + "epoch": 43.40659340659341, + "grad_norm": 13.499977111816406, + "learning_rate": 2.8296703296703296e-05, + "loss": 0.4516, + "step": 15800 + }, + { + "epoch": 43.40934065934066, + "grad_norm": 8.601890563964844, + "learning_rate": 2.8295329670329673e-05, + "loss": 0.1128, + "step": 15801 + }, + { + "epoch": 43.41208791208791, + "grad_norm": 29.130828857421875, + "learning_rate": 2.8293956043956043e-05, + "loss": 0.7193, + "step": 15802 + }, + { + "epoch": 43.41483516483517, + "grad_norm": 21.433393478393555, + "learning_rate": 2.829258241758242e-05, + "loss": 0.5502, + "step": 15803 + }, + { + "epoch": 43.417582417582416, + "grad_norm": 10.292837142944336, + "learning_rate": 2.8291208791208797e-05, + "loss": 0.2221, + "step": 15804 + }, + { + "epoch": 43.42032967032967, + "grad_norm": 7.786239147186279, + "learning_rate": 2.8289835164835167e-05, + "loss": 0.1358, + "step": 15805 + }, + { + "epoch": 43.42307692307692, + "grad_norm": 3.3182666301727295, + "learning_rate": 2.828846153846154e-05, + "loss": 0.1923, + "step": 15806 + }, + { + "epoch": 43.425824175824175, + "grad_norm": 16.790945053100586, + "learning_rate": 2.828708791208791e-05, + "loss": 0.4528, + "step": 15807 + }, + { + "epoch": 43.42857142857143, + "grad_norm": 6.409149646759033, + "learning_rate": 2.8285714285714287e-05, + "loss": 0.162, + "step": 15808 + }, + { + "epoch": 43.43131868131868, + "grad_norm": 15.501188278198242, + "learning_rate": 2.8284340659340664e-05, + "loss": 0.4202, + "step": 15809 + }, + { + "epoch": 43.434065934065934, + "grad_norm": 9.602458000183105, + "learning_rate": 2.8282967032967034e-05, + "loss": 0.2254, + "step": 15810 + }, + { + "epoch": 43.43681318681319, + "grad_norm": 3.1153557300567627, + "learning_rate": 2.828159340659341e-05, + "loss": 0.0357, + "step": 15811 + }, + { + "epoch": 43.43956043956044, + "grad_norm": 11.199146270751953, + "learning_rate": 2.828021978021978e-05, + "loss": 0.1936, + "step": 15812 + }, + { + "epoch": 43.44230769230769, + "grad_norm": 13.06050968170166, + "learning_rate": 2.8278846153846157e-05, + "loss": 0.4169, + "step": 15813 + }, + { + "epoch": 43.44505494505494, + "grad_norm": 24.51812744140625, + "learning_rate": 2.8277472527472527e-05, + "loss": 0.6668, + "step": 15814 + }, + { + "epoch": 43.4478021978022, + "grad_norm": 9.487325668334961, + "learning_rate": 2.82760989010989e-05, + "loss": 0.1997, + "step": 15815 + }, + { + "epoch": 43.45054945054945, + "grad_norm": 8.46791934967041, + "learning_rate": 2.8274725274725278e-05, + "loss": 0.1669, + "step": 15816 + }, + { + "epoch": 43.4532967032967, + "grad_norm": 7.172150135040283, + "learning_rate": 2.8273351648351648e-05, + "loss": 0.1733, + "step": 15817 + }, + { + "epoch": 43.456043956043956, + "grad_norm": 17.42736053466797, + "learning_rate": 2.8271978021978024e-05, + "loss": 0.4291, + "step": 15818 + }, + { + "epoch": 43.45879120879121, + "grad_norm": 6.261040687561035, + "learning_rate": 2.8270604395604394e-05, + "loss": 0.109, + "step": 15819 + }, + { + "epoch": 43.46153846153846, + "grad_norm": 14.908571243286133, + "learning_rate": 2.826923076923077e-05, + "loss": 0.3658, + "step": 15820 + }, + { + "epoch": 43.464285714285715, + "grad_norm": 15.02393913269043, + "learning_rate": 2.8267857142857145e-05, + "loss": 0.5795, + "step": 15821 + }, + { + "epoch": 43.467032967032964, + "grad_norm": 24.840320587158203, + "learning_rate": 2.8266483516483515e-05, + "loss": 0.4648, + "step": 15822 + }, + { + "epoch": 43.46978021978022, + "grad_norm": 7.9883928298950195, + "learning_rate": 2.826510989010989e-05, + "loss": 0.1371, + "step": 15823 + }, + { + "epoch": 43.472527472527474, + "grad_norm": 9.233962059020996, + "learning_rate": 2.826373626373626e-05, + "loss": 0.2175, + "step": 15824 + }, + { + "epoch": 43.47527472527472, + "grad_norm": 8.597414016723633, + "learning_rate": 2.8262362637362638e-05, + "loss": 0.2208, + "step": 15825 + }, + { + "epoch": 43.47802197802198, + "grad_norm": 20.25734519958496, + "learning_rate": 2.8260989010989015e-05, + "loss": 0.5445, + "step": 15826 + }, + { + "epoch": 43.48076923076923, + "grad_norm": 11.898449897766113, + "learning_rate": 2.8259615384615385e-05, + "loss": 0.299, + "step": 15827 + }, + { + "epoch": 43.48351648351648, + "grad_norm": 6.733222961425781, + "learning_rate": 2.8258241758241762e-05, + "loss": 0.113, + "step": 15828 + }, + { + "epoch": 43.48626373626374, + "grad_norm": 10.910306930541992, + "learning_rate": 2.8256868131868132e-05, + "loss": 0.2726, + "step": 15829 + }, + { + "epoch": 43.489010989010985, + "grad_norm": 20.031784057617188, + "learning_rate": 2.8255494505494505e-05, + "loss": 0.6137, + "step": 15830 + }, + { + "epoch": 43.49175824175824, + "grad_norm": 23.080732345581055, + "learning_rate": 2.8254120879120882e-05, + "loss": 0.6025, + "step": 15831 + }, + { + "epoch": 43.494505494505496, + "grad_norm": 9.993955612182617, + "learning_rate": 2.8252747252747252e-05, + "loss": 0.2353, + "step": 15832 + }, + { + "epoch": 43.497252747252745, + "grad_norm": 7.0224690437316895, + "learning_rate": 2.825137362637363e-05, + "loss": 0.196, + "step": 15833 + }, + { + "epoch": 43.5, + "grad_norm": 20.369678497314453, + "learning_rate": 2.825e-05, + "loss": 0.6764, + "step": 15834 + }, + { + "epoch": 43.502747252747255, + "grad_norm": 22.00522804260254, + "learning_rate": 2.8248626373626376e-05, + "loss": 0.6695, + "step": 15835 + }, + { + "epoch": 43.505494505494504, + "grad_norm": 7.348420143127441, + "learning_rate": 2.824725274725275e-05, + "loss": 0.0896, + "step": 15836 + }, + { + "epoch": 43.50824175824176, + "grad_norm": 4.335371971130371, + "learning_rate": 2.824587912087912e-05, + "loss": 0.0735, + "step": 15837 + }, + { + "epoch": 43.51098901098901, + "grad_norm": 11.279828071594238, + "learning_rate": 2.8244505494505496e-05, + "loss": 0.2602, + "step": 15838 + }, + { + "epoch": 43.51373626373626, + "grad_norm": 34.24044418334961, + "learning_rate": 2.8243131868131866e-05, + "loss": 1.3658, + "step": 15839 + }, + { + "epoch": 43.51648351648352, + "grad_norm": 10.458977699279785, + "learning_rate": 2.8241758241758243e-05, + "loss": 0.1866, + "step": 15840 + }, + { + "epoch": 43.51923076923077, + "grad_norm": 6.106395721435547, + "learning_rate": 2.824038461538462e-05, + "loss": 0.0451, + "step": 15841 + }, + { + "epoch": 43.52197802197802, + "grad_norm": 13.971549034118652, + "learning_rate": 2.823901098901099e-05, + "loss": 0.2476, + "step": 15842 + }, + { + "epoch": 43.52472527472528, + "grad_norm": 14.66690731048584, + "learning_rate": 2.8237637362637366e-05, + "loss": 0.2862, + "step": 15843 + }, + { + "epoch": 43.527472527472526, + "grad_norm": 18.662099838256836, + "learning_rate": 2.8236263736263736e-05, + "loss": 0.4197, + "step": 15844 + }, + { + "epoch": 43.53021978021978, + "grad_norm": 15.604878425598145, + "learning_rate": 2.823489010989011e-05, + "loss": 0.1984, + "step": 15845 + }, + { + "epoch": 43.532967032967036, + "grad_norm": 16.82464027404785, + "learning_rate": 2.8233516483516487e-05, + "loss": 0.3093, + "step": 15846 + }, + { + "epoch": 43.535714285714285, + "grad_norm": 7.125633716583252, + "learning_rate": 2.8232142857142857e-05, + "loss": 0.1208, + "step": 15847 + }, + { + "epoch": 43.53846153846154, + "grad_norm": 6.683441638946533, + "learning_rate": 2.8230769230769233e-05, + "loss": 0.0952, + "step": 15848 + }, + { + "epoch": 43.54120879120879, + "grad_norm": 10.849855422973633, + "learning_rate": 2.8229395604395603e-05, + "loss": 0.2807, + "step": 15849 + }, + { + "epoch": 43.543956043956044, + "grad_norm": 17.661672592163086, + "learning_rate": 2.822802197802198e-05, + "loss": 0.5963, + "step": 15850 + }, + { + "epoch": 43.5467032967033, + "grad_norm": 15.916738510131836, + "learning_rate": 2.8226648351648354e-05, + "loss": 0.27, + "step": 15851 + }, + { + "epoch": 43.54945054945055, + "grad_norm": 20.140554428100586, + "learning_rate": 2.8225274725274724e-05, + "loss": 0.396, + "step": 15852 + }, + { + "epoch": 43.5521978021978, + "grad_norm": 18.799142837524414, + "learning_rate": 2.82239010989011e-05, + "loss": 0.5314, + "step": 15853 + }, + { + "epoch": 43.55494505494506, + "grad_norm": 21.073945999145508, + "learning_rate": 2.822252747252747e-05, + "loss": 0.3113, + "step": 15854 + }, + { + "epoch": 43.55769230769231, + "grad_norm": 15.163572311401367, + "learning_rate": 2.8221153846153847e-05, + "loss": 0.314, + "step": 15855 + }, + { + "epoch": 43.56043956043956, + "grad_norm": 13.705788612365723, + "learning_rate": 2.8219780219780224e-05, + "loss": 0.4451, + "step": 15856 + }, + { + "epoch": 43.56318681318681, + "grad_norm": 2.9726297855377197, + "learning_rate": 2.8218406593406594e-05, + "loss": 0.0408, + "step": 15857 + }, + { + "epoch": 43.565934065934066, + "grad_norm": 17.53761863708496, + "learning_rate": 2.821703296703297e-05, + "loss": 0.3298, + "step": 15858 + }, + { + "epoch": 43.56868131868132, + "grad_norm": 5.706195831298828, + "learning_rate": 2.821565934065934e-05, + "loss": 0.0772, + "step": 15859 + }, + { + "epoch": 43.57142857142857, + "grad_norm": 16.389402389526367, + "learning_rate": 2.8214285714285714e-05, + "loss": 0.2897, + "step": 15860 + }, + { + "epoch": 43.574175824175825, + "grad_norm": 21.818632125854492, + "learning_rate": 2.821291208791209e-05, + "loss": 0.403, + "step": 15861 + }, + { + "epoch": 43.57692307692308, + "grad_norm": 12.646642684936523, + "learning_rate": 2.821153846153846e-05, + "loss": 0.3411, + "step": 15862 + }, + { + "epoch": 43.57967032967033, + "grad_norm": 9.462908744812012, + "learning_rate": 2.8210164835164838e-05, + "loss": 0.2103, + "step": 15863 + }, + { + "epoch": 43.582417582417584, + "grad_norm": 12.539497375488281, + "learning_rate": 2.8208791208791208e-05, + "loss": 0.1218, + "step": 15864 + }, + { + "epoch": 43.58516483516483, + "grad_norm": 17.416702270507812, + "learning_rate": 2.8207417582417585e-05, + "loss": 0.3014, + "step": 15865 + }, + { + "epoch": 43.58791208791209, + "grad_norm": 15.611762046813965, + "learning_rate": 2.8206043956043958e-05, + "loss": 0.3673, + "step": 15866 + }, + { + "epoch": 43.59065934065934, + "grad_norm": 6.6314215660095215, + "learning_rate": 2.8204670329670328e-05, + "loss": 0.1702, + "step": 15867 + }, + { + "epoch": 43.59340659340659, + "grad_norm": 5.796635627746582, + "learning_rate": 2.8203296703296705e-05, + "loss": 0.0945, + "step": 15868 + }, + { + "epoch": 43.59615384615385, + "grad_norm": 9.69038200378418, + "learning_rate": 2.8201923076923075e-05, + "loss": 0.1325, + "step": 15869 + }, + { + "epoch": 43.5989010989011, + "grad_norm": 13.946229934692383, + "learning_rate": 2.820054945054945e-05, + "loss": 0.3362, + "step": 15870 + }, + { + "epoch": 43.60164835164835, + "grad_norm": 20.719968795776367, + "learning_rate": 2.819917582417583e-05, + "loss": 0.5069, + "step": 15871 + }, + { + "epoch": 43.604395604395606, + "grad_norm": 2.557903289794922, + "learning_rate": 2.81978021978022e-05, + "loss": 0.0472, + "step": 15872 + }, + { + "epoch": 43.607142857142854, + "grad_norm": 6.141751766204834, + "learning_rate": 2.8196428571428575e-05, + "loss": 0.1345, + "step": 15873 + }, + { + "epoch": 43.60989010989011, + "grad_norm": 11.405735969543457, + "learning_rate": 2.8195054945054945e-05, + "loss": 0.2085, + "step": 15874 + }, + { + "epoch": 43.612637362637365, + "grad_norm": 2.643280029296875, + "learning_rate": 2.819368131868132e-05, + "loss": 0.0465, + "step": 15875 + }, + { + "epoch": 43.61538461538461, + "grad_norm": 16.833948135375977, + "learning_rate": 2.8192307692307696e-05, + "loss": 0.5214, + "step": 15876 + }, + { + "epoch": 43.61813186813187, + "grad_norm": 14.832662582397461, + "learning_rate": 2.8190934065934066e-05, + "loss": 0.3057, + "step": 15877 + }, + { + "epoch": 43.620879120879124, + "grad_norm": 8.762312889099121, + "learning_rate": 2.8189560439560442e-05, + "loss": 0.2541, + "step": 15878 + }, + { + "epoch": 43.62362637362637, + "grad_norm": 15.583823204040527, + "learning_rate": 2.8188186813186812e-05, + "loss": 0.4782, + "step": 15879 + }, + { + "epoch": 43.62637362637363, + "grad_norm": 10.261174201965332, + "learning_rate": 2.818681318681319e-05, + "loss": 0.185, + "step": 15880 + }, + { + "epoch": 43.629120879120876, + "grad_norm": 3.277012825012207, + "learning_rate": 2.8185439560439563e-05, + "loss": 0.0422, + "step": 15881 + }, + { + "epoch": 43.63186813186813, + "grad_norm": 11.059642791748047, + "learning_rate": 2.8184065934065933e-05, + "loss": 0.2329, + "step": 15882 + }, + { + "epoch": 43.63461538461539, + "grad_norm": 3.4103662967681885, + "learning_rate": 2.818269230769231e-05, + "loss": 0.0447, + "step": 15883 + }, + { + "epoch": 43.637362637362635, + "grad_norm": 9.263941764831543, + "learning_rate": 2.818131868131868e-05, + "loss": 0.2619, + "step": 15884 + }, + { + "epoch": 43.64010989010989, + "grad_norm": 5.880610942840576, + "learning_rate": 2.8179945054945056e-05, + "loss": 0.1283, + "step": 15885 + }, + { + "epoch": 43.642857142857146, + "grad_norm": 13.418915748596191, + "learning_rate": 2.8178571428571433e-05, + "loss": 0.307, + "step": 15886 + }, + { + "epoch": 43.645604395604394, + "grad_norm": 23.259767532348633, + "learning_rate": 2.8177197802197803e-05, + "loss": 0.4954, + "step": 15887 + }, + { + "epoch": 43.64835164835165, + "grad_norm": 19.106088638305664, + "learning_rate": 2.817582417582418e-05, + "loss": 0.663, + "step": 15888 + }, + { + "epoch": 43.6510989010989, + "grad_norm": 9.5476655960083, + "learning_rate": 2.817445054945055e-05, + "loss": 0.1967, + "step": 15889 + }, + { + "epoch": 43.65384615384615, + "grad_norm": 13.568849563598633, + "learning_rate": 2.8173076923076923e-05, + "loss": 0.3162, + "step": 15890 + }, + { + "epoch": 43.65659340659341, + "grad_norm": 13.945202827453613, + "learning_rate": 2.81717032967033e-05, + "loss": 0.3442, + "step": 15891 + }, + { + "epoch": 43.65934065934066, + "grad_norm": 10.646668434143066, + "learning_rate": 2.817032967032967e-05, + "loss": 0.2394, + "step": 15892 + }, + { + "epoch": 43.66208791208791, + "grad_norm": 6.791810512542725, + "learning_rate": 2.8168956043956047e-05, + "loss": 0.137, + "step": 15893 + }, + { + "epoch": 43.66483516483517, + "grad_norm": 11.118069648742676, + "learning_rate": 2.8167582417582417e-05, + "loss": 0.3559, + "step": 15894 + }, + { + "epoch": 43.667582417582416, + "grad_norm": 5.075682640075684, + "learning_rate": 2.8166208791208794e-05, + "loss": 0.1348, + "step": 15895 + }, + { + "epoch": 43.67032967032967, + "grad_norm": 14.589560508728027, + "learning_rate": 2.8164835164835167e-05, + "loss": 0.4472, + "step": 15896 + }, + { + "epoch": 43.67307692307692, + "grad_norm": 12.452080726623535, + "learning_rate": 2.8163461538461537e-05, + "loss": 0.2627, + "step": 15897 + }, + { + "epoch": 43.675824175824175, + "grad_norm": 14.60717487335205, + "learning_rate": 2.8162087912087914e-05, + "loss": 0.4489, + "step": 15898 + }, + { + "epoch": 43.67857142857143, + "grad_norm": 15.188528060913086, + "learning_rate": 2.8160714285714284e-05, + "loss": 0.4527, + "step": 15899 + }, + { + "epoch": 43.68131868131868, + "grad_norm": 6.1196608543396, + "learning_rate": 2.815934065934066e-05, + "loss": 0.0845, + "step": 15900 + }, + { + "epoch": 43.684065934065934, + "grad_norm": 21.22536277770996, + "learning_rate": 2.8157967032967037e-05, + "loss": 0.8386, + "step": 15901 + }, + { + "epoch": 43.68681318681319, + "grad_norm": 2.8556480407714844, + "learning_rate": 2.8156593406593407e-05, + "loss": 0.0314, + "step": 15902 + }, + { + "epoch": 43.68956043956044, + "grad_norm": 22.573562622070312, + "learning_rate": 2.815521978021978e-05, + "loss": 0.9885, + "step": 15903 + }, + { + "epoch": 43.69230769230769, + "grad_norm": 2.118576765060425, + "learning_rate": 2.8153846153846154e-05, + "loss": 0.0359, + "step": 15904 + }, + { + "epoch": 43.69505494505494, + "grad_norm": 6.0936102867126465, + "learning_rate": 2.8152472527472528e-05, + "loss": 0.1371, + "step": 15905 + }, + { + "epoch": 43.6978021978022, + "grad_norm": 15.276037216186523, + "learning_rate": 2.8151098901098905e-05, + "loss": 0.4536, + "step": 15906 + }, + { + "epoch": 43.70054945054945, + "grad_norm": 16.575416564941406, + "learning_rate": 2.8149725274725275e-05, + "loss": 0.2569, + "step": 15907 + }, + { + "epoch": 43.7032967032967, + "grad_norm": 8.390676498413086, + "learning_rate": 2.814835164835165e-05, + "loss": 0.1597, + "step": 15908 + }, + { + "epoch": 43.706043956043956, + "grad_norm": 16.846065521240234, + "learning_rate": 2.814697802197802e-05, + "loss": 0.5505, + "step": 15909 + }, + { + "epoch": 43.70879120879121, + "grad_norm": 13.551044464111328, + "learning_rate": 2.8145604395604398e-05, + "loss": 0.3075, + "step": 15910 + }, + { + "epoch": 43.71153846153846, + "grad_norm": 12.198280334472656, + "learning_rate": 2.814423076923077e-05, + "loss": 0.3178, + "step": 15911 + }, + { + "epoch": 43.714285714285715, + "grad_norm": 7.525563716888428, + "learning_rate": 2.814285714285714e-05, + "loss": 0.1577, + "step": 15912 + }, + { + "epoch": 43.717032967032964, + "grad_norm": 8.477606773376465, + "learning_rate": 2.814148351648352e-05, + "loss": 0.2016, + "step": 15913 + }, + { + "epoch": 43.71978021978022, + "grad_norm": 15.496764183044434, + "learning_rate": 2.814010989010989e-05, + "loss": 0.37, + "step": 15914 + }, + { + "epoch": 43.722527472527474, + "grad_norm": 13.922111511230469, + "learning_rate": 2.8138736263736265e-05, + "loss": 0.3598, + "step": 15915 + }, + { + "epoch": 43.72527472527472, + "grad_norm": 17.255725860595703, + "learning_rate": 2.8137362637362642e-05, + "loss": 0.4792, + "step": 15916 + }, + { + "epoch": 43.72802197802198, + "grad_norm": 11.047384262084961, + "learning_rate": 2.8135989010989012e-05, + "loss": 0.2423, + "step": 15917 + }, + { + "epoch": 43.73076923076923, + "grad_norm": 12.016846656799316, + "learning_rate": 2.8134615384615385e-05, + "loss": 0.3589, + "step": 15918 + }, + { + "epoch": 43.73351648351648, + "grad_norm": 8.466581344604492, + "learning_rate": 2.813324175824176e-05, + "loss": 0.2047, + "step": 15919 + }, + { + "epoch": 43.73626373626374, + "grad_norm": 10.18250846862793, + "learning_rate": 2.8131868131868132e-05, + "loss": 0.2634, + "step": 15920 + }, + { + "epoch": 43.73901098901099, + "grad_norm": 26.829879760742188, + "learning_rate": 2.813049450549451e-05, + "loss": 0.9145, + "step": 15921 + }, + { + "epoch": 43.74175824175824, + "grad_norm": 15.075981140136719, + "learning_rate": 2.812912087912088e-05, + "loss": 0.3325, + "step": 15922 + }, + { + "epoch": 43.744505494505496, + "grad_norm": 9.80953598022461, + "learning_rate": 2.8127747252747256e-05, + "loss": 0.1197, + "step": 15923 + }, + { + "epoch": 43.747252747252745, + "grad_norm": 14.452617645263672, + "learning_rate": 2.8126373626373626e-05, + "loss": 0.272, + "step": 15924 + }, + { + "epoch": 43.75, + "grad_norm": 13.129714965820312, + "learning_rate": 2.8125000000000003e-05, + "loss": 0.3456, + "step": 15925 + }, + { + "epoch": 43.752747252747255, + "grad_norm": 21.867900848388672, + "learning_rate": 2.8123626373626376e-05, + "loss": 0.3277, + "step": 15926 + }, + { + "epoch": 43.755494505494504, + "grad_norm": 6.85116720199585, + "learning_rate": 2.8122252747252746e-05, + "loss": 0.1072, + "step": 15927 + }, + { + "epoch": 43.75824175824176, + "grad_norm": 6.613266468048096, + "learning_rate": 2.8120879120879123e-05, + "loss": 0.143, + "step": 15928 + }, + { + "epoch": 43.76098901098901, + "grad_norm": 10.582091331481934, + "learning_rate": 2.8119505494505493e-05, + "loss": 0.1241, + "step": 15929 + }, + { + "epoch": 43.76373626373626, + "grad_norm": 21.692657470703125, + "learning_rate": 2.811813186813187e-05, + "loss": 0.6114, + "step": 15930 + }, + { + "epoch": 43.76648351648352, + "grad_norm": 7.8460845947265625, + "learning_rate": 2.8116758241758246e-05, + "loss": 0.2627, + "step": 15931 + }, + { + "epoch": 43.76923076923077, + "grad_norm": 18.553775787353516, + "learning_rate": 2.8115384615384616e-05, + "loss": 0.569, + "step": 15932 + }, + { + "epoch": 43.77197802197802, + "grad_norm": 9.122403144836426, + "learning_rate": 2.811401098901099e-05, + "loss": 0.1774, + "step": 15933 + }, + { + "epoch": 43.77472527472528, + "grad_norm": 16.922657012939453, + "learning_rate": 2.8112637362637363e-05, + "loss": 0.4478, + "step": 15934 + }, + { + "epoch": 43.777472527472526, + "grad_norm": 5.8385162353515625, + "learning_rate": 2.8111263736263737e-05, + "loss": 0.0765, + "step": 15935 + }, + { + "epoch": 43.78021978021978, + "grad_norm": 13.251432418823242, + "learning_rate": 2.8109890109890113e-05, + "loss": 0.2744, + "step": 15936 + }, + { + "epoch": 43.782967032967036, + "grad_norm": 12.103002548217773, + "learning_rate": 2.8108516483516484e-05, + "loss": 0.1474, + "step": 15937 + }, + { + "epoch": 43.785714285714285, + "grad_norm": 10.084260940551758, + "learning_rate": 2.810714285714286e-05, + "loss": 0.2094, + "step": 15938 + }, + { + "epoch": 43.78846153846154, + "grad_norm": 11.95087718963623, + "learning_rate": 2.810576923076923e-05, + "loss": 0.3456, + "step": 15939 + }, + { + "epoch": 43.79120879120879, + "grad_norm": 13.388632774353027, + "learning_rate": 2.8104395604395607e-05, + "loss": 0.1863, + "step": 15940 + }, + { + "epoch": 43.793956043956044, + "grad_norm": 11.844374656677246, + "learning_rate": 2.810302197802198e-05, + "loss": 0.2316, + "step": 15941 + }, + { + "epoch": 43.7967032967033, + "grad_norm": 5.234866142272949, + "learning_rate": 2.810164835164835e-05, + "loss": 0.1003, + "step": 15942 + }, + { + "epoch": 43.79945054945055, + "grad_norm": 7.218478679656982, + "learning_rate": 2.8100274725274727e-05, + "loss": 0.1594, + "step": 15943 + }, + { + "epoch": 43.8021978021978, + "grad_norm": 14.532306671142578, + "learning_rate": 2.8098901098901097e-05, + "loss": 0.3646, + "step": 15944 + }, + { + "epoch": 43.80494505494506, + "grad_norm": 16.665090560913086, + "learning_rate": 2.8097527472527474e-05, + "loss": 0.5172, + "step": 15945 + }, + { + "epoch": 43.80769230769231, + "grad_norm": 12.14478874206543, + "learning_rate": 2.809615384615385e-05, + "loss": 0.37, + "step": 15946 + }, + { + "epoch": 43.81043956043956, + "grad_norm": 7.156891345977783, + "learning_rate": 2.809478021978022e-05, + "loss": 0.1048, + "step": 15947 + }, + { + "epoch": 43.81318681318681, + "grad_norm": 9.907654762268066, + "learning_rate": 2.8093406593406594e-05, + "loss": 0.1275, + "step": 15948 + }, + { + "epoch": 43.815934065934066, + "grad_norm": 16.828706741333008, + "learning_rate": 2.8092032967032968e-05, + "loss": 0.4077, + "step": 15949 + }, + { + "epoch": 43.81868131868132, + "grad_norm": 17.51654815673828, + "learning_rate": 2.809065934065934e-05, + "loss": 0.3542, + "step": 15950 + }, + { + "epoch": 43.82142857142857, + "grad_norm": 13.71141242980957, + "learning_rate": 2.8089285714285718e-05, + "loss": 0.1774, + "step": 15951 + }, + { + "epoch": 43.824175824175825, + "grad_norm": 10.822617530822754, + "learning_rate": 2.8087912087912088e-05, + "loss": 0.1753, + "step": 15952 + }, + { + "epoch": 43.82692307692308, + "grad_norm": 11.336322784423828, + "learning_rate": 2.8086538461538465e-05, + "loss": 0.2058, + "step": 15953 + }, + { + "epoch": 43.82967032967033, + "grad_norm": 11.345124244689941, + "learning_rate": 2.8085164835164835e-05, + "loss": 0.1226, + "step": 15954 + }, + { + "epoch": 43.832417582417584, + "grad_norm": 8.47508716583252, + "learning_rate": 2.808379120879121e-05, + "loss": 0.1735, + "step": 15955 + }, + { + "epoch": 43.83516483516483, + "grad_norm": 12.904300689697266, + "learning_rate": 2.8082417582417585e-05, + "loss": 0.2773, + "step": 15956 + }, + { + "epoch": 43.83791208791209, + "grad_norm": 13.683331489562988, + "learning_rate": 2.8081043956043955e-05, + "loss": 0.4119, + "step": 15957 + }, + { + "epoch": 43.84065934065934, + "grad_norm": 15.594359397888184, + "learning_rate": 2.8079670329670332e-05, + "loss": 0.5276, + "step": 15958 + }, + { + "epoch": 43.84340659340659, + "grad_norm": 16.497669219970703, + "learning_rate": 2.8078296703296702e-05, + "loss": 0.1571, + "step": 15959 + }, + { + "epoch": 43.84615384615385, + "grad_norm": 5.343010902404785, + "learning_rate": 2.807692307692308e-05, + "loss": 0.0891, + "step": 15960 + }, + { + "epoch": 43.8489010989011, + "grad_norm": 1.9429576396942139, + "learning_rate": 2.8075549450549455e-05, + "loss": 0.0262, + "step": 15961 + }, + { + "epoch": 43.85164835164835, + "grad_norm": 10.684432029724121, + "learning_rate": 2.8074175824175825e-05, + "loss": 0.3182, + "step": 15962 + }, + { + "epoch": 43.854395604395606, + "grad_norm": 10.16934585571289, + "learning_rate": 2.80728021978022e-05, + "loss": 0.2377, + "step": 15963 + }, + { + "epoch": 43.857142857142854, + "grad_norm": 19.939266204833984, + "learning_rate": 2.8071428571428572e-05, + "loss": 0.6654, + "step": 15964 + }, + { + "epoch": 43.85989010989011, + "grad_norm": 3.6160757541656494, + "learning_rate": 2.8070054945054946e-05, + "loss": 0.0859, + "step": 15965 + }, + { + "epoch": 43.862637362637365, + "grad_norm": 24.443052291870117, + "learning_rate": 2.8068681318681322e-05, + "loss": 1.1378, + "step": 15966 + }, + { + "epoch": 43.86538461538461, + "grad_norm": 9.685699462890625, + "learning_rate": 2.8067307692307692e-05, + "loss": 0.1514, + "step": 15967 + }, + { + "epoch": 43.86813186813187, + "grad_norm": 3.6114542484283447, + "learning_rate": 2.806593406593407e-05, + "loss": 0.0653, + "step": 15968 + }, + { + "epoch": 43.870879120879124, + "grad_norm": 14.05138111114502, + "learning_rate": 2.806456043956044e-05, + "loss": 0.3972, + "step": 15969 + }, + { + "epoch": 43.87362637362637, + "grad_norm": 17.568744659423828, + "learning_rate": 2.8063186813186816e-05, + "loss": 0.4113, + "step": 15970 + }, + { + "epoch": 43.87637362637363, + "grad_norm": 15.647191047668457, + "learning_rate": 2.806181318681319e-05, + "loss": 0.4924, + "step": 15971 + }, + { + "epoch": 43.879120879120876, + "grad_norm": 12.498278617858887, + "learning_rate": 2.806043956043956e-05, + "loss": 0.2799, + "step": 15972 + }, + { + "epoch": 43.88186813186813, + "grad_norm": 12.343295097351074, + "learning_rate": 2.8059065934065936e-05, + "loss": 0.2784, + "step": 15973 + }, + { + "epoch": 43.88461538461539, + "grad_norm": 6.549098491668701, + "learning_rate": 2.8057692307692306e-05, + "loss": 0.1213, + "step": 15974 + }, + { + "epoch": 43.887362637362635, + "grad_norm": 10.775385856628418, + "learning_rate": 2.8056318681318683e-05, + "loss": 0.1868, + "step": 15975 + }, + { + "epoch": 43.89010989010989, + "grad_norm": 14.013678550720215, + "learning_rate": 2.805494505494506e-05, + "loss": 0.3341, + "step": 15976 + }, + { + "epoch": 43.892857142857146, + "grad_norm": 12.1305570602417, + "learning_rate": 2.805357142857143e-05, + "loss": 0.2612, + "step": 15977 + }, + { + "epoch": 43.895604395604394, + "grad_norm": 8.351272583007812, + "learning_rate": 2.8052197802197803e-05, + "loss": 0.1077, + "step": 15978 + }, + { + "epoch": 43.89835164835165, + "grad_norm": 5.573801517486572, + "learning_rate": 2.8050824175824177e-05, + "loss": 0.0828, + "step": 15979 + }, + { + "epoch": 43.9010989010989, + "grad_norm": 6.5023603439331055, + "learning_rate": 2.804945054945055e-05, + "loss": 0.1827, + "step": 15980 + }, + { + "epoch": 43.90384615384615, + "grad_norm": 8.62619686126709, + "learning_rate": 2.8048076923076927e-05, + "loss": 0.1251, + "step": 15981 + }, + { + "epoch": 43.90659340659341, + "grad_norm": 8.380780220031738, + "learning_rate": 2.8046703296703297e-05, + "loss": 0.2151, + "step": 15982 + }, + { + "epoch": 43.90934065934066, + "grad_norm": 6.761242389678955, + "learning_rate": 2.8045329670329674e-05, + "loss": 0.0861, + "step": 15983 + }, + { + "epoch": 43.91208791208791, + "grad_norm": 6.957091331481934, + "learning_rate": 2.8043956043956044e-05, + "loss": 0.1717, + "step": 15984 + }, + { + "epoch": 43.91483516483517, + "grad_norm": 14.017561912536621, + "learning_rate": 2.804258241758242e-05, + "loss": 0.4398, + "step": 15985 + }, + { + "epoch": 43.917582417582416, + "grad_norm": 20.566967010498047, + "learning_rate": 2.8041208791208794e-05, + "loss": 0.457, + "step": 15986 + }, + { + "epoch": 43.92032967032967, + "grad_norm": 9.520088195800781, + "learning_rate": 2.8039835164835164e-05, + "loss": 0.3129, + "step": 15987 + }, + { + "epoch": 43.92307692307692, + "grad_norm": 6.749226093292236, + "learning_rate": 2.803846153846154e-05, + "loss": 0.1537, + "step": 15988 + }, + { + "epoch": 43.925824175824175, + "grad_norm": 17.22075080871582, + "learning_rate": 2.803708791208791e-05, + "loss": 0.6298, + "step": 15989 + }, + { + "epoch": 43.92857142857143, + "grad_norm": 3.810267448425293, + "learning_rate": 2.8035714285714288e-05, + "loss": 0.0649, + "step": 15990 + }, + { + "epoch": 43.93131868131868, + "grad_norm": 20.554006576538086, + "learning_rate": 2.8034340659340664e-05, + "loss": 0.7455, + "step": 15991 + }, + { + "epoch": 43.934065934065934, + "grad_norm": 10.957173347473145, + "learning_rate": 2.8032967032967034e-05, + "loss": 0.219, + "step": 15992 + }, + { + "epoch": 43.93681318681319, + "grad_norm": 7.9663920402526855, + "learning_rate": 2.8031593406593408e-05, + "loss": 0.1039, + "step": 15993 + }, + { + "epoch": 43.93956043956044, + "grad_norm": 14.36160945892334, + "learning_rate": 2.803021978021978e-05, + "loss": 0.364, + "step": 15994 + }, + { + "epoch": 43.94230769230769, + "grad_norm": 5.283764362335205, + "learning_rate": 2.8028846153846155e-05, + "loss": 0.0669, + "step": 15995 + }, + { + "epoch": 43.94505494505494, + "grad_norm": 4.083611488342285, + "learning_rate": 2.802747252747253e-05, + "loss": 0.0957, + "step": 15996 + }, + { + "epoch": 43.9478021978022, + "grad_norm": 9.295612335205078, + "learning_rate": 2.80260989010989e-05, + "loss": 0.1135, + "step": 15997 + }, + { + "epoch": 43.95054945054945, + "grad_norm": 5.652164459228516, + "learning_rate": 2.8024725274725278e-05, + "loss": 0.0961, + "step": 15998 + }, + { + "epoch": 43.9532967032967, + "grad_norm": 10.122340202331543, + "learning_rate": 2.8023351648351648e-05, + "loss": 0.1682, + "step": 15999 + }, + { + "epoch": 43.956043956043956, + "grad_norm": 20.373180389404297, + "learning_rate": 2.8021978021978025e-05, + "loss": 0.3114, + "step": 16000 + }, + { + "epoch": 43.95879120879121, + "grad_norm": 12.350204467773438, + "learning_rate": 2.80206043956044e-05, + "loss": 0.3196, + "step": 16001 + }, + { + "epoch": 43.96153846153846, + "grad_norm": 12.642938613891602, + "learning_rate": 2.801923076923077e-05, + "loss": 0.4533, + "step": 16002 + }, + { + "epoch": 43.964285714285715, + "grad_norm": 10.286128997802734, + "learning_rate": 2.8017857142857145e-05, + "loss": 0.1628, + "step": 16003 + }, + { + "epoch": 43.967032967032964, + "grad_norm": 16.205183029174805, + "learning_rate": 2.8016483516483515e-05, + "loss": 0.4071, + "step": 16004 + }, + { + "epoch": 43.96978021978022, + "grad_norm": 11.792009353637695, + "learning_rate": 2.8015109890109892e-05, + "loss": 0.2723, + "step": 16005 + }, + { + "epoch": 43.972527472527474, + "grad_norm": 4.194671630859375, + "learning_rate": 2.801373626373627e-05, + "loss": 0.0485, + "step": 16006 + }, + { + "epoch": 43.97527472527472, + "grad_norm": 13.576417922973633, + "learning_rate": 2.801236263736264e-05, + "loss": 0.4313, + "step": 16007 + }, + { + "epoch": 43.97802197802198, + "grad_norm": 19.127639770507812, + "learning_rate": 2.8010989010989012e-05, + "loss": 0.9721, + "step": 16008 + }, + { + "epoch": 43.98076923076923, + "grad_norm": 12.611116409301758, + "learning_rate": 2.8009615384615386e-05, + "loss": 0.4544, + "step": 16009 + }, + { + "epoch": 43.98351648351648, + "grad_norm": 12.314667701721191, + "learning_rate": 2.800824175824176e-05, + "loss": 0.2349, + "step": 16010 + }, + { + "epoch": 43.98626373626374, + "grad_norm": 13.810630798339844, + "learning_rate": 2.8006868131868136e-05, + "loss": 0.3885, + "step": 16011 + }, + { + "epoch": 43.98901098901099, + "grad_norm": 19.988054275512695, + "learning_rate": 2.8005494505494506e-05, + "loss": 0.4556, + "step": 16012 + }, + { + "epoch": 43.99175824175824, + "grad_norm": 7.006148338317871, + "learning_rate": 2.8004120879120883e-05, + "loss": 0.2696, + "step": 16013 + }, + { + "epoch": 43.994505494505496, + "grad_norm": 16.520889282226562, + "learning_rate": 2.8002747252747253e-05, + "loss": 0.3354, + "step": 16014 + }, + { + "epoch": 43.997252747252745, + "grad_norm": 7.008114337921143, + "learning_rate": 2.800137362637363e-05, + "loss": 0.1617, + "step": 16015 + }, + { + "epoch": 44.0, + "grad_norm": 88.60670471191406, + "learning_rate": 2.8000000000000003e-05, + "loss": 1.0648, + "step": 16016 + }, + { + "epoch": 44.0, + "eval_accuracy": 0.6418732782369146, + "eval_f1": 0.616838797187407, + "eval_f1_DuraRiadoRio_64x64": 0.37988826815642457, + "eval_f1_Mole_64x64": 0.5686274509803921, + "eval_f1_Quebrado_64x64": 0.8153310104529616, + "eval_f1_RiadoRio_64x64": 0.5896805896805897, + "eval_f1_RioFechado_64x64": 0.7306666666666667, + "eval_loss": 2.248492479324341, + "eval_precision": 0.7629696248378399, + "eval_precision_DuraRiadoRio_64x64": 0.9714285714285714, + "eval_precision_Mole_64x64": 0.9666666666666667, + "eval_precision_Quebrado_64x64": 0.8181818181818182, + "eval_precision_RiadoRio_64x64": 0.47058823529411764, + "eval_precision_RioFechado_64x64": 0.5879828326180258, + "eval_recall": 0.6411302610987561, + "eval_recall_DuraRiadoRio_64x64": 0.2361111111111111, + "eval_recall_Mole_64x64": 0.4027777777777778, + "eval_recall_Quebrado_64x64": 0.8125, + "eval_recall_RiadoRio_64x64": 0.7894736842105263, + "eval_recall_RioFechado_64x64": 0.9647887323943662, + "eval_runtime": 1.8209, + "eval_samples_per_second": 398.693, + "eval_steps_per_second": 25.262, + "step": 16016 + }, + { + "epoch": 44.002747252747255, + "grad_norm": 9.451665878295898, + "learning_rate": 2.7998626373626373e-05, + "loss": 0.1091, + "step": 16017 + }, + { + "epoch": 44.005494505494504, + "grad_norm": 5.698117733001709, + "learning_rate": 2.799725274725275e-05, + "loss": 0.1654, + "step": 16018 + }, + { + "epoch": 44.00824175824176, + "grad_norm": 17.977048873901367, + "learning_rate": 2.799587912087912e-05, + "loss": 0.625, + "step": 16019 + }, + { + "epoch": 44.010989010989015, + "grad_norm": 11.575783729553223, + "learning_rate": 2.7994505494505497e-05, + "loss": 0.3082, + "step": 16020 + }, + { + "epoch": 44.01373626373626, + "grad_norm": 19.70444679260254, + "learning_rate": 2.7993131868131873e-05, + "loss": 0.4326, + "step": 16021 + }, + { + "epoch": 44.01648351648352, + "grad_norm": 10.546784400939941, + "learning_rate": 2.7991758241758243e-05, + "loss": 0.427, + "step": 16022 + }, + { + "epoch": 44.01923076923077, + "grad_norm": 13.95429515838623, + "learning_rate": 2.7990384615384617e-05, + "loss": 0.4058, + "step": 16023 + }, + { + "epoch": 44.02197802197802, + "grad_norm": 14.992243766784668, + "learning_rate": 2.798901098901099e-05, + "loss": 0.3835, + "step": 16024 + }, + { + "epoch": 44.02472527472528, + "grad_norm": 8.355257034301758, + "learning_rate": 2.7987637362637364e-05, + "loss": 0.2893, + "step": 16025 + }, + { + "epoch": 44.027472527472526, + "grad_norm": 16.117429733276367, + "learning_rate": 2.798626373626374e-05, + "loss": 0.3984, + "step": 16026 + }, + { + "epoch": 44.03021978021978, + "grad_norm": 12.272193908691406, + "learning_rate": 2.798489010989011e-05, + "loss": 0.2563, + "step": 16027 + }, + { + "epoch": 44.032967032967036, + "grad_norm": 9.83017349243164, + "learning_rate": 2.7983516483516487e-05, + "loss": 0.2541, + "step": 16028 + }, + { + "epoch": 44.035714285714285, + "grad_norm": 11.419838905334473, + "learning_rate": 2.7982142857142857e-05, + "loss": 0.1745, + "step": 16029 + }, + { + "epoch": 44.03846153846154, + "grad_norm": 8.616361618041992, + "learning_rate": 2.7980769230769234e-05, + "loss": 0.1847, + "step": 16030 + }, + { + "epoch": 44.04120879120879, + "grad_norm": 23.146366119384766, + "learning_rate": 2.7979395604395607e-05, + "loss": 0.6952, + "step": 16031 + }, + { + "epoch": 44.043956043956044, + "grad_norm": 11.264192581176758, + "learning_rate": 2.7978021978021977e-05, + "loss": 0.2476, + "step": 16032 + }, + { + "epoch": 44.0467032967033, + "grad_norm": 10.190991401672363, + "learning_rate": 2.7976648351648354e-05, + "loss": 0.142, + "step": 16033 + }, + { + "epoch": 44.04945054945055, + "grad_norm": 8.488512992858887, + "learning_rate": 2.7975274725274724e-05, + "loss": 0.1263, + "step": 16034 + }, + { + "epoch": 44.0521978021978, + "grad_norm": 4.306058406829834, + "learning_rate": 2.79739010989011e-05, + "loss": 0.0837, + "step": 16035 + }, + { + "epoch": 44.05494505494506, + "grad_norm": 19.786073684692383, + "learning_rate": 2.7972527472527478e-05, + "loss": 0.7598, + "step": 16036 + }, + { + "epoch": 44.05769230769231, + "grad_norm": 6.385810375213623, + "learning_rate": 2.7971153846153848e-05, + "loss": 0.1236, + "step": 16037 + }, + { + "epoch": 44.06043956043956, + "grad_norm": 16.205013275146484, + "learning_rate": 2.796978021978022e-05, + "loss": 0.4367, + "step": 16038 + }, + { + "epoch": 44.06318681318681, + "grad_norm": 5.671924591064453, + "learning_rate": 2.7968406593406595e-05, + "loss": 0.0736, + "step": 16039 + }, + { + "epoch": 44.065934065934066, + "grad_norm": 7.14484977722168, + "learning_rate": 2.7967032967032968e-05, + "loss": 0.1432, + "step": 16040 + }, + { + "epoch": 44.06868131868132, + "grad_norm": 11.91579532623291, + "learning_rate": 2.7965659340659338e-05, + "loss": 0.2376, + "step": 16041 + }, + { + "epoch": 44.07142857142857, + "grad_norm": 9.27576732635498, + "learning_rate": 2.7964285714285715e-05, + "loss": 0.2112, + "step": 16042 + }, + { + "epoch": 44.074175824175825, + "grad_norm": 8.518413543701172, + "learning_rate": 2.7962912087912092e-05, + "loss": 0.1232, + "step": 16043 + }, + { + "epoch": 44.07692307692308, + "grad_norm": 21.85494041442871, + "learning_rate": 2.7961538461538462e-05, + "loss": 0.921, + "step": 16044 + }, + { + "epoch": 44.07967032967033, + "grad_norm": 24.1586856842041, + "learning_rate": 2.796016483516484e-05, + "loss": 0.6832, + "step": 16045 + }, + { + "epoch": 44.082417582417584, + "grad_norm": 10.972951889038086, + "learning_rate": 2.795879120879121e-05, + "loss": 0.1332, + "step": 16046 + }, + { + "epoch": 44.08516483516483, + "grad_norm": 4.416203022003174, + "learning_rate": 2.7957417582417582e-05, + "loss": 0.0713, + "step": 16047 + }, + { + "epoch": 44.08791208791209, + "grad_norm": 12.373506546020508, + "learning_rate": 2.795604395604396e-05, + "loss": 0.2478, + "step": 16048 + }, + { + "epoch": 44.09065934065934, + "grad_norm": 1.4810657501220703, + "learning_rate": 2.795467032967033e-05, + "loss": 0.0319, + "step": 16049 + }, + { + "epoch": 44.09340659340659, + "grad_norm": 9.860380172729492, + "learning_rate": 2.7953296703296706e-05, + "loss": 0.1653, + "step": 16050 + }, + { + "epoch": 44.09615384615385, + "grad_norm": 13.47705364227295, + "learning_rate": 2.7951923076923076e-05, + "loss": 0.4452, + "step": 16051 + }, + { + "epoch": 44.0989010989011, + "grad_norm": 7.184254169464111, + "learning_rate": 2.7950549450549452e-05, + "loss": 0.2256, + "step": 16052 + }, + { + "epoch": 44.10164835164835, + "grad_norm": 11.282333374023438, + "learning_rate": 2.7949175824175826e-05, + "loss": 0.2035, + "step": 16053 + }, + { + "epoch": 44.104395604395606, + "grad_norm": 12.028433799743652, + "learning_rate": 2.79478021978022e-05, + "loss": 0.3686, + "step": 16054 + }, + { + "epoch": 44.107142857142854, + "grad_norm": 16.423397064208984, + "learning_rate": 2.7946428571428573e-05, + "loss": 0.3067, + "step": 16055 + }, + { + "epoch": 44.10989010989011, + "grad_norm": 12.015990257263184, + "learning_rate": 2.7945054945054943e-05, + "loss": 0.2354, + "step": 16056 + }, + { + "epoch": 44.112637362637365, + "grad_norm": 12.252962112426758, + "learning_rate": 2.794368131868132e-05, + "loss": 0.2462, + "step": 16057 + }, + { + "epoch": 44.11538461538461, + "grad_norm": 5.2733259201049805, + "learning_rate": 2.7942307692307696e-05, + "loss": 0.1507, + "step": 16058 + }, + { + "epoch": 44.11813186813187, + "grad_norm": 11.005428314208984, + "learning_rate": 2.7940934065934066e-05, + "loss": 0.3147, + "step": 16059 + }, + { + "epoch": 44.120879120879124, + "grad_norm": 16.907955169677734, + "learning_rate": 2.7939560439560443e-05, + "loss": 0.273, + "step": 16060 + }, + { + "epoch": 44.12362637362637, + "grad_norm": 5.219566822052002, + "learning_rate": 2.7938186813186813e-05, + "loss": 0.0697, + "step": 16061 + }, + { + "epoch": 44.12637362637363, + "grad_norm": 13.498678207397461, + "learning_rate": 2.7936813186813186e-05, + "loss": 0.2791, + "step": 16062 + }, + { + "epoch": 44.129120879120876, + "grad_norm": 12.192178726196289, + "learning_rate": 2.7935439560439563e-05, + "loss": 0.227, + "step": 16063 + }, + { + "epoch": 44.13186813186813, + "grad_norm": 8.195597648620605, + "learning_rate": 2.7934065934065933e-05, + "loss": 0.1497, + "step": 16064 + }, + { + "epoch": 44.13461538461539, + "grad_norm": 7.823512554168701, + "learning_rate": 2.793269230769231e-05, + "loss": 0.2077, + "step": 16065 + }, + { + "epoch": 44.137362637362635, + "grad_norm": 3.5001578330993652, + "learning_rate": 2.793131868131868e-05, + "loss": 0.0786, + "step": 16066 + }, + { + "epoch": 44.14010989010989, + "grad_norm": 8.794120788574219, + "learning_rate": 2.7929945054945057e-05, + "loss": 0.1726, + "step": 16067 + }, + { + "epoch": 44.142857142857146, + "grad_norm": 13.11182689666748, + "learning_rate": 2.792857142857143e-05, + "loss": 0.3129, + "step": 16068 + }, + { + "epoch": 44.145604395604394, + "grad_norm": 2.5933263301849365, + "learning_rate": 2.7927197802197804e-05, + "loss": 0.0401, + "step": 16069 + }, + { + "epoch": 44.14835164835165, + "grad_norm": 12.438628196716309, + "learning_rate": 2.7925824175824177e-05, + "loss": 0.3512, + "step": 16070 + }, + { + "epoch": 44.1510989010989, + "grad_norm": 5.731524467468262, + "learning_rate": 2.7924450549450547e-05, + "loss": 0.1583, + "step": 16071 + }, + { + "epoch": 44.15384615384615, + "grad_norm": 16.640905380249023, + "learning_rate": 2.7923076923076924e-05, + "loss": 0.5041, + "step": 16072 + }, + { + "epoch": 44.15659340659341, + "grad_norm": 7.292604446411133, + "learning_rate": 2.79217032967033e-05, + "loss": 0.1085, + "step": 16073 + }, + { + "epoch": 44.15934065934066, + "grad_norm": 12.185132026672363, + "learning_rate": 2.792032967032967e-05, + "loss": 0.3016, + "step": 16074 + }, + { + "epoch": 44.16208791208791, + "grad_norm": 11.605195999145508, + "learning_rate": 2.7918956043956048e-05, + "loss": 0.3748, + "step": 16075 + }, + { + "epoch": 44.16483516483517, + "grad_norm": 13.543437004089355, + "learning_rate": 2.7917582417582418e-05, + "loss": 0.2722, + "step": 16076 + }, + { + "epoch": 44.167582417582416, + "grad_norm": 10.243730545043945, + "learning_rate": 2.791620879120879e-05, + "loss": 0.2673, + "step": 16077 + }, + { + "epoch": 44.17032967032967, + "grad_norm": 10.304828643798828, + "learning_rate": 2.7914835164835168e-05, + "loss": 0.238, + "step": 16078 + }, + { + "epoch": 44.17307692307692, + "grad_norm": 12.035022735595703, + "learning_rate": 2.7913461538461538e-05, + "loss": 0.4238, + "step": 16079 + }, + { + "epoch": 44.175824175824175, + "grad_norm": 13.079094886779785, + "learning_rate": 2.7912087912087915e-05, + "loss": 0.3942, + "step": 16080 + }, + { + "epoch": 44.17857142857143, + "grad_norm": 12.252134323120117, + "learning_rate": 2.7910714285714285e-05, + "loss": 0.1855, + "step": 16081 + }, + { + "epoch": 44.18131868131868, + "grad_norm": 10.130105972290039, + "learning_rate": 2.790934065934066e-05, + "loss": 0.2456, + "step": 16082 + }, + { + "epoch": 44.184065934065934, + "grad_norm": 10.535056114196777, + "learning_rate": 2.7907967032967035e-05, + "loss": 0.1838, + "step": 16083 + }, + { + "epoch": 44.18681318681319, + "grad_norm": 7.115178108215332, + "learning_rate": 2.7906593406593405e-05, + "loss": 0.1389, + "step": 16084 + }, + { + "epoch": 44.18956043956044, + "grad_norm": 15.068340301513672, + "learning_rate": 2.790521978021978e-05, + "loss": 0.3943, + "step": 16085 + }, + { + "epoch": 44.19230769230769, + "grad_norm": 8.592716217041016, + "learning_rate": 2.790384615384615e-05, + "loss": 0.1349, + "step": 16086 + }, + { + "epoch": 44.19505494505494, + "grad_norm": 9.036055564880371, + "learning_rate": 2.790247252747253e-05, + "loss": 0.1899, + "step": 16087 + }, + { + "epoch": 44.1978021978022, + "grad_norm": 8.443902015686035, + "learning_rate": 2.7901098901098905e-05, + "loss": 0.2087, + "step": 16088 + }, + { + "epoch": 44.20054945054945, + "grad_norm": 9.163528442382812, + "learning_rate": 2.7899725274725275e-05, + "loss": 0.2117, + "step": 16089 + }, + { + "epoch": 44.2032967032967, + "grad_norm": 12.735993385314941, + "learning_rate": 2.7898351648351652e-05, + "loss": 0.3813, + "step": 16090 + }, + { + "epoch": 44.206043956043956, + "grad_norm": 14.626733779907227, + "learning_rate": 2.7896978021978022e-05, + "loss": 0.3227, + "step": 16091 + }, + { + "epoch": 44.20879120879121, + "grad_norm": 7.917704105377197, + "learning_rate": 2.7895604395604395e-05, + "loss": 0.0931, + "step": 16092 + }, + { + "epoch": 44.21153846153846, + "grad_norm": 17.589685440063477, + "learning_rate": 2.7894230769230772e-05, + "loss": 0.309, + "step": 16093 + }, + { + "epoch": 44.214285714285715, + "grad_norm": 7.784170150756836, + "learning_rate": 2.7892857142857142e-05, + "loss": 0.1039, + "step": 16094 + }, + { + "epoch": 44.217032967032964, + "grad_norm": 7.4269609451293945, + "learning_rate": 2.789148351648352e-05, + "loss": 0.1498, + "step": 16095 + }, + { + "epoch": 44.21978021978022, + "grad_norm": 13.823862075805664, + "learning_rate": 2.789010989010989e-05, + "loss": 0.2488, + "step": 16096 + }, + { + "epoch": 44.222527472527474, + "grad_norm": 15.259801864624023, + "learning_rate": 2.7888736263736266e-05, + "loss": 0.2414, + "step": 16097 + }, + { + "epoch": 44.22527472527472, + "grad_norm": 7.9590654373168945, + "learning_rate": 2.788736263736264e-05, + "loss": 0.1631, + "step": 16098 + }, + { + "epoch": 44.22802197802198, + "grad_norm": 27.447301864624023, + "learning_rate": 2.788598901098901e-05, + "loss": 0.6775, + "step": 16099 + }, + { + "epoch": 44.23076923076923, + "grad_norm": 5.940738201141357, + "learning_rate": 2.7884615384615386e-05, + "loss": 0.1042, + "step": 16100 + }, + { + "epoch": 44.23351648351648, + "grad_norm": 14.093040466308594, + "learning_rate": 2.7883241758241756e-05, + "loss": 0.2713, + "step": 16101 + }, + { + "epoch": 44.23626373626374, + "grad_norm": 2.5343027114868164, + "learning_rate": 2.7881868131868133e-05, + "loss": 0.0371, + "step": 16102 + }, + { + "epoch": 44.239010989010985, + "grad_norm": 11.502099990844727, + "learning_rate": 2.788049450549451e-05, + "loss": 0.4221, + "step": 16103 + }, + { + "epoch": 44.24175824175824, + "grad_norm": 13.159942626953125, + "learning_rate": 2.787912087912088e-05, + "loss": 0.3394, + "step": 16104 + }, + { + "epoch": 44.244505494505496, + "grad_norm": 14.818196296691895, + "learning_rate": 2.7877747252747257e-05, + "loss": 0.2506, + "step": 16105 + }, + { + "epoch": 44.247252747252745, + "grad_norm": 10.37000560760498, + "learning_rate": 2.7876373626373627e-05, + "loss": 0.1289, + "step": 16106 + }, + { + "epoch": 44.25, + "grad_norm": 18.786849975585938, + "learning_rate": 2.7875e-05, + "loss": 0.4332, + "step": 16107 + }, + { + "epoch": 44.252747252747255, + "grad_norm": 15.067177772521973, + "learning_rate": 2.7873626373626377e-05, + "loss": 0.2683, + "step": 16108 + }, + { + "epoch": 44.255494505494504, + "grad_norm": 14.166260719299316, + "learning_rate": 2.7872252747252747e-05, + "loss": 0.3171, + "step": 16109 + }, + { + "epoch": 44.25824175824176, + "grad_norm": 13.092336654663086, + "learning_rate": 2.7870879120879124e-05, + "loss": 0.4251, + "step": 16110 + }, + { + "epoch": 44.260989010989015, + "grad_norm": 7.815051078796387, + "learning_rate": 2.7869505494505494e-05, + "loss": 0.1758, + "step": 16111 + }, + { + "epoch": 44.26373626373626, + "grad_norm": 12.808262825012207, + "learning_rate": 2.786813186813187e-05, + "loss": 0.3285, + "step": 16112 + }, + { + "epoch": 44.26648351648352, + "grad_norm": 8.073309898376465, + "learning_rate": 2.7866758241758244e-05, + "loss": 0.27, + "step": 16113 + }, + { + "epoch": 44.26923076923077, + "grad_norm": 13.955554008483887, + "learning_rate": 2.7865384615384614e-05, + "loss": 0.2106, + "step": 16114 + }, + { + "epoch": 44.27197802197802, + "grad_norm": 11.480331420898438, + "learning_rate": 2.786401098901099e-05, + "loss": 0.1728, + "step": 16115 + }, + { + "epoch": 44.27472527472528, + "grad_norm": 12.105233192443848, + "learning_rate": 2.786263736263736e-05, + "loss": 0.3183, + "step": 16116 + }, + { + "epoch": 44.277472527472526, + "grad_norm": 15.003759384155273, + "learning_rate": 2.7861263736263737e-05, + "loss": 0.1725, + "step": 16117 + }, + { + "epoch": 44.28021978021978, + "grad_norm": 7.127226829528809, + "learning_rate": 2.7859890109890114e-05, + "loss": 0.1674, + "step": 16118 + }, + { + "epoch": 44.282967032967036, + "grad_norm": 15.00745964050293, + "learning_rate": 2.7858516483516484e-05, + "loss": 0.5066, + "step": 16119 + }, + { + "epoch": 44.285714285714285, + "grad_norm": 14.145362854003906, + "learning_rate": 2.785714285714286e-05, + "loss": 0.2678, + "step": 16120 + }, + { + "epoch": 44.28846153846154, + "grad_norm": 14.931425094604492, + "learning_rate": 2.785576923076923e-05, + "loss": 0.3787, + "step": 16121 + }, + { + "epoch": 44.29120879120879, + "grad_norm": 13.225860595703125, + "learning_rate": 2.7854395604395604e-05, + "loss": 0.2046, + "step": 16122 + }, + { + "epoch": 44.293956043956044, + "grad_norm": 15.549297332763672, + "learning_rate": 2.785302197802198e-05, + "loss": 0.3845, + "step": 16123 + }, + { + "epoch": 44.2967032967033, + "grad_norm": 8.163783073425293, + "learning_rate": 2.785164835164835e-05, + "loss": 0.1698, + "step": 16124 + }, + { + "epoch": 44.29945054945055, + "grad_norm": 6.042950630187988, + "learning_rate": 2.7850274725274728e-05, + "loss": 0.1055, + "step": 16125 + }, + { + "epoch": 44.3021978021978, + "grad_norm": 5.521322727203369, + "learning_rate": 2.7848901098901098e-05, + "loss": 0.1059, + "step": 16126 + }, + { + "epoch": 44.30494505494506, + "grad_norm": 7.375574111938477, + "learning_rate": 2.7847527472527475e-05, + "loss": 0.3175, + "step": 16127 + }, + { + "epoch": 44.30769230769231, + "grad_norm": 17.20154571533203, + "learning_rate": 2.7846153846153848e-05, + "loss": 0.5313, + "step": 16128 + }, + { + "epoch": 44.31043956043956, + "grad_norm": 14.68285846710205, + "learning_rate": 2.7844780219780218e-05, + "loss": 0.3756, + "step": 16129 + }, + { + "epoch": 44.31318681318681, + "grad_norm": 8.694334030151367, + "learning_rate": 2.7843406593406595e-05, + "loss": 0.1267, + "step": 16130 + }, + { + "epoch": 44.315934065934066, + "grad_norm": 19.31410789489746, + "learning_rate": 2.7842032967032965e-05, + "loss": 0.6058, + "step": 16131 + }, + { + "epoch": 44.31868131868132, + "grad_norm": 13.27900505065918, + "learning_rate": 2.7840659340659342e-05, + "loss": 0.2222, + "step": 16132 + }, + { + "epoch": 44.32142857142857, + "grad_norm": 9.84508228302002, + "learning_rate": 2.783928571428572e-05, + "loss": 0.2301, + "step": 16133 + }, + { + "epoch": 44.324175824175825, + "grad_norm": 8.575608253479004, + "learning_rate": 2.783791208791209e-05, + "loss": 0.1192, + "step": 16134 + }, + { + "epoch": 44.32692307692308, + "grad_norm": 14.555813789367676, + "learning_rate": 2.7836538461538465e-05, + "loss": 0.4156, + "step": 16135 + }, + { + "epoch": 44.32967032967033, + "grad_norm": 11.573906898498535, + "learning_rate": 2.7835164835164836e-05, + "loss": 0.2213, + "step": 16136 + }, + { + "epoch": 44.332417582417584, + "grad_norm": 8.460472106933594, + "learning_rate": 2.783379120879121e-05, + "loss": 0.1238, + "step": 16137 + }, + { + "epoch": 44.33516483516483, + "grad_norm": 9.05539608001709, + "learning_rate": 2.7832417582417586e-05, + "loss": 0.1344, + "step": 16138 + }, + { + "epoch": 44.33791208791209, + "grad_norm": 21.118728637695312, + "learning_rate": 2.7831043956043956e-05, + "loss": 0.4536, + "step": 16139 + }, + { + "epoch": 44.34065934065934, + "grad_norm": 9.784076690673828, + "learning_rate": 2.7829670329670333e-05, + "loss": 0.2116, + "step": 16140 + }, + { + "epoch": 44.34340659340659, + "grad_norm": 10.278277397155762, + "learning_rate": 2.7828296703296703e-05, + "loss": 0.2357, + "step": 16141 + }, + { + "epoch": 44.34615384615385, + "grad_norm": 4.685070514678955, + "learning_rate": 2.782692307692308e-05, + "loss": 0.1521, + "step": 16142 + }, + { + "epoch": 44.3489010989011, + "grad_norm": 18.44233512878418, + "learning_rate": 2.7825549450549453e-05, + "loss": 0.64, + "step": 16143 + }, + { + "epoch": 44.35164835164835, + "grad_norm": 16.525680541992188, + "learning_rate": 2.7824175824175823e-05, + "loss": 0.2484, + "step": 16144 + }, + { + "epoch": 44.354395604395606, + "grad_norm": 11.889708518981934, + "learning_rate": 2.78228021978022e-05, + "loss": 0.323, + "step": 16145 + }, + { + "epoch": 44.357142857142854, + "grad_norm": 5.3883280754089355, + "learning_rate": 2.782142857142857e-05, + "loss": 0.0842, + "step": 16146 + }, + { + "epoch": 44.35989010989011, + "grad_norm": 8.202067375183105, + "learning_rate": 2.7820054945054946e-05, + "loss": 0.1269, + "step": 16147 + }, + { + "epoch": 44.362637362637365, + "grad_norm": 12.613618850708008, + "learning_rate": 2.7818681318681323e-05, + "loss": 0.1742, + "step": 16148 + }, + { + "epoch": 44.36538461538461, + "grad_norm": 17.458946228027344, + "learning_rate": 2.7817307692307693e-05, + "loss": 0.4309, + "step": 16149 + }, + { + "epoch": 44.36813186813187, + "grad_norm": 8.339698791503906, + "learning_rate": 2.781593406593407e-05, + "loss": 0.1734, + "step": 16150 + }, + { + "epoch": 44.370879120879124, + "grad_norm": 9.705493927001953, + "learning_rate": 2.781456043956044e-05, + "loss": 0.2394, + "step": 16151 + }, + { + "epoch": 44.37362637362637, + "grad_norm": 9.321565628051758, + "learning_rate": 2.7813186813186813e-05, + "loss": 0.2917, + "step": 16152 + }, + { + "epoch": 44.37637362637363, + "grad_norm": 10.433466911315918, + "learning_rate": 2.781181318681319e-05, + "loss": 0.333, + "step": 16153 + }, + { + "epoch": 44.379120879120876, + "grad_norm": 19.027816772460938, + "learning_rate": 2.781043956043956e-05, + "loss": 0.389, + "step": 16154 + }, + { + "epoch": 44.38186813186813, + "grad_norm": 11.220173835754395, + "learning_rate": 2.7809065934065937e-05, + "loss": 0.2245, + "step": 16155 + }, + { + "epoch": 44.38461538461539, + "grad_norm": 12.127345085144043, + "learning_rate": 2.7807692307692307e-05, + "loss": 0.2673, + "step": 16156 + }, + { + "epoch": 44.387362637362635, + "grad_norm": 10.77370834350586, + "learning_rate": 2.7806318681318684e-05, + "loss": 0.2978, + "step": 16157 + }, + { + "epoch": 44.39010989010989, + "grad_norm": 6.684722423553467, + "learning_rate": 2.7804945054945057e-05, + "loss": 0.0731, + "step": 16158 + }, + { + "epoch": 44.392857142857146, + "grad_norm": 14.683267593383789, + "learning_rate": 2.7803571428571427e-05, + "loss": 0.3369, + "step": 16159 + }, + { + "epoch": 44.395604395604394, + "grad_norm": 12.153749465942383, + "learning_rate": 2.7802197802197804e-05, + "loss": 0.2572, + "step": 16160 + }, + { + "epoch": 44.39835164835165, + "grad_norm": 10.96805191040039, + "learning_rate": 2.7800824175824174e-05, + "loss": 0.0977, + "step": 16161 + }, + { + "epoch": 44.4010989010989, + "grad_norm": 9.96298885345459, + "learning_rate": 2.779945054945055e-05, + "loss": 0.2365, + "step": 16162 + }, + { + "epoch": 44.40384615384615, + "grad_norm": 12.022788047790527, + "learning_rate": 2.7798076923076928e-05, + "loss": 0.2631, + "step": 16163 + }, + { + "epoch": 44.40659340659341, + "grad_norm": 6.179070949554443, + "learning_rate": 2.7796703296703298e-05, + "loss": 0.1043, + "step": 16164 + }, + { + "epoch": 44.40934065934066, + "grad_norm": 15.229414939880371, + "learning_rate": 2.7795329670329674e-05, + "loss": 0.25, + "step": 16165 + }, + { + "epoch": 44.41208791208791, + "grad_norm": 10.282756805419922, + "learning_rate": 2.7793956043956044e-05, + "loss": 0.3991, + "step": 16166 + }, + { + "epoch": 44.41483516483517, + "grad_norm": 16.474645614624023, + "learning_rate": 2.7792582417582418e-05, + "loss": 0.3296, + "step": 16167 + }, + { + "epoch": 44.417582417582416, + "grad_norm": 9.822844505310059, + "learning_rate": 2.7791208791208795e-05, + "loss": 0.2258, + "step": 16168 + }, + { + "epoch": 44.42032967032967, + "grad_norm": 7.993399620056152, + "learning_rate": 2.7789835164835165e-05, + "loss": 0.0923, + "step": 16169 + }, + { + "epoch": 44.42307692307692, + "grad_norm": 13.334114074707031, + "learning_rate": 2.778846153846154e-05, + "loss": 0.5779, + "step": 16170 + }, + { + "epoch": 44.425824175824175, + "grad_norm": 9.490669250488281, + "learning_rate": 2.778708791208791e-05, + "loss": 0.2307, + "step": 16171 + }, + { + "epoch": 44.42857142857143, + "grad_norm": 23.096803665161133, + "learning_rate": 2.778571428571429e-05, + "loss": 0.7563, + "step": 16172 + }, + { + "epoch": 44.43131868131868, + "grad_norm": 8.2496919631958, + "learning_rate": 2.7784340659340662e-05, + "loss": 0.0925, + "step": 16173 + }, + { + "epoch": 44.434065934065934, + "grad_norm": 16.9781494140625, + "learning_rate": 2.7782967032967032e-05, + "loss": 0.3591, + "step": 16174 + }, + { + "epoch": 44.43681318681319, + "grad_norm": 7.430582046508789, + "learning_rate": 2.778159340659341e-05, + "loss": 0.1224, + "step": 16175 + }, + { + "epoch": 44.43956043956044, + "grad_norm": 17.278318405151367, + "learning_rate": 2.778021978021978e-05, + "loss": 0.4563, + "step": 16176 + }, + { + "epoch": 44.44230769230769, + "grad_norm": 19.056100845336914, + "learning_rate": 2.7778846153846155e-05, + "loss": 0.9635, + "step": 16177 + }, + { + "epoch": 44.44505494505494, + "grad_norm": 13.051070213317871, + "learning_rate": 2.7777472527472532e-05, + "loss": 0.4075, + "step": 16178 + }, + { + "epoch": 44.4478021978022, + "grad_norm": 18.196393966674805, + "learning_rate": 2.7776098901098902e-05, + "loss": 0.4355, + "step": 16179 + }, + { + "epoch": 44.45054945054945, + "grad_norm": 18.00712013244629, + "learning_rate": 2.777472527472528e-05, + "loss": 0.356, + "step": 16180 + }, + { + "epoch": 44.4532967032967, + "grad_norm": 14.212667465209961, + "learning_rate": 2.777335164835165e-05, + "loss": 0.3447, + "step": 16181 + }, + { + "epoch": 44.456043956043956, + "grad_norm": 8.22965145111084, + "learning_rate": 2.7771978021978022e-05, + "loss": 0.1527, + "step": 16182 + }, + { + "epoch": 44.45879120879121, + "grad_norm": 9.125936508178711, + "learning_rate": 2.77706043956044e-05, + "loss": 0.1884, + "step": 16183 + }, + { + "epoch": 44.46153846153846, + "grad_norm": 10.560012817382812, + "learning_rate": 2.776923076923077e-05, + "loss": 0.2897, + "step": 16184 + }, + { + "epoch": 44.464285714285715, + "grad_norm": 10.903709411621094, + "learning_rate": 2.7767857142857146e-05, + "loss": 0.2305, + "step": 16185 + }, + { + "epoch": 44.467032967032964, + "grad_norm": 21.72652816772461, + "learning_rate": 2.7766483516483516e-05, + "loss": 0.5927, + "step": 16186 + }, + { + "epoch": 44.46978021978022, + "grad_norm": 6.0609259605407715, + "learning_rate": 2.7765109890109893e-05, + "loss": 0.0941, + "step": 16187 + }, + { + "epoch": 44.472527472527474, + "grad_norm": 4.804350852966309, + "learning_rate": 2.7763736263736266e-05, + "loss": 0.0712, + "step": 16188 + }, + { + "epoch": 44.47527472527472, + "grad_norm": 19.694814682006836, + "learning_rate": 2.7762362637362636e-05, + "loss": 0.4172, + "step": 16189 + }, + { + "epoch": 44.47802197802198, + "grad_norm": 7.184197902679443, + "learning_rate": 2.7760989010989013e-05, + "loss": 0.1381, + "step": 16190 + }, + { + "epoch": 44.48076923076923, + "grad_norm": 17.362600326538086, + "learning_rate": 2.7759615384615383e-05, + "loss": 0.4055, + "step": 16191 + }, + { + "epoch": 44.48351648351648, + "grad_norm": 7.933069229125977, + "learning_rate": 2.775824175824176e-05, + "loss": 0.1664, + "step": 16192 + }, + { + "epoch": 44.48626373626374, + "grad_norm": 13.676082611083984, + "learning_rate": 2.7756868131868137e-05, + "loss": 0.3823, + "step": 16193 + }, + { + "epoch": 44.489010989010985, + "grad_norm": 22.38827133178711, + "learning_rate": 2.7755494505494507e-05, + "loss": 0.4416, + "step": 16194 + }, + { + "epoch": 44.49175824175824, + "grad_norm": 12.843589782714844, + "learning_rate": 2.7754120879120883e-05, + "loss": 0.2604, + "step": 16195 + }, + { + "epoch": 44.494505494505496, + "grad_norm": 19.950088500976562, + "learning_rate": 2.7752747252747253e-05, + "loss": 0.6754, + "step": 16196 + }, + { + "epoch": 44.497252747252745, + "grad_norm": 12.298808097839355, + "learning_rate": 2.7751373626373627e-05, + "loss": 0.2161, + "step": 16197 + }, + { + "epoch": 44.5, + "grad_norm": 7.921743869781494, + "learning_rate": 2.7750000000000004e-05, + "loss": 0.0711, + "step": 16198 + }, + { + "epoch": 44.502747252747255, + "grad_norm": 14.222504615783691, + "learning_rate": 2.7748626373626374e-05, + "loss": 0.2814, + "step": 16199 + }, + { + "epoch": 44.505494505494504, + "grad_norm": 12.415838241577148, + "learning_rate": 2.774725274725275e-05, + "loss": 0.3405, + "step": 16200 + }, + { + "epoch": 44.50824175824176, + "grad_norm": 4.174254894256592, + "learning_rate": 2.774587912087912e-05, + "loss": 0.1953, + "step": 16201 + }, + { + "epoch": 44.51098901098901, + "grad_norm": 25.071535110473633, + "learning_rate": 2.7744505494505497e-05, + "loss": 0.5937, + "step": 16202 + }, + { + "epoch": 44.51373626373626, + "grad_norm": 25.854360580444336, + "learning_rate": 2.774313186813187e-05, + "loss": 0.6433, + "step": 16203 + }, + { + "epoch": 44.51648351648352, + "grad_norm": 10.00607967376709, + "learning_rate": 2.774175824175824e-05, + "loss": 0.1477, + "step": 16204 + }, + { + "epoch": 44.51923076923077, + "grad_norm": 11.53541374206543, + "learning_rate": 2.7740384615384618e-05, + "loss": 0.1996, + "step": 16205 + }, + { + "epoch": 44.52197802197802, + "grad_norm": 8.611018180847168, + "learning_rate": 2.7739010989010988e-05, + "loss": 0.1622, + "step": 16206 + }, + { + "epoch": 44.52472527472528, + "grad_norm": 12.928763389587402, + "learning_rate": 2.7737637362637364e-05, + "loss": 0.4185, + "step": 16207 + }, + { + "epoch": 44.527472527472526, + "grad_norm": 13.614546775817871, + "learning_rate": 2.773626373626374e-05, + "loss": 0.3186, + "step": 16208 + }, + { + "epoch": 44.53021978021978, + "grad_norm": 2.1534597873687744, + "learning_rate": 2.773489010989011e-05, + "loss": 0.03, + "step": 16209 + }, + { + "epoch": 44.532967032967036, + "grad_norm": 13.278227806091309, + "learning_rate": 2.7733516483516485e-05, + "loss": 0.3118, + "step": 16210 + }, + { + "epoch": 44.535714285714285, + "grad_norm": 9.457128524780273, + "learning_rate": 2.7732142857142858e-05, + "loss": 0.1244, + "step": 16211 + }, + { + "epoch": 44.53846153846154, + "grad_norm": 16.04960823059082, + "learning_rate": 2.773076923076923e-05, + "loss": 0.3798, + "step": 16212 + }, + { + "epoch": 44.54120879120879, + "grad_norm": 12.749353408813477, + "learning_rate": 2.7729395604395608e-05, + "loss": 0.2773, + "step": 16213 + }, + { + "epoch": 44.543956043956044, + "grad_norm": 13.05144214630127, + "learning_rate": 2.7728021978021978e-05, + "loss": 0.4983, + "step": 16214 + }, + { + "epoch": 44.5467032967033, + "grad_norm": 7.23689079284668, + "learning_rate": 2.7726648351648355e-05, + "loss": 0.1275, + "step": 16215 + }, + { + "epoch": 44.54945054945055, + "grad_norm": 12.598428726196289, + "learning_rate": 2.7725274725274725e-05, + "loss": 0.5462, + "step": 16216 + }, + { + "epoch": 44.5521978021978, + "grad_norm": 3.6409904956817627, + "learning_rate": 2.7723901098901102e-05, + "loss": 0.0611, + "step": 16217 + }, + { + "epoch": 44.55494505494506, + "grad_norm": 14.123305320739746, + "learning_rate": 2.7722527472527475e-05, + "loss": 0.3301, + "step": 16218 + }, + { + "epoch": 44.55769230769231, + "grad_norm": 16.345693588256836, + "learning_rate": 2.7721153846153845e-05, + "loss": 0.3483, + "step": 16219 + }, + { + "epoch": 44.56043956043956, + "grad_norm": 14.795746803283691, + "learning_rate": 2.7719780219780222e-05, + "loss": 0.3556, + "step": 16220 + }, + { + "epoch": 44.56318681318681, + "grad_norm": 7.087062358856201, + "learning_rate": 2.7718406593406592e-05, + "loss": 0.0962, + "step": 16221 + }, + { + "epoch": 44.565934065934066, + "grad_norm": 11.856751441955566, + "learning_rate": 2.771703296703297e-05, + "loss": 0.2054, + "step": 16222 + }, + { + "epoch": 44.56868131868132, + "grad_norm": 15.00857162475586, + "learning_rate": 2.7715659340659346e-05, + "loss": 0.3593, + "step": 16223 + }, + { + "epoch": 44.57142857142857, + "grad_norm": 14.427299499511719, + "learning_rate": 2.7714285714285716e-05, + "loss": 0.1287, + "step": 16224 + }, + { + "epoch": 44.574175824175825, + "grad_norm": 3.8930420875549316, + "learning_rate": 2.771291208791209e-05, + "loss": 0.0506, + "step": 16225 + }, + { + "epoch": 44.57692307692308, + "grad_norm": 8.595462799072266, + "learning_rate": 2.7711538461538462e-05, + "loss": 0.1529, + "step": 16226 + }, + { + "epoch": 44.57967032967033, + "grad_norm": 14.170008659362793, + "learning_rate": 2.7710164835164836e-05, + "loss": 0.2459, + "step": 16227 + }, + { + "epoch": 44.582417582417584, + "grad_norm": 20.818313598632812, + "learning_rate": 2.7708791208791213e-05, + "loss": 0.6317, + "step": 16228 + }, + { + "epoch": 44.58516483516483, + "grad_norm": 10.069090843200684, + "learning_rate": 2.7707417582417583e-05, + "loss": 0.1839, + "step": 16229 + }, + { + "epoch": 44.58791208791209, + "grad_norm": 7.512587547302246, + "learning_rate": 2.770604395604396e-05, + "loss": 0.1324, + "step": 16230 + }, + { + "epoch": 44.59065934065934, + "grad_norm": 10.869203567504883, + "learning_rate": 2.770467032967033e-05, + "loss": 0.3438, + "step": 16231 + }, + { + "epoch": 44.59340659340659, + "grad_norm": 11.772021293640137, + "learning_rate": 2.7703296703296706e-05, + "loss": 0.2915, + "step": 16232 + }, + { + "epoch": 44.59615384615385, + "grad_norm": 5.200539588928223, + "learning_rate": 2.770192307692308e-05, + "loss": 0.1241, + "step": 16233 + }, + { + "epoch": 44.5989010989011, + "grad_norm": 13.22293758392334, + "learning_rate": 2.770054945054945e-05, + "loss": 0.1828, + "step": 16234 + }, + { + "epoch": 44.60164835164835, + "grad_norm": 18.205549240112305, + "learning_rate": 2.7699175824175826e-05, + "loss": 0.3384, + "step": 16235 + }, + { + "epoch": 44.604395604395606, + "grad_norm": 7.93039608001709, + "learning_rate": 2.7697802197802197e-05, + "loss": 0.327, + "step": 16236 + }, + { + "epoch": 44.607142857142854, + "grad_norm": 24.15182113647461, + "learning_rate": 2.7696428571428573e-05, + "loss": 0.6786, + "step": 16237 + }, + { + "epoch": 44.60989010989011, + "grad_norm": 9.352304458618164, + "learning_rate": 2.769505494505495e-05, + "loss": 0.3069, + "step": 16238 + }, + { + "epoch": 44.612637362637365, + "grad_norm": 20.501792907714844, + "learning_rate": 2.769368131868132e-05, + "loss": 0.4522, + "step": 16239 + }, + { + "epoch": 44.61538461538461, + "grad_norm": 12.334023475646973, + "learning_rate": 2.7692307692307694e-05, + "loss": 0.1846, + "step": 16240 + }, + { + "epoch": 44.61813186813187, + "grad_norm": 14.668120384216309, + "learning_rate": 2.7690934065934067e-05, + "loss": 0.2719, + "step": 16241 + }, + { + "epoch": 44.620879120879124, + "grad_norm": 9.347362518310547, + "learning_rate": 2.768956043956044e-05, + "loss": 0.0768, + "step": 16242 + }, + { + "epoch": 44.62362637362637, + "grad_norm": 6.3569536209106445, + "learning_rate": 2.7688186813186817e-05, + "loss": 0.0728, + "step": 16243 + }, + { + "epoch": 44.62637362637363, + "grad_norm": 17.51766586303711, + "learning_rate": 2.7686813186813187e-05, + "loss": 0.3875, + "step": 16244 + }, + { + "epoch": 44.629120879120876, + "grad_norm": 9.786550521850586, + "learning_rate": 2.7685439560439564e-05, + "loss": 0.1671, + "step": 16245 + }, + { + "epoch": 44.63186813186813, + "grad_norm": 5.065805912017822, + "learning_rate": 2.7684065934065934e-05, + "loss": 0.0592, + "step": 16246 + }, + { + "epoch": 44.63461538461539, + "grad_norm": 19.782453536987305, + "learning_rate": 2.768269230769231e-05, + "loss": 0.506, + "step": 16247 + }, + { + "epoch": 44.637362637362635, + "grad_norm": 14.731271743774414, + "learning_rate": 2.7681318681318684e-05, + "loss": 0.242, + "step": 16248 + }, + { + "epoch": 44.64010989010989, + "grad_norm": 6.27602481842041, + "learning_rate": 2.7679945054945054e-05, + "loss": 0.1122, + "step": 16249 + }, + { + "epoch": 44.642857142857146, + "grad_norm": 11.24021053314209, + "learning_rate": 2.767857142857143e-05, + "loss": 0.2773, + "step": 16250 + }, + { + "epoch": 44.645604395604394, + "grad_norm": 6.856316566467285, + "learning_rate": 2.76771978021978e-05, + "loss": 0.0852, + "step": 16251 + }, + { + "epoch": 44.64835164835165, + "grad_norm": 10.65339469909668, + "learning_rate": 2.7675824175824178e-05, + "loss": 0.2484, + "step": 16252 + }, + { + "epoch": 44.6510989010989, + "grad_norm": 17.173418045043945, + "learning_rate": 2.7674450549450555e-05, + "loss": 0.5154, + "step": 16253 + }, + { + "epoch": 44.65384615384615, + "grad_norm": 11.535968780517578, + "learning_rate": 2.7673076923076925e-05, + "loss": 0.2282, + "step": 16254 + }, + { + "epoch": 44.65659340659341, + "grad_norm": 21.29249382019043, + "learning_rate": 2.7671703296703298e-05, + "loss": 0.3232, + "step": 16255 + }, + { + "epoch": 44.65934065934066, + "grad_norm": 6.765261173248291, + "learning_rate": 2.767032967032967e-05, + "loss": 0.1862, + "step": 16256 + }, + { + "epoch": 44.66208791208791, + "grad_norm": 8.060086250305176, + "learning_rate": 2.7668956043956045e-05, + "loss": 0.1336, + "step": 16257 + }, + { + "epoch": 44.66483516483517, + "grad_norm": 8.77187442779541, + "learning_rate": 2.766758241758242e-05, + "loss": 0.174, + "step": 16258 + }, + { + "epoch": 44.667582417582416, + "grad_norm": 20.158578872680664, + "learning_rate": 2.766620879120879e-05, + "loss": 0.4863, + "step": 16259 + }, + { + "epoch": 44.67032967032967, + "grad_norm": 14.512181282043457, + "learning_rate": 2.766483516483517e-05, + "loss": 0.2873, + "step": 16260 + }, + { + "epoch": 44.67307692307692, + "grad_norm": 12.319293975830078, + "learning_rate": 2.766346153846154e-05, + "loss": 0.505, + "step": 16261 + }, + { + "epoch": 44.675824175824175, + "grad_norm": 9.45378589630127, + "learning_rate": 2.7662087912087915e-05, + "loss": 0.2161, + "step": 16262 + }, + { + "epoch": 44.67857142857143, + "grad_norm": 18.125164031982422, + "learning_rate": 2.766071428571429e-05, + "loss": 0.4117, + "step": 16263 + }, + { + "epoch": 44.68131868131868, + "grad_norm": 2.2286314964294434, + "learning_rate": 2.765934065934066e-05, + "loss": 0.0346, + "step": 16264 + }, + { + "epoch": 44.684065934065934, + "grad_norm": 5.218389511108398, + "learning_rate": 2.7657967032967035e-05, + "loss": 0.1389, + "step": 16265 + }, + { + "epoch": 44.68681318681319, + "grad_norm": 18.14516830444336, + "learning_rate": 2.7656593406593405e-05, + "loss": 0.5144, + "step": 16266 + }, + { + "epoch": 44.68956043956044, + "grad_norm": 26.365049362182617, + "learning_rate": 2.7655219780219782e-05, + "loss": 0.6301, + "step": 16267 + }, + { + "epoch": 44.69230769230769, + "grad_norm": 10.641374588012695, + "learning_rate": 2.7653846153846152e-05, + "loss": 0.2385, + "step": 16268 + }, + { + "epoch": 44.69505494505494, + "grad_norm": 22.674840927124023, + "learning_rate": 2.765247252747253e-05, + "loss": 0.8261, + "step": 16269 + }, + { + "epoch": 44.6978021978022, + "grad_norm": 16.130599975585938, + "learning_rate": 2.7651098901098903e-05, + "loss": 0.5319, + "step": 16270 + }, + { + "epoch": 44.70054945054945, + "grad_norm": 12.096152305603027, + "learning_rate": 2.7649725274725276e-05, + "loss": 0.3083, + "step": 16271 + }, + { + "epoch": 44.7032967032967, + "grad_norm": 9.853597640991211, + "learning_rate": 2.764835164835165e-05, + "loss": 0.24, + "step": 16272 + }, + { + "epoch": 44.706043956043956, + "grad_norm": 5.790520668029785, + "learning_rate": 2.764697802197802e-05, + "loss": 0.1313, + "step": 16273 + }, + { + "epoch": 44.70879120879121, + "grad_norm": 13.259023666381836, + "learning_rate": 2.7645604395604396e-05, + "loss": 0.2468, + "step": 16274 + }, + { + "epoch": 44.71153846153846, + "grad_norm": 11.699324607849121, + "learning_rate": 2.7644230769230773e-05, + "loss": 0.5063, + "step": 16275 + }, + { + "epoch": 44.714285714285715, + "grad_norm": 5.483785629272461, + "learning_rate": 2.7642857142857143e-05, + "loss": 0.0966, + "step": 16276 + }, + { + "epoch": 44.717032967032964, + "grad_norm": 11.669867515563965, + "learning_rate": 2.764148351648352e-05, + "loss": 0.2966, + "step": 16277 + }, + { + "epoch": 44.71978021978022, + "grad_norm": 6.760678291320801, + "learning_rate": 2.764010989010989e-05, + "loss": 0.1635, + "step": 16278 + }, + { + "epoch": 44.722527472527474, + "grad_norm": 8.261175155639648, + "learning_rate": 2.7638736263736263e-05, + "loss": 0.1514, + "step": 16279 + }, + { + "epoch": 44.72527472527472, + "grad_norm": 12.61336898803711, + "learning_rate": 2.763736263736264e-05, + "loss": 0.4797, + "step": 16280 + }, + { + "epoch": 44.72802197802198, + "grad_norm": 11.478049278259277, + "learning_rate": 2.763598901098901e-05, + "loss": 0.3754, + "step": 16281 + }, + { + "epoch": 44.73076923076923, + "grad_norm": 6.683297157287598, + "learning_rate": 2.7634615384615387e-05, + "loss": 0.1213, + "step": 16282 + }, + { + "epoch": 44.73351648351648, + "grad_norm": 10.21506118774414, + "learning_rate": 2.7633241758241757e-05, + "loss": 0.2576, + "step": 16283 + }, + { + "epoch": 44.73626373626374, + "grad_norm": 13.93339729309082, + "learning_rate": 2.7631868131868134e-05, + "loss": 0.3426, + "step": 16284 + }, + { + "epoch": 44.73901098901099, + "grad_norm": 7.749085426330566, + "learning_rate": 2.7630494505494507e-05, + "loss": 0.1551, + "step": 16285 + }, + { + "epoch": 44.74175824175824, + "grad_norm": 18.284074783325195, + "learning_rate": 2.762912087912088e-05, + "loss": 0.3773, + "step": 16286 + }, + { + "epoch": 44.744505494505496, + "grad_norm": 12.603867530822754, + "learning_rate": 2.7627747252747254e-05, + "loss": 0.4918, + "step": 16287 + }, + { + "epoch": 44.747252747252745, + "grad_norm": 15.033258438110352, + "learning_rate": 2.7626373626373624e-05, + "loss": 0.3876, + "step": 16288 + }, + { + "epoch": 44.75, + "grad_norm": 16.32088279724121, + "learning_rate": 2.7625e-05, + "loss": 0.4383, + "step": 16289 + }, + { + "epoch": 44.752747252747255, + "grad_norm": 6.924007892608643, + "learning_rate": 2.7623626373626377e-05, + "loss": 0.1422, + "step": 16290 + }, + { + "epoch": 44.755494505494504, + "grad_norm": 8.574299812316895, + "learning_rate": 2.7622252747252747e-05, + "loss": 0.2194, + "step": 16291 + }, + { + "epoch": 44.75824175824176, + "grad_norm": 10.233756065368652, + "learning_rate": 2.7620879120879124e-05, + "loss": 0.1534, + "step": 16292 + }, + { + "epoch": 44.76098901098901, + "grad_norm": 14.600768089294434, + "learning_rate": 2.7619505494505494e-05, + "loss": 0.3463, + "step": 16293 + }, + { + "epoch": 44.76373626373626, + "grad_norm": 12.092083930969238, + "learning_rate": 2.7618131868131868e-05, + "loss": 0.1979, + "step": 16294 + }, + { + "epoch": 44.76648351648352, + "grad_norm": 13.91090202331543, + "learning_rate": 2.7616758241758244e-05, + "loss": 0.553, + "step": 16295 + }, + { + "epoch": 44.76923076923077, + "grad_norm": 5.451493263244629, + "learning_rate": 2.7615384615384614e-05, + "loss": 0.1032, + "step": 16296 + }, + { + "epoch": 44.77197802197802, + "grad_norm": 5.1810407638549805, + "learning_rate": 2.761401098901099e-05, + "loss": 0.1248, + "step": 16297 + }, + { + "epoch": 44.77472527472528, + "grad_norm": 8.632659912109375, + "learning_rate": 2.761263736263736e-05, + "loss": 0.2063, + "step": 16298 + }, + { + "epoch": 44.777472527472526, + "grad_norm": 9.42891788482666, + "learning_rate": 2.7611263736263738e-05, + "loss": 0.1725, + "step": 16299 + }, + { + "epoch": 44.78021978021978, + "grad_norm": 17.821897506713867, + "learning_rate": 2.760989010989011e-05, + "loss": 0.3277, + "step": 16300 + }, + { + "epoch": 44.782967032967036, + "grad_norm": 17.52439308166504, + "learning_rate": 2.7608516483516485e-05, + "loss": 0.5769, + "step": 16301 + }, + { + "epoch": 44.785714285714285, + "grad_norm": 19.134952545166016, + "learning_rate": 2.760714285714286e-05, + "loss": 0.3879, + "step": 16302 + }, + { + "epoch": 44.78846153846154, + "grad_norm": 6.276153087615967, + "learning_rate": 2.760576923076923e-05, + "loss": 0.144, + "step": 16303 + }, + { + "epoch": 44.79120879120879, + "grad_norm": 20.33472442626953, + "learning_rate": 2.7604395604395605e-05, + "loss": 0.467, + "step": 16304 + }, + { + "epoch": 44.793956043956044, + "grad_norm": 8.375494956970215, + "learning_rate": 2.7603021978021982e-05, + "loss": 0.1612, + "step": 16305 + }, + { + "epoch": 44.7967032967033, + "grad_norm": 16.531824111938477, + "learning_rate": 2.7601648351648352e-05, + "loss": 0.4164, + "step": 16306 + }, + { + "epoch": 44.79945054945055, + "grad_norm": 6.561689376831055, + "learning_rate": 2.760027472527473e-05, + "loss": 0.1138, + "step": 16307 + }, + { + "epoch": 44.8021978021978, + "grad_norm": 11.755992889404297, + "learning_rate": 2.75989010989011e-05, + "loss": 0.3304, + "step": 16308 + }, + { + "epoch": 44.80494505494506, + "grad_norm": 10.185770988464355, + "learning_rate": 2.7597527472527472e-05, + "loss": 0.306, + "step": 16309 + }, + { + "epoch": 44.80769230769231, + "grad_norm": 10.039775848388672, + "learning_rate": 2.759615384615385e-05, + "loss": 0.1367, + "step": 16310 + }, + { + "epoch": 44.81043956043956, + "grad_norm": 21.106618881225586, + "learning_rate": 2.759478021978022e-05, + "loss": 0.4634, + "step": 16311 + }, + { + "epoch": 44.81318681318681, + "grad_norm": 8.08415699005127, + "learning_rate": 2.7593406593406596e-05, + "loss": 0.1195, + "step": 16312 + }, + { + "epoch": 44.815934065934066, + "grad_norm": 9.753188133239746, + "learning_rate": 2.7592032967032966e-05, + "loss": 0.1701, + "step": 16313 + }, + { + "epoch": 44.81868131868132, + "grad_norm": 9.97262954711914, + "learning_rate": 2.7590659340659343e-05, + "loss": 0.2326, + "step": 16314 + }, + { + "epoch": 44.82142857142857, + "grad_norm": 12.049066543579102, + "learning_rate": 2.7589285714285716e-05, + "loss": 0.3231, + "step": 16315 + }, + { + "epoch": 44.824175824175825, + "grad_norm": 14.384961128234863, + "learning_rate": 2.758791208791209e-05, + "loss": 0.2408, + "step": 16316 + }, + { + "epoch": 44.82692307692308, + "grad_norm": 17.9307918548584, + "learning_rate": 2.7586538461538463e-05, + "loss": 0.7102, + "step": 16317 + }, + { + "epoch": 44.82967032967033, + "grad_norm": 16.413801193237305, + "learning_rate": 2.7585164835164833e-05, + "loss": 0.3545, + "step": 16318 + }, + { + "epoch": 44.832417582417584, + "grad_norm": 4.633411884307861, + "learning_rate": 2.758379120879121e-05, + "loss": 0.0871, + "step": 16319 + }, + { + "epoch": 44.83516483516483, + "grad_norm": 4.980630874633789, + "learning_rate": 2.7582417582417586e-05, + "loss": 0.0624, + "step": 16320 + }, + { + "epoch": 44.83791208791209, + "grad_norm": 10.083041191101074, + "learning_rate": 2.7581043956043956e-05, + "loss": 0.2739, + "step": 16321 + }, + { + "epoch": 44.84065934065934, + "grad_norm": 19.176288604736328, + "learning_rate": 2.7579670329670333e-05, + "loss": 0.381, + "step": 16322 + }, + { + "epoch": 44.84340659340659, + "grad_norm": 13.954021453857422, + "learning_rate": 2.7578296703296703e-05, + "loss": 0.3087, + "step": 16323 + }, + { + "epoch": 44.84615384615385, + "grad_norm": 9.815675735473633, + "learning_rate": 2.7576923076923077e-05, + "loss": 0.1328, + "step": 16324 + }, + { + "epoch": 44.8489010989011, + "grad_norm": 14.803587913513184, + "learning_rate": 2.7575549450549453e-05, + "loss": 0.2724, + "step": 16325 + }, + { + "epoch": 44.85164835164835, + "grad_norm": 5.152420997619629, + "learning_rate": 2.7574175824175823e-05, + "loss": 0.0685, + "step": 16326 + }, + { + "epoch": 44.854395604395606, + "grad_norm": 23.82599639892578, + "learning_rate": 2.75728021978022e-05, + "loss": 0.5453, + "step": 16327 + }, + { + "epoch": 44.857142857142854, + "grad_norm": 19.124731063842773, + "learning_rate": 2.757142857142857e-05, + "loss": 0.404, + "step": 16328 + }, + { + "epoch": 44.85989010989011, + "grad_norm": 2.8187029361724854, + "learning_rate": 2.7570054945054947e-05, + "loss": 0.0333, + "step": 16329 + }, + { + "epoch": 44.862637362637365, + "grad_norm": 7.446107387542725, + "learning_rate": 2.756868131868132e-05, + "loss": 0.1649, + "step": 16330 + }, + { + "epoch": 44.86538461538461, + "grad_norm": 16.853622436523438, + "learning_rate": 2.7567307692307694e-05, + "loss": 0.4127, + "step": 16331 + }, + { + "epoch": 44.86813186813187, + "grad_norm": 14.598433494567871, + "learning_rate": 2.7565934065934067e-05, + "loss": 0.5797, + "step": 16332 + }, + { + "epoch": 44.870879120879124, + "grad_norm": 11.240250587463379, + "learning_rate": 2.7564560439560437e-05, + "loss": 0.1847, + "step": 16333 + }, + { + "epoch": 44.87362637362637, + "grad_norm": 6.033176898956299, + "learning_rate": 2.7563186813186814e-05, + "loss": 0.132, + "step": 16334 + }, + { + "epoch": 44.87637362637363, + "grad_norm": 22.159912109375, + "learning_rate": 2.756181318681319e-05, + "loss": 0.4296, + "step": 16335 + }, + { + "epoch": 44.879120879120876, + "grad_norm": 8.000210762023926, + "learning_rate": 2.756043956043956e-05, + "loss": 0.2139, + "step": 16336 + }, + { + "epoch": 44.88186813186813, + "grad_norm": 3.190774917602539, + "learning_rate": 2.7559065934065938e-05, + "loss": 0.0618, + "step": 16337 + }, + { + "epoch": 44.88461538461539, + "grad_norm": 8.597938537597656, + "learning_rate": 2.7557692307692308e-05, + "loss": 0.114, + "step": 16338 + }, + { + "epoch": 44.887362637362635, + "grad_norm": 8.270703315734863, + "learning_rate": 2.755631868131868e-05, + "loss": 0.2495, + "step": 16339 + }, + { + "epoch": 44.89010989010989, + "grad_norm": 13.14427661895752, + "learning_rate": 2.7554945054945058e-05, + "loss": 0.3513, + "step": 16340 + }, + { + "epoch": 44.892857142857146, + "grad_norm": 13.453310012817383, + "learning_rate": 2.7553571428571428e-05, + "loss": 0.2051, + "step": 16341 + }, + { + "epoch": 44.895604395604394, + "grad_norm": 9.55972671508789, + "learning_rate": 2.7552197802197805e-05, + "loss": 0.2011, + "step": 16342 + }, + { + "epoch": 44.89835164835165, + "grad_norm": 14.617124557495117, + "learning_rate": 2.7550824175824175e-05, + "loss": 0.2071, + "step": 16343 + }, + { + "epoch": 44.9010989010989, + "grad_norm": 11.126286506652832, + "learning_rate": 2.754945054945055e-05, + "loss": 0.1976, + "step": 16344 + }, + { + "epoch": 44.90384615384615, + "grad_norm": 11.445080757141113, + "learning_rate": 2.7548076923076925e-05, + "loss": 0.3482, + "step": 16345 + }, + { + "epoch": 44.90659340659341, + "grad_norm": 11.469874382019043, + "learning_rate": 2.75467032967033e-05, + "loss": 0.3695, + "step": 16346 + }, + { + "epoch": 44.90934065934066, + "grad_norm": 4.122318267822266, + "learning_rate": 2.7545329670329672e-05, + "loss": 0.0634, + "step": 16347 + }, + { + "epoch": 44.91208791208791, + "grad_norm": 18.65763282775879, + "learning_rate": 2.7543956043956042e-05, + "loss": 0.3017, + "step": 16348 + }, + { + "epoch": 44.91483516483517, + "grad_norm": 3.1213715076446533, + "learning_rate": 2.754258241758242e-05, + "loss": 0.0392, + "step": 16349 + }, + { + "epoch": 44.917582417582416, + "grad_norm": 7.690974712371826, + "learning_rate": 2.7541208791208795e-05, + "loss": 0.1667, + "step": 16350 + }, + { + "epoch": 44.92032967032967, + "grad_norm": 11.853496551513672, + "learning_rate": 2.7539835164835165e-05, + "loss": 0.2631, + "step": 16351 + }, + { + "epoch": 44.92307692307692, + "grad_norm": 8.0759916305542, + "learning_rate": 2.7538461538461542e-05, + "loss": 0.2264, + "step": 16352 + }, + { + "epoch": 44.925824175824175, + "grad_norm": 8.499150276184082, + "learning_rate": 2.7537087912087912e-05, + "loss": 0.1258, + "step": 16353 + }, + { + "epoch": 44.92857142857143, + "grad_norm": 11.98570728302002, + "learning_rate": 2.7535714285714286e-05, + "loss": 0.1355, + "step": 16354 + }, + { + "epoch": 44.93131868131868, + "grad_norm": 10.823890686035156, + "learning_rate": 2.7534340659340662e-05, + "loss": 0.1866, + "step": 16355 + }, + { + "epoch": 44.934065934065934, + "grad_norm": 14.016520500183105, + "learning_rate": 2.7532967032967032e-05, + "loss": 0.48, + "step": 16356 + }, + { + "epoch": 44.93681318681319, + "grad_norm": 6.240230560302734, + "learning_rate": 2.753159340659341e-05, + "loss": 0.1524, + "step": 16357 + }, + { + "epoch": 44.93956043956044, + "grad_norm": 12.948141098022461, + "learning_rate": 2.753021978021978e-05, + "loss": 0.4229, + "step": 16358 + }, + { + "epoch": 44.94230769230769, + "grad_norm": 10.169549942016602, + "learning_rate": 2.7528846153846156e-05, + "loss": 0.2652, + "step": 16359 + }, + { + "epoch": 44.94505494505494, + "grad_norm": 8.48612117767334, + "learning_rate": 2.752747252747253e-05, + "loss": 0.206, + "step": 16360 + }, + { + "epoch": 44.9478021978022, + "grad_norm": 17.628334045410156, + "learning_rate": 2.7526098901098903e-05, + "loss": 0.5292, + "step": 16361 + }, + { + "epoch": 44.95054945054945, + "grad_norm": 16.453025817871094, + "learning_rate": 2.7524725274725276e-05, + "loss": 0.3386, + "step": 16362 + }, + { + "epoch": 44.9532967032967, + "grad_norm": 15.926980018615723, + "learning_rate": 2.7523351648351646e-05, + "loss": 0.1841, + "step": 16363 + }, + { + "epoch": 44.956043956043956, + "grad_norm": 9.115092277526855, + "learning_rate": 2.7521978021978023e-05, + "loss": 0.1193, + "step": 16364 + }, + { + "epoch": 44.95879120879121, + "grad_norm": 10.334813117980957, + "learning_rate": 2.75206043956044e-05, + "loss": 0.1255, + "step": 16365 + }, + { + "epoch": 44.96153846153846, + "grad_norm": 24.73044776916504, + "learning_rate": 2.751923076923077e-05, + "loss": 0.8666, + "step": 16366 + }, + { + "epoch": 44.964285714285715, + "grad_norm": 11.356854438781738, + "learning_rate": 2.7517857142857147e-05, + "loss": 0.156, + "step": 16367 + }, + { + "epoch": 44.967032967032964, + "grad_norm": 13.966785430908203, + "learning_rate": 2.7516483516483517e-05, + "loss": 0.5012, + "step": 16368 + }, + { + "epoch": 44.96978021978022, + "grad_norm": 14.547978401184082, + "learning_rate": 2.751510989010989e-05, + "loss": 0.3293, + "step": 16369 + }, + { + "epoch": 44.972527472527474, + "grad_norm": 8.236713409423828, + "learning_rate": 2.7513736263736267e-05, + "loss": 0.1306, + "step": 16370 + }, + { + "epoch": 44.97527472527472, + "grad_norm": 9.690834045410156, + "learning_rate": 2.7512362637362637e-05, + "loss": 0.1814, + "step": 16371 + }, + { + "epoch": 44.97802197802198, + "grad_norm": 30.266447067260742, + "learning_rate": 2.7510989010989014e-05, + "loss": 0.5422, + "step": 16372 + }, + { + "epoch": 44.98076923076923, + "grad_norm": 13.57616901397705, + "learning_rate": 2.7509615384615384e-05, + "loss": 0.3258, + "step": 16373 + }, + { + "epoch": 44.98351648351648, + "grad_norm": 6.8341569900512695, + "learning_rate": 2.750824175824176e-05, + "loss": 0.2504, + "step": 16374 + }, + { + "epoch": 44.98626373626374, + "grad_norm": 0.8944143652915955, + "learning_rate": 2.7506868131868134e-05, + "loss": 0.0123, + "step": 16375 + }, + { + "epoch": 44.98901098901099, + "grad_norm": 8.799300193786621, + "learning_rate": 2.7505494505494507e-05, + "loss": 0.1744, + "step": 16376 + }, + { + "epoch": 44.99175824175824, + "grad_norm": 22.160085678100586, + "learning_rate": 2.750412087912088e-05, + "loss": 0.4406, + "step": 16377 + }, + { + "epoch": 44.994505494505496, + "grad_norm": 23.2850399017334, + "learning_rate": 2.750274725274725e-05, + "loss": 0.3911, + "step": 16378 + }, + { + "epoch": 44.997252747252745, + "grad_norm": 16.953367233276367, + "learning_rate": 2.7501373626373628e-05, + "loss": 0.5523, + "step": 16379 + }, + { + "epoch": 45.0, + "grad_norm": 53.4002571105957, + "learning_rate": 2.7500000000000004e-05, + "loss": 1.6683, + "step": 16380 + }, + { + "epoch": 45.0, + "eval_accuracy": 0.7575757575757576, + "eval_f1": 0.7487906295912194, + "eval_f1_DuraRiadoRio_64x64": 0.5517241379310345, + "eval_f1_Mole_64x64": 0.8701754385964913, + "eval_f1_Quebrado_64x64": 0.8714285714285714, + "eval_f1_RiadoRio_64x64": 0.7266666666666667, + "eval_f1_RioFechado_64x64": 0.7239583333333334, + "eval_loss": 1.3285672664642334, + "eval_precision": 0.8073021283583021, + "eval_precision_DuraRiadoRio_64x64": 0.9491525423728814, + "eval_precision_Mole_64x64": 0.8794326241134752, + "eval_precision_Quebrado_64x64": 0.8970588235294118, + "eval_precision_RiadoRio_64x64": 0.7364864864864865, + "eval_precision_RioFechado_64x64": 0.5743801652892562, + "eval_recall": 0.7586401449633474, + "eval_recall_DuraRiadoRio_64x64": 0.3888888888888889, + "eval_recall_Mole_64x64": 0.8611111111111112, + "eval_recall_Quebrado_64x64": 0.8472222222222222, + "eval_recall_RiadoRio_64x64": 0.7171052631578947, + "eval_recall_RioFechado_64x64": 0.9788732394366197, + "eval_runtime": 1.7608, + "eval_samples_per_second": 412.314, + "eval_steps_per_second": 26.125, + "step": 16380 + }, + { + "epoch": 45.002747252747255, + "grad_norm": 15.10816478729248, + "learning_rate": 2.7498626373626374e-05, + "loss": 0.4916, + "step": 16381 + }, + { + "epoch": 45.005494505494504, + "grad_norm": 12.00133228302002, + "learning_rate": 2.749725274725275e-05, + "loss": 0.254, + "step": 16382 + }, + { + "epoch": 45.00824175824176, + "grad_norm": 9.877009391784668, + "learning_rate": 2.749587912087912e-05, + "loss": 0.1796, + "step": 16383 + }, + { + "epoch": 45.010989010989015, + "grad_norm": 6.8330559730529785, + "learning_rate": 2.7494505494505495e-05, + "loss": 0.1686, + "step": 16384 + }, + { + "epoch": 45.01373626373626, + "grad_norm": 9.218469619750977, + "learning_rate": 2.749313186813187e-05, + "loss": 0.2645, + "step": 16385 + }, + { + "epoch": 45.01648351648352, + "grad_norm": 1.346415638923645, + "learning_rate": 2.749175824175824e-05, + "loss": 0.021, + "step": 16386 + }, + { + "epoch": 45.01923076923077, + "grad_norm": 11.176919937133789, + "learning_rate": 2.7490384615384618e-05, + "loss": 0.183, + "step": 16387 + }, + { + "epoch": 45.02197802197802, + "grad_norm": 11.695694923400879, + "learning_rate": 2.7489010989010988e-05, + "loss": 0.2348, + "step": 16388 + }, + { + "epoch": 45.02472527472528, + "grad_norm": 18.68592643737793, + "learning_rate": 2.7487637362637365e-05, + "loss": 0.432, + "step": 16389 + }, + { + "epoch": 45.027472527472526, + "grad_norm": 18.59162139892578, + "learning_rate": 2.748626373626374e-05, + "loss": 0.5729, + "step": 16390 + }, + { + "epoch": 45.03021978021978, + "grad_norm": 17.405441284179688, + "learning_rate": 2.7484890109890112e-05, + "loss": 0.302, + "step": 16391 + }, + { + "epoch": 45.032967032967036, + "grad_norm": 2.4548087120056152, + "learning_rate": 2.7483516483516485e-05, + "loss": 0.0391, + "step": 16392 + }, + { + "epoch": 45.035714285714285, + "grad_norm": 14.851311683654785, + "learning_rate": 2.7482142857142855e-05, + "loss": 0.3027, + "step": 16393 + }, + { + "epoch": 45.03846153846154, + "grad_norm": 8.572622299194336, + "learning_rate": 2.7480769230769232e-05, + "loss": 0.2075, + "step": 16394 + }, + { + "epoch": 45.04120879120879, + "grad_norm": 14.534687042236328, + "learning_rate": 2.747939560439561e-05, + "loss": 0.3992, + "step": 16395 + }, + { + "epoch": 45.043956043956044, + "grad_norm": 13.786152839660645, + "learning_rate": 2.747802197802198e-05, + "loss": 0.1511, + "step": 16396 + }, + { + "epoch": 45.0467032967033, + "grad_norm": 15.853812217712402, + "learning_rate": 2.7476648351648356e-05, + "loss": 0.3188, + "step": 16397 + }, + { + "epoch": 45.04945054945055, + "grad_norm": 12.092182159423828, + "learning_rate": 2.7475274725274726e-05, + "loss": 0.1742, + "step": 16398 + }, + { + "epoch": 45.0521978021978, + "grad_norm": 8.748893737792969, + "learning_rate": 2.74739010989011e-05, + "loss": 0.2097, + "step": 16399 + }, + { + "epoch": 45.05494505494506, + "grad_norm": 15.43234634399414, + "learning_rate": 2.7472527472527476e-05, + "loss": 0.3039, + "step": 16400 + }, + { + "epoch": 45.05769230769231, + "grad_norm": 14.03597640991211, + "learning_rate": 2.7471153846153846e-05, + "loss": 0.2918, + "step": 16401 + }, + { + "epoch": 45.06043956043956, + "grad_norm": 23.172847747802734, + "learning_rate": 2.7469780219780223e-05, + "loss": 0.5243, + "step": 16402 + }, + { + "epoch": 45.06318681318681, + "grad_norm": 10.668278694152832, + "learning_rate": 2.7468406593406593e-05, + "loss": 0.2656, + "step": 16403 + }, + { + "epoch": 45.065934065934066, + "grad_norm": 13.018433570861816, + "learning_rate": 2.746703296703297e-05, + "loss": 0.5899, + "step": 16404 + }, + { + "epoch": 45.06868131868132, + "grad_norm": 12.24028491973877, + "learning_rate": 2.7465659340659343e-05, + "loss": 0.2207, + "step": 16405 + }, + { + "epoch": 45.07142857142857, + "grad_norm": 13.33354663848877, + "learning_rate": 2.7464285714285713e-05, + "loss": 0.2982, + "step": 16406 + }, + { + "epoch": 45.074175824175825, + "grad_norm": 17.71793556213379, + "learning_rate": 2.746291208791209e-05, + "loss": 0.2549, + "step": 16407 + }, + { + "epoch": 45.07692307692308, + "grad_norm": 12.910266876220703, + "learning_rate": 2.746153846153846e-05, + "loss": 0.2494, + "step": 16408 + }, + { + "epoch": 45.07967032967033, + "grad_norm": 24.174949645996094, + "learning_rate": 2.7460164835164837e-05, + "loss": 0.4853, + "step": 16409 + }, + { + "epoch": 45.082417582417584, + "grad_norm": 8.359413146972656, + "learning_rate": 2.7458791208791213e-05, + "loss": 0.2406, + "step": 16410 + }, + { + "epoch": 45.08516483516483, + "grad_norm": 13.136395454406738, + "learning_rate": 2.7457417582417583e-05, + "loss": 0.3502, + "step": 16411 + }, + { + "epoch": 45.08791208791209, + "grad_norm": 7.754383087158203, + "learning_rate": 2.745604395604396e-05, + "loss": 0.1289, + "step": 16412 + }, + { + "epoch": 45.09065934065934, + "grad_norm": 2.0220787525177, + "learning_rate": 2.745467032967033e-05, + "loss": 0.0342, + "step": 16413 + }, + { + "epoch": 45.09340659340659, + "grad_norm": 18.305217742919922, + "learning_rate": 2.7453296703296704e-05, + "loss": 0.5895, + "step": 16414 + }, + { + "epoch": 45.09615384615385, + "grad_norm": 3.5188188552856445, + "learning_rate": 2.745192307692308e-05, + "loss": 0.0523, + "step": 16415 + }, + { + "epoch": 45.0989010989011, + "grad_norm": 11.864378929138184, + "learning_rate": 2.745054945054945e-05, + "loss": 0.5047, + "step": 16416 + }, + { + "epoch": 45.10164835164835, + "grad_norm": 1.615087628364563, + "learning_rate": 2.7449175824175827e-05, + "loss": 0.0244, + "step": 16417 + }, + { + "epoch": 45.104395604395606, + "grad_norm": 17.72319221496582, + "learning_rate": 2.7447802197802197e-05, + "loss": 0.3812, + "step": 16418 + }, + { + "epoch": 45.107142857142854, + "grad_norm": 19.19476318359375, + "learning_rate": 2.7446428571428574e-05, + "loss": 0.8085, + "step": 16419 + }, + { + "epoch": 45.10989010989011, + "grad_norm": 8.338905334472656, + "learning_rate": 2.7445054945054947e-05, + "loss": 0.1641, + "step": 16420 + }, + { + "epoch": 45.112637362637365, + "grad_norm": 11.001947402954102, + "learning_rate": 2.7443681318681317e-05, + "loss": 0.2199, + "step": 16421 + }, + { + "epoch": 45.11538461538461, + "grad_norm": 15.349496841430664, + "learning_rate": 2.7442307692307694e-05, + "loss": 0.5876, + "step": 16422 + }, + { + "epoch": 45.11813186813187, + "grad_norm": 24.38431167602539, + "learning_rate": 2.7440934065934064e-05, + "loss": 0.652, + "step": 16423 + }, + { + "epoch": 45.120879120879124, + "grad_norm": 6.096091270446777, + "learning_rate": 2.743956043956044e-05, + "loss": 0.1148, + "step": 16424 + }, + { + "epoch": 45.12362637362637, + "grad_norm": 4.560073375701904, + "learning_rate": 2.7438186813186818e-05, + "loss": 0.0804, + "step": 16425 + }, + { + "epoch": 45.12637362637363, + "grad_norm": 4.7581353187561035, + "learning_rate": 2.7436813186813188e-05, + "loss": 0.1139, + "step": 16426 + }, + { + "epoch": 45.129120879120876, + "grad_norm": 11.025965690612793, + "learning_rate": 2.7435439560439565e-05, + "loss": 0.2376, + "step": 16427 + }, + { + "epoch": 45.13186813186813, + "grad_norm": 11.63103199005127, + "learning_rate": 2.7434065934065935e-05, + "loss": 0.295, + "step": 16428 + }, + { + "epoch": 45.13461538461539, + "grad_norm": 11.3065767288208, + "learning_rate": 2.7432692307692308e-05, + "loss": 0.2359, + "step": 16429 + }, + { + "epoch": 45.137362637362635, + "grad_norm": 12.512605667114258, + "learning_rate": 2.7431318681318685e-05, + "loss": 0.2707, + "step": 16430 + }, + { + "epoch": 45.14010989010989, + "grad_norm": 9.856180191040039, + "learning_rate": 2.7429945054945055e-05, + "loss": 0.1678, + "step": 16431 + }, + { + "epoch": 45.142857142857146, + "grad_norm": 23.161657333374023, + "learning_rate": 2.742857142857143e-05, + "loss": 0.4306, + "step": 16432 + }, + { + "epoch": 45.145604395604394, + "grad_norm": 19.356489181518555, + "learning_rate": 2.7427197802197802e-05, + "loss": 0.5087, + "step": 16433 + }, + { + "epoch": 45.14835164835165, + "grad_norm": 11.600841522216797, + "learning_rate": 2.742582417582418e-05, + "loss": 0.3079, + "step": 16434 + }, + { + "epoch": 45.1510989010989, + "grad_norm": 8.861642837524414, + "learning_rate": 2.7424450549450552e-05, + "loss": 0.213, + "step": 16435 + }, + { + "epoch": 45.15384615384615, + "grad_norm": 20.88428497314453, + "learning_rate": 2.7423076923076922e-05, + "loss": 0.6041, + "step": 16436 + }, + { + "epoch": 45.15659340659341, + "grad_norm": 12.031037330627441, + "learning_rate": 2.74217032967033e-05, + "loss": 0.2551, + "step": 16437 + }, + { + "epoch": 45.15934065934066, + "grad_norm": 11.338590621948242, + "learning_rate": 2.742032967032967e-05, + "loss": 0.3219, + "step": 16438 + }, + { + "epoch": 45.16208791208791, + "grad_norm": 15.068341255187988, + "learning_rate": 2.7418956043956046e-05, + "loss": 0.3382, + "step": 16439 + }, + { + "epoch": 45.16483516483517, + "grad_norm": 3.8746635913848877, + "learning_rate": 2.7417582417582422e-05, + "loss": 0.0646, + "step": 16440 + }, + { + "epoch": 45.167582417582416, + "grad_norm": 8.30562973022461, + "learning_rate": 2.7416208791208792e-05, + "loss": 0.1092, + "step": 16441 + }, + { + "epoch": 45.17032967032967, + "grad_norm": 6.656029224395752, + "learning_rate": 2.741483516483517e-05, + "loss": 0.0852, + "step": 16442 + }, + { + "epoch": 45.17307692307692, + "grad_norm": 15.151284217834473, + "learning_rate": 2.741346153846154e-05, + "loss": 0.4091, + "step": 16443 + }, + { + "epoch": 45.175824175824175, + "grad_norm": 6.893497467041016, + "learning_rate": 2.7412087912087913e-05, + "loss": 0.1179, + "step": 16444 + }, + { + "epoch": 45.17857142857143, + "grad_norm": 3.7062175273895264, + "learning_rate": 2.741071428571429e-05, + "loss": 0.0799, + "step": 16445 + }, + { + "epoch": 45.18131868131868, + "grad_norm": 15.05126953125, + "learning_rate": 2.740934065934066e-05, + "loss": 0.194, + "step": 16446 + }, + { + "epoch": 45.184065934065934, + "grad_norm": 14.470425605773926, + "learning_rate": 2.7407967032967036e-05, + "loss": 0.3472, + "step": 16447 + }, + { + "epoch": 45.18681318681319, + "grad_norm": 4.872298240661621, + "learning_rate": 2.7406593406593406e-05, + "loss": 0.0657, + "step": 16448 + }, + { + "epoch": 45.18956043956044, + "grad_norm": 5.720584869384766, + "learning_rate": 2.7405219780219783e-05, + "loss": 0.0985, + "step": 16449 + }, + { + "epoch": 45.19230769230769, + "grad_norm": 7.0923333168029785, + "learning_rate": 2.7403846153846156e-05, + "loss": 0.1224, + "step": 16450 + }, + { + "epoch": 45.19505494505494, + "grad_norm": 4.498831748962402, + "learning_rate": 2.7402472527472526e-05, + "loss": 0.0883, + "step": 16451 + }, + { + "epoch": 45.1978021978022, + "grad_norm": 20.595399856567383, + "learning_rate": 2.7401098901098903e-05, + "loss": 0.6807, + "step": 16452 + }, + { + "epoch": 45.20054945054945, + "grad_norm": 12.97756290435791, + "learning_rate": 2.7399725274725273e-05, + "loss": 0.2484, + "step": 16453 + }, + { + "epoch": 45.2032967032967, + "grad_norm": 15.288012504577637, + "learning_rate": 2.739835164835165e-05, + "loss": 0.2238, + "step": 16454 + }, + { + "epoch": 45.206043956043956, + "grad_norm": 19.27302360534668, + "learning_rate": 2.7396978021978027e-05, + "loss": 0.3988, + "step": 16455 + }, + { + "epoch": 45.20879120879121, + "grad_norm": 9.040999412536621, + "learning_rate": 2.7395604395604397e-05, + "loss": 0.1409, + "step": 16456 + }, + { + "epoch": 45.21153846153846, + "grad_norm": 8.82153034210205, + "learning_rate": 2.7394230769230774e-05, + "loss": 0.1759, + "step": 16457 + }, + { + "epoch": 45.214285714285715, + "grad_norm": 7.434335708618164, + "learning_rate": 2.7392857142857144e-05, + "loss": 0.0872, + "step": 16458 + }, + { + "epoch": 45.217032967032964, + "grad_norm": 10.198216438293457, + "learning_rate": 2.7391483516483517e-05, + "loss": 0.2052, + "step": 16459 + }, + { + "epoch": 45.21978021978022, + "grad_norm": 14.634302139282227, + "learning_rate": 2.7390109890109894e-05, + "loss": 0.2582, + "step": 16460 + }, + { + "epoch": 45.222527472527474, + "grad_norm": 22.44148063659668, + "learning_rate": 2.7388736263736264e-05, + "loss": 0.6784, + "step": 16461 + }, + { + "epoch": 45.22527472527472, + "grad_norm": 12.171012878417969, + "learning_rate": 2.738736263736264e-05, + "loss": 0.2795, + "step": 16462 + }, + { + "epoch": 45.22802197802198, + "grad_norm": 8.52856159210205, + "learning_rate": 2.738598901098901e-05, + "loss": 0.0953, + "step": 16463 + }, + { + "epoch": 45.23076923076923, + "grad_norm": 10.771673202514648, + "learning_rate": 2.7384615384615387e-05, + "loss": 0.1905, + "step": 16464 + }, + { + "epoch": 45.23351648351648, + "grad_norm": 19.051681518554688, + "learning_rate": 2.738324175824176e-05, + "loss": 0.7137, + "step": 16465 + }, + { + "epoch": 45.23626373626374, + "grad_norm": 2.220761299133301, + "learning_rate": 2.738186813186813e-05, + "loss": 0.0392, + "step": 16466 + }, + { + "epoch": 45.239010989010985, + "grad_norm": 9.034170150756836, + "learning_rate": 2.7380494505494508e-05, + "loss": 0.2237, + "step": 16467 + }, + { + "epoch": 45.24175824175824, + "grad_norm": 9.380980491638184, + "learning_rate": 2.7379120879120878e-05, + "loss": 0.3299, + "step": 16468 + }, + { + "epoch": 45.244505494505496, + "grad_norm": 1.9306254386901855, + "learning_rate": 2.7377747252747255e-05, + "loss": 0.0223, + "step": 16469 + }, + { + "epoch": 45.247252747252745, + "grad_norm": 10.933039665222168, + "learning_rate": 2.737637362637363e-05, + "loss": 0.3925, + "step": 16470 + }, + { + "epoch": 45.25, + "grad_norm": 18.463932037353516, + "learning_rate": 2.7375e-05, + "loss": 0.7329, + "step": 16471 + }, + { + "epoch": 45.252747252747255, + "grad_norm": 15.286763191223145, + "learning_rate": 2.7373626373626378e-05, + "loss": 0.3533, + "step": 16472 + }, + { + "epoch": 45.255494505494504, + "grad_norm": 2.5223381519317627, + "learning_rate": 2.7372252747252748e-05, + "loss": 0.0494, + "step": 16473 + }, + { + "epoch": 45.25824175824176, + "grad_norm": 5.701233386993408, + "learning_rate": 2.737087912087912e-05, + "loss": 0.1042, + "step": 16474 + }, + { + "epoch": 45.260989010989015, + "grad_norm": 5.103464603424072, + "learning_rate": 2.73695054945055e-05, + "loss": 0.0837, + "step": 16475 + }, + { + "epoch": 45.26373626373626, + "grad_norm": 5.391756534576416, + "learning_rate": 2.736813186813187e-05, + "loss": 0.1038, + "step": 16476 + }, + { + "epoch": 45.26648351648352, + "grad_norm": 5.595009803771973, + "learning_rate": 2.7366758241758245e-05, + "loss": 0.1035, + "step": 16477 + }, + { + "epoch": 45.26923076923077, + "grad_norm": 14.258834838867188, + "learning_rate": 2.7365384615384615e-05, + "loss": 0.2796, + "step": 16478 + }, + { + "epoch": 45.27197802197802, + "grad_norm": 11.188193321228027, + "learning_rate": 2.7364010989010992e-05, + "loss": 0.2072, + "step": 16479 + }, + { + "epoch": 45.27472527472528, + "grad_norm": 21.362951278686523, + "learning_rate": 2.7362637362637365e-05, + "loss": 0.5441, + "step": 16480 + }, + { + "epoch": 45.277472527472526, + "grad_norm": 4.588320255279541, + "learning_rate": 2.7361263736263735e-05, + "loss": 0.0801, + "step": 16481 + }, + { + "epoch": 45.28021978021978, + "grad_norm": 9.369990348815918, + "learning_rate": 2.7359890109890112e-05, + "loss": 0.401, + "step": 16482 + }, + { + "epoch": 45.282967032967036, + "grad_norm": 16.0467529296875, + "learning_rate": 2.7358516483516482e-05, + "loss": 0.4425, + "step": 16483 + }, + { + "epoch": 45.285714285714285, + "grad_norm": 9.933137893676758, + "learning_rate": 2.735714285714286e-05, + "loss": 0.248, + "step": 16484 + }, + { + "epoch": 45.28846153846154, + "grad_norm": 22.844011306762695, + "learning_rate": 2.7355769230769236e-05, + "loss": 0.6515, + "step": 16485 + }, + { + "epoch": 45.29120879120879, + "grad_norm": 9.793500900268555, + "learning_rate": 2.7354395604395606e-05, + "loss": 0.1497, + "step": 16486 + }, + { + "epoch": 45.293956043956044, + "grad_norm": 14.265609741210938, + "learning_rate": 2.7353021978021983e-05, + "loss": 0.2108, + "step": 16487 + }, + { + "epoch": 45.2967032967033, + "grad_norm": 11.204522132873535, + "learning_rate": 2.7351648351648353e-05, + "loss": 0.2788, + "step": 16488 + }, + { + "epoch": 45.29945054945055, + "grad_norm": 20.684040069580078, + "learning_rate": 2.7350274725274726e-05, + "loss": 0.7613, + "step": 16489 + }, + { + "epoch": 45.3021978021978, + "grad_norm": 10.517494201660156, + "learning_rate": 2.7348901098901103e-05, + "loss": 0.2365, + "step": 16490 + }, + { + "epoch": 45.30494505494506, + "grad_norm": 14.429422378540039, + "learning_rate": 2.7347527472527473e-05, + "loss": 0.1881, + "step": 16491 + }, + { + "epoch": 45.30769230769231, + "grad_norm": 21.745559692382812, + "learning_rate": 2.734615384615385e-05, + "loss": 0.497, + "step": 16492 + }, + { + "epoch": 45.31043956043956, + "grad_norm": 17.405418395996094, + "learning_rate": 2.734478021978022e-05, + "loss": 0.3826, + "step": 16493 + }, + { + "epoch": 45.31318681318681, + "grad_norm": 12.433853149414062, + "learning_rate": 2.7343406593406596e-05, + "loss": 0.1623, + "step": 16494 + }, + { + "epoch": 45.315934065934066, + "grad_norm": 4.634425640106201, + "learning_rate": 2.7342032967032966e-05, + "loss": 0.1228, + "step": 16495 + }, + { + "epoch": 45.31868131868132, + "grad_norm": 12.630337715148926, + "learning_rate": 2.734065934065934e-05, + "loss": 0.2172, + "step": 16496 + }, + { + "epoch": 45.32142857142857, + "grad_norm": 4.5160627365112305, + "learning_rate": 2.7339285714285717e-05, + "loss": 0.0878, + "step": 16497 + }, + { + "epoch": 45.324175824175825, + "grad_norm": 10.514334678649902, + "learning_rate": 2.7337912087912087e-05, + "loss": 0.2633, + "step": 16498 + }, + { + "epoch": 45.32692307692308, + "grad_norm": 12.251008987426758, + "learning_rate": 2.7336538461538463e-05, + "loss": 0.321, + "step": 16499 + }, + { + "epoch": 45.32967032967033, + "grad_norm": 13.50131607055664, + "learning_rate": 2.7335164835164834e-05, + "loss": 0.3519, + "step": 16500 + }, + { + "epoch": 45.332417582417584, + "grad_norm": 7.898532867431641, + "learning_rate": 2.733379120879121e-05, + "loss": 0.2429, + "step": 16501 + }, + { + "epoch": 45.33516483516483, + "grad_norm": 7.697335720062256, + "learning_rate": 2.7332417582417587e-05, + "loss": 0.2158, + "step": 16502 + }, + { + "epoch": 45.33791208791209, + "grad_norm": 11.770254135131836, + "learning_rate": 2.7331043956043957e-05, + "loss": 0.2495, + "step": 16503 + }, + { + "epoch": 45.34065934065934, + "grad_norm": 14.576192855834961, + "learning_rate": 2.732967032967033e-05, + "loss": 0.3994, + "step": 16504 + }, + { + "epoch": 45.34340659340659, + "grad_norm": 9.830738067626953, + "learning_rate": 2.73282967032967e-05, + "loss": 0.1801, + "step": 16505 + }, + { + "epoch": 45.34615384615385, + "grad_norm": 19.256967544555664, + "learning_rate": 2.7326923076923077e-05, + "loss": 0.3454, + "step": 16506 + }, + { + "epoch": 45.3489010989011, + "grad_norm": 12.559123992919922, + "learning_rate": 2.7325549450549454e-05, + "loss": 0.2796, + "step": 16507 + }, + { + "epoch": 45.35164835164835, + "grad_norm": 12.529492378234863, + "learning_rate": 2.7324175824175824e-05, + "loss": 0.2104, + "step": 16508 + }, + { + "epoch": 45.354395604395606, + "grad_norm": 5.1312737464904785, + "learning_rate": 2.73228021978022e-05, + "loss": 0.0891, + "step": 16509 + }, + { + "epoch": 45.357142857142854, + "grad_norm": 7.6860246658325195, + "learning_rate": 2.732142857142857e-05, + "loss": 0.181, + "step": 16510 + }, + { + "epoch": 45.35989010989011, + "grad_norm": 11.268532752990723, + "learning_rate": 2.7320054945054944e-05, + "loss": 0.2467, + "step": 16511 + }, + { + "epoch": 45.362637362637365, + "grad_norm": 13.16435718536377, + "learning_rate": 2.731868131868132e-05, + "loss": 0.2535, + "step": 16512 + }, + { + "epoch": 45.36538461538461, + "grad_norm": 12.06294059753418, + "learning_rate": 2.731730769230769e-05, + "loss": 0.8133, + "step": 16513 + }, + { + "epoch": 45.36813186813187, + "grad_norm": 7.551521301269531, + "learning_rate": 2.7315934065934068e-05, + "loss": 0.1215, + "step": 16514 + }, + { + "epoch": 45.370879120879124, + "grad_norm": 13.938547134399414, + "learning_rate": 2.7314560439560438e-05, + "loss": 0.2408, + "step": 16515 + }, + { + "epoch": 45.37362637362637, + "grad_norm": 7.882559299468994, + "learning_rate": 2.7313186813186815e-05, + "loss": 0.1466, + "step": 16516 + }, + { + "epoch": 45.37637362637363, + "grad_norm": 17.10875129699707, + "learning_rate": 2.7311813186813188e-05, + "loss": 0.5882, + "step": 16517 + }, + { + "epoch": 45.379120879120876, + "grad_norm": 19.850818634033203, + "learning_rate": 2.731043956043956e-05, + "loss": 0.5918, + "step": 16518 + }, + { + "epoch": 45.38186813186813, + "grad_norm": 11.433328628540039, + "learning_rate": 2.7309065934065935e-05, + "loss": 0.3438, + "step": 16519 + }, + { + "epoch": 45.38461538461539, + "grad_norm": 6.475882053375244, + "learning_rate": 2.7307692307692305e-05, + "loss": 0.2125, + "step": 16520 + }, + { + "epoch": 45.387362637362635, + "grad_norm": 11.958027839660645, + "learning_rate": 2.7306318681318682e-05, + "loss": 0.2523, + "step": 16521 + }, + { + "epoch": 45.39010989010989, + "grad_norm": 17.98605728149414, + "learning_rate": 2.730494505494506e-05, + "loss": 0.4692, + "step": 16522 + }, + { + "epoch": 45.392857142857146, + "grad_norm": 5.0607123374938965, + "learning_rate": 2.730357142857143e-05, + "loss": 0.055, + "step": 16523 + }, + { + "epoch": 45.395604395604394, + "grad_norm": 13.09063720703125, + "learning_rate": 2.7302197802197805e-05, + "loss": 0.4741, + "step": 16524 + }, + { + "epoch": 45.39835164835165, + "grad_norm": 12.236993789672852, + "learning_rate": 2.7300824175824175e-05, + "loss": 0.2194, + "step": 16525 + }, + { + "epoch": 45.4010989010989, + "grad_norm": 10.275652885437012, + "learning_rate": 2.729945054945055e-05, + "loss": 0.3016, + "step": 16526 + }, + { + "epoch": 45.40384615384615, + "grad_norm": 8.829995155334473, + "learning_rate": 2.7298076923076926e-05, + "loss": 0.1887, + "step": 16527 + }, + { + "epoch": 45.40659340659341, + "grad_norm": 8.050972938537598, + "learning_rate": 2.7296703296703296e-05, + "loss": 0.2689, + "step": 16528 + }, + { + "epoch": 45.40934065934066, + "grad_norm": 8.755331993103027, + "learning_rate": 2.7295329670329672e-05, + "loss": 0.2172, + "step": 16529 + }, + { + "epoch": 45.41208791208791, + "grad_norm": 12.336555480957031, + "learning_rate": 2.7293956043956042e-05, + "loss": 0.4883, + "step": 16530 + }, + { + "epoch": 45.41483516483517, + "grad_norm": 5.087710857391357, + "learning_rate": 2.729258241758242e-05, + "loss": 0.0725, + "step": 16531 + }, + { + "epoch": 45.417582417582416, + "grad_norm": 10.058428764343262, + "learning_rate": 2.7291208791208793e-05, + "loss": 0.3149, + "step": 16532 + }, + { + "epoch": 45.42032967032967, + "grad_norm": 10.902612686157227, + "learning_rate": 2.7289835164835166e-05, + "loss": 0.1839, + "step": 16533 + }, + { + "epoch": 45.42307692307692, + "grad_norm": 14.198450088500977, + "learning_rate": 2.728846153846154e-05, + "loss": 0.3012, + "step": 16534 + }, + { + "epoch": 45.425824175824175, + "grad_norm": 3.6729989051818848, + "learning_rate": 2.728708791208791e-05, + "loss": 0.0508, + "step": 16535 + }, + { + "epoch": 45.42857142857143, + "grad_norm": 7.2945661544799805, + "learning_rate": 2.7285714285714286e-05, + "loss": 0.2014, + "step": 16536 + }, + { + "epoch": 45.43131868131868, + "grad_norm": 5.89376163482666, + "learning_rate": 2.7284340659340663e-05, + "loss": 0.1125, + "step": 16537 + }, + { + "epoch": 45.434065934065934, + "grad_norm": 17.55332374572754, + "learning_rate": 2.7282967032967033e-05, + "loss": 0.3451, + "step": 16538 + }, + { + "epoch": 45.43681318681319, + "grad_norm": 6.804434776306152, + "learning_rate": 2.728159340659341e-05, + "loss": 0.2084, + "step": 16539 + }, + { + "epoch": 45.43956043956044, + "grad_norm": 21.836851119995117, + "learning_rate": 2.728021978021978e-05, + "loss": 0.6337, + "step": 16540 + }, + { + "epoch": 45.44230769230769, + "grad_norm": 5.192014217376709, + "learning_rate": 2.7278846153846153e-05, + "loss": 0.0723, + "step": 16541 + }, + { + "epoch": 45.44505494505494, + "grad_norm": 6.050711154937744, + "learning_rate": 2.727747252747253e-05, + "loss": 0.1313, + "step": 16542 + }, + { + "epoch": 45.4478021978022, + "grad_norm": 15.937301635742188, + "learning_rate": 2.72760989010989e-05, + "loss": 0.5094, + "step": 16543 + }, + { + "epoch": 45.45054945054945, + "grad_norm": 25.00142478942871, + "learning_rate": 2.7274725274725277e-05, + "loss": 0.6886, + "step": 16544 + }, + { + "epoch": 45.4532967032967, + "grad_norm": 11.698342323303223, + "learning_rate": 2.7273351648351647e-05, + "loss": 0.3547, + "step": 16545 + }, + { + "epoch": 45.456043956043956, + "grad_norm": 13.234496116638184, + "learning_rate": 2.7271978021978024e-05, + "loss": 0.3781, + "step": 16546 + }, + { + "epoch": 45.45879120879121, + "grad_norm": 14.251191139221191, + "learning_rate": 2.7270604395604397e-05, + "loss": 0.3073, + "step": 16547 + }, + { + "epoch": 45.46153846153846, + "grad_norm": 11.213120460510254, + "learning_rate": 2.726923076923077e-05, + "loss": 0.1372, + "step": 16548 + }, + { + "epoch": 45.464285714285715, + "grad_norm": 9.659965515136719, + "learning_rate": 2.7267857142857144e-05, + "loss": 0.3514, + "step": 16549 + }, + { + "epoch": 45.467032967032964, + "grad_norm": 11.48454761505127, + "learning_rate": 2.7266483516483514e-05, + "loss": 0.3521, + "step": 16550 + }, + { + "epoch": 45.46978021978022, + "grad_norm": 10.978582382202148, + "learning_rate": 2.726510989010989e-05, + "loss": 0.1573, + "step": 16551 + }, + { + "epoch": 45.472527472527474, + "grad_norm": 5.818161964416504, + "learning_rate": 2.7263736263736268e-05, + "loss": 0.0657, + "step": 16552 + }, + { + "epoch": 45.47527472527472, + "grad_norm": 8.355310440063477, + "learning_rate": 2.7262362637362638e-05, + "loss": 0.2612, + "step": 16553 + }, + { + "epoch": 45.47802197802198, + "grad_norm": 13.03822135925293, + "learning_rate": 2.7260989010989014e-05, + "loss": 0.227, + "step": 16554 + }, + { + "epoch": 45.48076923076923, + "grad_norm": 16.65780258178711, + "learning_rate": 2.7259615384615384e-05, + "loss": 0.3239, + "step": 16555 + }, + { + "epoch": 45.48351648351648, + "grad_norm": 3.5482983589172363, + "learning_rate": 2.7258241758241758e-05, + "loss": 0.0502, + "step": 16556 + }, + { + "epoch": 45.48626373626374, + "grad_norm": 14.020415306091309, + "learning_rate": 2.7256868131868135e-05, + "loss": 0.3852, + "step": 16557 + }, + { + "epoch": 45.489010989010985, + "grad_norm": 14.166363716125488, + "learning_rate": 2.7255494505494505e-05, + "loss": 0.4539, + "step": 16558 + }, + { + "epoch": 45.49175824175824, + "grad_norm": 11.800177574157715, + "learning_rate": 2.725412087912088e-05, + "loss": 0.2656, + "step": 16559 + }, + { + "epoch": 45.494505494505496, + "grad_norm": 25.703622817993164, + "learning_rate": 2.725274725274725e-05, + "loss": 0.9653, + "step": 16560 + }, + { + "epoch": 45.497252747252745, + "grad_norm": 16.18138885498047, + "learning_rate": 2.7251373626373628e-05, + "loss": 0.5091, + "step": 16561 + }, + { + "epoch": 45.5, + "grad_norm": 9.367990493774414, + "learning_rate": 2.725e-05, + "loss": 0.3295, + "step": 16562 + }, + { + "epoch": 45.502747252747255, + "grad_norm": 5.028893947601318, + "learning_rate": 2.7248626373626375e-05, + "loss": 0.1189, + "step": 16563 + }, + { + "epoch": 45.505494505494504, + "grad_norm": 15.237418174743652, + "learning_rate": 2.724725274725275e-05, + "loss": 0.3781, + "step": 16564 + }, + { + "epoch": 45.50824175824176, + "grad_norm": 5.589926719665527, + "learning_rate": 2.724587912087912e-05, + "loss": 0.0808, + "step": 16565 + }, + { + "epoch": 45.51098901098901, + "grad_norm": 4.6932783126831055, + "learning_rate": 2.7244505494505495e-05, + "loss": 0.0891, + "step": 16566 + }, + { + "epoch": 45.51373626373626, + "grad_norm": 7.902852535247803, + "learning_rate": 2.7243131868131872e-05, + "loss": 0.1558, + "step": 16567 + }, + { + "epoch": 45.51648351648352, + "grad_norm": 12.07844066619873, + "learning_rate": 2.7241758241758242e-05, + "loss": 0.1706, + "step": 16568 + }, + { + "epoch": 45.51923076923077, + "grad_norm": 15.283358573913574, + "learning_rate": 2.724038461538462e-05, + "loss": 0.5208, + "step": 16569 + }, + { + "epoch": 45.52197802197802, + "grad_norm": 8.374464988708496, + "learning_rate": 2.723901098901099e-05, + "loss": 0.1762, + "step": 16570 + }, + { + "epoch": 45.52472527472528, + "grad_norm": 12.260522842407227, + "learning_rate": 2.7237637362637362e-05, + "loss": 0.2654, + "step": 16571 + }, + { + "epoch": 45.527472527472526, + "grad_norm": 12.699161529541016, + "learning_rate": 2.723626373626374e-05, + "loss": 0.324, + "step": 16572 + }, + { + "epoch": 45.53021978021978, + "grad_norm": 14.461427688598633, + "learning_rate": 2.723489010989011e-05, + "loss": 0.4664, + "step": 16573 + }, + { + "epoch": 45.532967032967036, + "grad_norm": 8.56316089630127, + "learning_rate": 2.7233516483516486e-05, + "loss": 0.2174, + "step": 16574 + }, + { + "epoch": 45.535714285714285, + "grad_norm": 25.480356216430664, + "learning_rate": 2.7232142857142856e-05, + "loss": 0.9139, + "step": 16575 + }, + { + "epoch": 45.53846153846154, + "grad_norm": 4.706369876861572, + "learning_rate": 2.7230769230769233e-05, + "loss": 0.0782, + "step": 16576 + }, + { + "epoch": 45.54120879120879, + "grad_norm": 23.43101692199707, + "learning_rate": 2.7229395604395606e-05, + "loss": 0.6948, + "step": 16577 + }, + { + "epoch": 45.543956043956044, + "grad_norm": 18.30377197265625, + "learning_rate": 2.722802197802198e-05, + "loss": 0.5048, + "step": 16578 + }, + { + "epoch": 45.5467032967033, + "grad_norm": 12.735559463500977, + "learning_rate": 2.7226648351648353e-05, + "loss": 0.2876, + "step": 16579 + }, + { + "epoch": 45.54945054945055, + "grad_norm": 12.449736595153809, + "learning_rate": 2.7225274725274723e-05, + "loss": 0.4805, + "step": 16580 + }, + { + "epoch": 45.5521978021978, + "grad_norm": 22.34673309326172, + "learning_rate": 2.72239010989011e-05, + "loss": 0.7798, + "step": 16581 + }, + { + "epoch": 45.55494505494506, + "grad_norm": 22.374940872192383, + "learning_rate": 2.7222527472527477e-05, + "loss": 0.8082, + "step": 16582 + }, + { + "epoch": 45.55769230769231, + "grad_norm": 21.12441062927246, + "learning_rate": 2.7221153846153847e-05, + "loss": 0.5449, + "step": 16583 + }, + { + "epoch": 45.56043956043956, + "grad_norm": 15.580131530761719, + "learning_rate": 2.7219780219780223e-05, + "loss": 0.3912, + "step": 16584 + }, + { + "epoch": 45.56318681318681, + "grad_norm": 16.44266128540039, + "learning_rate": 2.7218406593406593e-05, + "loss": 0.3723, + "step": 16585 + }, + { + "epoch": 45.565934065934066, + "grad_norm": 11.632165908813477, + "learning_rate": 2.7217032967032967e-05, + "loss": 0.4841, + "step": 16586 + }, + { + "epoch": 45.56868131868132, + "grad_norm": 8.046585083007812, + "learning_rate": 2.7215659340659344e-05, + "loss": 0.1295, + "step": 16587 + }, + { + "epoch": 45.57142857142857, + "grad_norm": 7.371860980987549, + "learning_rate": 2.7214285714285714e-05, + "loss": 0.0907, + "step": 16588 + }, + { + "epoch": 45.574175824175825, + "grad_norm": 10.336833953857422, + "learning_rate": 2.721291208791209e-05, + "loss": 0.1228, + "step": 16589 + }, + { + "epoch": 45.57692307692308, + "grad_norm": 5.670066833496094, + "learning_rate": 2.721153846153846e-05, + "loss": 0.0776, + "step": 16590 + }, + { + "epoch": 45.57967032967033, + "grad_norm": 10.5892972946167, + "learning_rate": 2.7210164835164837e-05, + "loss": 0.2173, + "step": 16591 + }, + { + "epoch": 45.582417582417584, + "grad_norm": 9.711411476135254, + "learning_rate": 2.720879120879121e-05, + "loss": 0.2894, + "step": 16592 + }, + { + "epoch": 45.58516483516483, + "grad_norm": 7.086904048919678, + "learning_rate": 2.7207417582417584e-05, + "loss": 0.1083, + "step": 16593 + }, + { + "epoch": 45.58791208791209, + "grad_norm": 10.85147476196289, + "learning_rate": 2.7206043956043957e-05, + "loss": 0.222, + "step": 16594 + }, + { + "epoch": 45.59065934065934, + "grad_norm": 13.713204383850098, + "learning_rate": 2.7204670329670327e-05, + "loss": 0.4692, + "step": 16595 + }, + { + "epoch": 45.59340659340659, + "grad_norm": 14.596221923828125, + "learning_rate": 2.7203296703296704e-05, + "loss": 0.2153, + "step": 16596 + }, + { + "epoch": 45.59615384615385, + "grad_norm": 11.570091247558594, + "learning_rate": 2.720192307692308e-05, + "loss": 0.4708, + "step": 16597 + }, + { + "epoch": 45.5989010989011, + "grad_norm": 7.658239841461182, + "learning_rate": 2.720054945054945e-05, + "loss": 0.1498, + "step": 16598 + }, + { + "epoch": 45.60164835164835, + "grad_norm": 16.703588485717773, + "learning_rate": 2.7199175824175828e-05, + "loss": 0.5575, + "step": 16599 + }, + { + "epoch": 45.604395604395606, + "grad_norm": 13.208378791809082, + "learning_rate": 2.7197802197802198e-05, + "loss": 0.3194, + "step": 16600 + }, + { + "epoch": 45.607142857142854, + "grad_norm": 6.2308573722839355, + "learning_rate": 2.719642857142857e-05, + "loss": 0.1872, + "step": 16601 + }, + { + "epoch": 45.60989010989011, + "grad_norm": 19.90608787536621, + "learning_rate": 2.7195054945054948e-05, + "loss": 0.6897, + "step": 16602 + }, + { + "epoch": 45.612637362637365, + "grad_norm": 14.407461166381836, + "learning_rate": 2.7193681318681318e-05, + "loss": 0.279, + "step": 16603 + }, + { + "epoch": 45.61538461538461, + "grad_norm": 11.65424919128418, + "learning_rate": 2.7192307692307695e-05, + "loss": 0.1449, + "step": 16604 + }, + { + "epoch": 45.61813186813187, + "grad_norm": 25.898223876953125, + "learning_rate": 2.7190934065934065e-05, + "loss": 0.7906, + "step": 16605 + }, + { + "epoch": 45.620879120879124, + "grad_norm": 0.9182599186897278, + "learning_rate": 2.7189560439560442e-05, + "loss": 0.014, + "step": 16606 + }, + { + "epoch": 45.62362637362637, + "grad_norm": 4.081959247589111, + "learning_rate": 2.7188186813186815e-05, + "loss": 0.061, + "step": 16607 + }, + { + "epoch": 45.62637362637363, + "grad_norm": 3.5123133659362793, + "learning_rate": 2.718681318681319e-05, + "loss": 0.0588, + "step": 16608 + }, + { + "epoch": 45.629120879120876, + "grad_norm": 15.546542167663574, + "learning_rate": 2.7185439560439562e-05, + "loss": 0.2625, + "step": 16609 + }, + { + "epoch": 45.63186813186813, + "grad_norm": 11.78036880493164, + "learning_rate": 2.7184065934065932e-05, + "loss": 0.2248, + "step": 16610 + }, + { + "epoch": 45.63461538461539, + "grad_norm": 10.88530445098877, + "learning_rate": 2.718269230769231e-05, + "loss": 0.1878, + "step": 16611 + }, + { + "epoch": 45.637362637362635, + "grad_norm": 9.93009090423584, + "learning_rate": 2.7181318681318686e-05, + "loss": 0.4613, + "step": 16612 + }, + { + "epoch": 45.64010989010989, + "grad_norm": 16.747365951538086, + "learning_rate": 2.7179945054945056e-05, + "loss": 0.4599, + "step": 16613 + }, + { + "epoch": 45.642857142857146, + "grad_norm": 15.642192840576172, + "learning_rate": 2.7178571428571432e-05, + "loss": 0.3224, + "step": 16614 + }, + { + "epoch": 45.645604395604394, + "grad_norm": 17.3204402923584, + "learning_rate": 2.7177197802197802e-05, + "loss": 0.4031, + "step": 16615 + }, + { + "epoch": 45.64835164835165, + "grad_norm": 8.401459693908691, + "learning_rate": 2.7175824175824176e-05, + "loss": 0.2089, + "step": 16616 + }, + { + "epoch": 45.6510989010989, + "grad_norm": 15.03040599822998, + "learning_rate": 2.7174450549450553e-05, + "loss": 0.4227, + "step": 16617 + }, + { + "epoch": 45.65384615384615, + "grad_norm": 6.404319763183594, + "learning_rate": 2.7173076923076923e-05, + "loss": 0.0899, + "step": 16618 + }, + { + "epoch": 45.65659340659341, + "grad_norm": 4.561349868774414, + "learning_rate": 2.71717032967033e-05, + "loss": 0.1225, + "step": 16619 + }, + { + "epoch": 45.65934065934066, + "grad_norm": 8.674609184265137, + "learning_rate": 2.717032967032967e-05, + "loss": 0.1687, + "step": 16620 + }, + { + "epoch": 45.66208791208791, + "grad_norm": 5.0070013999938965, + "learning_rate": 2.7168956043956046e-05, + "loss": 0.1029, + "step": 16621 + }, + { + "epoch": 45.66483516483517, + "grad_norm": 5.030767440795898, + "learning_rate": 2.716758241758242e-05, + "loss": 0.0832, + "step": 16622 + }, + { + "epoch": 45.667582417582416, + "grad_norm": 4.416872978210449, + "learning_rate": 2.7166208791208793e-05, + "loss": 0.0585, + "step": 16623 + }, + { + "epoch": 45.67032967032967, + "grad_norm": 12.798380851745605, + "learning_rate": 2.7164835164835166e-05, + "loss": 0.3138, + "step": 16624 + }, + { + "epoch": 45.67307692307692, + "grad_norm": 7.967178821563721, + "learning_rate": 2.7163461538461536e-05, + "loss": 0.1619, + "step": 16625 + }, + { + "epoch": 45.675824175824175, + "grad_norm": 16.285221099853516, + "learning_rate": 2.7162087912087913e-05, + "loss": 0.4359, + "step": 16626 + }, + { + "epoch": 45.67857142857143, + "grad_norm": 27.39604377746582, + "learning_rate": 2.716071428571429e-05, + "loss": 0.7527, + "step": 16627 + }, + { + "epoch": 45.68131868131868, + "grad_norm": 13.954607009887695, + "learning_rate": 2.715934065934066e-05, + "loss": 0.4324, + "step": 16628 + }, + { + "epoch": 45.684065934065934, + "grad_norm": 19.202285766601562, + "learning_rate": 2.7157967032967037e-05, + "loss": 0.6198, + "step": 16629 + }, + { + "epoch": 45.68681318681319, + "grad_norm": 12.608819007873535, + "learning_rate": 2.7156593406593407e-05, + "loss": 0.2925, + "step": 16630 + }, + { + "epoch": 45.68956043956044, + "grad_norm": 11.492644309997559, + "learning_rate": 2.715521978021978e-05, + "loss": 0.2535, + "step": 16631 + }, + { + "epoch": 45.69230769230769, + "grad_norm": 15.342822074890137, + "learning_rate": 2.7153846153846157e-05, + "loss": 0.3159, + "step": 16632 + }, + { + "epoch": 45.69505494505494, + "grad_norm": 18.01177215576172, + "learning_rate": 2.7152472527472527e-05, + "loss": 0.4891, + "step": 16633 + }, + { + "epoch": 45.6978021978022, + "grad_norm": 11.209961891174316, + "learning_rate": 2.7151098901098904e-05, + "loss": 0.1902, + "step": 16634 + }, + { + "epoch": 45.70054945054945, + "grad_norm": 13.708060264587402, + "learning_rate": 2.7149725274725274e-05, + "loss": 0.3523, + "step": 16635 + }, + { + "epoch": 45.7032967032967, + "grad_norm": 4.608619689941406, + "learning_rate": 2.714835164835165e-05, + "loss": 0.0806, + "step": 16636 + }, + { + "epoch": 45.706043956043956, + "grad_norm": 12.183107376098633, + "learning_rate": 2.7146978021978024e-05, + "loss": 0.199, + "step": 16637 + }, + { + "epoch": 45.70879120879121, + "grad_norm": 6.261659145355225, + "learning_rate": 2.7145604395604398e-05, + "loss": 0.077, + "step": 16638 + }, + { + "epoch": 45.71153846153846, + "grad_norm": 6.897027492523193, + "learning_rate": 2.714423076923077e-05, + "loss": 0.2249, + "step": 16639 + }, + { + "epoch": 45.714285714285715, + "grad_norm": 7.364860534667969, + "learning_rate": 2.714285714285714e-05, + "loss": 0.1387, + "step": 16640 + }, + { + "epoch": 45.717032967032964, + "grad_norm": 12.994629859924316, + "learning_rate": 2.7141483516483518e-05, + "loss": 0.4741, + "step": 16641 + }, + { + "epoch": 45.71978021978022, + "grad_norm": 10.808168411254883, + "learning_rate": 2.7140109890109895e-05, + "loss": 0.3124, + "step": 16642 + }, + { + "epoch": 45.722527472527474, + "grad_norm": 15.277810096740723, + "learning_rate": 2.7138736263736265e-05, + "loss": 0.4852, + "step": 16643 + }, + { + "epoch": 45.72527472527472, + "grad_norm": 10.67697811126709, + "learning_rate": 2.713736263736264e-05, + "loss": 0.2842, + "step": 16644 + }, + { + "epoch": 45.72802197802198, + "grad_norm": 4.65373420715332, + "learning_rate": 2.713598901098901e-05, + "loss": 0.1028, + "step": 16645 + }, + { + "epoch": 45.73076923076923, + "grad_norm": 5.8148345947265625, + "learning_rate": 2.7134615384615385e-05, + "loss": 0.0597, + "step": 16646 + }, + { + "epoch": 45.73351648351648, + "grad_norm": 6.979625701904297, + "learning_rate": 2.713324175824176e-05, + "loss": 0.2188, + "step": 16647 + }, + { + "epoch": 45.73626373626374, + "grad_norm": 15.394408226013184, + "learning_rate": 2.713186813186813e-05, + "loss": 0.2818, + "step": 16648 + }, + { + "epoch": 45.73901098901099, + "grad_norm": 7.786677360534668, + "learning_rate": 2.713049450549451e-05, + "loss": 0.2985, + "step": 16649 + }, + { + "epoch": 45.74175824175824, + "grad_norm": 17.66900062561035, + "learning_rate": 2.712912087912088e-05, + "loss": 0.4058, + "step": 16650 + }, + { + "epoch": 45.744505494505496, + "grad_norm": 8.657093048095703, + "learning_rate": 2.7127747252747255e-05, + "loss": 0.1419, + "step": 16651 + }, + { + "epoch": 45.747252747252745, + "grad_norm": 7.191888809204102, + "learning_rate": 2.712637362637363e-05, + "loss": 0.1874, + "step": 16652 + }, + { + "epoch": 45.75, + "grad_norm": 8.297320365905762, + "learning_rate": 2.7125000000000002e-05, + "loss": 0.1347, + "step": 16653 + }, + { + "epoch": 45.752747252747255, + "grad_norm": 9.514182090759277, + "learning_rate": 2.7123626373626375e-05, + "loss": 0.2259, + "step": 16654 + }, + { + "epoch": 45.755494505494504, + "grad_norm": 14.892386436462402, + "learning_rate": 2.7122252747252745e-05, + "loss": 0.217, + "step": 16655 + }, + { + "epoch": 45.75824175824176, + "grad_norm": 17.387537002563477, + "learning_rate": 2.7120879120879122e-05, + "loss": 0.3872, + "step": 16656 + }, + { + "epoch": 45.76098901098901, + "grad_norm": 10.157768249511719, + "learning_rate": 2.71195054945055e-05, + "loss": 0.2285, + "step": 16657 + }, + { + "epoch": 45.76373626373626, + "grad_norm": 13.446407318115234, + "learning_rate": 2.711813186813187e-05, + "loss": 0.3207, + "step": 16658 + }, + { + "epoch": 45.76648351648352, + "grad_norm": 7.314265727996826, + "learning_rate": 2.7116758241758246e-05, + "loss": 0.175, + "step": 16659 + }, + { + "epoch": 45.76923076923077, + "grad_norm": 5.424487590789795, + "learning_rate": 2.7115384615384616e-05, + "loss": 0.0685, + "step": 16660 + }, + { + "epoch": 45.77197802197802, + "grad_norm": 20.29616355895996, + "learning_rate": 2.711401098901099e-05, + "loss": 0.3814, + "step": 16661 + }, + { + "epoch": 45.77472527472528, + "grad_norm": 15.628583908081055, + "learning_rate": 2.7112637362637366e-05, + "loss": 0.3998, + "step": 16662 + }, + { + "epoch": 45.777472527472526, + "grad_norm": 13.53589916229248, + "learning_rate": 2.7111263736263736e-05, + "loss": 0.2385, + "step": 16663 + }, + { + "epoch": 45.78021978021978, + "grad_norm": 14.339548110961914, + "learning_rate": 2.7109890109890113e-05, + "loss": 0.4376, + "step": 16664 + }, + { + "epoch": 45.782967032967036, + "grad_norm": 12.344467163085938, + "learning_rate": 2.7108516483516483e-05, + "loss": 0.295, + "step": 16665 + }, + { + "epoch": 45.785714285714285, + "grad_norm": 17.62237548828125, + "learning_rate": 2.710714285714286e-05, + "loss": 0.8317, + "step": 16666 + }, + { + "epoch": 45.78846153846154, + "grad_norm": 20.73914337158203, + "learning_rate": 2.7105769230769233e-05, + "loss": 0.3868, + "step": 16667 + }, + { + "epoch": 45.79120879120879, + "grad_norm": 15.143594741821289, + "learning_rate": 2.7104395604395607e-05, + "loss": 0.4172, + "step": 16668 + }, + { + "epoch": 45.793956043956044, + "grad_norm": 14.64088249206543, + "learning_rate": 2.710302197802198e-05, + "loss": 0.3713, + "step": 16669 + }, + { + "epoch": 45.7967032967033, + "grad_norm": 6.8478193283081055, + "learning_rate": 2.710164835164835e-05, + "loss": 0.1511, + "step": 16670 + }, + { + "epoch": 45.79945054945055, + "grad_norm": 20.1259822845459, + "learning_rate": 2.7100274725274727e-05, + "loss": 0.3289, + "step": 16671 + }, + { + "epoch": 45.8021978021978, + "grad_norm": 13.013279914855957, + "learning_rate": 2.7098901098901104e-05, + "loss": 0.2554, + "step": 16672 + }, + { + "epoch": 45.80494505494506, + "grad_norm": 8.267706871032715, + "learning_rate": 2.7097527472527474e-05, + "loss": 0.1074, + "step": 16673 + }, + { + "epoch": 45.80769230769231, + "grad_norm": 16.056522369384766, + "learning_rate": 2.709615384615385e-05, + "loss": 0.6702, + "step": 16674 + }, + { + "epoch": 45.81043956043956, + "grad_norm": 14.552391052246094, + "learning_rate": 2.709478021978022e-05, + "loss": 0.4454, + "step": 16675 + }, + { + "epoch": 45.81318681318681, + "grad_norm": 19.301015853881836, + "learning_rate": 2.7093406593406594e-05, + "loss": 0.5651, + "step": 16676 + }, + { + "epoch": 45.815934065934066, + "grad_norm": 12.851885795593262, + "learning_rate": 2.709203296703297e-05, + "loss": 0.336, + "step": 16677 + }, + { + "epoch": 45.81868131868132, + "grad_norm": 15.069330215454102, + "learning_rate": 2.709065934065934e-05, + "loss": 0.4102, + "step": 16678 + }, + { + "epoch": 45.82142857142857, + "grad_norm": 10.016376495361328, + "learning_rate": 2.7089285714285717e-05, + "loss": 0.3225, + "step": 16679 + }, + { + "epoch": 45.824175824175825, + "grad_norm": 22.19622039794922, + "learning_rate": 2.7087912087912087e-05, + "loss": 0.5646, + "step": 16680 + }, + { + "epoch": 45.82692307692308, + "grad_norm": 23.430830001831055, + "learning_rate": 2.7086538461538464e-05, + "loss": 0.668, + "step": 16681 + }, + { + "epoch": 45.82967032967033, + "grad_norm": 5.77452278137207, + "learning_rate": 2.7085164835164838e-05, + "loss": 0.1333, + "step": 16682 + }, + { + "epoch": 45.832417582417584, + "grad_norm": 20.417081832885742, + "learning_rate": 2.708379120879121e-05, + "loss": 0.4252, + "step": 16683 + }, + { + "epoch": 45.83516483516483, + "grad_norm": 15.287939071655273, + "learning_rate": 2.7082417582417584e-05, + "loss": 0.6265, + "step": 16684 + }, + { + "epoch": 45.83791208791209, + "grad_norm": 8.107386589050293, + "learning_rate": 2.7081043956043954e-05, + "loss": 0.2945, + "step": 16685 + }, + { + "epoch": 45.84065934065934, + "grad_norm": 10.863726615905762, + "learning_rate": 2.707967032967033e-05, + "loss": 0.1791, + "step": 16686 + }, + { + "epoch": 45.84340659340659, + "grad_norm": 5.182093143463135, + "learning_rate": 2.7078296703296708e-05, + "loss": 0.0938, + "step": 16687 + }, + { + "epoch": 45.84615384615385, + "grad_norm": 17.67214012145996, + "learning_rate": 2.7076923076923078e-05, + "loss": 0.453, + "step": 16688 + }, + { + "epoch": 45.8489010989011, + "grad_norm": 11.721064567565918, + "learning_rate": 2.7075549450549455e-05, + "loss": 0.3143, + "step": 16689 + }, + { + "epoch": 45.85164835164835, + "grad_norm": 23.343175888061523, + "learning_rate": 2.7074175824175825e-05, + "loss": 0.3527, + "step": 16690 + }, + { + "epoch": 45.854395604395606, + "grad_norm": 2.2275960445404053, + "learning_rate": 2.7072802197802198e-05, + "loss": 0.0338, + "step": 16691 + }, + { + "epoch": 45.857142857142854, + "grad_norm": 6.262772560119629, + "learning_rate": 2.7071428571428575e-05, + "loss": 0.0917, + "step": 16692 + }, + { + "epoch": 45.85989010989011, + "grad_norm": 10.424203872680664, + "learning_rate": 2.7070054945054945e-05, + "loss": 0.2182, + "step": 16693 + }, + { + "epoch": 45.862637362637365, + "grad_norm": 5.513480186462402, + "learning_rate": 2.7068681318681322e-05, + "loss": 0.0938, + "step": 16694 + }, + { + "epoch": 45.86538461538461, + "grad_norm": 10.353946685791016, + "learning_rate": 2.7067307692307692e-05, + "loss": 0.2394, + "step": 16695 + }, + { + "epoch": 45.86813186813187, + "grad_norm": 13.94019889831543, + "learning_rate": 2.706593406593407e-05, + "loss": 0.4819, + "step": 16696 + }, + { + "epoch": 45.870879120879124, + "grad_norm": 7.772610664367676, + "learning_rate": 2.7064560439560442e-05, + "loss": 0.1687, + "step": 16697 + }, + { + "epoch": 45.87362637362637, + "grad_norm": 7.4281768798828125, + "learning_rate": 2.7063186813186815e-05, + "loss": 0.1726, + "step": 16698 + }, + { + "epoch": 45.87637362637363, + "grad_norm": 8.550127029418945, + "learning_rate": 2.706181318681319e-05, + "loss": 0.1957, + "step": 16699 + }, + { + "epoch": 45.879120879120876, + "grad_norm": 5.788720607757568, + "learning_rate": 2.706043956043956e-05, + "loss": 0.1055, + "step": 16700 + }, + { + "epoch": 45.88186813186813, + "grad_norm": 9.396692276000977, + "learning_rate": 2.7059065934065936e-05, + "loss": 0.2578, + "step": 16701 + }, + { + "epoch": 45.88461538461539, + "grad_norm": 9.672204971313477, + "learning_rate": 2.7057692307692313e-05, + "loss": 0.341, + "step": 16702 + }, + { + "epoch": 45.887362637362635, + "grad_norm": 7.447322845458984, + "learning_rate": 2.7056318681318683e-05, + "loss": 0.1375, + "step": 16703 + }, + { + "epoch": 45.89010989010989, + "grad_norm": 6.961292743682861, + "learning_rate": 2.705494505494506e-05, + "loss": 0.0884, + "step": 16704 + }, + { + "epoch": 45.892857142857146, + "grad_norm": 9.668048858642578, + "learning_rate": 2.705357142857143e-05, + "loss": 0.2314, + "step": 16705 + }, + { + "epoch": 45.895604395604394, + "grad_norm": 9.178070068359375, + "learning_rate": 2.7052197802197803e-05, + "loss": 0.2548, + "step": 16706 + }, + { + "epoch": 45.89835164835165, + "grad_norm": 6.123378753662109, + "learning_rate": 2.705082417582418e-05, + "loss": 0.1319, + "step": 16707 + }, + { + "epoch": 45.9010989010989, + "grad_norm": 17.342191696166992, + "learning_rate": 2.704945054945055e-05, + "loss": 0.5063, + "step": 16708 + }, + { + "epoch": 45.90384615384615, + "grad_norm": 11.215499877929688, + "learning_rate": 2.7048076923076926e-05, + "loss": 0.2442, + "step": 16709 + }, + { + "epoch": 45.90659340659341, + "grad_norm": 6.97644567489624, + "learning_rate": 2.7046703296703296e-05, + "loss": 0.1378, + "step": 16710 + }, + { + "epoch": 45.90934065934066, + "grad_norm": 11.493948936462402, + "learning_rate": 2.7045329670329673e-05, + "loss": 0.1611, + "step": 16711 + }, + { + "epoch": 45.91208791208791, + "grad_norm": 20.386505126953125, + "learning_rate": 2.7043956043956047e-05, + "loss": 0.305, + "step": 16712 + }, + { + "epoch": 45.91483516483517, + "grad_norm": 15.433052062988281, + "learning_rate": 2.7042582417582417e-05, + "loss": 0.287, + "step": 16713 + }, + { + "epoch": 45.917582417582416, + "grad_norm": 11.825308799743652, + "learning_rate": 2.7041208791208793e-05, + "loss": 0.4243, + "step": 16714 + }, + { + "epoch": 45.92032967032967, + "grad_norm": 17.803688049316406, + "learning_rate": 2.7039835164835163e-05, + "loss": 0.4956, + "step": 16715 + }, + { + "epoch": 45.92307692307692, + "grad_norm": 5.226690769195557, + "learning_rate": 2.703846153846154e-05, + "loss": 0.1127, + "step": 16716 + }, + { + "epoch": 45.925824175824175, + "grad_norm": 8.125946044921875, + "learning_rate": 2.7037087912087917e-05, + "loss": 0.263, + "step": 16717 + }, + { + "epoch": 45.92857142857143, + "grad_norm": 4.298019886016846, + "learning_rate": 2.7035714285714287e-05, + "loss": 0.0833, + "step": 16718 + }, + { + "epoch": 45.93131868131868, + "grad_norm": 13.043207168579102, + "learning_rate": 2.7034340659340664e-05, + "loss": 0.2335, + "step": 16719 + }, + { + "epoch": 45.934065934065934, + "grad_norm": 11.602137565612793, + "learning_rate": 2.7032967032967034e-05, + "loss": 0.3534, + "step": 16720 + }, + { + "epoch": 45.93681318681319, + "grad_norm": 4.007894039154053, + "learning_rate": 2.7031593406593407e-05, + "loss": 0.0728, + "step": 16721 + }, + { + "epoch": 45.93956043956044, + "grad_norm": 21.834835052490234, + "learning_rate": 2.7030219780219777e-05, + "loss": 0.7703, + "step": 16722 + }, + { + "epoch": 45.94230769230769, + "grad_norm": 19.58079719543457, + "learning_rate": 2.7028846153846154e-05, + "loss": 0.4386, + "step": 16723 + }, + { + "epoch": 45.94505494505494, + "grad_norm": 4.317696571350098, + "learning_rate": 2.702747252747253e-05, + "loss": 0.1076, + "step": 16724 + }, + { + "epoch": 45.9478021978022, + "grad_norm": 14.345244407653809, + "learning_rate": 2.70260989010989e-05, + "loss": 0.2027, + "step": 16725 + }, + { + "epoch": 45.95054945054945, + "grad_norm": 5.949600696563721, + "learning_rate": 2.7024725274725278e-05, + "loss": 0.1518, + "step": 16726 + }, + { + "epoch": 45.9532967032967, + "grad_norm": 19.42307472229004, + "learning_rate": 2.7023351648351648e-05, + "loss": 0.501, + "step": 16727 + }, + { + "epoch": 45.956043956043956, + "grad_norm": 6.803420543670654, + "learning_rate": 2.702197802197802e-05, + "loss": 0.1326, + "step": 16728 + }, + { + "epoch": 45.95879120879121, + "grad_norm": 21.530275344848633, + "learning_rate": 2.7020604395604398e-05, + "loss": 0.7611, + "step": 16729 + }, + { + "epoch": 45.96153846153846, + "grad_norm": 12.687899589538574, + "learning_rate": 2.7019230769230768e-05, + "loss": 0.4326, + "step": 16730 + }, + { + "epoch": 45.964285714285715, + "grad_norm": 12.689726829528809, + "learning_rate": 2.7017857142857145e-05, + "loss": 0.2238, + "step": 16731 + }, + { + "epoch": 45.967032967032964, + "grad_norm": 8.799552917480469, + "learning_rate": 2.7016483516483515e-05, + "loss": 0.1154, + "step": 16732 + }, + { + "epoch": 45.96978021978022, + "grad_norm": 7.2816901206970215, + "learning_rate": 2.701510989010989e-05, + "loss": 0.1436, + "step": 16733 + }, + { + "epoch": 45.972527472527474, + "grad_norm": 12.806685447692871, + "learning_rate": 2.701373626373627e-05, + "loss": 0.3492, + "step": 16734 + }, + { + "epoch": 45.97527472527472, + "grad_norm": 4.654186725616455, + "learning_rate": 2.701236263736264e-05, + "loss": 0.051, + "step": 16735 + }, + { + "epoch": 45.97802197802198, + "grad_norm": 10.722845077514648, + "learning_rate": 2.7010989010989012e-05, + "loss": 0.2402, + "step": 16736 + }, + { + "epoch": 45.98076923076923, + "grad_norm": 9.19813060760498, + "learning_rate": 2.7009615384615382e-05, + "loss": 0.2463, + "step": 16737 + }, + { + "epoch": 45.98351648351648, + "grad_norm": 12.771120071411133, + "learning_rate": 2.700824175824176e-05, + "loss": 0.8669, + "step": 16738 + }, + { + "epoch": 45.98626373626374, + "grad_norm": 8.348508834838867, + "learning_rate": 2.7006868131868135e-05, + "loss": 0.2443, + "step": 16739 + }, + { + "epoch": 45.98901098901099, + "grad_norm": 9.639698028564453, + "learning_rate": 2.7005494505494505e-05, + "loss": 0.1732, + "step": 16740 + }, + { + "epoch": 45.99175824175824, + "grad_norm": 15.228493690490723, + "learning_rate": 2.7004120879120882e-05, + "loss": 0.6071, + "step": 16741 + }, + { + "epoch": 45.994505494505496, + "grad_norm": 12.03900146484375, + "learning_rate": 2.7002747252747252e-05, + "loss": 0.3564, + "step": 16742 + }, + { + "epoch": 45.997252747252745, + "grad_norm": 13.925264358520508, + "learning_rate": 2.7001373626373626e-05, + "loss": 0.5969, + "step": 16743 + }, + { + "epoch": 46.0, + "grad_norm": 66.99674224853516, + "learning_rate": 2.7000000000000002e-05, + "loss": 1.4987, + "step": 16744 + }, + { + "epoch": 46.0, + "eval_accuracy": 0.6914600550964187, + "eval_f1": 0.6876525927730747, + "eval_f1_DuraRiadoRio_64x64": 0.7333333333333333, + "eval_f1_Mole_64x64": 0.8433734939759037, + "eval_f1_Quebrado_64x64": 0.7902097902097902, + "eval_f1_RiadoRio_64x64": 0.6064814814814815, + "eval_f1_RioFechado_64x64": 0.4648648648648649, + "eval_loss": 1.4055602550506592, + "eval_precision": 0.7937519991745344, + "eval_precision_DuraRiadoRio_64x64": 0.7051282051282052, + "eval_precision_Mole_64x64": 1.0, + "eval_precision_Quebrado_64x64": 0.795774647887324, + "eval_precision_RiadoRio_64x64": 0.46785714285714286, + "eval_precision_RioFechado_64x64": 1.0, + "eval_recall": 0.6884873568898773, + "eval_recall_DuraRiadoRio_64x64": 0.7638888888888888, + "eval_recall_Mole_64x64": 0.7291666666666666, + "eval_recall_Quebrado_64x64": 0.7847222222222222, + "eval_recall_RiadoRio_64x64": 0.8618421052631579, + "eval_recall_RioFechado_64x64": 0.3028169014084507, + "eval_runtime": 1.696, + "eval_samples_per_second": 428.076, + "eval_steps_per_second": 27.123, + "step": 16744 + }, + { + "epoch": 46.002747252747255, + "grad_norm": 7.6277947425842285, + "learning_rate": 2.6998626373626372e-05, + "loss": 0.1696, + "step": 16745 + }, + { + "epoch": 46.005494505494504, + "grad_norm": 10.100150108337402, + "learning_rate": 2.699725274725275e-05, + "loss": 0.3059, + "step": 16746 + }, + { + "epoch": 46.00824175824176, + "grad_norm": 9.247543334960938, + "learning_rate": 2.699587912087912e-05, + "loss": 0.2916, + "step": 16747 + }, + { + "epoch": 46.010989010989015, + "grad_norm": 12.053858757019043, + "learning_rate": 2.6994505494505496e-05, + "loss": 0.2375, + "step": 16748 + }, + { + "epoch": 46.01373626373626, + "grad_norm": 8.63136100769043, + "learning_rate": 2.6993131868131873e-05, + "loss": 0.1675, + "step": 16749 + }, + { + "epoch": 46.01648351648352, + "grad_norm": 9.719589233398438, + "learning_rate": 2.6991758241758243e-05, + "loss": 0.3553, + "step": 16750 + }, + { + "epoch": 46.01923076923077, + "grad_norm": 11.347174644470215, + "learning_rate": 2.6990384615384616e-05, + "loss": 0.3067, + "step": 16751 + }, + { + "epoch": 46.02197802197802, + "grad_norm": 9.77111530303955, + "learning_rate": 2.6989010989010986e-05, + "loss": 0.3029, + "step": 16752 + }, + { + "epoch": 46.02472527472528, + "grad_norm": 14.806114196777344, + "learning_rate": 2.6987637362637363e-05, + "loss": 0.1738, + "step": 16753 + }, + { + "epoch": 46.027472527472526, + "grad_norm": 7.537672519683838, + "learning_rate": 2.698626373626374e-05, + "loss": 0.1121, + "step": 16754 + }, + { + "epoch": 46.03021978021978, + "grad_norm": 7.127208232879639, + "learning_rate": 2.698489010989011e-05, + "loss": 0.1355, + "step": 16755 + }, + { + "epoch": 46.032967032967036, + "grad_norm": 1.6903272867202759, + "learning_rate": 2.6983516483516487e-05, + "loss": 0.0333, + "step": 16756 + }, + { + "epoch": 46.035714285714285, + "grad_norm": 10.743465423583984, + "learning_rate": 2.6982142857142857e-05, + "loss": 0.2645, + "step": 16757 + }, + { + "epoch": 46.03846153846154, + "grad_norm": 8.7451171875, + "learning_rate": 2.698076923076923e-05, + "loss": 0.2754, + "step": 16758 + }, + { + "epoch": 46.04120879120879, + "grad_norm": 6.057008266448975, + "learning_rate": 2.6979395604395607e-05, + "loss": 0.0992, + "step": 16759 + }, + { + "epoch": 46.043956043956044, + "grad_norm": 12.706199645996094, + "learning_rate": 2.6978021978021977e-05, + "loss": 0.3224, + "step": 16760 + }, + { + "epoch": 46.0467032967033, + "grad_norm": 10.352566719055176, + "learning_rate": 2.6976648351648354e-05, + "loss": 0.183, + "step": 16761 + }, + { + "epoch": 46.04945054945055, + "grad_norm": 15.428298950195312, + "learning_rate": 2.6975274725274724e-05, + "loss": 0.4605, + "step": 16762 + }, + { + "epoch": 46.0521978021978, + "grad_norm": 9.001070976257324, + "learning_rate": 2.69739010989011e-05, + "loss": 0.1749, + "step": 16763 + }, + { + "epoch": 46.05494505494506, + "grad_norm": 18.57709503173828, + "learning_rate": 2.6972527472527477e-05, + "loss": 0.3003, + "step": 16764 + }, + { + "epoch": 46.05769230769231, + "grad_norm": 6.219459533691406, + "learning_rate": 2.6971153846153847e-05, + "loss": 0.1089, + "step": 16765 + }, + { + "epoch": 46.06043956043956, + "grad_norm": 13.434171676635742, + "learning_rate": 2.696978021978022e-05, + "loss": 0.3874, + "step": 16766 + }, + { + "epoch": 46.06318681318681, + "grad_norm": 7.253554821014404, + "learning_rate": 2.696840659340659e-05, + "loss": 0.0917, + "step": 16767 + }, + { + "epoch": 46.065934065934066, + "grad_norm": 10.393275260925293, + "learning_rate": 2.6967032967032968e-05, + "loss": 0.1829, + "step": 16768 + }, + { + "epoch": 46.06868131868132, + "grad_norm": 8.200196266174316, + "learning_rate": 2.6965659340659344e-05, + "loss": 0.1368, + "step": 16769 + }, + { + "epoch": 46.07142857142857, + "grad_norm": 10.36529541015625, + "learning_rate": 2.6964285714285714e-05, + "loss": 0.1922, + "step": 16770 + }, + { + "epoch": 46.074175824175825, + "grad_norm": 20.716936111450195, + "learning_rate": 2.696291208791209e-05, + "loss": 0.5779, + "step": 16771 + }, + { + "epoch": 46.07692307692308, + "grad_norm": 8.973428726196289, + "learning_rate": 2.696153846153846e-05, + "loss": 0.1627, + "step": 16772 + }, + { + "epoch": 46.07967032967033, + "grad_norm": 14.065114974975586, + "learning_rate": 2.6960164835164835e-05, + "loss": 0.5499, + "step": 16773 + }, + { + "epoch": 46.082417582417584, + "grad_norm": 10.127226829528809, + "learning_rate": 2.695879120879121e-05, + "loss": 0.4659, + "step": 16774 + }, + { + "epoch": 46.08516483516483, + "grad_norm": 11.417067527770996, + "learning_rate": 2.695741758241758e-05, + "loss": 0.3752, + "step": 16775 + }, + { + "epoch": 46.08791208791209, + "grad_norm": 4.560007095336914, + "learning_rate": 2.6956043956043958e-05, + "loss": 0.0684, + "step": 16776 + }, + { + "epoch": 46.09065934065934, + "grad_norm": 8.327869415283203, + "learning_rate": 2.6954670329670328e-05, + "loss": 0.138, + "step": 16777 + }, + { + "epoch": 46.09340659340659, + "grad_norm": 19.93785858154297, + "learning_rate": 2.6953296703296705e-05, + "loss": 0.4762, + "step": 16778 + }, + { + "epoch": 46.09615384615385, + "grad_norm": 9.340025901794434, + "learning_rate": 2.6951923076923082e-05, + "loss": 0.1676, + "step": 16779 + }, + { + "epoch": 46.0989010989011, + "grad_norm": 7.88911247253418, + "learning_rate": 2.6950549450549452e-05, + "loss": 0.2474, + "step": 16780 + }, + { + "epoch": 46.10164835164835, + "grad_norm": 14.924449920654297, + "learning_rate": 2.6949175824175825e-05, + "loss": 0.322, + "step": 16781 + }, + { + "epoch": 46.104395604395606, + "grad_norm": 10.953150749206543, + "learning_rate": 2.6947802197802195e-05, + "loss": 0.266, + "step": 16782 + }, + { + "epoch": 46.107142857142854, + "grad_norm": 8.393452644348145, + "learning_rate": 2.6946428571428572e-05, + "loss": 0.1697, + "step": 16783 + }, + { + "epoch": 46.10989010989011, + "grad_norm": 9.317512512207031, + "learning_rate": 2.694505494505495e-05, + "loss": 0.1208, + "step": 16784 + }, + { + "epoch": 46.112637362637365, + "grad_norm": 7.128317832946777, + "learning_rate": 2.694368131868132e-05, + "loss": 0.1047, + "step": 16785 + }, + { + "epoch": 46.11538461538461, + "grad_norm": 14.024206161499023, + "learning_rate": 2.6942307692307696e-05, + "loss": 0.2324, + "step": 16786 + }, + { + "epoch": 46.11813186813187, + "grad_norm": 15.603557586669922, + "learning_rate": 2.6940934065934066e-05, + "loss": 0.4778, + "step": 16787 + }, + { + "epoch": 46.120879120879124, + "grad_norm": 4.659139156341553, + "learning_rate": 2.693956043956044e-05, + "loss": 0.0634, + "step": 16788 + }, + { + "epoch": 46.12362637362637, + "grad_norm": 12.802501678466797, + "learning_rate": 2.6938186813186816e-05, + "loss": 0.1969, + "step": 16789 + }, + { + "epoch": 46.12637362637363, + "grad_norm": 5.934314250946045, + "learning_rate": 2.6936813186813186e-05, + "loss": 0.0771, + "step": 16790 + }, + { + "epoch": 46.129120879120876, + "grad_norm": 12.805814743041992, + "learning_rate": 2.6935439560439563e-05, + "loss": 0.3511, + "step": 16791 + }, + { + "epoch": 46.13186813186813, + "grad_norm": 3.685894727706909, + "learning_rate": 2.6934065934065933e-05, + "loss": 0.0655, + "step": 16792 + }, + { + "epoch": 46.13461538461539, + "grad_norm": 10.330484390258789, + "learning_rate": 2.693269230769231e-05, + "loss": 0.2001, + "step": 16793 + }, + { + "epoch": 46.137362637362635, + "grad_norm": 9.084391593933105, + "learning_rate": 2.6931318681318686e-05, + "loss": 0.3315, + "step": 16794 + }, + { + "epoch": 46.14010989010989, + "grad_norm": 12.661054611206055, + "learning_rate": 2.6929945054945056e-05, + "loss": 0.2431, + "step": 16795 + }, + { + "epoch": 46.142857142857146, + "grad_norm": 12.147514343261719, + "learning_rate": 2.692857142857143e-05, + "loss": 0.235, + "step": 16796 + }, + { + "epoch": 46.145604395604394, + "grad_norm": 11.870528221130371, + "learning_rate": 2.69271978021978e-05, + "loss": 0.3609, + "step": 16797 + }, + { + "epoch": 46.14835164835165, + "grad_norm": 8.089487075805664, + "learning_rate": 2.6925824175824177e-05, + "loss": 0.1505, + "step": 16798 + }, + { + "epoch": 46.1510989010989, + "grad_norm": 5.355933666229248, + "learning_rate": 2.6924450549450553e-05, + "loss": 0.0484, + "step": 16799 + }, + { + "epoch": 46.15384615384615, + "grad_norm": 11.018021583557129, + "learning_rate": 2.6923076923076923e-05, + "loss": 0.1747, + "step": 16800 + }, + { + "epoch": 46.15659340659341, + "grad_norm": 17.038225173950195, + "learning_rate": 2.69217032967033e-05, + "loss": 0.4347, + "step": 16801 + }, + { + "epoch": 46.15934065934066, + "grad_norm": 8.41877555847168, + "learning_rate": 2.692032967032967e-05, + "loss": 0.1725, + "step": 16802 + }, + { + "epoch": 46.16208791208791, + "grad_norm": 13.961080551147461, + "learning_rate": 2.6918956043956044e-05, + "loss": 0.3218, + "step": 16803 + }, + { + "epoch": 46.16483516483517, + "grad_norm": 12.465716361999512, + "learning_rate": 2.691758241758242e-05, + "loss": 0.1676, + "step": 16804 + }, + { + "epoch": 46.167582417582416, + "grad_norm": 13.063015937805176, + "learning_rate": 2.691620879120879e-05, + "loss": 0.2184, + "step": 16805 + }, + { + "epoch": 46.17032967032967, + "grad_norm": 4.583030700683594, + "learning_rate": 2.6914835164835167e-05, + "loss": 0.1219, + "step": 16806 + }, + { + "epoch": 46.17307692307692, + "grad_norm": 7.35659646987915, + "learning_rate": 2.6913461538461537e-05, + "loss": 0.0829, + "step": 16807 + }, + { + "epoch": 46.175824175824175, + "grad_norm": 4.0404534339904785, + "learning_rate": 2.6912087912087914e-05, + "loss": 0.1282, + "step": 16808 + }, + { + "epoch": 46.17857142857143, + "grad_norm": 12.933829307556152, + "learning_rate": 2.691071428571429e-05, + "loss": 0.3515, + "step": 16809 + }, + { + "epoch": 46.18131868131868, + "grad_norm": 18.46422576904297, + "learning_rate": 2.690934065934066e-05, + "loss": 0.3167, + "step": 16810 + }, + { + "epoch": 46.184065934065934, + "grad_norm": 9.557899475097656, + "learning_rate": 2.6907967032967034e-05, + "loss": 0.3323, + "step": 16811 + }, + { + "epoch": 46.18681318681319, + "grad_norm": 11.480186462402344, + "learning_rate": 2.6906593406593404e-05, + "loss": 0.3968, + "step": 16812 + }, + { + "epoch": 46.18956043956044, + "grad_norm": 18.801250457763672, + "learning_rate": 2.690521978021978e-05, + "loss": 0.5434, + "step": 16813 + }, + { + "epoch": 46.19230769230769, + "grad_norm": 6.646800518035889, + "learning_rate": 2.6903846153846158e-05, + "loss": 0.0959, + "step": 16814 + }, + { + "epoch": 46.19505494505494, + "grad_norm": 11.14172649383545, + "learning_rate": 2.6902472527472528e-05, + "loss": 0.1405, + "step": 16815 + }, + { + "epoch": 46.1978021978022, + "grad_norm": 17.208276748657227, + "learning_rate": 2.6901098901098905e-05, + "loss": 0.3097, + "step": 16816 + }, + { + "epoch": 46.20054945054945, + "grad_norm": 9.954442024230957, + "learning_rate": 2.6899725274725275e-05, + "loss": 0.1979, + "step": 16817 + }, + { + "epoch": 46.2032967032967, + "grad_norm": 17.303874969482422, + "learning_rate": 2.6898351648351648e-05, + "loss": 0.3584, + "step": 16818 + }, + { + "epoch": 46.206043956043956, + "grad_norm": 13.183987617492676, + "learning_rate": 2.6896978021978025e-05, + "loss": 0.2156, + "step": 16819 + }, + { + "epoch": 46.20879120879121, + "grad_norm": 8.848766326904297, + "learning_rate": 2.6895604395604395e-05, + "loss": 0.1853, + "step": 16820 + }, + { + "epoch": 46.21153846153846, + "grad_norm": 13.113615036010742, + "learning_rate": 2.689423076923077e-05, + "loss": 0.2294, + "step": 16821 + }, + { + "epoch": 46.214285714285715, + "grad_norm": 15.173210144042969, + "learning_rate": 2.689285714285714e-05, + "loss": 0.2849, + "step": 16822 + }, + { + "epoch": 46.217032967032964, + "grad_norm": 2.899785280227661, + "learning_rate": 2.689148351648352e-05, + "loss": 0.0832, + "step": 16823 + }, + { + "epoch": 46.21978021978022, + "grad_norm": 8.874139785766602, + "learning_rate": 2.6890109890109895e-05, + "loss": 0.112, + "step": 16824 + }, + { + "epoch": 46.222527472527474, + "grad_norm": 7.981083393096924, + "learning_rate": 2.6888736263736265e-05, + "loss": 0.1817, + "step": 16825 + }, + { + "epoch": 46.22527472527472, + "grad_norm": 17.5482120513916, + "learning_rate": 2.688736263736264e-05, + "loss": 0.6753, + "step": 16826 + }, + { + "epoch": 46.22802197802198, + "grad_norm": 10.059844017028809, + "learning_rate": 2.688598901098901e-05, + "loss": 0.239, + "step": 16827 + }, + { + "epoch": 46.23076923076923, + "grad_norm": 12.526973724365234, + "learning_rate": 2.6884615384615385e-05, + "loss": 0.1829, + "step": 16828 + }, + { + "epoch": 46.23351648351648, + "grad_norm": 18.313312530517578, + "learning_rate": 2.6883241758241762e-05, + "loss": 0.4532, + "step": 16829 + }, + { + "epoch": 46.23626373626374, + "grad_norm": 10.620637893676758, + "learning_rate": 2.6881868131868132e-05, + "loss": 0.2496, + "step": 16830 + }, + { + "epoch": 46.239010989010985, + "grad_norm": 17.466571807861328, + "learning_rate": 2.688049450549451e-05, + "loss": 0.3618, + "step": 16831 + }, + { + "epoch": 46.24175824175824, + "grad_norm": 18.72297477722168, + "learning_rate": 2.687912087912088e-05, + "loss": 0.449, + "step": 16832 + }, + { + "epoch": 46.244505494505496, + "grad_norm": 16.489761352539062, + "learning_rate": 2.6877747252747253e-05, + "loss": 0.6232, + "step": 16833 + }, + { + "epoch": 46.247252747252745, + "grad_norm": 16.67675018310547, + "learning_rate": 2.687637362637363e-05, + "loss": 0.4293, + "step": 16834 + }, + { + "epoch": 46.25, + "grad_norm": 19.426923751831055, + "learning_rate": 2.6875e-05, + "loss": 0.3717, + "step": 16835 + }, + { + "epoch": 46.252747252747255, + "grad_norm": 3.394312620162964, + "learning_rate": 2.6873626373626376e-05, + "loss": 0.0649, + "step": 16836 + }, + { + "epoch": 46.255494505494504, + "grad_norm": 13.018110275268555, + "learning_rate": 2.6872252747252746e-05, + "loss": 0.3169, + "step": 16837 + }, + { + "epoch": 46.25824175824176, + "grad_norm": 7.688633918762207, + "learning_rate": 2.6870879120879123e-05, + "loss": 0.1839, + "step": 16838 + }, + { + "epoch": 46.260989010989015, + "grad_norm": 3.7045044898986816, + "learning_rate": 2.6869505494505496e-05, + "loss": 0.0552, + "step": 16839 + }, + { + "epoch": 46.26373626373626, + "grad_norm": 8.384742736816406, + "learning_rate": 2.686813186813187e-05, + "loss": 0.2722, + "step": 16840 + }, + { + "epoch": 46.26648351648352, + "grad_norm": 3.2324202060699463, + "learning_rate": 2.6866758241758243e-05, + "loss": 0.0649, + "step": 16841 + }, + { + "epoch": 46.26923076923077, + "grad_norm": 7.819655895233154, + "learning_rate": 2.6865384615384613e-05, + "loss": 0.0974, + "step": 16842 + }, + { + "epoch": 46.27197802197802, + "grad_norm": 17.34023666381836, + "learning_rate": 2.686401098901099e-05, + "loss": 0.5799, + "step": 16843 + }, + { + "epoch": 46.27472527472528, + "grad_norm": 12.77344799041748, + "learning_rate": 2.6862637362637367e-05, + "loss": 0.3294, + "step": 16844 + }, + { + "epoch": 46.277472527472526, + "grad_norm": 23.23251724243164, + "learning_rate": 2.6861263736263737e-05, + "loss": 0.3819, + "step": 16845 + }, + { + "epoch": 46.28021978021978, + "grad_norm": 9.520791053771973, + "learning_rate": 2.6859890109890114e-05, + "loss": 0.3495, + "step": 16846 + }, + { + "epoch": 46.282967032967036, + "grad_norm": 8.551628112792969, + "learning_rate": 2.6858516483516484e-05, + "loss": 0.1424, + "step": 16847 + }, + { + "epoch": 46.285714285714285, + "grad_norm": 15.389778137207031, + "learning_rate": 2.6857142857142857e-05, + "loss": 0.3683, + "step": 16848 + }, + { + "epoch": 46.28846153846154, + "grad_norm": 16.32468032836914, + "learning_rate": 2.6855769230769234e-05, + "loss": 0.4569, + "step": 16849 + }, + { + "epoch": 46.29120879120879, + "grad_norm": 5.602548599243164, + "learning_rate": 2.6854395604395604e-05, + "loss": 0.1692, + "step": 16850 + }, + { + "epoch": 46.293956043956044, + "grad_norm": 18.34750747680664, + "learning_rate": 2.685302197802198e-05, + "loss": 0.302, + "step": 16851 + }, + { + "epoch": 46.2967032967033, + "grad_norm": 12.233973503112793, + "learning_rate": 2.685164835164835e-05, + "loss": 0.2316, + "step": 16852 + }, + { + "epoch": 46.29945054945055, + "grad_norm": 19.30760383605957, + "learning_rate": 2.6850274725274727e-05, + "loss": 0.725, + "step": 16853 + }, + { + "epoch": 46.3021978021978, + "grad_norm": 11.379138946533203, + "learning_rate": 2.68489010989011e-05, + "loss": 0.211, + "step": 16854 + }, + { + "epoch": 46.30494505494506, + "grad_norm": 19.372535705566406, + "learning_rate": 2.6847527472527474e-05, + "loss": 0.3395, + "step": 16855 + }, + { + "epoch": 46.30769230769231, + "grad_norm": 20.343605041503906, + "learning_rate": 2.6846153846153848e-05, + "loss": 0.4496, + "step": 16856 + }, + { + "epoch": 46.31043956043956, + "grad_norm": 7.722204208374023, + "learning_rate": 2.6844780219780218e-05, + "loss": 0.1779, + "step": 16857 + }, + { + "epoch": 46.31318681318681, + "grad_norm": 6.9506001472473145, + "learning_rate": 2.6843406593406594e-05, + "loss": 0.1527, + "step": 16858 + }, + { + "epoch": 46.315934065934066, + "grad_norm": 16.128820419311523, + "learning_rate": 2.684203296703297e-05, + "loss": 0.2524, + "step": 16859 + }, + { + "epoch": 46.31868131868132, + "grad_norm": 19.027070999145508, + "learning_rate": 2.684065934065934e-05, + "loss": 0.4027, + "step": 16860 + }, + { + "epoch": 46.32142857142857, + "grad_norm": 20.57721519470215, + "learning_rate": 2.6839285714285718e-05, + "loss": 0.5498, + "step": 16861 + }, + { + "epoch": 46.324175824175825, + "grad_norm": 5.382396697998047, + "learning_rate": 2.6837912087912088e-05, + "loss": 0.1282, + "step": 16862 + }, + { + "epoch": 46.32692307692308, + "grad_norm": 8.276043891906738, + "learning_rate": 2.683653846153846e-05, + "loss": 0.2464, + "step": 16863 + }, + { + "epoch": 46.32967032967033, + "grad_norm": 6.38380241394043, + "learning_rate": 2.6835164835164838e-05, + "loss": 0.1313, + "step": 16864 + }, + { + "epoch": 46.332417582417584, + "grad_norm": 18.9993953704834, + "learning_rate": 2.683379120879121e-05, + "loss": 0.5401, + "step": 16865 + }, + { + "epoch": 46.33516483516483, + "grad_norm": 20.034257888793945, + "learning_rate": 2.6832417582417585e-05, + "loss": 0.3772, + "step": 16866 + }, + { + "epoch": 46.33791208791209, + "grad_norm": 18.291122436523438, + "learning_rate": 2.6831043956043955e-05, + "loss": 0.4861, + "step": 16867 + }, + { + "epoch": 46.34065934065934, + "grad_norm": 11.260671615600586, + "learning_rate": 2.6829670329670332e-05, + "loss": 0.1876, + "step": 16868 + }, + { + "epoch": 46.34340659340659, + "grad_norm": 9.965863227844238, + "learning_rate": 2.6828296703296705e-05, + "loss": 0.3449, + "step": 16869 + }, + { + "epoch": 46.34615384615385, + "grad_norm": 9.594759941101074, + "learning_rate": 2.682692307692308e-05, + "loss": 0.1583, + "step": 16870 + }, + { + "epoch": 46.3489010989011, + "grad_norm": 22.89779281616211, + "learning_rate": 2.6825549450549452e-05, + "loss": 0.5136, + "step": 16871 + }, + { + "epoch": 46.35164835164835, + "grad_norm": 11.887510299682617, + "learning_rate": 2.6824175824175822e-05, + "loss": 0.2367, + "step": 16872 + }, + { + "epoch": 46.354395604395606, + "grad_norm": 15.267447471618652, + "learning_rate": 2.68228021978022e-05, + "loss": 0.2749, + "step": 16873 + }, + { + "epoch": 46.357142857142854, + "grad_norm": 7.6714091300964355, + "learning_rate": 2.6821428571428576e-05, + "loss": 0.122, + "step": 16874 + }, + { + "epoch": 46.35989010989011, + "grad_norm": 23.484949111938477, + "learning_rate": 2.6820054945054946e-05, + "loss": 0.6804, + "step": 16875 + }, + { + "epoch": 46.362637362637365, + "grad_norm": 13.745709419250488, + "learning_rate": 2.6818681318681323e-05, + "loss": 0.1659, + "step": 16876 + }, + { + "epoch": 46.36538461538461, + "grad_norm": 9.153287887573242, + "learning_rate": 2.6817307692307693e-05, + "loss": 0.1314, + "step": 16877 + }, + { + "epoch": 46.36813186813187, + "grad_norm": 12.018424034118652, + "learning_rate": 2.6815934065934066e-05, + "loss": 0.2304, + "step": 16878 + }, + { + "epoch": 46.370879120879124, + "grad_norm": 10.821022033691406, + "learning_rate": 2.6814560439560443e-05, + "loss": 0.1741, + "step": 16879 + }, + { + "epoch": 46.37362637362637, + "grad_norm": 11.647834777832031, + "learning_rate": 2.6813186813186813e-05, + "loss": 0.2779, + "step": 16880 + }, + { + "epoch": 46.37637362637363, + "grad_norm": 8.5911226272583, + "learning_rate": 2.681181318681319e-05, + "loss": 0.1671, + "step": 16881 + }, + { + "epoch": 46.379120879120876, + "grad_norm": 18.005783081054688, + "learning_rate": 2.681043956043956e-05, + "loss": 0.5214, + "step": 16882 + }, + { + "epoch": 46.38186813186813, + "grad_norm": 6.365168571472168, + "learning_rate": 2.6809065934065936e-05, + "loss": 0.1198, + "step": 16883 + }, + { + "epoch": 46.38461538461539, + "grad_norm": 7.833474636077881, + "learning_rate": 2.680769230769231e-05, + "loss": 0.1281, + "step": 16884 + }, + { + "epoch": 46.387362637362635, + "grad_norm": 7.9203386306762695, + "learning_rate": 2.6806318681318683e-05, + "loss": 0.1922, + "step": 16885 + }, + { + "epoch": 46.39010989010989, + "grad_norm": 10.147530555725098, + "learning_rate": 2.6804945054945057e-05, + "loss": 0.3719, + "step": 16886 + }, + { + "epoch": 46.392857142857146, + "grad_norm": 16.810009002685547, + "learning_rate": 2.6803571428571427e-05, + "loss": 0.2961, + "step": 16887 + }, + { + "epoch": 46.395604395604394, + "grad_norm": 15.74610710144043, + "learning_rate": 2.6802197802197803e-05, + "loss": 0.2367, + "step": 16888 + }, + { + "epoch": 46.39835164835165, + "grad_norm": 19.339893341064453, + "learning_rate": 2.680082417582418e-05, + "loss": 0.4293, + "step": 16889 + }, + { + "epoch": 46.4010989010989, + "grad_norm": 7.503775119781494, + "learning_rate": 2.679945054945055e-05, + "loss": 0.0843, + "step": 16890 + }, + { + "epoch": 46.40384615384615, + "grad_norm": 3.202312469482422, + "learning_rate": 2.6798076923076927e-05, + "loss": 0.0483, + "step": 16891 + }, + { + "epoch": 46.40659340659341, + "grad_norm": 13.013725280761719, + "learning_rate": 2.6796703296703297e-05, + "loss": 0.3027, + "step": 16892 + }, + { + "epoch": 46.40934065934066, + "grad_norm": 14.623112678527832, + "learning_rate": 2.679532967032967e-05, + "loss": 0.4184, + "step": 16893 + }, + { + "epoch": 46.41208791208791, + "grad_norm": 8.809220314025879, + "learning_rate": 2.6793956043956047e-05, + "loss": 0.1758, + "step": 16894 + }, + { + "epoch": 46.41483516483517, + "grad_norm": 21.99510383605957, + "learning_rate": 2.6792582417582417e-05, + "loss": 0.6609, + "step": 16895 + }, + { + "epoch": 46.417582417582416, + "grad_norm": 18.01515769958496, + "learning_rate": 2.6791208791208794e-05, + "loss": 0.3634, + "step": 16896 + }, + { + "epoch": 46.42032967032967, + "grad_norm": 10.44369125366211, + "learning_rate": 2.6789835164835164e-05, + "loss": 0.1102, + "step": 16897 + }, + { + "epoch": 46.42307692307692, + "grad_norm": 20.28680992126465, + "learning_rate": 2.678846153846154e-05, + "loss": 0.3191, + "step": 16898 + }, + { + "epoch": 46.425824175824175, + "grad_norm": 9.28948974609375, + "learning_rate": 2.6787087912087914e-05, + "loss": 0.168, + "step": 16899 + }, + { + "epoch": 46.42857142857143, + "grad_norm": 8.745770454406738, + "learning_rate": 2.6785714285714288e-05, + "loss": 0.1667, + "step": 16900 + }, + { + "epoch": 46.43131868131868, + "grad_norm": 11.591073989868164, + "learning_rate": 2.678434065934066e-05, + "loss": 0.2638, + "step": 16901 + }, + { + "epoch": 46.434065934065934, + "grad_norm": 12.72492790222168, + "learning_rate": 2.678296703296703e-05, + "loss": 0.2943, + "step": 16902 + }, + { + "epoch": 46.43681318681319, + "grad_norm": 10.684366226196289, + "learning_rate": 2.6781593406593408e-05, + "loss": 0.1577, + "step": 16903 + }, + { + "epoch": 46.43956043956044, + "grad_norm": 13.603301048278809, + "learning_rate": 2.6780219780219785e-05, + "loss": 0.2426, + "step": 16904 + }, + { + "epoch": 46.44230769230769, + "grad_norm": 14.33255386352539, + "learning_rate": 2.6778846153846155e-05, + "loss": 0.2653, + "step": 16905 + }, + { + "epoch": 46.44505494505494, + "grad_norm": 12.996025085449219, + "learning_rate": 2.677747252747253e-05, + "loss": 0.2581, + "step": 16906 + }, + { + "epoch": 46.4478021978022, + "grad_norm": 16.32575225830078, + "learning_rate": 2.67760989010989e-05, + "loss": 0.4451, + "step": 16907 + }, + { + "epoch": 46.45054945054945, + "grad_norm": 20.325260162353516, + "learning_rate": 2.6774725274725275e-05, + "loss": 0.5445, + "step": 16908 + }, + { + "epoch": 46.4532967032967, + "grad_norm": 14.862030982971191, + "learning_rate": 2.6773351648351652e-05, + "loss": 0.5726, + "step": 16909 + }, + { + "epoch": 46.456043956043956, + "grad_norm": 5.424529552459717, + "learning_rate": 2.6771978021978022e-05, + "loss": 0.1506, + "step": 16910 + }, + { + "epoch": 46.45879120879121, + "grad_norm": 8.288224220275879, + "learning_rate": 2.67706043956044e-05, + "loss": 0.2538, + "step": 16911 + }, + { + "epoch": 46.46153846153846, + "grad_norm": 10.441864967346191, + "learning_rate": 2.676923076923077e-05, + "loss": 0.1769, + "step": 16912 + }, + { + "epoch": 46.464285714285715, + "grad_norm": 14.712727546691895, + "learning_rate": 2.6767857142857145e-05, + "loss": 0.4095, + "step": 16913 + }, + { + "epoch": 46.467032967032964, + "grad_norm": 7.274421215057373, + "learning_rate": 2.676648351648352e-05, + "loss": 0.089, + "step": 16914 + }, + { + "epoch": 46.46978021978022, + "grad_norm": 16.548542022705078, + "learning_rate": 2.6765109890109892e-05, + "loss": 0.2385, + "step": 16915 + }, + { + "epoch": 46.472527472527474, + "grad_norm": 8.189208030700684, + "learning_rate": 2.6763736263736266e-05, + "loss": 0.1161, + "step": 16916 + }, + { + "epoch": 46.47527472527472, + "grad_norm": 14.51323413848877, + "learning_rate": 2.6762362637362636e-05, + "loss": 0.3216, + "step": 16917 + }, + { + "epoch": 46.47802197802198, + "grad_norm": 3.7715847492218018, + "learning_rate": 2.6760989010989012e-05, + "loss": 0.125, + "step": 16918 + }, + { + "epoch": 46.48076923076923, + "grad_norm": 16.446916580200195, + "learning_rate": 2.675961538461539e-05, + "loss": 0.4046, + "step": 16919 + }, + { + "epoch": 46.48351648351648, + "grad_norm": 11.052696228027344, + "learning_rate": 2.675824175824176e-05, + "loss": 0.3947, + "step": 16920 + }, + { + "epoch": 46.48626373626374, + "grad_norm": 12.973705291748047, + "learning_rate": 2.6756868131868136e-05, + "loss": 0.1955, + "step": 16921 + }, + { + "epoch": 46.489010989010985, + "grad_norm": 9.706465721130371, + "learning_rate": 2.6755494505494506e-05, + "loss": 0.2463, + "step": 16922 + }, + { + "epoch": 46.49175824175824, + "grad_norm": 8.946295738220215, + "learning_rate": 2.675412087912088e-05, + "loss": 0.2642, + "step": 16923 + }, + { + "epoch": 46.494505494505496, + "grad_norm": 3.2086217403411865, + "learning_rate": 2.6752747252747256e-05, + "loss": 0.0545, + "step": 16924 + }, + { + "epoch": 46.497252747252745, + "grad_norm": 13.884536743164062, + "learning_rate": 2.6751373626373626e-05, + "loss": 0.1874, + "step": 16925 + }, + { + "epoch": 46.5, + "grad_norm": 9.895081520080566, + "learning_rate": 2.6750000000000003e-05, + "loss": 0.1359, + "step": 16926 + }, + { + "epoch": 46.502747252747255, + "grad_norm": 10.55978012084961, + "learning_rate": 2.6748626373626373e-05, + "loss": 0.2534, + "step": 16927 + }, + { + "epoch": 46.505494505494504, + "grad_norm": 13.109460830688477, + "learning_rate": 2.674725274725275e-05, + "loss": 0.232, + "step": 16928 + }, + { + "epoch": 46.50824175824176, + "grad_norm": 11.962017059326172, + "learning_rate": 2.6745879120879123e-05, + "loss": 0.2119, + "step": 16929 + }, + { + "epoch": 46.51098901098901, + "grad_norm": 11.17820930480957, + "learning_rate": 2.6744505494505497e-05, + "loss": 0.2049, + "step": 16930 + }, + { + "epoch": 46.51373626373626, + "grad_norm": 13.276304244995117, + "learning_rate": 2.674313186813187e-05, + "loss": 0.3875, + "step": 16931 + }, + { + "epoch": 46.51648351648352, + "grad_norm": 10.566064834594727, + "learning_rate": 2.674175824175824e-05, + "loss": 0.231, + "step": 16932 + }, + { + "epoch": 46.51923076923077, + "grad_norm": 4.7567620277404785, + "learning_rate": 2.6740384615384617e-05, + "loss": 0.0684, + "step": 16933 + }, + { + "epoch": 46.52197802197802, + "grad_norm": 21.3419132232666, + "learning_rate": 2.6739010989010994e-05, + "loss": 0.4293, + "step": 16934 + }, + { + "epoch": 46.52472527472528, + "grad_norm": 7.35630464553833, + "learning_rate": 2.6737637362637364e-05, + "loss": 0.1207, + "step": 16935 + }, + { + "epoch": 46.527472527472526, + "grad_norm": 14.932747840881348, + "learning_rate": 2.673626373626374e-05, + "loss": 0.3975, + "step": 16936 + }, + { + "epoch": 46.53021978021978, + "grad_norm": 16.343734741210938, + "learning_rate": 2.673489010989011e-05, + "loss": 0.3066, + "step": 16937 + }, + { + "epoch": 46.532967032967036, + "grad_norm": 8.322590827941895, + "learning_rate": 2.6733516483516484e-05, + "loss": 0.1063, + "step": 16938 + }, + { + "epoch": 46.535714285714285, + "grad_norm": 16.703292846679688, + "learning_rate": 2.673214285714286e-05, + "loss": 0.3814, + "step": 16939 + }, + { + "epoch": 46.53846153846154, + "grad_norm": 9.132804870605469, + "learning_rate": 2.673076923076923e-05, + "loss": 0.2298, + "step": 16940 + }, + { + "epoch": 46.54120879120879, + "grad_norm": 4.5922441482543945, + "learning_rate": 2.6729395604395608e-05, + "loss": 0.0884, + "step": 16941 + }, + { + "epoch": 46.543956043956044, + "grad_norm": 9.935491561889648, + "learning_rate": 2.6728021978021978e-05, + "loss": 0.1684, + "step": 16942 + }, + { + "epoch": 46.5467032967033, + "grad_norm": 11.069436073303223, + "learning_rate": 2.6726648351648354e-05, + "loss": 0.1891, + "step": 16943 + }, + { + "epoch": 46.54945054945055, + "grad_norm": 11.726786613464355, + "learning_rate": 2.6725274725274728e-05, + "loss": 0.2008, + "step": 16944 + }, + { + "epoch": 46.5521978021978, + "grad_norm": 10.888734817504883, + "learning_rate": 2.67239010989011e-05, + "loss": 0.1741, + "step": 16945 + }, + { + "epoch": 46.55494505494506, + "grad_norm": 16.93570899963379, + "learning_rate": 2.6722527472527475e-05, + "loss": 0.2704, + "step": 16946 + }, + { + "epoch": 46.55769230769231, + "grad_norm": 17.17978286743164, + "learning_rate": 2.6721153846153845e-05, + "loss": 0.3086, + "step": 16947 + }, + { + "epoch": 46.56043956043956, + "grad_norm": 21.204917907714844, + "learning_rate": 2.671978021978022e-05, + "loss": 0.5655, + "step": 16948 + }, + { + "epoch": 46.56318681318681, + "grad_norm": 8.5320405960083, + "learning_rate": 2.671840659340659e-05, + "loss": 0.2539, + "step": 16949 + }, + { + "epoch": 46.565934065934066, + "grad_norm": 15.280695915222168, + "learning_rate": 2.6717032967032968e-05, + "loss": 0.2153, + "step": 16950 + }, + { + "epoch": 46.56868131868132, + "grad_norm": 10.544750213623047, + "learning_rate": 2.6715659340659345e-05, + "loss": 0.236, + "step": 16951 + }, + { + "epoch": 46.57142857142857, + "grad_norm": 26.40995216369629, + "learning_rate": 2.6714285714285715e-05, + "loss": 0.5095, + "step": 16952 + }, + { + "epoch": 46.574175824175825, + "grad_norm": 10.213204383850098, + "learning_rate": 2.671291208791209e-05, + "loss": 0.1596, + "step": 16953 + }, + { + "epoch": 46.57692307692308, + "grad_norm": 7.071646213531494, + "learning_rate": 2.671153846153846e-05, + "loss": 0.266, + "step": 16954 + }, + { + "epoch": 46.57967032967033, + "grad_norm": 21.589641571044922, + "learning_rate": 2.6710164835164835e-05, + "loss": 0.3475, + "step": 16955 + }, + { + "epoch": 46.582417582417584, + "grad_norm": 12.88742733001709, + "learning_rate": 2.6708791208791212e-05, + "loss": 0.364, + "step": 16956 + }, + { + "epoch": 46.58516483516483, + "grad_norm": 15.560098648071289, + "learning_rate": 2.6707417582417582e-05, + "loss": 0.4126, + "step": 16957 + }, + { + "epoch": 46.58791208791209, + "grad_norm": 3.877269744873047, + "learning_rate": 2.670604395604396e-05, + "loss": 0.0918, + "step": 16958 + }, + { + "epoch": 46.59065934065934, + "grad_norm": 20.032073974609375, + "learning_rate": 2.670467032967033e-05, + "loss": 0.4857, + "step": 16959 + }, + { + "epoch": 46.59340659340659, + "grad_norm": 17.633960723876953, + "learning_rate": 2.6703296703296706e-05, + "loss": 0.3948, + "step": 16960 + }, + { + "epoch": 46.59615384615385, + "grad_norm": 4.741770267486572, + "learning_rate": 2.670192307692308e-05, + "loss": 0.0717, + "step": 16961 + }, + { + "epoch": 46.5989010989011, + "grad_norm": 10.773530960083008, + "learning_rate": 2.670054945054945e-05, + "loss": 0.3533, + "step": 16962 + }, + { + "epoch": 46.60164835164835, + "grad_norm": 13.770939826965332, + "learning_rate": 2.6699175824175826e-05, + "loss": 0.3505, + "step": 16963 + }, + { + "epoch": 46.604395604395606, + "grad_norm": 7.471609592437744, + "learning_rate": 2.6697802197802196e-05, + "loss": 0.2385, + "step": 16964 + }, + { + "epoch": 46.607142857142854, + "grad_norm": 9.609323501586914, + "learning_rate": 2.6696428571428573e-05, + "loss": 0.2324, + "step": 16965 + }, + { + "epoch": 46.60989010989011, + "grad_norm": 7.378693580627441, + "learning_rate": 2.669505494505495e-05, + "loss": 0.0956, + "step": 16966 + }, + { + "epoch": 46.612637362637365, + "grad_norm": 9.57571029663086, + "learning_rate": 2.669368131868132e-05, + "loss": 0.1707, + "step": 16967 + }, + { + "epoch": 46.61538461538461, + "grad_norm": 15.221997261047363, + "learning_rate": 2.6692307692307693e-05, + "loss": 0.4185, + "step": 16968 + }, + { + "epoch": 46.61813186813187, + "grad_norm": 10.70628547668457, + "learning_rate": 2.6690934065934063e-05, + "loss": 0.1304, + "step": 16969 + }, + { + "epoch": 46.620879120879124, + "grad_norm": 15.532630920410156, + "learning_rate": 2.668956043956044e-05, + "loss": 0.4991, + "step": 16970 + }, + { + "epoch": 46.62362637362637, + "grad_norm": 7.082924842834473, + "learning_rate": 2.6688186813186817e-05, + "loss": 0.1487, + "step": 16971 + }, + { + "epoch": 46.62637362637363, + "grad_norm": 9.960737228393555, + "learning_rate": 2.6686813186813187e-05, + "loss": 0.3407, + "step": 16972 + }, + { + "epoch": 46.629120879120876, + "grad_norm": 14.162611961364746, + "learning_rate": 2.6685439560439563e-05, + "loss": 0.3183, + "step": 16973 + }, + { + "epoch": 46.63186813186813, + "grad_norm": 6.686069965362549, + "learning_rate": 2.6684065934065933e-05, + "loss": 0.1287, + "step": 16974 + }, + { + "epoch": 46.63461538461539, + "grad_norm": 15.108827590942383, + "learning_rate": 2.668269230769231e-05, + "loss": 0.5259, + "step": 16975 + }, + { + "epoch": 46.637362637362635, + "grad_norm": 14.656362533569336, + "learning_rate": 2.6681318681318684e-05, + "loss": 0.1759, + "step": 16976 + }, + { + "epoch": 46.64010989010989, + "grad_norm": 13.321246147155762, + "learning_rate": 2.6679945054945054e-05, + "loss": 0.382, + "step": 16977 + }, + { + "epoch": 46.642857142857146, + "grad_norm": 10.34376049041748, + "learning_rate": 2.667857142857143e-05, + "loss": 0.1927, + "step": 16978 + }, + { + "epoch": 46.645604395604394, + "grad_norm": 9.743504524230957, + "learning_rate": 2.66771978021978e-05, + "loss": 0.1308, + "step": 16979 + }, + { + "epoch": 46.64835164835165, + "grad_norm": 8.615843772888184, + "learning_rate": 2.6675824175824177e-05, + "loss": 0.1918, + "step": 16980 + }, + { + "epoch": 46.6510989010989, + "grad_norm": 18.07268714904785, + "learning_rate": 2.6674450549450554e-05, + "loss": 0.5565, + "step": 16981 + }, + { + "epoch": 46.65384615384615, + "grad_norm": 16.048084259033203, + "learning_rate": 2.6673076923076924e-05, + "loss": 0.4083, + "step": 16982 + }, + { + "epoch": 46.65659340659341, + "grad_norm": 10.219449996948242, + "learning_rate": 2.6671703296703297e-05, + "loss": 0.1104, + "step": 16983 + }, + { + "epoch": 46.65934065934066, + "grad_norm": 17.284818649291992, + "learning_rate": 2.6670329670329667e-05, + "loss": 0.2026, + "step": 16984 + }, + { + "epoch": 46.66208791208791, + "grad_norm": 7.577291011810303, + "learning_rate": 2.6668956043956044e-05, + "loss": 0.1623, + "step": 16985 + }, + { + "epoch": 46.66483516483517, + "grad_norm": 17.315364837646484, + "learning_rate": 2.666758241758242e-05, + "loss": 0.2521, + "step": 16986 + }, + { + "epoch": 46.667582417582416, + "grad_norm": 12.32079029083252, + "learning_rate": 2.666620879120879e-05, + "loss": 0.3286, + "step": 16987 + }, + { + "epoch": 46.67032967032967, + "grad_norm": 3.05511474609375, + "learning_rate": 2.6664835164835168e-05, + "loss": 0.0633, + "step": 16988 + }, + { + "epoch": 46.67307692307692, + "grad_norm": 17.794292449951172, + "learning_rate": 2.6663461538461538e-05, + "loss": 0.5269, + "step": 16989 + }, + { + "epoch": 46.675824175824175, + "grad_norm": 24.136075973510742, + "learning_rate": 2.6662087912087915e-05, + "loss": 0.5242, + "step": 16990 + }, + { + "epoch": 46.67857142857143, + "grad_norm": 5.157872676849365, + "learning_rate": 2.6660714285714288e-05, + "loss": 0.0697, + "step": 16991 + }, + { + "epoch": 46.68131868131868, + "grad_norm": 5.870591640472412, + "learning_rate": 2.6659340659340658e-05, + "loss": 0.0661, + "step": 16992 + }, + { + "epoch": 46.684065934065934, + "grad_norm": 4.202634334564209, + "learning_rate": 2.6657967032967035e-05, + "loss": 0.087, + "step": 16993 + }, + { + "epoch": 46.68681318681319, + "grad_norm": 11.795302391052246, + "learning_rate": 2.6656593406593405e-05, + "loss": 0.1994, + "step": 16994 + }, + { + "epoch": 46.68956043956044, + "grad_norm": 12.395830154418945, + "learning_rate": 2.6655219780219782e-05, + "loss": 0.3865, + "step": 16995 + }, + { + "epoch": 46.69230769230769, + "grad_norm": 14.538599967956543, + "learning_rate": 2.665384615384616e-05, + "loss": 0.5758, + "step": 16996 + }, + { + "epoch": 46.69505494505494, + "grad_norm": 16.84617805480957, + "learning_rate": 2.665247252747253e-05, + "loss": 0.5036, + "step": 16997 + }, + { + "epoch": 46.6978021978022, + "grad_norm": 14.33422565460205, + "learning_rate": 2.6651098901098902e-05, + "loss": 0.6406, + "step": 16998 + }, + { + "epoch": 46.70054945054945, + "grad_norm": 30.753992080688477, + "learning_rate": 2.6649725274725272e-05, + "loss": 1.2878, + "step": 16999 + }, + { + "epoch": 46.7032967032967, + "grad_norm": 11.896255493164062, + "learning_rate": 2.664835164835165e-05, + "loss": 0.2703, + "step": 17000 + }, + { + "epoch": 46.706043956043956, + "grad_norm": 15.53536319732666, + "learning_rate": 2.6646978021978026e-05, + "loss": 0.3746, + "step": 17001 + }, + { + "epoch": 46.70879120879121, + "grad_norm": 13.939057350158691, + "learning_rate": 2.6645604395604396e-05, + "loss": 0.2342, + "step": 17002 + }, + { + "epoch": 46.71153846153846, + "grad_norm": 4.280613899230957, + "learning_rate": 2.6644230769230772e-05, + "loss": 0.0965, + "step": 17003 + }, + { + "epoch": 46.714285714285715, + "grad_norm": 14.090689659118652, + "learning_rate": 2.6642857142857142e-05, + "loss": 0.2131, + "step": 17004 + }, + { + "epoch": 46.717032967032964, + "grad_norm": 4.445944309234619, + "learning_rate": 2.664148351648352e-05, + "loss": 0.0873, + "step": 17005 + }, + { + "epoch": 46.71978021978022, + "grad_norm": 9.43537425994873, + "learning_rate": 2.6640109890109893e-05, + "loss": 0.1082, + "step": 17006 + }, + { + "epoch": 46.722527472527474, + "grad_norm": 4.293985366821289, + "learning_rate": 2.6638736263736263e-05, + "loss": 0.0683, + "step": 17007 + }, + { + "epoch": 46.72527472527472, + "grad_norm": 12.56916618347168, + "learning_rate": 2.663736263736264e-05, + "loss": 0.1589, + "step": 17008 + }, + { + "epoch": 46.72802197802198, + "grad_norm": 24.66110610961914, + "learning_rate": 2.663598901098901e-05, + "loss": 0.6497, + "step": 17009 + }, + { + "epoch": 46.73076923076923, + "grad_norm": 7.57127046585083, + "learning_rate": 2.6634615384615386e-05, + "loss": 0.1567, + "step": 17010 + }, + { + "epoch": 46.73351648351648, + "grad_norm": 19.461841583251953, + "learning_rate": 2.6633241758241763e-05, + "loss": 0.3725, + "step": 17011 + }, + { + "epoch": 46.73626373626374, + "grad_norm": 9.533763885498047, + "learning_rate": 2.6631868131868133e-05, + "loss": 0.3184, + "step": 17012 + }, + { + "epoch": 46.73901098901099, + "grad_norm": 6.166184902191162, + "learning_rate": 2.6630494505494506e-05, + "loss": 0.1199, + "step": 17013 + }, + { + "epoch": 46.74175824175824, + "grad_norm": 6.50484561920166, + "learning_rate": 2.6629120879120876e-05, + "loss": 0.0948, + "step": 17014 + }, + { + "epoch": 46.744505494505496, + "grad_norm": 8.743546485900879, + "learning_rate": 2.6627747252747253e-05, + "loss": 0.2187, + "step": 17015 + }, + { + "epoch": 46.747252747252745, + "grad_norm": 7.884067535400391, + "learning_rate": 2.662637362637363e-05, + "loss": 0.1455, + "step": 17016 + }, + { + "epoch": 46.75, + "grad_norm": 9.431434631347656, + "learning_rate": 2.6625e-05, + "loss": 0.2902, + "step": 17017 + }, + { + "epoch": 46.752747252747255, + "grad_norm": 7.042226791381836, + "learning_rate": 2.6623626373626377e-05, + "loss": 0.1065, + "step": 17018 + }, + { + "epoch": 46.755494505494504, + "grad_norm": 10.72852611541748, + "learning_rate": 2.6622252747252747e-05, + "loss": 0.217, + "step": 17019 + }, + { + "epoch": 46.75824175824176, + "grad_norm": 3.6944210529327393, + "learning_rate": 2.662087912087912e-05, + "loss": 0.0465, + "step": 17020 + }, + { + "epoch": 46.76098901098901, + "grad_norm": 16.152488708496094, + "learning_rate": 2.6619505494505497e-05, + "loss": 0.3708, + "step": 17021 + }, + { + "epoch": 46.76373626373626, + "grad_norm": 9.913494110107422, + "learning_rate": 2.6618131868131867e-05, + "loss": 0.374, + "step": 17022 + }, + { + "epoch": 46.76648351648352, + "grad_norm": 16.315858840942383, + "learning_rate": 2.6616758241758244e-05, + "loss": 0.4392, + "step": 17023 + }, + { + "epoch": 46.76923076923077, + "grad_norm": 9.649744033813477, + "learning_rate": 2.6615384615384614e-05, + "loss": 0.1747, + "step": 17024 + }, + { + "epoch": 46.77197802197802, + "grad_norm": 9.94169807434082, + "learning_rate": 2.661401098901099e-05, + "loss": 0.1574, + "step": 17025 + }, + { + "epoch": 46.77472527472528, + "grad_norm": 3.4787211418151855, + "learning_rate": 2.6612637362637367e-05, + "loss": 0.0505, + "step": 17026 + }, + { + "epoch": 46.777472527472526, + "grad_norm": 8.9515962600708, + "learning_rate": 2.6611263736263737e-05, + "loss": 0.2541, + "step": 17027 + }, + { + "epoch": 46.78021978021978, + "grad_norm": 14.690851211547852, + "learning_rate": 2.660989010989011e-05, + "loss": 0.3241, + "step": 17028 + }, + { + "epoch": 46.782967032967036, + "grad_norm": 7.760059356689453, + "learning_rate": 2.660851648351648e-05, + "loss": 0.2906, + "step": 17029 + }, + { + "epoch": 46.785714285714285, + "grad_norm": 6.8074822425842285, + "learning_rate": 2.6607142857142858e-05, + "loss": 0.2185, + "step": 17030 + }, + { + "epoch": 46.78846153846154, + "grad_norm": 14.318604469299316, + "learning_rate": 2.6605769230769235e-05, + "loss": 0.4828, + "step": 17031 + }, + { + "epoch": 46.79120879120879, + "grad_norm": 6.681386470794678, + "learning_rate": 2.6604395604395605e-05, + "loss": 0.1025, + "step": 17032 + }, + { + "epoch": 46.793956043956044, + "grad_norm": 19.34852409362793, + "learning_rate": 2.660302197802198e-05, + "loss": 0.7108, + "step": 17033 + }, + { + "epoch": 46.7967032967033, + "grad_norm": 10.978768348693848, + "learning_rate": 2.660164835164835e-05, + "loss": 0.2648, + "step": 17034 + }, + { + "epoch": 46.79945054945055, + "grad_norm": 20.547840118408203, + "learning_rate": 2.6600274725274725e-05, + "loss": 0.571, + "step": 17035 + }, + { + "epoch": 46.8021978021978, + "grad_norm": 14.04208755493164, + "learning_rate": 2.65989010989011e-05, + "loss": 0.2477, + "step": 17036 + }, + { + "epoch": 46.80494505494506, + "grad_norm": 12.14185619354248, + "learning_rate": 2.659752747252747e-05, + "loss": 0.1723, + "step": 17037 + }, + { + "epoch": 46.80769230769231, + "grad_norm": 4.261749267578125, + "learning_rate": 2.659615384615385e-05, + "loss": 0.0843, + "step": 17038 + }, + { + "epoch": 46.81043956043956, + "grad_norm": 19.247926712036133, + "learning_rate": 2.659478021978022e-05, + "loss": 0.407, + "step": 17039 + }, + { + "epoch": 46.81318681318681, + "grad_norm": 14.56160831451416, + "learning_rate": 2.6593406593406595e-05, + "loss": 0.2915, + "step": 17040 + }, + { + "epoch": 46.815934065934066, + "grad_norm": 3.345635175704956, + "learning_rate": 2.6592032967032972e-05, + "loss": 0.0674, + "step": 17041 + }, + { + "epoch": 46.81868131868132, + "grad_norm": 6.472519397735596, + "learning_rate": 2.6590659340659342e-05, + "loss": 0.1284, + "step": 17042 + }, + { + "epoch": 46.82142857142857, + "grad_norm": 12.83879566192627, + "learning_rate": 2.6589285714285715e-05, + "loss": 0.2362, + "step": 17043 + }, + { + "epoch": 46.824175824175825, + "grad_norm": 7.623875141143799, + "learning_rate": 2.6587912087912085e-05, + "loss": 0.141, + "step": 17044 + }, + { + "epoch": 46.82692307692308, + "grad_norm": 28.156333923339844, + "learning_rate": 2.6586538461538462e-05, + "loss": 1.115, + "step": 17045 + }, + { + "epoch": 46.82967032967033, + "grad_norm": 6.9097089767456055, + "learning_rate": 2.658516483516484e-05, + "loss": 0.1236, + "step": 17046 + }, + { + "epoch": 46.832417582417584, + "grad_norm": 10.8482084274292, + "learning_rate": 2.658379120879121e-05, + "loss": 0.3755, + "step": 17047 + }, + { + "epoch": 46.83516483516483, + "grad_norm": 3.7288360595703125, + "learning_rate": 2.6582417582417586e-05, + "loss": 0.11, + "step": 17048 + }, + { + "epoch": 46.83791208791209, + "grad_norm": 7.773338794708252, + "learning_rate": 2.6581043956043956e-05, + "loss": 0.2612, + "step": 17049 + }, + { + "epoch": 46.84065934065934, + "grad_norm": 10.962444305419922, + "learning_rate": 2.657967032967033e-05, + "loss": 0.3359, + "step": 17050 + }, + { + "epoch": 46.84340659340659, + "grad_norm": 9.446890830993652, + "learning_rate": 2.6578296703296706e-05, + "loss": 0.1985, + "step": 17051 + }, + { + "epoch": 46.84615384615385, + "grad_norm": 20.23959732055664, + "learning_rate": 2.6576923076923076e-05, + "loss": 0.3353, + "step": 17052 + }, + { + "epoch": 46.8489010989011, + "grad_norm": 15.183892250061035, + "learning_rate": 2.6575549450549453e-05, + "loss": 0.437, + "step": 17053 + }, + { + "epoch": 46.85164835164835, + "grad_norm": 12.511157989501953, + "learning_rate": 2.6574175824175823e-05, + "loss": 0.243, + "step": 17054 + }, + { + "epoch": 46.854395604395606, + "grad_norm": 14.79592227935791, + "learning_rate": 2.65728021978022e-05, + "loss": 0.3874, + "step": 17055 + }, + { + "epoch": 46.857142857142854, + "grad_norm": 11.108198165893555, + "learning_rate": 2.6571428571428576e-05, + "loss": 0.2312, + "step": 17056 + }, + { + "epoch": 46.85989010989011, + "grad_norm": 16.55498695373535, + "learning_rate": 2.6570054945054946e-05, + "loss": 0.4768, + "step": 17057 + }, + { + "epoch": 46.862637362637365, + "grad_norm": 10.370219230651855, + "learning_rate": 2.656868131868132e-05, + "loss": 0.2845, + "step": 17058 + }, + { + "epoch": 46.86538461538461, + "grad_norm": 9.93709659576416, + "learning_rate": 2.656730769230769e-05, + "loss": 0.2152, + "step": 17059 + }, + { + "epoch": 46.86813186813187, + "grad_norm": 13.19886589050293, + "learning_rate": 2.6565934065934067e-05, + "loss": 0.2145, + "step": 17060 + }, + { + "epoch": 46.870879120879124, + "grad_norm": 9.42692756652832, + "learning_rate": 2.6564560439560443e-05, + "loss": 0.1962, + "step": 17061 + }, + { + "epoch": 46.87362637362637, + "grad_norm": 2.6720755100250244, + "learning_rate": 2.6563186813186814e-05, + "loss": 0.045, + "step": 17062 + }, + { + "epoch": 46.87637362637363, + "grad_norm": 15.404671669006348, + "learning_rate": 2.656181318681319e-05, + "loss": 0.4298, + "step": 17063 + }, + { + "epoch": 46.879120879120876, + "grad_norm": 15.355828285217285, + "learning_rate": 2.656043956043956e-05, + "loss": 0.3816, + "step": 17064 + }, + { + "epoch": 46.88186813186813, + "grad_norm": 12.215899467468262, + "learning_rate": 2.6559065934065934e-05, + "loss": 0.4392, + "step": 17065 + }, + { + "epoch": 46.88461538461539, + "grad_norm": 14.304661750793457, + "learning_rate": 2.655769230769231e-05, + "loss": 0.4632, + "step": 17066 + }, + { + "epoch": 46.887362637362635, + "grad_norm": 10.378796577453613, + "learning_rate": 2.655631868131868e-05, + "loss": 0.245, + "step": 17067 + }, + { + "epoch": 46.89010989010989, + "grad_norm": 16.41862678527832, + "learning_rate": 2.6554945054945057e-05, + "loss": 0.5712, + "step": 17068 + }, + { + "epoch": 46.892857142857146, + "grad_norm": 15.956104278564453, + "learning_rate": 2.6553571428571427e-05, + "loss": 0.4149, + "step": 17069 + }, + { + "epoch": 46.895604395604394, + "grad_norm": 16.21163558959961, + "learning_rate": 2.6552197802197804e-05, + "loss": 0.4829, + "step": 17070 + }, + { + "epoch": 46.89835164835165, + "grad_norm": 6.957067012786865, + "learning_rate": 2.655082417582418e-05, + "loss": 0.0933, + "step": 17071 + }, + { + "epoch": 46.9010989010989, + "grad_norm": 7.218100070953369, + "learning_rate": 2.654945054945055e-05, + "loss": 0.1126, + "step": 17072 + }, + { + "epoch": 46.90384615384615, + "grad_norm": 10.46230411529541, + "learning_rate": 2.6548076923076924e-05, + "loss": 0.1594, + "step": 17073 + }, + { + "epoch": 46.90659340659341, + "grad_norm": 23.255420684814453, + "learning_rate": 2.6546703296703294e-05, + "loss": 0.5353, + "step": 17074 + }, + { + "epoch": 46.90934065934066, + "grad_norm": 12.391380310058594, + "learning_rate": 2.654532967032967e-05, + "loss": 0.2771, + "step": 17075 + }, + { + "epoch": 46.91208791208791, + "grad_norm": 3.341378688812256, + "learning_rate": 2.6543956043956048e-05, + "loss": 0.0797, + "step": 17076 + }, + { + "epoch": 46.91483516483517, + "grad_norm": 16.452241897583008, + "learning_rate": 2.6542582417582418e-05, + "loss": 0.5677, + "step": 17077 + }, + { + "epoch": 46.917582417582416, + "grad_norm": 7.443065166473389, + "learning_rate": 2.6541208791208795e-05, + "loss": 0.1263, + "step": 17078 + }, + { + "epoch": 46.92032967032967, + "grad_norm": 13.099522590637207, + "learning_rate": 2.6539835164835165e-05, + "loss": 0.2516, + "step": 17079 + }, + { + "epoch": 46.92307692307692, + "grad_norm": 10.051538467407227, + "learning_rate": 2.6538461538461538e-05, + "loss": 0.2882, + "step": 17080 + }, + { + "epoch": 46.925824175824175, + "grad_norm": 20.18876838684082, + "learning_rate": 2.6537087912087915e-05, + "loss": 0.6118, + "step": 17081 + }, + { + "epoch": 46.92857142857143, + "grad_norm": 17.430532455444336, + "learning_rate": 2.6535714285714285e-05, + "loss": 0.33, + "step": 17082 + }, + { + "epoch": 46.93131868131868, + "grad_norm": 4.358668804168701, + "learning_rate": 2.6534340659340662e-05, + "loss": 0.0564, + "step": 17083 + }, + { + "epoch": 46.934065934065934, + "grad_norm": 5.383837699890137, + "learning_rate": 2.6532967032967032e-05, + "loss": 0.1204, + "step": 17084 + }, + { + "epoch": 46.93681318681319, + "grad_norm": 14.913843154907227, + "learning_rate": 2.653159340659341e-05, + "loss": 0.2125, + "step": 17085 + }, + { + "epoch": 46.93956043956044, + "grad_norm": 7.756620407104492, + "learning_rate": 2.6530219780219785e-05, + "loss": 0.12, + "step": 17086 + }, + { + "epoch": 46.94230769230769, + "grad_norm": 18.818695068359375, + "learning_rate": 2.6528846153846155e-05, + "loss": 0.6584, + "step": 17087 + }, + { + "epoch": 46.94505494505494, + "grad_norm": 10.184017181396484, + "learning_rate": 2.652747252747253e-05, + "loss": 0.1509, + "step": 17088 + }, + { + "epoch": 46.9478021978022, + "grad_norm": 11.574830055236816, + "learning_rate": 2.65260989010989e-05, + "loss": 0.1493, + "step": 17089 + }, + { + "epoch": 46.95054945054945, + "grad_norm": 13.937495231628418, + "learning_rate": 2.6524725274725276e-05, + "loss": 0.2775, + "step": 17090 + }, + { + "epoch": 46.9532967032967, + "grad_norm": 12.199507713317871, + "learning_rate": 2.6523351648351652e-05, + "loss": 0.4382, + "step": 17091 + }, + { + "epoch": 46.956043956043956, + "grad_norm": 18.275632858276367, + "learning_rate": 2.6521978021978022e-05, + "loss": 0.5997, + "step": 17092 + }, + { + "epoch": 46.95879120879121, + "grad_norm": 3.4698331356048584, + "learning_rate": 2.65206043956044e-05, + "loss": 0.0618, + "step": 17093 + }, + { + "epoch": 46.96153846153846, + "grad_norm": 7.342044830322266, + "learning_rate": 2.651923076923077e-05, + "loss": 0.1068, + "step": 17094 + }, + { + "epoch": 46.964285714285715, + "grad_norm": 17.860759735107422, + "learning_rate": 2.6517857142857143e-05, + "loss": 0.4165, + "step": 17095 + }, + { + "epoch": 46.967032967032964, + "grad_norm": 9.074539184570312, + "learning_rate": 2.651648351648352e-05, + "loss": 0.1866, + "step": 17096 + }, + { + "epoch": 46.96978021978022, + "grad_norm": 19.334108352661133, + "learning_rate": 2.651510989010989e-05, + "loss": 0.4829, + "step": 17097 + }, + { + "epoch": 46.972527472527474, + "grad_norm": 12.4268217086792, + "learning_rate": 2.6513736263736266e-05, + "loss": 0.3291, + "step": 17098 + }, + { + "epoch": 46.97527472527472, + "grad_norm": 8.833392143249512, + "learning_rate": 2.6512362637362636e-05, + "loss": 0.1831, + "step": 17099 + }, + { + "epoch": 46.97802197802198, + "grad_norm": 20.310386657714844, + "learning_rate": 2.6510989010989013e-05, + "loss": 0.5679, + "step": 17100 + }, + { + "epoch": 46.98076923076923, + "grad_norm": 11.228721618652344, + "learning_rate": 2.650961538461539e-05, + "loss": 0.243, + "step": 17101 + }, + { + "epoch": 46.98351648351648, + "grad_norm": 10.615389823913574, + "learning_rate": 2.650824175824176e-05, + "loss": 0.3104, + "step": 17102 + }, + { + "epoch": 46.98626373626374, + "grad_norm": 11.080363273620605, + "learning_rate": 2.6506868131868133e-05, + "loss": 0.2681, + "step": 17103 + }, + { + "epoch": 46.98901098901099, + "grad_norm": 6.19834041595459, + "learning_rate": 2.6505494505494503e-05, + "loss": 0.0895, + "step": 17104 + }, + { + "epoch": 46.99175824175824, + "grad_norm": 12.669477462768555, + "learning_rate": 2.650412087912088e-05, + "loss": 0.2689, + "step": 17105 + }, + { + "epoch": 46.994505494505496, + "grad_norm": 6.602015972137451, + "learning_rate": 2.6502747252747257e-05, + "loss": 0.1711, + "step": 17106 + }, + { + "epoch": 46.997252747252745, + "grad_norm": 7.679996967315674, + "learning_rate": 2.6501373626373627e-05, + "loss": 0.1062, + "step": 17107 + }, + { + "epoch": 47.0, + "grad_norm": 13.337787628173828, + "learning_rate": 2.6500000000000004e-05, + "loss": 0.1716, + "step": 17108 + }, + { + "epoch": 47.0, + "eval_accuracy": 0.7644628099173554, + "eval_f1": 0.7660557140801278, + "eval_f1_DuraRiadoRio_64x64": 0.7012987012987013, + "eval_f1_Mole_64x64": 0.7768595041322314, + "eval_f1_Quebrado_64x64": 0.8409893992932862, + "eval_f1_RiadoRio_64x64": 0.6846846846846847, + "eval_f1_RioFechado_64x64": 0.8264462809917356, + "eval_loss": 0.9572784304618835, + "eval_precision": 0.8121638922508584, + "eval_precision_DuraRiadoRio_64x64": 0.9310344827586207, + "eval_precision_Mole_64x64": 0.6438356164383562, + "eval_precision_Quebrado_64x64": 0.8561151079136691, + "eval_precision_RiadoRio_64x64": 0.6298342541436464, + "eval_precision_RioFechado_64x64": 1.0, + "eval_recall": 0.7644561815336463, + "eval_recall_DuraRiadoRio_64x64": 0.5625, + "eval_recall_Mole_64x64": 0.9791666666666666, + "eval_recall_Quebrado_64x64": 0.8263888888888888, + "eval_recall_RiadoRio_64x64": 0.75, + "eval_recall_RioFechado_64x64": 0.704225352112676, + "eval_runtime": 1.8406, + "eval_samples_per_second": 394.439, + "eval_steps_per_second": 24.992, + "step": 17108 + }, + { + "epoch": 47.002747252747255, + "grad_norm": 14.2421236038208, + "learning_rate": 2.6498626373626374e-05, + "loss": 0.2368, + "step": 17109 + }, + { + "epoch": 47.005494505494504, + "grad_norm": 13.458246231079102, + "learning_rate": 2.6497252747252747e-05, + "loss": 0.296, + "step": 17110 + }, + { + "epoch": 47.00824175824176, + "grad_norm": 12.206469535827637, + "learning_rate": 2.6495879120879124e-05, + "loss": 0.2894, + "step": 17111 + }, + { + "epoch": 47.010989010989015, + "grad_norm": 17.111862182617188, + "learning_rate": 2.6494505494505494e-05, + "loss": 0.4879, + "step": 17112 + }, + { + "epoch": 47.01373626373626, + "grad_norm": 12.446081161499023, + "learning_rate": 2.649313186813187e-05, + "loss": 0.3108, + "step": 17113 + }, + { + "epoch": 47.01648351648352, + "grad_norm": 9.715473175048828, + "learning_rate": 2.649175824175824e-05, + "loss": 0.277, + "step": 17114 + }, + { + "epoch": 47.01923076923077, + "grad_norm": 10.351278305053711, + "learning_rate": 2.6490384615384618e-05, + "loss": 0.1958, + "step": 17115 + }, + { + "epoch": 47.02197802197802, + "grad_norm": 4.740372180938721, + "learning_rate": 2.6489010989010994e-05, + "loss": 0.0516, + "step": 17116 + }, + { + "epoch": 47.02472527472528, + "grad_norm": 18.56142234802246, + "learning_rate": 2.6487637362637364e-05, + "loss": 0.5348, + "step": 17117 + }, + { + "epoch": 47.027472527472526, + "grad_norm": 6.330165386199951, + "learning_rate": 2.6486263736263738e-05, + "loss": 0.15, + "step": 17118 + }, + { + "epoch": 47.03021978021978, + "grad_norm": 2.951885223388672, + "learning_rate": 2.6484890109890108e-05, + "loss": 0.0599, + "step": 17119 + }, + { + "epoch": 47.032967032967036, + "grad_norm": 12.374666213989258, + "learning_rate": 2.6483516483516485e-05, + "loss": 0.2168, + "step": 17120 + }, + { + "epoch": 47.035714285714285, + "grad_norm": 10.106657028198242, + "learning_rate": 2.648214285714286e-05, + "loss": 0.2868, + "step": 17121 + }, + { + "epoch": 47.03846153846154, + "grad_norm": 11.302122116088867, + "learning_rate": 2.648076923076923e-05, + "loss": 0.1748, + "step": 17122 + }, + { + "epoch": 47.04120879120879, + "grad_norm": 10.400955200195312, + "learning_rate": 2.6479395604395608e-05, + "loss": 0.2475, + "step": 17123 + }, + { + "epoch": 47.043956043956044, + "grad_norm": 14.017531394958496, + "learning_rate": 2.6478021978021978e-05, + "loss": 0.3202, + "step": 17124 + }, + { + "epoch": 47.0467032967033, + "grad_norm": 10.811749458312988, + "learning_rate": 2.647664835164835e-05, + "loss": 0.1982, + "step": 17125 + }, + { + "epoch": 47.04945054945055, + "grad_norm": 15.69522476196289, + "learning_rate": 2.647527472527473e-05, + "loss": 0.4819, + "step": 17126 + }, + { + "epoch": 47.0521978021978, + "grad_norm": 23.0186767578125, + "learning_rate": 2.64739010989011e-05, + "loss": 0.8105, + "step": 17127 + }, + { + "epoch": 47.05494505494506, + "grad_norm": 15.741670608520508, + "learning_rate": 2.6472527472527475e-05, + "loss": 0.4084, + "step": 17128 + }, + { + "epoch": 47.05769230769231, + "grad_norm": 13.523933410644531, + "learning_rate": 2.6471153846153845e-05, + "loss": 0.3288, + "step": 17129 + }, + { + "epoch": 47.06043956043956, + "grad_norm": 5.6883392333984375, + "learning_rate": 2.6469780219780222e-05, + "loss": 0.1277, + "step": 17130 + }, + { + "epoch": 47.06318681318681, + "grad_norm": 4.425714015960693, + "learning_rate": 2.64684065934066e-05, + "loss": 0.1078, + "step": 17131 + }, + { + "epoch": 47.065934065934066, + "grad_norm": 11.47581958770752, + "learning_rate": 2.646703296703297e-05, + "loss": 0.1383, + "step": 17132 + }, + { + "epoch": 47.06868131868132, + "grad_norm": 15.37021255493164, + "learning_rate": 2.6465659340659342e-05, + "loss": 0.4129, + "step": 17133 + }, + { + "epoch": 47.07142857142857, + "grad_norm": 5.244199275970459, + "learning_rate": 2.6464285714285712e-05, + "loss": 0.0761, + "step": 17134 + }, + { + "epoch": 47.074175824175825, + "grad_norm": 8.254598617553711, + "learning_rate": 2.646291208791209e-05, + "loss": 0.2636, + "step": 17135 + }, + { + "epoch": 47.07692307692308, + "grad_norm": 17.127084732055664, + "learning_rate": 2.6461538461538466e-05, + "loss": 0.3461, + "step": 17136 + }, + { + "epoch": 47.07967032967033, + "grad_norm": 16.265775680541992, + "learning_rate": 2.6460164835164836e-05, + "loss": 0.474, + "step": 17137 + }, + { + "epoch": 47.082417582417584, + "grad_norm": 11.14221477508545, + "learning_rate": 2.6458791208791213e-05, + "loss": 0.3755, + "step": 17138 + }, + { + "epoch": 47.08516483516483, + "grad_norm": 5.824831008911133, + "learning_rate": 2.6457417582417583e-05, + "loss": 0.0296, + "step": 17139 + }, + { + "epoch": 47.08791208791209, + "grad_norm": 9.501020431518555, + "learning_rate": 2.6456043956043956e-05, + "loss": 0.13, + "step": 17140 + }, + { + "epoch": 47.09065934065934, + "grad_norm": 8.697870254516602, + "learning_rate": 2.6454670329670333e-05, + "loss": 0.2204, + "step": 17141 + }, + { + "epoch": 47.09340659340659, + "grad_norm": 2.0050649642944336, + "learning_rate": 2.6453296703296703e-05, + "loss": 0.0428, + "step": 17142 + }, + { + "epoch": 47.09615384615385, + "grad_norm": 12.613531112670898, + "learning_rate": 2.645192307692308e-05, + "loss": 0.5069, + "step": 17143 + }, + { + "epoch": 47.0989010989011, + "grad_norm": 10.676132202148438, + "learning_rate": 2.645054945054945e-05, + "loss": 0.1776, + "step": 17144 + }, + { + "epoch": 47.10164835164835, + "grad_norm": 13.587632179260254, + "learning_rate": 2.6449175824175827e-05, + "loss": 0.3761, + "step": 17145 + }, + { + "epoch": 47.104395604395606, + "grad_norm": 6.393051624298096, + "learning_rate": 2.64478021978022e-05, + "loss": 0.1145, + "step": 17146 + }, + { + "epoch": 47.107142857142854, + "grad_norm": 12.499663352966309, + "learning_rate": 2.6446428571428573e-05, + "loss": 0.2962, + "step": 17147 + }, + { + "epoch": 47.10989010989011, + "grad_norm": 12.642260551452637, + "learning_rate": 2.6445054945054947e-05, + "loss": 0.2937, + "step": 17148 + }, + { + "epoch": 47.112637362637365, + "grad_norm": 4.48829984664917, + "learning_rate": 2.6443681318681317e-05, + "loss": 0.0852, + "step": 17149 + }, + { + "epoch": 47.11538461538461, + "grad_norm": 15.138615608215332, + "learning_rate": 2.6442307692307694e-05, + "loss": 0.5087, + "step": 17150 + }, + { + "epoch": 47.11813186813187, + "grad_norm": 20.83669662475586, + "learning_rate": 2.644093406593407e-05, + "loss": 0.6168, + "step": 17151 + }, + { + "epoch": 47.120879120879124, + "grad_norm": 12.461776733398438, + "learning_rate": 2.643956043956044e-05, + "loss": 0.3857, + "step": 17152 + }, + { + "epoch": 47.12362637362637, + "grad_norm": 11.799545288085938, + "learning_rate": 2.6438186813186817e-05, + "loss": 0.2593, + "step": 17153 + }, + { + "epoch": 47.12637362637363, + "grad_norm": 12.686844825744629, + "learning_rate": 2.6436813186813187e-05, + "loss": 0.483, + "step": 17154 + }, + { + "epoch": 47.129120879120876, + "grad_norm": 12.267522811889648, + "learning_rate": 2.643543956043956e-05, + "loss": 0.3103, + "step": 17155 + }, + { + "epoch": 47.13186813186813, + "grad_norm": 19.661258697509766, + "learning_rate": 2.6434065934065937e-05, + "loss": 0.4367, + "step": 17156 + }, + { + "epoch": 47.13461538461539, + "grad_norm": 5.9384307861328125, + "learning_rate": 2.6432692307692307e-05, + "loss": 0.0617, + "step": 17157 + }, + { + "epoch": 47.137362637362635, + "grad_norm": 12.336174011230469, + "learning_rate": 2.6431318681318684e-05, + "loss": 0.2971, + "step": 17158 + }, + { + "epoch": 47.14010989010989, + "grad_norm": 12.054838180541992, + "learning_rate": 2.6429945054945054e-05, + "loss": 0.2865, + "step": 17159 + }, + { + "epoch": 47.142857142857146, + "grad_norm": 11.922188758850098, + "learning_rate": 2.642857142857143e-05, + "loss": 0.364, + "step": 17160 + }, + { + "epoch": 47.145604395604394, + "grad_norm": 8.116414070129395, + "learning_rate": 2.6427197802197805e-05, + "loss": 0.2693, + "step": 17161 + }, + { + "epoch": 47.14835164835165, + "grad_norm": 4.047406196594238, + "learning_rate": 2.6425824175824178e-05, + "loss": 0.0796, + "step": 17162 + }, + { + "epoch": 47.1510989010989, + "grad_norm": 9.20903491973877, + "learning_rate": 2.642445054945055e-05, + "loss": 0.1919, + "step": 17163 + }, + { + "epoch": 47.15384615384615, + "grad_norm": 14.365049362182617, + "learning_rate": 2.642307692307692e-05, + "loss": 0.3318, + "step": 17164 + }, + { + "epoch": 47.15659340659341, + "grad_norm": 6.878326892852783, + "learning_rate": 2.6421703296703298e-05, + "loss": 0.2092, + "step": 17165 + }, + { + "epoch": 47.15934065934066, + "grad_norm": 10.78748893737793, + "learning_rate": 2.6420329670329675e-05, + "loss": 0.2435, + "step": 17166 + }, + { + "epoch": 47.16208791208791, + "grad_norm": 9.942546844482422, + "learning_rate": 2.6418956043956045e-05, + "loss": 0.1893, + "step": 17167 + }, + { + "epoch": 47.16483516483517, + "grad_norm": 2.6048548221588135, + "learning_rate": 2.6417582417582422e-05, + "loss": 0.0467, + "step": 17168 + }, + { + "epoch": 47.167582417582416, + "grad_norm": 17.621623992919922, + "learning_rate": 2.6416208791208792e-05, + "loss": 0.4246, + "step": 17169 + }, + { + "epoch": 47.17032967032967, + "grad_norm": 8.12430477142334, + "learning_rate": 2.6414835164835165e-05, + "loss": 0.2171, + "step": 17170 + }, + { + "epoch": 47.17307692307692, + "grad_norm": 11.330317497253418, + "learning_rate": 2.6413461538461542e-05, + "loss": 0.1915, + "step": 17171 + }, + { + "epoch": 47.175824175824175, + "grad_norm": 3.0070278644561768, + "learning_rate": 2.6412087912087912e-05, + "loss": 0.0429, + "step": 17172 + }, + { + "epoch": 47.17857142857143, + "grad_norm": 16.38026237487793, + "learning_rate": 2.641071428571429e-05, + "loss": 0.3422, + "step": 17173 + }, + { + "epoch": 47.18131868131868, + "grad_norm": 8.772428512573242, + "learning_rate": 2.640934065934066e-05, + "loss": 0.2496, + "step": 17174 + }, + { + "epoch": 47.184065934065934, + "grad_norm": 3.51271653175354, + "learning_rate": 2.6407967032967036e-05, + "loss": 0.056, + "step": 17175 + }, + { + "epoch": 47.18681318681319, + "grad_norm": 9.86409854888916, + "learning_rate": 2.640659340659341e-05, + "loss": 0.1919, + "step": 17176 + }, + { + "epoch": 47.18956043956044, + "grad_norm": 5.3896164894104, + "learning_rate": 2.6405219780219782e-05, + "loss": 0.1001, + "step": 17177 + }, + { + "epoch": 47.19230769230769, + "grad_norm": 12.167989730834961, + "learning_rate": 2.6403846153846156e-05, + "loss": 0.2069, + "step": 17178 + }, + { + "epoch": 47.19505494505494, + "grad_norm": 6.1022539138793945, + "learning_rate": 2.6402472527472526e-05, + "loss": 0.0885, + "step": 17179 + }, + { + "epoch": 47.1978021978022, + "grad_norm": 5.184519290924072, + "learning_rate": 2.6401098901098903e-05, + "loss": 0.0765, + "step": 17180 + }, + { + "epoch": 47.20054945054945, + "grad_norm": 12.634685516357422, + "learning_rate": 2.6399725274725273e-05, + "loss": 0.3539, + "step": 17181 + }, + { + "epoch": 47.2032967032967, + "grad_norm": 15.786320686340332, + "learning_rate": 2.639835164835165e-05, + "loss": 0.4674, + "step": 17182 + }, + { + "epoch": 47.206043956043956, + "grad_norm": 14.285063743591309, + "learning_rate": 2.6396978021978026e-05, + "loss": 0.2589, + "step": 17183 + }, + { + "epoch": 47.20879120879121, + "grad_norm": 22.054306030273438, + "learning_rate": 2.6395604395604396e-05, + "loss": 0.7581, + "step": 17184 + }, + { + "epoch": 47.21153846153846, + "grad_norm": 2.467790365219116, + "learning_rate": 2.639423076923077e-05, + "loss": 0.0418, + "step": 17185 + }, + { + "epoch": 47.214285714285715, + "grad_norm": 8.052628517150879, + "learning_rate": 2.6392857142857143e-05, + "loss": 0.1646, + "step": 17186 + }, + { + "epoch": 47.217032967032964, + "grad_norm": 11.757567405700684, + "learning_rate": 2.6391483516483516e-05, + "loss": 0.3654, + "step": 17187 + }, + { + "epoch": 47.21978021978022, + "grad_norm": 9.253372192382812, + "learning_rate": 2.6390109890109893e-05, + "loss": 0.1878, + "step": 17188 + }, + { + "epoch": 47.222527472527474, + "grad_norm": 16.939599990844727, + "learning_rate": 2.6388736263736263e-05, + "loss": 0.4149, + "step": 17189 + }, + { + "epoch": 47.22527472527472, + "grad_norm": 15.700945854187012, + "learning_rate": 2.638736263736264e-05, + "loss": 0.4796, + "step": 17190 + }, + { + "epoch": 47.22802197802198, + "grad_norm": 16.852174758911133, + "learning_rate": 2.638598901098901e-05, + "loss": 0.4617, + "step": 17191 + }, + { + "epoch": 47.23076923076923, + "grad_norm": 13.527801513671875, + "learning_rate": 2.6384615384615387e-05, + "loss": 0.2012, + "step": 17192 + }, + { + "epoch": 47.23351648351648, + "grad_norm": 6.8405070304870605, + "learning_rate": 2.638324175824176e-05, + "loss": 0.0951, + "step": 17193 + }, + { + "epoch": 47.23626373626374, + "grad_norm": 14.02316665649414, + "learning_rate": 2.638186813186813e-05, + "loss": 0.2699, + "step": 17194 + }, + { + "epoch": 47.239010989010985, + "grad_norm": 8.215437889099121, + "learning_rate": 2.6380494505494507e-05, + "loss": 0.2213, + "step": 17195 + }, + { + "epoch": 47.24175824175824, + "grad_norm": 4.088120460510254, + "learning_rate": 2.6379120879120877e-05, + "loss": 0.0631, + "step": 17196 + }, + { + "epoch": 47.244505494505496, + "grad_norm": 14.32767391204834, + "learning_rate": 2.6377747252747254e-05, + "loss": 0.3033, + "step": 17197 + }, + { + "epoch": 47.247252747252745, + "grad_norm": 14.492696762084961, + "learning_rate": 2.637637362637363e-05, + "loss": 0.3961, + "step": 17198 + }, + { + "epoch": 47.25, + "grad_norm": 23.206186294555664, + "learning_rate": 2.6375e-05, + "loss": 0.62, + "step": 17199 + }, + { + "epoch": 47.252747252747255, + "grad_norm": 9.573745727539062, + "learning_rate": 2.6373626373626374e-05, + "loss": 0.2677, + "step": 17200 + }, + { + "epoch": 47.255494505494504, + "grad_norm": 9.281007766723633, + "learning_rate": 2.6372252747252748e-05, + "loss": 0.1474, + "step": 17201 + }, + { + "epoch": 47.25824175824176, + "grad_norm": 8.756848335266113, + "learning_rate": 2.637087912087912e-05, + "loss": 0.1588, + "step": 17202 + }, + { + "epoch": 47.260989010989015, + "grad_norm": 18.154211044311523, + "learning_rate": 2.6369505494505498e-05, + "loss": 0.3467, + "step": 17203 + }, + { + "epoch": 47.26373626373626, + "grad_norm": 4.375400066375732, + "learning_rate": 2.6368131868131868e-05, + "loss": 0.0884, + "step": 17204 + }, + { + "epoch": 47.26648351648352, + "grad_norm": 11.807211875915527, + "learning_rate": 2.6366758241758245e-05, + "loss": 0.2605, + "step": 17205 + }, + { + "epoch": 47.26923076923077, + "grad_norm": 14.437731742858887, + "learning_rate": 2.6365384615384615e-05, + "loss": 0.1792, + "step": 17206 + }, + { + "epoch": 47.27197802197802, + "grad_norm": 15.947653770446777, + "learning_rate": 2.636401098901099e-05, + "loss": 0.5675, + "step": 17207 + }, + { + "epoch": 47.27472527472528, + "grad_norm": 10.00573444366455, + "learning_rate": 2.6362637362637365e-05, + "loss": 0.2444, + "step": 17208 + }, + { + "epoch": 47.277472527472526, + "grad_norm": 9.913686752319336, + "learning_rate": 2.6361263736263735e-05, + "loss": 0.1265, + "step": 17209 + }, + { + "epoch": 47.28021978021978, + "grad_norm": 7.713098526000977, + "learning_rate": 2.635989010989011e-05, + "loss": 0.248, + "step": 17210 + }, + { + "epoch": 47.282967032967036, + "grad_norm": 13.00743293762207, + "learning_rate": 2.635851648351648e-05, + "loss": 0.1193, + "step": 17211 + }, + { + "epoch": 47.285714285714285, + "grad_norm": 15.059185028076172, + "learning_rate": 2.635714285714286e-05, + "loss": 0.4269, + "step": 17212 + }, + { + "epoch": 47.28846153846154, + "grad_norm": 10.245871543884277, + "learning_rate": 2.6355769230769235e-05, + "loss": 0.2256, + "step": 17213 + }, + { + "epoch": 47.29120879120879, + "grad_norm": 8.376002311706543, + "learning_rate": 2.6354395604395605e-05, + "loss": 0.1627, + "step": 17214 + }, + { + "epoch": 47.293956043956044, + "grad_norm": 26.044200897216797, + "learning_rate": 2.635302197802198e-05, + "loss": 0.8515, + "step": 17215 + }, + { + "epoch": 47.2967032967033, + "grad_norm": 3.20959734916687, + "learning_rate": 2.635164835164835e-05, + "loss": 0.0513, + "step": 17216 + }, + { + "epoch": 47.29945054945055, + "grad_norm": 10.306631088256836, + "learning_rate": 2.6350274725274725e-05, + "loss": 0.1495, + "step": 17217 + }, + { + "epoch": 47.3021978021978, + "grad_norm": 11.296248435974121, + "learning_rate": 2.6348901098901102e-05, + "loss": 0.2025, + "step": 17218 + }, + { + "epoch": 47.30494505494506, + "grad_norm": 9.125865936279297, + "learning_rate": 2.6347527472527472e-05, + "loss": 0.4098, + "step": 17219 + }, + { + "epoch": 47.30769230769231, + "grad_norm": 9.768671035766602, + "learning_rate": 2.634615384615385e-05, + "loss": 0.354, + "step": 17220 + }, + { + "epoch": 47.31043956043956, + "grad_norm": 14.94801139831543, + "learning_rate": 2.634478021978022e-05, + "loss": 0.4169, + "step": 17221 + }, + { + "epoch": 47.31318681318681, + "grad_norm": 21.393226623535156, + "learning_rate": 2.6343406593406596e-05, + "loss": 0.5551, + "step": 17222 + }, + { + "epoch": 47.315934065934066, + "grad_norm": 8.995244979858398, + "learning_rate": 2.634203296703297e-05, + "loss": 0.3154, + "step": 17223 + }, + { + "epoch": 47.31868131868132, + "grad_norm": 5.865374565124512, + "learning_rate": 2.634065934065934e-05, + "loss": 0.1191, + "step": 17224 + }, + { + "epoch": 47.32142857142857, + "grad_norm": 11.155729293823242, + "learning_rate": 2.6339285714285716e-05, + "loss": 0.2732, + "step": 17225 + }, + { + "epoch": 47.324175824175825, + "grad_norm": 11.79899787902832, + "learning_rate": 2.6337912087912086e-05, + "loss": 0.543, + "step": 17226 + }, + { + "epoch": 47.32692307692308, + "grad_norm": 4.9680914878845215, + "learning_rate": 2.6336538461538463e-05, + "loss": 0.0865, + "step": 17227 + }, + { + "epoch": 47.32967032967033, + "grad_norm": 3.8358359336853027, + "learning_rate": 2.633516483516484e-05, + "loss": 0.0676, + "step": 17228 + }, + { + "epoch": 47.332417582417584, + "grad_norm": 9.245885848999023, + "learning_rate": 2.633379120879121e-05, + "loss": 0.1638, + "step": 17229 + }, + { + "epoch": 47.33516483516483, + "grad_norm": 13.845090866088867, + "learning_rate": 2.6332417582417583e-05, + "loss": 0.3411, + "step": 17230 + }, + { + "epoch": 47.33791208791209, + "grad_norm": 8.697348594665527, + "learning_rate": 2.6331043956043953e-05, + "loss": 0.341, + "step": 17231 + }, + { + "epoch": 47.34065934065934, + "grad_norm": 9.570517539978027, + "learning_rate": 2.632967032967033e-05, + "loss": 0.2431, + "step": 17232 + }, + { + "epoch": 47.34340659340659, + "grad_norm": 11.575982093811035, + "learning_rate": 2.6328296703296707e-05, + "loss": 0.2052, + "step": 17233 + }, + { + "epoch": 47.34615384615385, + "grad_norm": 13.359431266784668, + "learning_rate": 2.6326923076923077e-05, + "loss": 0.2643, + "step": 17234 + }, + { + "epoch": 47.3489010989011, + "grad_norm": 13.059806823730469, + "learning_rate": 2.6325549450549454e-05, + "loss": 0.2548, + "step": 17235 + }, + { + "epoch": 47.35164835164835, + "grad_norm": 20.189184188842773, + "learning_rate": 2.6324175824175824e-05, + "loss": 0.5048, + "step": 17236 + }, + { + "epoch": 47.354395604395606, + "grad_norm": 11.51082992553711, + "learning_rate": 2.63228021978022e-05, + "loss": 0.3587, + "step": 17237 + }, + { + "epoch": 47.357142857142854, + "grad_norm": 10.080635070800781, + "learning_rate": 2.6321428571428574e-05, + "loss": 0.5167, + "step": 17238 + }, + { + "epoch": 47.35989010989011, + "grad_norm": 11.56709098815918, + "learning_rate": 2.6320054945054944e-05, + "loss": 0.2562, + "step": 17239 + }, + { + "epoch": 47.362637362637365, + "grad_norm": 18.294946670532227, + "learning_rate": 2.631868131868132e-05, + "loss": 0.3811, + "step": 17240 + }, + { + "epoch": 47.36538461538461, + "grad_norm": 16.52008056640625, + "learning_rate": 2.631730769230769e-05, + "loss": 0.4451, + "step": 17241 + }, + { + "epoch": 47.36813186813187, + "grad_norm": 7.169119834899902, + "learning_rate": 2.6315934065934067e-05, + "loss": 0.146, + "step": 17242 + }, + { + "epoch": 47.370879120879124, + "grad_norm": 15.880084991455078, + "learning_rate": 2.6314560439560444e-05, + "loss": 0.2465, + "step": 17243 + }, + { + "epoch": 47.37362637362637, + "grad_norm": 7.9049553871154785, + "learning_rate": 2.6313186813186814e-05, + "loss": 0.1307, + "step": 17244 + }, + { + "epoch": 47.37637362637363, + "grad_norm": 13.296385765075684, + "learning_rate": 2.6311813186813188e-05, + "loss": 0.2878, + "step": 17245 + }, + { + "epoch": 47.379120879120876, + "grad_norm": 11.929619789123535, + "learning_rate": 2.6310439560439558e-05, + "loss": 0.1999, + "step": 17246 + }, + { + "epoch": 47.38186813186813, + "grad_norm": 19.41872787475586, + "learning_rate": 2.6309065934065934e-05, + "loss": 0.4683, + "step": 17247 + }, + { + "epoch": 47.38461538461539, + "grad_norm": 14.20943546295166, + "learning_rate": 2.630769230769231e-05, + "loss": 0.4899, + "step": 17248 + }, + { + "epoch": 47.387362637362635, + "grad_norm": 10.563318252563477, + "learning_rate": 2.630631868131868e-05, + "loss": 0.2258, + "step": 17249 + }, + { + "epoch": 47.39010989010989, + "grad_norm": 11.81131362915039, + "learning_rate": 2.6304945054945058e-05, + "loss": 0.2895, + "step": 17250 + }, + { + "epoch": 47.392857142857146, + "grad_norm": 10.057950973510742, + "learning_rate": 2.6303571428571428e-05, + "loss": 0.236, + "step": 17251 + }, + { + "epoch": 47.395604395604394, + "grad_norm": 12.77086067199707, + "learning_rate": 2.6302197802197805e-05, + "loss": 0.3004, + "step": 17252 + }, + { + "epoch": 47.39835164835165, + "grad_norm": 16.54129981994629, + "learning_rate": 2.6300824175824178e-05, + "loss": 0.5743, + "step": 17253 + }, + { + "epoch": 47.4010989010989, + "grad_norm": 9.808599472045898, + "learning_rate": 2.6299450549450548e-05, + "loss": 0.256, + "step": 17254 + }, + { + "epoch": 47.40384615384615, + "grad_norm": 15.581132888793945, + "learning_rate": 2.6298076923076925e-05, + "loss": 0.435, + "step": 17255 + }, + { + "epoch": 47.40659340659341, + "grad_norm": 15.11998176574707, + "learning_rate": 2.6296703296703295e-05, + "loss": 0.4662, + "step": 17256 + }, + { + "epoch": 47.40934065934066, + "grad_norm": 20.37786293029785, + "learning_rate": 2.6295329670329672e-05, + "loss": 0.6506, + "step": 17257 + }, + { + "epoch": 47.41208791208791, + "grad_norm": 7.242435932159424, + "learning_rate": 2.629395604395605e-05, + "loss": 0.0771, + "step": 17258 + }, + { + "epoch": 47.41483516483517, + "grad_norm": 15.407134056091309, + "learning_rate": 2.629258241758242e-05, + "loss": 0.3915, + "step": 17259 + }, + { + "epoch": 47.417582417582416, + "grad_norm": 19.560720443725586, + "learning_rate": 2.6291208791208792e-05, + "loss": 0.6903, + "step": 17260 + }, + { + "epoch": 47.42032967032967, + "grad_norm": 14.846457481384277, + "learning_rate": 2.6289835164835162e-05, + "loss": 0.2874, + "step": 17261 + }, + { + "epoch": 47.42307692307692, + "grad_norm": 16.1901912689209, + "learning_rate": 2.628846153846154e-05, + "loss": 0.2472, + "step": 17262 + }, + { + "epoch": 47.425824175824175, + "grad_norm": 9.149925231933594, + "learning_rate": 2.6287087912087916e-05, + "loss": 0.3075, + "step": 17263 + }, + { + "epoch": 47.42857142857143, + "grad_norm": 14.18908977508545, + "learning_rate": 2.6285714285714286e-05, + "loss": 0.3306, + "step": 17264 + }, + { + "epoch": 47.43131868131868, + "grad_norm": 11.436803817749023, + "learning_rate": 2.6284340659340663e-05, + "loss": 0.2581, + "step": 17265 + }, + { + "epoch": 47.434065934065934, + "grad_norm": 16.426719665527344, + "learning_rate": 2.6282967032967033e-05, + "loss": 0.3123, + "step": 17266 + }, + { + "epoch": 47.43681318681319, + "grad_norm": 13.166582107543945, + "learning_rate": 2.628159340659341e-05, + "loss": 0.4238, + "step": 17267 + }, + { + "epoch": 47.43956043956044, + "grad_norm": 23.591426849365234, + "learning_rate": 2.6280219780219783e-05, + "loss": 0.7862, + "step": 17268 + }, + { + "epoch": 47.44230769230769, + "grad_norm": 9.160791397094727, + "learning_rate": 2.6278846153846153e-05, + "loss": 0.1747, + "step": 17269 + }, + { + "epoch": 47.44505494505494, + "grad_norm": 17.91855239868164, + "learning_rate": 2.627747252747253e-05, + "loss": 0.497, + "step": 17270 + }, + { + "epoch": 47.4478021978022, + "grad_norm": 16.59986114501953, + "learning_rate": 2.62760989010989e-05, + "loss": 0.3391, + "step": 17271 + }, + { + "epoch": 47.45054945054945, + "grad_norm": 4.0809125900268555, + "learning_rate": 2.6274725274725276e-05, + "loss": 0.1171, + "step": 17272 + }, + { + "epoch": 47.4532967032967, + "grad_norm": 17.500568389892578, + "learning_rate": 2.6273351648351653e-05, + "loss": 0.3701, + "step": 17273 + }, + { + "epoch": 47.456043956043956, + "grad_norm": 18.889633178710938, + "learning_rate": 2.6271978021978023e-05, + "loss": 0.5877, + "step": 17274 + }, + { + "epoch": 47.45879120879121, + "grad_norm": 18.57775115966797, + "learning_rate": 2.6270604395604397e-05, + "loss": 0.6746, + "step": 17275 + }, + { + "epoch": 47.46153846153846, + "grad_norm": 9.508074760437012, + "learning_rate": 2.6269230769230767e-05, + "loss": 0.2124, + "step": 17276 + }, + { + "epoch": 47.464285714285715, + "grad_norm": 5.141617774963379, + "learning_rate": 2.6267857142857143e-05, + "loss": 0.1341, + "step": 17277 + }, + { + "epoch": 47.467032967032964, + "grad_norm": 20.210601806640625, + "learning_rate": 2.626648351648352e-05, + "loss": 0.6127, + "step": 17278 + }, + { + "epoch": 47.46978021978022, + "grad_norm": 10.455612182617188, + "learning_rate": 2.626510989010989e-05, + "loss": 0.3155, + "step": 17279 + }, + { + "epoch": 47.472527472527474, + "grad_norm": 11.48849868774414, + "learning_rate": 2.6263736263736267e-05, + "loss": 0.3336, + "step": 17280 + }, + { + "epoch": 47.47527472527472, + "grad_norm": 3.4944024085998535, + "learning_rate": 2.6262362637362637e-05, + "loss": 0.0916, + "step": 17281 + }, + { + "epoch": 47.47802197802198, + "grad_norm": 15.810354232788086, + "learning_rate": 2.6260989010989014e-05, + "loss": 0.6072, + "step": 17282 + }, + { + "epoch": 47.48076923076923, + "grad_norm": 8.546021461486816, + "learning_rate": 2.6259615384615387e-05, + "loss": 0.2711, + "step": 17283 + }, + { + "epoch": 47.48351648351648, + "grad_norm": 13.423983573913574, + "learning_rate": 2.6258241758241757e-05, + "loss": 0.1854, + "step": 17284 + }, + { + "epoch": 47.48626373626374, + "grad_norm": 12.848628997802734, + "learning_rate": 2.6256868131868134e-05, + "loss": 0.3081, + "step": 17285 + }, + { + "epoch": 47.489010989010985, + "grad_norm": 3.6433751583099365, + "learning_rate": 2.6255494505494504e-05, + "loss": 0.0881, + "step": 17286 + }, + { + "epoch": 47.49175824175824, + "grad_norm": 6.73231840133667, + "learning_rate": 2.625412087912088e-05, + "loss": 0.1439, + "step": 17287 + }, + { + "epoch": 47.494505494505496, + "grad_norm": 6.8422393798828125, + "learning_rate": 2.6252747252747258e-05, + "loss": 0.0943, + "step": 17288 + }, + { + "epoch": 47.497252747252745, + "grad_norm": 6.101145267486572, + "learning_rate": 2.6251373626373628e-05, + "loss": 0.09, + "step": 17289 + }, + { + "epoch": 47.5, + "grad_norm": 9.278286933898926, + "learning_rate": 2.625e-05, + "loss": 0.253, + "step": 17290 + }, + { + "epoch": 47.502747252747255, + "grad_norm": 4.909732818603516, + "learning_rate": 2.624862637362637e-05, + "loss": 0.1458, + "step": 17291 + }, + { + "epoch": 47.505494505494504, + "grad_norm": 10.137974739074707, + "learning_rate": 2.6247252747252748e-05, + "loss": 0.1457, + "step": 17292 + }, + { + "epoch": 47.50824175824176, + "grad_norm": 7.078355312347412, + "learning_rate": 2.6245879120879125e-05, + "loss": 0.1224, + "step": 17293 + }, + { + "epoch": 47.51098901098901, + "grad_norm": 5.228044033050537, + "learning_rate": 2.6244505494505495e-05, + "loss": 0.072, + "step": 17294 + }, + { + "epoch": 47.51373626373626, + "grad_norm": 4.547136306762695, + "learning_rate": 2.624313186813187e-05, + "loss": 0.1029, + "step": 17295 + }, + { + "epoch": 47.51648351648352, + "grad_norm": 8.494446754455566, + "learning_rate": 2.624175824175824e-05, + "loss": 0.1737, + "step": 17296 + }, + { + "epoch": 47.51923076923077, + "grad_norm": 9.433069229125977, + "learning_rate": 2.624038461538462e-05, + "loss": 0.3507, + "step": 17297 + }, + { + "epoch": 47.52197802197802, + "grad_norm": 14.85103702545166, + "learning_rate": 2.6239010989010992e-05, + "loss": 0.4516, + "step": 17298 + }, + { + "epoch": 47.52472527472528, + "grad_norm": 12.994197845458984, + "learning_rate": 2.6237637362637362e-05, + "loss": 0.3025, + "step": 17299 + }, + { + "epoch": 47.527472527472526, + "grad_norm": 5.158896446228027, + "learning_rate": 2.623626373626374e-05, + "loss": 0.1097, + "step": 17300 + }, + { + "epoch": 47.53021978021978, + "grad_norm": 6.655899524688721, + "learning_rate": 2.623489010989011e-05, + "loss": 0.1151, + "step": 17301 + }, + { + "epoch": 47.532967032967036, + "grad_norm": 8.090664863586426, + "learning_rate": 2.6233516483516485e-05, + "loss": 0.2918, + "step": 17302 + }, + { + "epoch": 47.535714285714285, + "grad_norm": 3.835397243499756, + "learning_rate": 2.6232142857142862e-05, + "loss": 0.0627, + "step": 17303 + }, + { + "epoch": 47.53846153846154, + "grad_norm": 7.833704948425293, + "learning_rate": 2.6230769230769232e-05, + "loss": 0.0835, + "step": 17304 + }, + { + "epoch": 47.54120879120879, + "grad_norm": 6.547387599945068, + "learning_rate": 2.6229395604395606e-05, + "loss": 0.0695, + "step": 17305 + }, + { + "epoch": 47.543956043956044, + "grad_norm": 11.237861633300781, + "learning_rate": 2.6228021978021976e-05, + "loss": 0.2145, + "step": 17306 + }, + { + "epoch": 47.5467032967033, + "grad_norm": 6.104776859283447, + "learning_rate": 2.6226648351648352e-05, + "loss": 0.1076, + "step": 17307 + }, + { + "epoch": 47.54945054945055, + "grad_norm": 19.15179443359375, + "learning_rate": 2.622527472527473e-05, + "loss": 0.3043, + "step": 17308 + }, + { + "epoch": 47.5521978021978, + "grad_norm": 21.57694435119629, + "learning_rate": 2.62239010989011e-05, + "loss": 0.5794, + "step": 17309 + }, + { + "epoch": 47.55494505494506, + "grad_norm": 4.453803539276123, + "learning_rate": 2.6222527472527476e-05, + "loss": 0.0461, + "step": 17310 + }, + { + "epoch": 47.55769230769231, + "grad_norm": 9.084609985351562, + "learning_rate": 2.6221153846153846e-05, + "loss": 0.2725, + "step": 17311 + }, + { + "epoch": 47.56043956043956, + "grad_norm": 21.558795928955078, + "learning_rate": 2.6219780219780223e-05, + "loss": 0.4934, + "step": 17312 + }, + { + "epoch": 47.56318681318681, + "grad_norm": 11.19825553894043, + "learning_rate": 2.6218406593406596e-05, + "loss": 0.1184, + "step": 17313 + }, + { + "epoch": 47.565934065934066, + "grad_norm": 14.86231517791748, + "learning_rate": 2.6217032967032966e-05, + "loss": 0.3216, + "step": 17314 + }, + { + "epoch": 47.56868131868132, + "grad_norm": 9.280162811279297, + "learning_rate": 2.6215659340659343e-05, + "loss": 0.2008, + "step": 17315 + }, + { + "epoch": 47.57142857142857, + "grad_norm": 16.465614318847656, + "learning_rate": 2.6214285714285713e-05, + "loss": 0.5812, + "step": 17316 + }, + { + "epoch": 47.574175824175825, + "grad_norm": 18.260591506958008, + "learning_rate": 2.621291208791209e-05, + "loss": 0.4609, + "step": 17317 + }, + { + "epoch": 47.57692307692308, + "grad_norm": 23.659624099731445, + "learning_rate": 2.6211538461538467e-05, + "loss": 0.6417, + "step": 17318 + }, + { + "epoch": 47.57967032967033, + "grad_norm": 6.972861289978027, + "learning_rate": 2.6210164835164837e-05, + "loss": 0.1327, + "step": 17319 + }, + { + "epoch": 47.582417582417584, + "grad_norm": 8.53467082977295, + "learning_rate": 2.620879120879121e-05, + "loss": 0.1077, + "step": 17320 + }, + { + "epoch": 47.58516483516483, + "grad_norm": 12.53383731842041, + "learning_rate": 2.620741758241758e-05, + "loss": 0.4237, + "step": 17321 + }, + { + "epoch": 47.58791208791209, + "grad_norm": 14.494232177734375, + "learning_rate": 2.6206043956043957e-05, + "loss": 0.3605, + "step": 17322 + }, + { + "epoch": 47.59065934065934, + "grad_norm": 16.51168441772461, + "learning_rate": 2.6204670329670334e-05, + "loss": 0.3371, + "step": 17323 + }, + { + "epoch": 47.59340659340659, + "grad_norm": 6.361389636993408, + "learning_rate": 2.6203296703296704e-05, + "loss": 0.1294, + "step": 17324 + }, + { + "epoch": 47.59615384615385, + "grad_norm": 1.5418332815170288, + "learning_rate": 2.620192307692308e-05, + "loss": 0.027, + "step": 17325 + }, + { + "epoch": 47.5989010989011, + "grad_norm": 8.983003616333008, + "learning_rate": 2.620054945054945e-05, + "loss": 0.1494, + "step": 17326 + }, + { + "epoch": 47.60164835164835, + "grad_norm": 4.0462541580200195, + "learning_rate": 2.6199175824175827e-05, + "loss": 0.0818, + "step": 17327 + }, + { + "epoch": 47.604395604395606, + "grad_norm": 2.3993465900421143, + "learning_rate": 2.61978021978022e-05, + "loss": 0.0376, + "step": 17328 + }, + { + "epoch": 47.607142857142854, + "grad_norm": 5.628002643585205, + "learning_rate": 2.619642857142857e-05, + "loss": 0.1004, + "step": 17329 + }, + { + "epoch": 47.60989010989011, + "grad_norm": 2.2850615978240967, + "learning_rate": 2.6195054945054948e-05, + "loss": 0.0407, + "step": 17330 + }, + { + "epoch": 47.612637362637365, + "grad_norm": 16.33994483947754, + "learning_rate": 2.6193681318681318e-05, + "loss": 0.4594, + "step": 17331 + }, + { + "epoch": 47.61538461538461, + "grad_norm": 8.812907218933105, + "learning_rate": 2.6192307692307694e-05, + "loss": 0.2412, + "step": 17332 + }, + { + "epoch": 47.61813186813187, + "grad_norm": 18.58852195739746, + "learning_rate": 2.619093406593407e-05, + "loss": 0.6782, + "step": 17333 + }, + { + "epoch": 47.620879120879124, + "grad_norm": 9.919376373291016, + "learning_rate": 2.618956043956044e-05, + "loss": 0.207, + "step": 17334 + }, + { + "epoch": 47.62362637362637, + "grad_norm": 10.637526512145996, + "learning_rate": 2.6188186813186815e-05, + "loss": 0.184, + "step": 17335 + }, + { + "epoch": 47.62637362637363, + "grad_norm": 14.506207466125488, + "learning_rate": 2.6186813186813185e-05, + "loss": 0.2419, + "step": 17336 + }, + { + "epoch": 47.629120879120876, + "grad_norm": 14.455160140991211, + "learning_rate": 2.618543956043956e-05, + "loss": 0.2827, + "step": 17337 + }, + { + "epoch": 47.63186813186813, + "grad_norm": 20.032899856567383, + "learning_rate": 2.6184065934065938e-05, + "loss": 0.5001, + "step": 17338 + }, + { + "epoch": 47.63461538461539, + "grad_norm": 15.542850494384766, + "learning_rate": 2.6182692307692308e-05, + "loss": 0.2242, + "step": 17339 + }, + { + "epoch": 47.637362637362635, + "grad_norm": 14.18270206451416, + "learning_rate": 2.6181318681318685e-05, + "loss": 0.4239, + "step": 17340 + }, + { + "epoch": 47.64010989010989, + "grad_norm": 13.239116668701172, + "learning_rate": 2.6179945054945055e-05, + "loss": 0.1156, + "step": 17341 + }, + { + "epoch": 47.642857142857146, + "grad_norm": 3.8800222873687744, + "learning_rate": 2.617857142857143e-05, + "loss": 0.0792, + "step": 17342 + }, + { + "epoch": 47.645604395604394, + "grad_norm": 11.92992877960205, + "learning_rate": 2.6177197802197805e-05, + "loss": 0.3888, + "step": 17343 + }, + { + "epoch": 47.64835164835165, + "grad_norm": 10.823426246643066, + "learning_rate": 2.6175824175824175e-05, + "loss": 0.2316, + "step": 17344 + }, + { + "epoch": 47.6510989010989, + "grad_norm": 8.43618106842041, + "learning_rate": 2.6174450549450552e-05, + "loss": 0.1365, + "step": 17345 + }, + { + "epoch": 47.65384615384615, + "grad_norm": 4.421975135803223, + "learning_rate": 2.6173076923076922e-05, + "loss": 0.0615, + "step": 17346 + }, + { + "epoch": 47.65659340659341, + "grad_norm": 15.680882453918457, + "learning_rate": 2.61717032967033e-05, + "loss": 0.5088, + "step": 17347 + }, + { + "epoch": 47.65934065934066, + "grad_norm": 18.886001586914062, + "learning_rate": 2.6170329670329676e-05, + "loss": 0.3854, + "step": 17348 + }, + { + "epoch": 47.66208791208791, + "grad_norm": 3.8420157432556152, + "learning_rate": 2.6168956043956046e-05, + "loss": 0.1016, + "step": 17349 + }, + { + "epoch": 47.66483516483517, + "grad_norm": 15.823081970214844, + "learning_rate": 2.616758241758242e-05, + "loss": 0.3484, + "step": 17350 + }, + { + "epoch": 47.667582417582416, + "grad_norm": 13.217544555664062, + "learning_rate": 2.616620879120879e-05, + "loss": 0.344, + "step": 17351 + }, + { + "epoch": 47.67032967032967, + "grad_norm": 6.219814777374268, + "learning_rate": 2.6164835164835166e-05, + "loss": 0.1359, + "step": 17352 + }, + { + "epoch": 47.67307692307692, + "grad_norm": 10.283029556274414, + "learning_rate": 2.6163461538461543e-05, + "loss": 0.1399, + "step": 17353 + }, + { + "epoch": 47.675824175824175, + "grad_norm": 8.138348579406738, + "learning_rate": 2.6162087912087913e-05, + "loss": 0.2527, + "step": 17354 + }, + { + "epoch": 47.67857142857143, + "grad_norm": 14.1920804977417, + "learning_rate": 2.616071428571429e-05, + "loss": 0.2128, + "step": 17355 + }, + { + "epoch": 47.68131868131868, + "grad_norm": 14.292926788330078, + "learning_rate": 2.615934065934066e-05, + "loss": 0.3507, + "step": 17356 + }, + { + "epoch": 47.684065934065934, + "grad_norm": 5.110658645629883, + "learning_rate": 2.6157967032967033e-05, + "loss": 0.0699, + "step": 17357 + }, + { + "epoch": 47.68681318681319, + "grad_norm": 20.652973175048828, + "learning_rate": 2.615659340659341e-05, + "loss": 0.3428, + "step": 17358 + }, + { + "epoch": 47.68956043956044, + "grad_norm": 5.067534446716309, + "learning_rate": 2.615521978021978e-05, + "loss": 0.0821, + "step": 17359 + }, + { + "epoch": 47.69230769230769, + "grad_norm": 9.560684204101562, + "learning_rate": 2.6153846153846157e-05, + "loss": 0.3145, + "step": 17360 + }, + { + "epoch": 47.69505494505494, + "grad_norm": 8.976749420166016, + "learning_rate": 2.6152472527472527e-05, + "loss": 0.1148, + "step": 17361 + }, + { + "epoch": 47.6978021978022, + "grad_norm": 9.919673919677734, + "learning_rate": 2.6151098901098903e-05, + "loss": 0.2368, + "step": 17362 + }, + { + "epoch": 47.70054945054945, + "grad_norm": 24.424129486083984, + "learning_rate": 2.614972527472528e-05, + "loss": 0.4552, + "step": 17363 + }, + { + "epoch": 47.7032967032967, + "grad_norm": 23.734235763549805, + "learning_rate": 2.614835164835165e-05, + "loss": 0.4585, + "step": 17364 + }, + { + "epoch": 47.706043956043956, + "grad_norm": 8.511262893676758, + "learning_rate": 2.6146978021978024e-05, + "loss": 0.1421, + "step": 17365 + }, + { + "epoch": 47.70879120879121, + "grad_norm": 19.240068435668945, + "learning_rate": 2.6145604395604394e-05, + "loss": 0.7138, + "step": 17366 + }, + { + "epoch": 47.71153846153846, + "grad_norm": 19.538862228393555, + "learning_rate": 2.614423076923077e-05, + "loss": 0.5895, + "step": 17367 + }, + { + "epoch": 47.714285714285715, + "grad_norm": 9.58764934539795, + "learning_rate": 2.6142857142857147e-05, + "loss": 0.2254, + "step": 17368 + }, + { + "epoch": 47.717032967032964, + "grad_norm": 26.403095245361328, + "learning_rate": 2.6141483516483517e-05, + "loss": 0.7229, + "step": 17369 + }, + { + "epoch": 47.71978021978022, + "grad_norm": 7.441641807556152, + "learning_rate": 2.6140109890109894e-05, + "loss": 0.1263, + "step": 17370 + }, + { + "epoch": 47.722527472527474, + "grad_norm": 2.7061729431152344, + "learning_rate": 2.6138736263736264e-05, + "loss": 0.0452, + "step": 17371 + }, + { + "epoch": 47.72527472527472, + "grad_norm": 9.263777732849121, + "learning_rate": 2.6137362637362637e-05, + "loss": 0.2191, + "step": 17372 + }, + { + "epoch": 47.72802197802198, + "grad_norm": 9.465581893920898, + "learning_rate": 2.6135989010989014e-05, + "loss": 0.158, + "step": 17373 + }, + { + "epoch": 47.73076923076923, + "grad_norm": 8.36858081817627, + "learning_rate": 2.6134615384615384e-05, + "loss": 0.1227, + "step": 17374 + }, + { + "epoch": 47.73351648351648, + "grad_norm": 14.611286163330078, + "learning_rate": 2.613324175824176e-05, + "loss": 0.5583, + "step": 17375 + }, + { + "epoch": 47.73626373626374, + "grad_norm": 10.520820617675781, + "learning_rate": 2.613186813186813e-05, + "loss": 0.2468, + "step": 17376 + }, + { + "epoch": 47.73901098901099, + "grad_norm": 9.981287002563477, + "learning_rate": 2.6130494505494508e-05, + "loss": 0.1713, + "step": 17377 + }, + { + "epoch": 47.74175824175824, + "grad_norm": 10.440160751342773, + "learning_rate": 2.6129120879120885e-05, + "loss": 0.2147, + "step": 17378 + }, + { + "epoch": 47.744505494505496, + "grad_norm": 2.695249557495117, + "learning_rate": 2.6127747252747255e-05, + "loss": 0.0385, + "step": 17379 + }, + { + "epoch": 47.747252747252745, + "grad_norm": 15.39649486541748, + "learning_rate": 2.6126373626373628e-05, + "loss": 0.2598, + "step": 17380 + }, + { + "epoch": 47.75, + "grad_norm": 9.48596477508545, + "learning_rate": 2.6124999999999998e-05, + "loss": 0.0871, + "step": 17381 + }, + { + "epoch": 47.752747252747255, + "grad_norm": 12.679668426513672, + "learning_rate": 2.6123626373626375e-05, + "loss": 0.2518, + "step": 17382 + }, + { + "epoch": 47.755494505494504, + "grad_norm": 3.414201498031616, + "learning_rate": 2.612225274725275e-05, + "loss": 0.0498, + "step": 17383 + }, + { + "epoch": 47.75824175824176, + "grad_norm": 3.101267099380493, + "learning_rate": 2.612087912087912e-05, + "loss": 0.0623, + "step": 17384 + }, + { + "epoch": 47.76098901098901, + "grad_norm": 6.375417709350586, + "learning_rate": 2.61195054945055e-05, + "loss": 0.16, + "step": 17385 + }, + { + "epoch": 47.76373626373626, + "grad_norm": 16.781862258911133, + "learning_rate": 2.611813186813187e-05, + "loss": 0.5951, + "step": 17386 + }, + { + "epoch": 47.76648351648352, + "grad_norm": 13.312017440795898, + "learning_rate": 2.6116758241758242e-05, + "loss": 0.2596, + "step": 17387 + }, + { + "epoch": 47.76923076923077, + "grad_norm": 25.641084671020508, + "learning_rate": 2.611538461538462e-05, + "loss": 0.6668, + "step": 17388 + }, + { + "epoch": 47.77197802197802, + "grad_norm": 21.21333122253418, + "learning_rate": 2.611401098901099e-05, + "loss": 0.3418, + "step": 17389 + }, + { + "epoch": 47.77472527472528, + "grad_norm": 12.028106689453125, + "learning_rate": 2.6112637362637365e-05, + "loss": 0.3716, + "step": 17390 + }, + { + "epoch": 47.777472527472526, + "grad_norm": 11.22791862487793, + "learning_rate": 2.6111263736263736e-05, + "loss": 0.3743, + "step": 17391 + }, + { + "epoch": 47.78021978021978, + "grad_norm": 11.197015762329102, + "learning_rate": 2.6109890109890112e-05, + "loss": 0.2323, + "step": 17392 + }, + { + "epoch": 47.782967032967036, + "grad_norm": 8.131093978881836, + "learning_rate": 2.610851648351649e-05, + "loss": 0.2079, + "step": 17393 + }, + { + "epoch": 47.785714285714285, + "grad_norm": 16.98356819152832, + "learning_rate": 2.610714285714286e-05, + "loss": 0.4363, + "step": 17394 + }, + { + "epoch": 47.78846153846154, + "grad_norm": 13.164422035217285, + "learning_rate": 2.6105769230769233e-05, + "loss": 0.245, + "step": 17395 + }, + { + "epoch": 47.79120879120879, + "grad_norm": 13.025799751281738, + "learning_rate": 2.6104395604395603e-05, + "loss": 0.1885, + "step": 17396 + }, + { + "epoch": 47.793956043956044, + "grad_norm": 17.567745208740234, + "learning_rate": 2.610302197802198e-05, + "loss": 0.2662, + "step": 17397 + }, + { + "epoch": 47.7967032967033, + "grad_norm": 8.56596851348877, + "learning_rate": 2.6101648351648356e-05, + "loss": 0.1331, + "step": 17398 + }, + { + "epoch": 47.79945054945055, + "grad_norm": 15.274518013000488, + "learning_rate": 2.6100274725274726e-05, + "loss": 0.4857, + "step": 17399 + }, + { + "epoch": 47.8021978021978, + "grad_norm": 10.130899429321289, + "learning_rate": 2.6098901098901103e-05, + "loss": 0.2414, + "step": 17400 + }, + { + "epoch": 47.80494505494506, + "grad_norm": 24.056198120117188, + "learning_rate": 2.6097527472527473e-05, + "loss": 0.566, + "step": 17401 + }, + { + "epoch": 47.80769230769231, + "grad_norm": 14.803784370422363, + "learning_rate": 2.6096153846153846e-05, + "loss": 0.2097, + "step": 17402 + }, + { + "epoch": 47.81043956043956, + "grad_norm": 14.043143272399902, + "learning_rate": 2.6094780219780223e-05, + "loss": 0.3102, + "step": 17403 + }, + { + "epoch": 47.81318681318681, + "grad_norm": 14.861841201782227, + "learning_rate": 2.6093406593406593e-05, + "loss": 0.4673, + "step": 17404 + }, + { + "epoch": 47.815934065934066, + "grad_norm": 9.781205177307129, + "learning_rate": 2.609203296703297e-05, + "loss": 0.1791, + "step": 17405 + }, + { + "epoch": 47.81868131868132, + "grad_norm": 15.753067016601562, + "learning_rate": 2.609065934065934e-05, + "loss": 0.3467, + "step": 17406 + }, + { + "epoch": 47.82142857142857, + "grad_norm": 19.248815536499023, + "learning_rate": 2.6089285714285717e-05, + "loss": 0.6019, + "step": 17407 + }, + { + "epoch": 47.824175824175825, + "grad_norm": 16.796445846557617, + "learning_rate": 2.6087912087912087e-05, + "loss": 0.585, + "step": 17408 + }, + { + "epoch": 47.82692307692308, + "grad_norm": 17.19902801513672, + "learning_rate": 2.6086538461538464e-05, + "loss": 0.5967, + "step": 17409 + }, + { + "epoch": 47.82967032967033, + "grad_norm": 15.381000518798828, + "learning_rate": 2.6085164835164837e-05, + "loss": 0.5689, + "step": 17410 + }, + { + "epoch": 47.832417582417584, + "grad_norm": 6.305978298187256, + "learning_rate": 2.6083791208791207e-05, + "loss": 0.0986, + "step": 17411 + }, + { + "epoch": 47.83516483516483, + "grad_norm": 21.652257919311523, + "learning_rate": 2.6082417582417584e-05, + "loss": 0.6163, + "step": 17412 + }, + { + "epoch": 47.83791208791209, + "grad_norm": 3.940342664718628, + "learning_rate": 2.6081043956043954e-05, + "loss": 0.0906, + "step": 17413 + }, + { + "epoch": 47.84065934065934, + "grad_norm": 10.656538009643555, + "learning_rate": 2.607967032967033e-05, + "loss": 0.204, + "step": 17414 + }, + { + "epoch": 47.84340659340659, + "grad_norm": 15.382559776306152, + "learning_rate": 2.6078296703296707e-05, + "loss": 0.361, + "step": 17415 + }, + { + "epoch": 47.84615384615385, + "grad_norm": 13.741037368774414, + "learning_rate": 2.6076923076923077e-05, + "loss": 0.1814, + "step": 17416 + }, + { + "epoch": 47.8489010989011, + "grad_norm": 6.161917686462402, + "learning_rate": 2.607554945054945e-05, + "loss": 0.1453, + "step": 17417 + }, + { + "epoch": 47.85164835164835, + "grad_norm": 8.04800033569336, + "learning_rate": 2.6074175824175824e-05, + "loss": 0.1221, + "step": 17418 + }, + { + "epoch": 47.854395604395606, + "grad_norm": 6.077328681945801, + "learning_rate": 2.6072802197802198e-05, + "loss": 0.1494, + "step": 17419 + }, + { + "epoch": 47.857142857142854, + "grad_norm": 20.763673782348633, + "learning_rate": 2.6071428571428574e-05, + "loss": 0.4248, + "step": 17420 + }, + { + "epoch": 47.85989010989011, + "grad_norm": 14.414860725402832, + "learning_rate": 2.6070054945054944e-05, + "loss": 0.4159, + "step": 17421 + }, + { + "epoch": 47.862637362637365, + "grad_norm": 5.799196720123291, + "learning_rate": 2.606868131868132e-05, + "loss": 0.0735, + "step": 17422 + }, + { + "epoch": 47.86538461538461, + "grad_norm": 8.908646583557129, + "learning_rate": 2.606730769230769e-05, + "loss": 0.2033, + "step": 17423 + }, + { + "epoch": 47.86813186813187, + "grad_norm": 7.514125347137451, + "learning_rate": 2.6065934065934068e-05, + "loss": 0.189, + "step": 17424 + }, + { + "epoch": 47.870879120879124, + "grad_norm": 10.658056259155273, + "learning_rate": 2.606456043956044e-05, + "loss": 0.3069, + "step": 17425 + }, + { + "epoch": 47.87362637362637, + "grad_norm": 16.877229690551758, + "learning_rate": 2.606318681318681e-05, + "loss": 0.5128, + "step": 17426 + }, + { + "epoch": 47.87637362637363, + "grad_norm": 14.235614776611328, + "learning_rate": 2.606181318681319e-05, + "loss": 0.3917, + "step": 17427 + }, + { + "epoch": 47.879120879120876, + "grad_norm": 15.323441505432129, + "learning_rate": 2.606043956043956e-05, + "loss": 0.3476, + "step": 17428 + }, + { + "epoch": 47.88186813186813, + "grad_norm": 4.090129852294922, + "learning_rate": 2.6059065934065935e-05, + "loss": 0.0491, + "step": 17429 + }, + { + "epoch": 47.88461538461539, + "grad_norm": 11.887350082397461, + "learning_rate": 2.6057692307692312e-05, + "loss": 0.2317, + "step": 17430 + }, + { + "epoch": 47.887362637362635, + "grad_norm": 20.236820220947266, + "learning_rate": 2.6056318681318682e-05, + "loss": 0.401, + "step": 17431 + }, + { + "epoch": 47.89010989010989, + "grad_norm": 25.097145080566406, + "learning_rate": 2.6054945054945055e-05, + "loss": 0.6994, + "step": 17432 + }, + { + "epoch": 47.892857142857146, + "grad_norm": 6.284339427947998, + "learning_rate": 2.605357142857143e-05, + "loss": 0.1403, + "step": 17433 + }, + { + "epoch": 47.895604395604394, + "grad_norm": 12.883635520935059, + "learning_rate": 2.6052197802197802e-05, + "loss": 0.2718, + "step": 17434 + }, + { + "epoch": 47.89835164835165, + "grad_norm": 17.417770385742188, + "learning_rate": 2.605082417582418e-05, + "loss": 0.477, + "step": 17435 + }, + { + "epoch": 47.9010989010989, + "grad_norm": 11.848587989807129, + "learning_rate": 2.604945054945055e-05, + "loss": 0.3379, + "step": 17436 + }, + { + "epoch": 47.90384615384615, + "grad_norm": 12.649103164672852, + "learning_rate": 2.6048076923076926e-05, + "loss": 0.4379, + "step": 17437 + }, + { + "epoch": 47.90659340659341, + "grad_norm": 26.23058319091797, + "learning_rate": 2.6046703296703296e-05, + "loss": 0.7563, + "step": 17438 + }, + { + "epoch": 47.90934065934066, + "grad_norm": 8.531590461730957, + "learning_rate": 2.6045329670329673e-05, + "loss": 0.157, + "step": 17439 + }, + { + "epoch": 47.91208791208791, + "grad_norm": 24.834842681884766, + "learning_rate": 2.6043956043956046e-05, + "loss": 0.5526, + "step": 17440 + }, + { + "epoch": 47.91483516483517, + "grad_norm": 10.494952201843262, + "learning_rate": 2.6042582417582416e-05, + "loss": 0.172, + "step": 17441 + }, + { + "epoch": 47.917582417582416, + "grad_norm": 15.35400104522705, + "learning_rate": 2.6041208791208793e-05, + "loss": 0.3855, + "step": 17442 + }, + { + "epoch": 47.92032967032967, + "grad_norm": 19.52849006652832, + "learning_rate": 2.6039835164835163e-05, + "loss": 0.4891, + "step": 17443 + }, + { + "epoch": 47.92307692307692, + "grad_norm": 11.99397087097168, + "learning_rate": 2.603846153846154e-05, + "loss": 0.2604, + "step": 17444 + }, + { + "epoch": 47.925824175824175, + "grad_norm": 7.326359748840332, + "learning_rate": 2.6037087912087916e-05, + "loss": 0.1138, + "step": 17445 + }, + { + "epoch": 47.92857142857143, + "grad_norm": 5.6804609298706055, + "learning_rate": 2.6035714285714286e-05, + "loss": 0.1586, + "step": 17446 + }, + { + "epoch": 47.93131868131868, + "grad_norm": 12.05353832244873, + "learning_rate": 2.603434065934066e-05, + "loss": 0.1835, + "step": 17447 + }, + { + "epoch": 47.934065934065934, + "grad_norm": 17.279991149902344, + "learning_rate": 2.6032967032967033e-05, + "loss": 0.5015, + "step": 17448 + }, + { + "epoch": 47.93681318681319, + "grad_norm": 10.772603988647461, + "learning_rate": 2.6031593406593407e-05, + "loss": 0.2004, + "step": 17449 + }, + { + "epoch": 47.93956043956044, + "grad_norm": 16.799041748046875, + "learning_rate": 2.6030219780219783e-05, + "loss": 0.3888, + "step": 17450 + }, + { + "epoch": 47.94230769230769, + "grad_norm": 24.690080642700195, + "learning_rate": 2.6028846153846153e-05, + "loss": 0.6086, + "step": 17451 + }, + { + "epoch": 47.94505494505494, + "grad_norm": 5.540375709533691, + "learning_rate": 2.602747252747253e-05, + "loss": 0.1055, + "step": 17452 + }, + { + "epoch": 47.9478021978022, + "grad_norm": 6.803598880767822, + "learning_rate": 2.60260989010989e-05, + "loss": 0.1313, + "step": 17453 + }, + { + "epoch": 47.95054945054945, + "grad_norm": 20.551923751831055, + "learning_rate": 2.6024725274725277e-05, + "loss": 0.6473, + "step": 17454 + }, + { + "epoch": 47.9532967032967, + "grad_norm": 6.72373104095459, + "learning_rate": 2.602335164835165e-05, + "loss": 0.0595, + "step": 17455 + }, + { + "epoch": 47.956043956043956, + "grad_norm": 10.701581001281738, + "learning_rate": 2.602197802197802e-05, + "loss": 0.2947, + "step": 17456 + }, + { + "epoch": 47.95879120879121, + "grad_norm": 16.985570907592773, + "learning_rate": 2.6020604395604397e-05, + "loss": 0.3176, + "step": 17457 + }, + { + "epoch": 47.96153846153846, + "grad_norm": 10.400656700134277, + "learning_rate": 2.6019230769230767e-05, + "loss": 0.3288, + "step": 17458 + }, + { + "epoch": 47.964285714285715, + "grad_norm": 1.4587899446487427, + "learning_rate": 2.6017857142857144e-05, + "loss": 0.0159, + "step": 17459 + }, + { + "epoch": 47.967032967032964, + "grad_norm": 5.386379241943359, + "learning_rate": 2.601648351648352e-05, + "loss": 0.0822, + "step": 17460 + }, + { + "epoch": 47.96978021978022, + "grad_norm": 12.015254020690918, + "learning_rate": 2.601510989010989e-05, + "loss": 0.2055, + "step": 17461 + }, + { + "epoch": 47.972527472527474, + "grad_norm": 19.074052810668945, + "learning_rate": 2.6013736263736264e-05, + "loss": 0.3891, + "step": 17462 + }, + { + "epoch": 47.97527472527472, + "grad_norm": 1.8543790578842163, + "learning_rate": 2.6012362637362638e-05, + "loss": 0.0378, + "step": 17463 + }, + { + "epoch": 47.97802197802198, + "grad_norm": 11.064554214477539, + "learning_rate": 2.601098901098901e-05, + "loss": 0.2315, + "step": 17464 + }, + { + "epoch": 47.98076923076923, + "grad_norm": 17.373619079589844, + "learning_rate": 2.6009615384615388e-05, + "loss": 0.2225, + "step": 17465 + }, + { + "epoch": 47.98351648351648, + "grad_norm": 13.413677215576172, + "learning_rate": 2.6008241758241758e-05, + "loss": 0.3567, + "step": 17466 + }, + { + "epoch": 47.98626373626374, + "grad_norm": 6.153037071228027, + "learning_rate": 2.6006868131868135e-05, + "loss": 0.1107, + "step": 17467 + }, + { + "epoch": 47.98901098901099, + "grad_norm": 16.32100486755371, + "learning_rate": 2.6005494505494505e-05, + "loss": 0.3522, + "step": 17468 + }, + { + "epoch": 47.99175824175824, + "grad_norm": 5.612778186798096, + "learning_rate": 2.600412087912088e-05, + "loss": 0.0864, + "step": 17469 + }, + { + "epoch": 47.994505494505496, + "grad_norm": 9.317848205566406, + "learning_rate": 2.6002747252747255e-05, + "loss": 0.1643, + "step": 17470 + }, + { + "epoch": 47.997252747252745, + "grad_norm": 5.930396556854248, + "learning_rate": 2.6001373626373625e-05, + "loss": 0.1279, + "step": 17471 + }, + { + "epoch": 48.0, + "grad_norm": 50.758995056152344, + "learning_rate": 2.6000000000000002e-05, + "loss": 1.2902, + "step": 17472 + }, + { + "epoch": 48.0, + "eval_accuracy": 0.8168044077134986, + "eval_f1": 0.8245434920090375, + "eval_f1_DuraRiadoRio_64x64": 0.8683274021352313, + "eval_f1_Mole_64x64": 0.8163265306122449, + "eval_f1_Quebrado_64x64": 0.8666666666666667, + "eval_f1_RiadoRio_64x64": 0.7008086253369272, + "eval_f1_RioFechado_64x64": 0.8705882352941177, + "eval_loss": 0.774443507194519, + "eval_precision": 0.8579702966062517, + "eval_precision_DuraRiadoRio_64x64": 0.8905109489051095, + "eval_precision_Mole_64x64": 0.9900990099009901, + "eval_precision_Quebrado_64x64": 0.8333333333333334, + "eval_precision_RiadoRio_64x64": 0.593607305936073, + "eval_precision_RioFechado_64x64": 0.9823008849557522, + "eval_recall": 0.8162795486368501, + "eval_recall_DuraRiadoRio_64x64": 0.8472222222222222, + "eval_recall_Mole_64x64": 0.6944444444444444, + "eval_recall_Quebrado_64x64": 0.9027777777777778, + "eval_recall_RiadoRio_64x64": 0.8552631578947368, + "eval_recall_RioFechado_64x64": 0.7816901408450704, + "eval_runtime": 1.713, + "eval_samples_per_second": 423.811, + "eval_steps_per_second": 26.853, + "step": 17472 + }, + { + "epoch": 48.002747252747255, + "grad_norm": 8.79784870147705, + "learning_rate": 2.5998626373626372e-05, + "loss": 0.2007, + "step": 17473 + }, + { + "epoch": 48.005494505494504, + "grad_norm": 4.77112340927124, + "learning_rate": 2.599725274725275e-05, + "loss": 0.0631, + "step": 17474 + }, + { + "epoch": 48.00824175824176, + "grad_norm": 14.893538475036621, + "learning_rate": 2.5995879120879125e-05, + "loss": 0.2315, + "step": 17475 + }, + { + "epoch": 48.010989010989015, + "grad_norm": 11.297122955322266, + "learning_rate": 2.5994505494505495e-05, + "loss": 0.268, + "step": 17476 + }, + { + "epoch": 48.01373626373626, + "grad_norm": 18.21546745300293, + "learning_rate": 2.599313186813187e-05, + "loss": 0.4941, + "step": 17477 + }, + { + "epoch": 48.01648351648352, + "grad_norm": 6.9329609870910645, + "learning_rate": 2.5991758241758242e-05, + "loss": 0.1439, + "step": 17478 + }, + { + "epoch": 48.01923076923077, + "grad_norm": 14.3359375, + "learning_rate": 2.5990384615384616e-05, + "loss": 0.2662, + "step": 17479 + }, + { + "epoch": 48.02197802197802, + "grad_norm": 9.429073333740234, + "learning_rate": 2.5989010989010992e-05, + "loss": 0.2629, + "step": 17480 + }, + { + "epoch": 48.02472527472528, + "grad_norm": 10.771421432495117, + "learning_rate": 2.5987637362637362e-05, + "loss": 0.1675, + "step": 17481 + }, + { + "epoch": 48.027472527472526, + "grad_norm": 6.154287815093994, + "learning_rate": 2.598626373626374e-05, + "loss": 0.1382, + "step": 17482 + }, + { + "epoch": 48.03021978021978, + "grad_norm": 9.857621192932129, + "learning_rate": 2.598489010989011e-05, + "loss": 0.1677, + "step": 17483 + }, + { + "epoch": 48.032967032967036, + "grad_norm": 19.470157623291016, + "learning_rate": 2.5983516483516486e-05, + "loss": 0.5683, + "step": 17484 + }, + { + "epoch": 48.035714285714285, + "grad_norm": 22.23851203918457, + "learning_rate": 2.598214285714286e-05, + "loss": 0.6575, + "step": 17485 + }, + { + "epoch": 48.03846153846154, + "grad_norm": 12.81548023223877, + "learning_rate": 2.598076923076923e-05, + "loss": 0.2275, + "step": 17486 + }, + { + "epoch": 48.04120879120879, + "grad_norm": 15.505489349365234, + "learning_rate": 2.5979395604395606e-05, + "loss": 0.3299, + "step": 17487 + }, + { + "epoch": 48.043956043956044, + "grad_norm": 8.382671356201172, + "learning_rate": 2.5978021978021976e-05, + "loss": 0.1446, + "step": 17488 + }, + { + "epoch": 48.0467032967033, + "grad_norm": 10.642098426818848, + "learning_rate": 2.5976648351648353e-05, + "loss": 0.1257, + "step": 17489 + }, + { + "epoch": 48.04945054945055, + "grad_norm": 3.167235851287842, + "learning_rate": 2.597527472527473e-05, + "loss": 0.0347, + "step": 17490 + }, + { + "epoch": 48.0521978021978, + "grad_norm": 8.702589988708496, + "learning_rate": 2.59739010989011e-05, + "loss": 0.2515, + "step": 17491 + }, + { + "epoch": 48.05494505494506, + "grad_norm": 13.720142364501953, + "learning_rate": 2.5972527472527473e-05, + "loss": 0.3661, + "step": 17492 + }, + { + "epoch": 48.05769230769231, + "grad_norm": 17.790830612182617, + "learning_rate": 2.5971153846153847e-05, + "loss": 0.326, + "step": 17493 + }, + { + "epoch": 48.06043956043956, + "grad_norm": 10.084871292114258, + "learning_rate": 2.596978021978022e-05, + "loss": 0.1781, + "step": 17494 + }, + { + "epoch": 48.06318681318681, + "grad_norm": 8.185820579528809, + "learning_rate": 2.5968406593406597e-05, + "loss": 0.198, + "step": 17495 + }, + { + "epoch": 48.065934065934066, + "grad_norm": 15.14883804321289, + "learning_rate": 2.5967032967032967e-05, + "loss": 0.4725, + "step": 17496 + }, + { + "epoch": 48.06868131868132, + "grad_norm": 4.764971733093262, + "learning_rate": 2.5965659340659344e-05, + "loss": 0.0777, + "step": 17497 + }, + { + "epoch": 48.07142857142857, + "grad_norm": 11.021069526672363, + "learning_rate": 2.5964285714285714e-05, + "loss": 0.3822, + "step": 17498 + }, + { + "epoch": 48.074175824175825, + "grad_norm": 13.39116096496582, + "learning_rate": 2.596291208791209e-05, + "loss": 0.2362, + "step": 17499 + }, + { + "epoch": 48.07692307692308, + "grad_norm": 9.79859447479248, + "learning_rate": 2.5961538461538464e-05, + "loss": 0.2094, + "step": 17500 + }, + { + "epoch": 48.07967032967033, + "grad_norm": 3.5203537940979004, + "learning_rate": 2.5960164835164834e-05, + "loss": 0.0624, + "step": 17501 + }, + { + "epoch": 48.082417582417584, + "grad_norm": 14.945846557617188, + "learning_rate": 2.595879120879121e-05, + "loss": 0.2432, + "step": 17502 + }, + { + "epoch": 48.08516483516483, + "grad_norm": 8.684974670410156, + "learning_rate": 2.595741758241758e-05, + "loss": 0.1701, + "step": 17503 + }, + { + "epoch": 48.08791208791209, + "grad_norm": 16.700719833374023, + "learning_rate": 2.5956043956043958e-05, + "loss": 0.3854, + "step": 17504 + }, + { + "epoch": 48.09065934065934, + "grad_norm": 9.327634811401367, + "learning_rate": 2.5954670329670334e-05, + "loss": 0.1994, + "step": 17505 + }, + { + "epoch": 48.09340659340659, + "grad_norm": 9.958038330078125, + "learning_rate": 2.5953296703296704e-05, + "loss": 0.2006, + "step": 17506 + }, + { + "epoch": 48.09615384615385, + "grad_norm": 5.059261798858643, + "learning_rate": 2.5951923076923078e-05, + "loss": 0.1075, + "step": 17507 + }, + { + "epoch": 48.0989010989011, + "grad_norm": 9.782784461975098, + "learning_rate": 2.595054945054945e-05, + "loss": 0.4216, + "step": 17508 + }, + { + "epoch": 48.10164835164835, + "grad_norm": 9.913899421691895, + "learning_rate": 2.5949175824175825e-05, + "loss": 0.1811, + "step": 17509 + }, + { + "epoch": 48.104395604395606, + "grad_norm": 19.26412010192871, + "learning_rate": 2.59478021978022e-05, + "loss": 0.3817, + "step": 17510 + }, + { + "epoch": 48.107142857142854, + "grad_norm": 20.281328201293945, + "learning_rate": 2.594642857142857e-05, + "loss": 0.6592, + "step": 17511 + }, + { + "epoch": 48.10989010989011, + "grad_norm": 17.74080467224121, + "learning_rate": 2.5945054945054948e-05, + "loss": 0.2829, + "step": 17512 + }, + { + "epoch": 48.112637362637365, + "grad_norm": 16.305187225341797, + "learning_rate": 2.5943681318681318e-05, + "loss": 0.3083, + "step": 17513 + }, + { + "epoch": 48.11538461538461, + "grad_norm": 18.102725982666016, + "learning_rate": 2.5942307692307695e-05, + "loss": 0.4156, + "step": 17514 + }, + { + "epoch": 48.11813186813187, + "grad_norm": 5.36336612701416, + "learning_rate": 2.594093406593407e-05, + "loss": 0.1085, + "step": 17515 + }, + { + "epoch": 48.120879120879124, + "grad_norm": 15.96771240234375, + "learning_rate": 2.593956043956044e-05, + "loss": 0.3933, + "step": 17516 + }, + { + "epoch": 48.12362637362637, + "grad_norm": 16.218326568603516, + "learning_rate": 2.5938186813186815e-05, + "loss": 0.5836, + "step": 17517 + }, + { + "epoch": 48.12637362637363, + "grad_norm": 16.3139705657959, + "learning_rate": 2.5936813186813185e-05, + "loss": 0.3797, + "step": 17518 + }, + { + "epoch": 48.129120879120876, + "grad_norm": 6.995763301849365, + "learning_rate": 2.5935439560439562e-05, + "loss": 0.1111, + "step": 17519 + }, + { + "epoch": 48.13186813186813, + "grad_norm": 7.661044597625732, + "learning_rate": 2.593406593406594e-05, + "loss": 0.2222, + "step": 17520 + }, + { + "epoch": 48.13461538461539, + "grad_norm": 15.311161994934082, + "learning_rate": 2.593269230769231e-05, + "loss": 0.3839, + "step": 17521 + }, + { + "epoch": 48.137362637362635, + "grad_norm": 9.595455169677734, + "learning_rate": 2.5931318681318682e-05, + "loss": 0.075, + "step": 17522 + }, + { + "epoch": 48.14010989010989, + "grad_norm": 7.833017826080322, + "learning_rate": 2.5929945054945052e-05, + "loss": 0.1718, + "step": 17523 + }, + { + "epoch": 48.142857142857146, + "grad_norm": 10.31669807434082, + "learning_rate": 2.592857142857143e-05, + "loss": 0.18, + "step": 17524 + }, + { + "epoch": 48.145604395604394, + "grad_norm": 6.939246654510498, + "learning_rate": 2.5927197802197806e-05, + "loss": 0.1155, + "step": 17525 + }, + { + "epoch": 48.14835164835165, + "grad_norm": 6.545838832855225, + "learning_rate": 2.5925824175824176e-05, + "loss": 0.1397, + "step": 17526 + }, + { + "epoch": 48.1510989010989, + "grad_norm": 21.375747680664062, + "learning_rate": 2.5924450549450553e-05, + "loss": 0.7283, + "step": 17527 + }, + { + "epoch": 48.15384615384615, + "grad_norm": 16.40635108947754, + "learning_rate": 2.5923076923076923e-05, + "loss": 0.5989, + "step": 17528 + }, + { + "epoch": 48.15659340659341, + "grad_norm": 1.6139588356018066, + "learning_rate": 2.59217032967033e-05, + "loss": 0.0706, + "step": 17529 + }, + { + "epoch": 48.15934065934066, + "grad_norm": 18.67658233642578, + "learning_rate": 2.5920329670329673e-05, + "loss": 0.6449, + "step": 17530 + }, + { + "epoch": 48.16208791208791, + "grad_norm": 7.5615949630737305, + "learning_rate": 2.5918956043956043e-05, + "loss": 0.2119, + "step": 17531 + }, + { + "epoch": 48.16483516483517, + "grad_norm": 16.55354118347168, + "learning_rate": 2.591758241758242e-05, + "loss": 0.2635, + "step": 17532 + }, + { + "epoch": 48.167582417582416, + "grad_norm": 9.363330841064453, + "learning_rate": 2.591620879120879e-05, + "loss": 0.2131, + "step": 17533 + }, + { + "epoch": 48.17032967032967, + "grad_norm": 4.934901714324951, + "learning_rate": 2.5914835164835167e-05, + "loss": 0.0736, + "step": 17534 + }, + { + "epoch": 48.17307692307692, + "grad_norm": 8.167081832885742, + "learning_rate": 2.5913461538461543e-05, + "loss": 0.1175, + "step": 17535 + }, + { + "epoch": 48.175824175824175, + "grad_norm": 18.626310348510742, + "learning_rate": 2.5912087912087913e-05, + "loss": 0.5107, + "step": 17536 + }, + { + "epoch": 48.17857142857143, + "grad_norm": 12.416261672973633, + "learning_rate": 2.5910714285714287e-05, + "loss": 0.2939, + "step": 17537 + }, + { + "epoch": 48.18131868131868, + "grad_norm": 5.860850811004639, + "learning_rate": 2.5909340659340657e-05, + "loss": 0.1322, + "step": 17538 + }, + { + "epoch": 48.184065934065934, + "grad_norm": 6.987998008728027, + "learning_rate": 2.5907967032967034e-05, + "loss": 0.1005, + "step": 17539 + }, + { + "epoch": 48.18681318681319, + "grad_norm": 15.425063133239746, + "learning_rate": 2.590659340659341e-05, + "loss": 0.3721, + "step": 17540 + }, + { + "epoch": 48.18956043956044, + "grad_norm": 14.494478225708008, + "learning_rate": 2.590521978021978e-05, + "loss": 0.3379, + "step": 17541 + }, + { + "epoch": 48.19230769230769, + "grad_norm": 13.930500984191895, + "learning_rate": 2.5903846153846157e-05, + "loss": 0.2798, + "step": 17542 + }, + { + "epoch": 48.19505494505494, + "grad_norm": 17.03575897216797, + "learning_rate": 2.5902472527472527e-05, + "loss": 0.3866, + "step": 17543 + }, + { + "epoch": 48.1978021978022, + "grad_norm": 4.150413513183594, + "learning_rate": 2.5901098901098904e-05, + "loss": 0.1252, + "step": 17544 + }, + { + "epoch": 48.20054945054945, + "grad_norm": 15.388261795043945, + "learning_rate": 2.5899725274725277e-05, + "loss": 0.322, + "step": 17545 + }, + { + "epoch": 48.2032967032967, + "grad_norm": 4.1973958015441895, + "learning_rate": 2.5898351648351647e-05, + "loss": 0.0626, + "step": 17546 + }, + { + "epoch": 48.206043956043956, + "grad_norm": 26.070661544799805, + "learning_rate": 2.5896978021978024e-05, + "loss": 0.449, + "step": 17547 + }, + { + "epoch": 48.20879120879121, + "grad_norm": 6.637587547302246, + "learning_rate": 2.5895604395604394e-05, + "loss": 0.1117, + "step": 17548 + }, + { + "epoch": 48.21153846153846, + "grad_norm": 2.369021415710449, + "learning_rate": 2.589423076923077e-05, + "loss": 0.0427, + "step": 17549 + }, + { + "epoch": 48.214285714285715, + "grad_norm": 24.305849075317383, + "learning_rate": 2.5892857142857148e-05, + "loss": 0.7225, + "step": 17550 + }, + { + "epoch": 48.217032967032964, + "grad_norm": 11.048579216003418, + "learning_rate": 2.5891483516483518e-05, + "loss": 0.1927, + "step": 17551 + }, + { + "epoch": 48.21978021978022, + "grad_norm": 17.292219161987305, + "learning_rate": 2.589010989010989e-05, + "loss": 0.3429, + "step": 17552 + }, + { + "epoch": 48.222527472527474, + "grad_norm": 6.477325439453125, + "learning_rate": 2.588873626373626e-05, + "loss": 0.1086, + "step": 17553 + }, + { + "epoch": 48.22527472527472, + "grad_norm": 25.065685272216797, + "learning_rate": 2.5887362637362638e-05, + "loss": 0.5023, + "step": 17554 + }, + { + "epoch": 48.22802197802198, + "grad_norm": 13.731429100036621, + "learning_rate": 2.5885989010989015e-05, + "loss": 0.3301, + "step": 17555 + }, + { + "epoch": 48.23076923076923, + "grad_norm": 18.080669403076172, + "learning_rate": 2.5884615384615385e-05, + "loss": 0.4373, + "step": 17556 + }, + { + "epoch": 48.23351648351648, + "grad_norm": 11.529447555541992, + "learning_rate": 2.5883241758241762e-05, + "loss": 0.255, + "step": 17557 + }, + { + "epoch": 48.23626373626374, + "grad_norm": 12.013978004455566, + "learning_rate": 2.5881868131868132e-05, + "loss": 0.2472, + "step": 17558 + }, + { + "epoch": 48.239010989010985, + "grad_norm": 6.486544609069824, + "learning_rate": 2.588049450549451e-05, + "loss": 0.0901, + "step": 17559 + }, + { + "epoch": 48.24175824175824, + "grad_norm": 8.115477561950684, + "learning_rate": 2.5879120879120882e-05, + "loss": 0.1363, + "step": 17560 + }, + { + "epoch": 48.244505494505496, + "grad_norm": 13.595870018005371, + "learning_rate": 2.5877747252747252e-05, + "loss": 0.301, + "step": 17561 + }, + { + "epoch": 48.247252747252745, + "grad_norm": 8.092015266418457, + "learning_rate": 2.587637362637363e-05, + "loss": 0.2907, + "step": 17562 + }, + { + "epoch": 48.25, + "grad_norm": 22.734180450439453, + "learning_rate": 2.5875e-05, + "loss": 0.3775, + "step": 17563 + }, + { + "epoch": 48.252747252747255, + "grad_norm": 6.150089740753174, + "learning_rate": 2.5873626373626376e-05, + "loss": 0.1476, + "step": 17564 + }, + { + "epoch": 48.255494505494504, + "grad_norm": 12.277754783630371, + "learning_rate": 2.5872252747252752e-05, + "loss": 0.3352, + "step": 17565 + }, + { + "epoch": 48.25824175824176, + "grad_norm": 9.02490234375, + "learning_rate": 2.5870879120879122e-05, + "loss": 0.1094, + "step": 17566 + }, + { + "epoch": 48.260989010989015, + "grad_norm": 13.570401191711426, + "learning_rate": 2.5869505494505496e-05, + "loss": 0.2476, + "step": 17567 + }, + { + "epoch": 48.26373626373626, + "grad_norm": 16.50487518310547, + "learning_rate": 2.5868131868131866e-05, + "loss": 0.5436, + "step": 17568 + }, + { + "epoch": 48.26648351648352, + "grad_norm": 19.295190811157227, + "learning_rate": 2.5866758241758243e-05, + "loss": 0.793, + "step": 17569 + }, + { + "epoch": 48.26923076923077, + "grad_norm": 16.954307556152344, + "learning_rate": 2.586538461538462e-05, + "loss": 0.3521, + "step": 17570 + }, + { + "epoch": 48.27197802197802, + "grad_norm": 6.121532440185547, + "learning_rate": 2.586401098901099e-05, + "loss": 0.1701, + "step": 17571 + }, + { + "epoch": 48.27472527472528, + "grad_norm": 12.290703773498535, + "learning_rate": 2.5862637362637366e-05, + "loss": 0.2, + "step": 17572 + }, + { + "epoch": 48.277472527472526, + "grad_norm": 9.165225982666016, + "learning_rate": 2.5861263736263736e-05, + "loss": 0.1497, + "step": 17573 + }, + { + "epoch": 48.28021978021978, + "grad_norm": 11.719622611999512, + "learning_rate": 2.5859890109890113e-05, + "loss": 0.2944, + "step": 17574 + }, + { + "epoch": 48.282967032967036, + "grad_norm": 10.355831146240234, + "learning_rate": 2.5858516483516486e-05, + "loss": 0.1418, + "step": 17575 + }, + { + "epoch": 48.285714285714285, + "grad_norm": 10.450675010681152, + "learning_rate": 2.5857142857142856e-05, + "loss": 0.1555, + "step": 17576 + }, + { + "epoch": 48.28846153846154, + "grad_norm": 16.38675880432129, + "learning_rate": 2.5855769230769233e-05, + "loss": 0.9112, + "step": 17577 + }, + { + "epoch": 48.29120879120879, + "grad_norm": 12.61646842956543, + "learning_rate": 2.5854395604395603e-05, + "loss": 0.3175, + "step": 17578 + }, + { + "epoch": 48.293956043956044, + "grad_norm": 14.479281425476074, + "learning_rate": 2.585302197802198e-05, + "loss": 0.4203, + "step": 17579 + }, + { + "epoch": 48.2967032967033, + "grad_norm": 11.089468002319336, + "learning_rate": 2.5851648351648357e-05, + "loss": 0.221, + "step": 17580 + }, + { + "epoch": 48.29945054945055, + "grad_norm": 26.335092544555664, + "learning_rate": 2.5850274725274727e-05, + "loss": 0.8285, + "step": 17581 + }, + { + "epoch": 48.3021978021978, + "grad_norm": 6.306833744049072, + "learning_rate": 2.58489010989011e-05, + "loss": 0.2023, + "step": 17582 + }, + { + "epoch": 48.30494505494506, + "grad_norm": 12.628175735473633, + "learning_rate": 2.584752747252747e-05, + "loss": 0.3151, + "step": 17583 + }, + { + "epoch": 48.30769230769231, + "grad_norm": 6.079700946807861, + "learning_rate": 2.5846153846153847e-05, + "loss": 0.1034, + "step": 17584 + }, + { + "epoch": 48.31043956043956, + "grad_norm": 11.884176254272461, + "learning_rate": 2.5844780219780224e-05, + "loss": 0.1869, + "step": 17585 + }, + { + "epoch": 48.31318681318681, + "grad_norm": 12.072164535522461, + "learning_rate": 2.5843406593406594e-05, + "loss": 0.2433, + "step": 17586 + }, + { + "epoch": 48.315934065934066, + "grad_norm": 12.144071578979492, + "learning_rate": 2.584203296703297e-05, + "loss": 0.2128, + "step": 17587 + }, + { + "epoch": 48.31868131868132, + "grad_norm": 26.57925796508789, + "learning_rate": 2.584065934065934e-05, + "loss": 0.6929, + "step": 17588 + }, + { + "epoch": 48.32142857142857, + "grad_norm": 19.738182067871094, + "learning_rate": 2.5839285714285717e-05, + "loss": 0.3736, + "step": 17589 + }, + { + "epoch": 48.324175824175825, + "grad_norm": 7.598310947418213, + "learning_rate": 2.583791208791209e-05, + "loss": 0.1778, + "step": 17590 + }, + { + "epoch": 48.32692307692308, + "grad_norm": 1.692028284072876, + "learning_rate": 2.583653846153846e-05, + "loss": 0.0194, + "step": 17591 + }, + { + "epoch": 48.32967032967033, + "grad_norm": 5.306708812713623, + "learning_rate": 2.5835164835164838e-05, + "loss": 0.1196, + "step": 17592 + }, + { + "epoch": 48.332417582417584, + "grad_norm": 7.544980049133301, + "learning_rate": 2.5833791208791208e-05, + "loss": 0.1413, + "step": 17593 + }, + { + "epoch": 48.33516483516483, + "grad_norm": 3.7424838542938232, + "learning_rate": 2.5832417582417585e-05, + "loss": 0.0659, + "step": 17594 + }, + { + "epoch": 48.33791208791209, + "grad_norm": 6.8811163902282715, + "learning_rate": 2.583104395604396e-05, + "loss": 0.1235, + "step": 17595 + }, + { + "epoch": 48.34065934065934, + "grad_norm": 11.615238189697266, + "learning_rate": 2.582967032967033e-05, + "loss": 0.2871, + "step": 17596 + }, + { + "epoch": 48.34340659340659, + "grad_norm": 12.610482215881348, + "learning_rate": 2.5828296703296705e-05, + "loss": 0.253, + "step": 17597 + }, + { + "epoch": 48.34615384615385, + "grad_norm": 16.75773811340332, + "learning_rate": 2.5826923076923075e-05, + "loss": 0.3828, + "step": 17598 + }, + { + "epoch": 48.3489010989011, + "grad_norm": 11.929718017578125, + "learning_rate": 2.582554945054945e-05, + "loss": 0.2197, + "step": 17599 + }, + { + "epoch": 48.35164835164835, + "grad_norm": 23.50381851196289, + "learning_rate": 2.582417582417583e-05, + "loss": 0.8348, + "step": 17600 + }, + { + "epoch": 48.354395604395606, + "grad_norm": 12.808391571044922, + "learning_rate": 2.58228021978022e-05, + "loss": 0.3594, + "step": 17601 + }, + { + "epoch": 48.357142857142854, + "grad_norm": 8.287216186523438, + "learning_rate": 2.5821428571428575e-05, + "loss": 0.1373, + "step": 17602 + }, + { + "epoch": 48.35989010989011, + "grad_norm": 15.17048168182373, + "learning_rate": 2.5820054945054945e-05, + "loss": 0.3334, + "step": 17603 + }, + { + "epoch": 48.362637362637365, + "grad_norm": 10.795105934143066, + "learning_rate": 2.5818681318681322e-05, + "loss": 0.1937, + "step": 17604 + }, + { + "epoch": 48.36538461538461, + "grad_norm": 1.2191835641860962, + "learning_rate": 2.5817307692307695e-05, + "loss": 0.0137, + "step": 17605 + }, + { + "epoch": 48.36813186813187, + "grad_norm": 6.008065223693848, + "learning_rate": 2.5815934065934065e-05, + "loss": 0.079, + "step": 17606 + }, + { + "epoch": 48.370879120879124, + "grad_norm": 6.815036773681641, + "learning_rate": 2.5814560439560442e-05, + "loss": 0.1979, + "step": 17607 + }, + { + "epoch": 48.37362637362637, + "grad_norm": 17.085712432861328, + "learning_rate": 2.5813186813186812e-05, + "loss": 0.5116, + "step": 17608 + }, + { + "epoch": 48.37637362637363, + "grad_norm": 12.240211486816406, + "learning_rate": 2.581181318681319e-05, + "loss": 0.1732, + "step": 17609 + }, + { + "epoch": 48.379120879120876, + "grad_norm": 17.016864776611328, + "learning_rate": 2.5810439560439566e-05, + "loss": 0.4467, + "step": 17610 + }, + { + "epoch": 48.38186813186813, + "grad_norm": 12.65159797668457, + "learning_rate": 2.5809065934065936e-05, + "loss": 0.5087, + "step": 17611 + }, + { + "epoch": 48.38461538461539, + "grad_norm": 7.834897518157959, + "learning_rate": 2.580769230769231e-05, + "loss": 0.1886, + "step": 17612 + }, + { + "epoch": 48.387362637362635, + "grad_norm": 7.4080305099487305, + "learning_rate": 2.580631868131868e-05, + "loss": 0.1341, + "step": 17613 + }, + { + "epoch": 48.39010989010989, + "grad_norm": 10.196364402770996, + "learning_rate": 2.5804945054945056e-05, + "loss": 0.3481, + "step": 17614 + }, + { + "epoch": 48.392857142857146, + "grad_norm": 9.1120023727417, + "learning_rate": 2.5803571428571433e-05, + "loss": 0.1194, + "step": 17615 + }, + { + "epoch": 48.395604395604394, + "grad_norm": 18.973196029663086, + "learning_rate": 2.5802197802197803e-05, + "loss": 0.2659, + "step": 17616 + }, + { + "epoch": 48.39835164835165, + "grad_norm": 7.402097702026367, + "learning_rate": 2.580082417582418e-05, + "loss": 0.2061, + "step": 17617 + }, + { + "epoch": 48.4010989010989, + "grad_norm": 22.75821304321289, + "learning_rate": 2.579945054945055e-05, + "loss": 0.4975, + "step": 17618 + }, + { + "epoch": 48.40384615384615, + "grad_norm": 18.19085693359375, + "learning_rate": 2.5798076923076926e-05, + "loss": 0.298, + "step": 17619 + }, + { + "epoch": 48.40659340659341, + "grad_norm": 13.753243446350098, + "learning_rate": 2.57967032967033e-05, + "loss": 0.4058, + "step": 17620 + }, + { + "epoch": 48.40934065934066, + "grad_norm": 7.428514003753662, + "learning_rate": 2.579532967032967e-05, + "loss": 0.1333, + "step": 17621 + }, + { + "epoch": 48.41208791208791, + "grad_norm": 9.864649772644043, + "learning_rate": 2.5793956043956047e-05, + "loss": 0.1787, + "step": 17622 + }, + { + "epoch": 48.41483516483517, + "grad_norm": 8.814007759094238, + "learning_rate": 2.5792582417582417e-05, + "loss": 0.1716, + "step": 17623 + }, + { + "epoch": 48.417582417582416, + "grad_norm": 3.8254706859588623, + "learning_rate": 2.5791208791208794e-05, + "loss": 0.0667, + "step": 17624 + }, + { + "epoch": 48.42032967032967, + "grad_norm": 15.298874855041504, + "learning_rate": 2.578983516483517e-05, + "loss": 0.2793, + "step": 17625 + }, + { + "epoch": 48.42307692307692, + "grad_norm": 10.020243644714355, + "learning_rate": 2.578846153846154e-05, + "loss": 0.2413, + "step": 17626 + }, + { + "epoch": 48.425824175824175, + "grad_norm": 3.2642340660095215, + "learning_rate": 2.5787087912087914e-05, + "loss": 0.0439, + "step": 17627 + }, + { + "epoch": 48.42857142857143, + "grad_norm": 9.153223991394043, + "learning_rate": 2.5785714285714284e-05, + "loss": 0.3205, + "step": 17628 + }, + { + "epoch": 48.43131868131868, + "grad_norm": 9.702567100524902, + "learning_rate": 2.578434065934066e-05, + "loss": 0.1826, + "step": 17629 + }, + { + "epoch": 48.434065934065934, + "grad_norm": 11.203105926513672, + "learning_rate": 2.5782967032967037e-05, + "loss": 0.2102, + "step": 17630 + }, + { + "epoch": 48.43681318681319, + "grad_norm": 10.504806518554688, + "learning_rate": 2.5781593406593407e-05, + "loss": 0.3747, + "step": 17631 + }, + { + "epoch": 48.43956043956044, + "grad_norm": 12.411768913269043, + "learning_rate": 2.5780219780219784e-05, + "loss": 0.2592, + "step": 17632 + }, + { + "epoch": 48.44230769230769, + "grad_norm": 4.966740608215332, + "learning_rate": 2.5778846153846154e-05, + "loss": 0.1102, + "step": 17633 + }, + { + "epoch": 48.44505494505494, + "grad_norm": 7.305582046508789, + "learning_rate": 2.577747252747253e-05, + "loss": 0.1251, + "step": 17634 + }, + { + "epoch": 48.4478021978022, + "grad_norm": 10.263527870178223, + "learning_rate": 2.57760989010989e-05, + "loss": 0.2042, + "step": 17635 + }, + { + "epoch": 48.45054945054945, + "grad_norm": 19.749866485595703, + "learning_rate": 2.5774725274725274e-05, + "loss": 0.3942, + "step": 17636 + }, + { + "epoch": 48.4532967032967, + "grad_norm": 6.295070648193359, + "learning_rate": 2.577335164835165e-05, + "loss": 0.1852, + "step": 17637 + }, + { + "epoch": 48.456043956043956, + "grad_norm": 11.727836608886719, + "learning_rate": 2.577197802197802e-05, + "loss": 0.2161, + "step": 17638 + }, + { + "epoch": 48.45879120879121, + "grad_norm": 11.32756519317627, + "learning_rate": 2.5770604395604398e-05, + "loss": 0.344, + "step": 17639 + }, + { + "epoch": 48.46153846153846, + "grad_norm": 19.580699920654297, + "learning_rate": 2.5769230769230768e-05, + "loss": 0.877, + "step": 17640 + }, + { + "epoch": 48.464285714285715, + "grad_norm": 9.799625396728516, + "learning_rate": 2.5767857142857145e-05, + "loss": 0.2717, + "step": 17641 + }, + { + "epoch": 48.467032967032964, + "grad_norm": 10.27149486541748, + "learning_rate": 2.5766483516483518e-05, + "loss": 0.2744, + "step": 17642 + }, + { + "epoch": 48.46978021978022, + "grad_norm": 16.25603675842285, + "learning_rate": 2.5765109890109888e-05, + "loss": 0.6113, + "step": 17643 + }, + { + "epoch": 48.472527472527474, + "grad_norm": 5.189461708068848, + "learning_rate": 2.5763736263736265e-05, + "loss": 0.0721, + "step": 17644 + }, + { + "epoch": 48.47527472527472, + "grad_norm": 19.794755935668945, + "learning_rate": 2.5762362637362635e-05, + "loss": 0.4725, + "step": 17645 + }, + { + "epoch": 48.47802197802198, + "grad_norm": 12.32939624786377, + "learning_rate": 2.5760989010989012e-05, + "loss": 0.2449, + "step": 17646 + }, + { + "epoch": 48.48076923076923, + "grad_norm": 10.912202835083008, + "learning_rate": 2.575961538461539e-05, + "loss": 0.3433, + "step": 17647 + }, + { + "epoch": 48.48351648351648, + "grad_norm": 21.432523727416992, + "learning_rate": 2.575824175824176e-05, + "loss": 0.4195, + "step": 17648 + }, + { + "epoch": 48.48626373626374, + "grad_norm": 7.613550186157227, + "learning_rate": 2.5756868131868132e-05, + "loss": 0.1782, + "step": 17649 + }, + { + "epoch": 48.489010989010985, + "grad_norm": 23.642595291137695, + "learning_rate": 2.5755494505494505e-05, + "loss": 0.6483, + "step": 17650 + }, + { + "epoch": 48.49175824175824, + "grad_norm": 13.499917984008789, + "learning_rate": 2.575412087912088e-05, + "loss": 0.4637, + "step": 17651 + }, + { + "epoch": 48.494505494505496, + "grad_norm": 11.14340877532959, + "learning_rate": 2.5752747252747256e-05, + "loss": 0.4611, + "step": 17652 + }, + { + "epoch": 48.497252747252745, + "grad_norm": 16.246845245361328, + "learning_rate": 2.5751373626373626e-05, + "loss": 0.2188, + "step": 17653 + }, + { + "epoch": 48.5, + "grad_norm": 17.29405975341797, + "learning_rate": 2.5750000000000002e-05, + "loss": 0.6456, + "step": 17654 + }, + { + "epoch": 48.502747252747255, + "grad_norm": 13.584375381469727, + "learning_rate": 2.5748626373626373e-05, + "loss": 0.1409, + "step": 17655 + }, + { + "epoch": 48.505494505494504, + "grad_norm": 14.09654712677002, + "learning_rate": 2.574725274725275e-05, + "loss": 0.3374, + "step": 17656 + }, + { + "epoch": 48.50824175824176, + "grad_norm": 14.168272972106934, + "learning_rate": 2.5745879120879123e-05, + "loss": 0.4612, + "step": 17657 + }, + { + "epoch": 48.51098901098901, + "grad_norm": 6.426126956939697, + "learning_rate": 2.5744505494505493e-05, + "loss": 0.1994, + "step": 17658 + }, + { + "epoch": 48.51373626373626, + "grad_norm": 7.312405586242676, + "learning_rate": 2.574313186813187e-05, + "loss": 0.1691, + "step": 17659 + }, + { + "epoch": 48.51648351648352, + "grad_norm": 19.959638595581055, + "learning_rate": 2.574175824175824e-05, + "loss": 0.3148, + "step": 17660 + }, + { + "epoch": 48.51923076923077, + "grad_norm": 9.515829086303711, + "learning_rate": 2.5740384615384616e-05, + "loss": 0.2234, + "step": 17661 + }, + { + "epoch": 48.52197802197802, + "grad_norm": 1.7452929019927979, + "learning_rate": 2.5739010989010993e-05, + "loss": 0.0295, + "step": 17662 + }, + { + "epoch": 48.52472527472528, + "grad_norm": 15.202777862548828, + "learning_rate": 2.5737637362637363e-05, + "loss": 0.4155, + "step": 17663 + }, + { + "epoch": 48.527472527472526, + "grad_norm": 18.683759689331055, + "learning_rate": 2.5736263736263737e-05, + "loss": 0.3999, + "step": 17664 + }, + { + "epoch": 48.53021978021978, + "grad_norm": 8.866095542907715, + "learning_rate": 2.573489010989011e-05, + "loss": 0.1995, + "step": 17665 + }, + { + "epoch": 48.532967032967036, + "grad_norm": 11.400243759155273, + "learning_rate": 2.5733516483516483e-05, + "loss": 0.2765, + "step": 17666 + }, + { + "epoch": 48.535714285714285, + "grad_norm": 8.016047477722168, + "learning_rate": 2.573214285714286e-05, + "loss": 0.1039, + "step": 17667 + }, + { + "epoch": 48.53846153846154, + "grad_norm": 14.135640144348145, + "learning_rate": 2.573076923076923e-05, + "loss": 0.4911, + "step": 17668 + }, + { + "epoch": 48.54120879120879, + "grad_norm": 20.670381546020508, + "learning_rate": 2.5729395604395607e-05, + "loss": 0.7338, + "step": 17669 + }, + { + "epoch": 48.543956043956044, + "grad_norm": 13.001977920532227, + "learning_rate": 2.5728021978021977e-05, + "loss": 0.5322, + "step": 17670 + }, + { + "epoch": 48.5467032967033, + "grad_norm": 14.102445602416992, + "learning_rate": 2.5726648351648354e-05, + "loss": 0.283, + "step": 17671 + }, + { + "epoch": 48.54945054945055, + "grad_norm": 17.84710121154785, + "learning_rate": 2.5725274725274727e-05, + "loss": 0.3373, + "step": 17672 + }, + { + "epoch": 48.5521978021978, + "grad_norm": 22.238182067871094, + "learning_rate": 2.5723901098901097e-05, + "loss": 1.1047, + "step": 17673 + }, + { + "epoch": 48.55494505494506, + "grad_norm": 17.538951873779297, + "learning_rate": 2.5722527472527474e-05, + "loss": 0.1961, + "step": 17674 + }, + { + "epoch": 48.55769230769231, + "grad_norm": 8.09304141998291, + "learning_rate": 2.5721153846153844e-05, + "loss": 0.1964, + "step": 17675 + }, + { + "epoch": 48.56043956043956, + "grad_norm": 12.95245361328125, + "learning_rate": 2.571978021978022e-05, + "loss": 0.254, + "step": 17676 + }, + { + "epoch": 48.56318681318681, + "grad_norm": 8.906588554382324, + "learning_rate": 2.5718406593406598e-05, + "loss": 0.4018, + "step": 17677 + }, + { + "epoch": 48.565934065934066, + "grad_norm": 13.836773872375488, + "learning_rate": 2.5717032967032968e-05, + "loss": 0.2683, + "step": 17678 + }, + { + "epoch": 48.56868131868132, + "grad_norm": 20.497556686401367, + "learning_rate": 2.571565934065934e-05, + "loss": 0.5716, + "step": 17679 + }, + { + "epoch": 48.57142857142857, + "grad_norm": 15.636780738830566, + "learning_rate": 2.5714285714285714e-05, + "loss": 0.475, + "step": 17680 + }, + { + "epoch": 48.574175824175825, + "grad_norm": 4.598755359649658, + "learning_rate": 2.5712912087912088e-05, + "loss": 0.1011, + "step": 17681 + }, + { + "epoch": 48.57692307692308, + "grad_norm": 5.337921142578125, + "learning_rate": 2.5711538461538465e-05, + "loss": 0.1378, + "step": 17682 + }, + { + "epoch": 48.57967032967033, + "grad_norm": 9.255762100219727, + "learning_rate": 2.5710164835164835e-05, + "loss": 0.1453, + "step": 17683 + }, + { + "epoch": 48.582417582417584, + "grad_norm": 14.243037223815918, + "learning_rate": 2.570879120879121e-05, + "loss": 0.3273, + "step": 17684 + }, + { + "epoch": 48.58516483516483, + "grad_norm": 9.886838912963867, + "learning_rate": 2.570741758241758e-05, + "loss": 0.2423, + "step": 17685 + }, + { + "epoch": 48.58791208791209, + "grad_norm": 12.352495193481445, + "learning_rate": 2.5706043956043958e-05, + "loss": 0.3372, + "step": 17686 + }, + { + "epoch": 48.59065934065934, + "grad_norm": 9.091702461242676, + "learning_rate": 2.570467032967033e-05, + "loss": 0.2389, + "step": 17687 + }, + { + "epoch": 48.59340659340659, + "grad_norm": 15.600025177001953, + "learning_rate": 2.5703296703296702e-05, + "loss": 0.3492, + "step": 17688 + }, + { + "epoch": 48.59615384615385, + "grad_norm": 5.541799068450928, + "learning_rate": 2.570192307692308e-05, + "loss": 0.0912, + "step": 17689 + }, + { + "epoch": 48.5989010989011, + "grad_norm": 5.596834659576416, + "learning_rate": 2.570054945054945e-05, + "loss": 0.1052, + "step": 17690 + }, + { + "epoch": 48.60164835164835, + "grad_norm": 3.047471284866333, + "learning_rate": 2.5699175824175825e-05, + "loss": 0.0523, + "step": 17691 + }, + { + "epoch": 48.604395604395606, + "grad_norm": 17.752552032470703, + "learning_rate": 2.5697802197802202e-05, + "loss": 0.4536, + "step": 17692 + }, + { + "epoch": 48.607142857142854, + "grad_norm": 11.709097862243652, + "learning_rate": 2.5696428571428572e-05, + "loss": 0.3486, + "step": 17693 + }, + { + "epoch": 48.60989010989011, + "grad_norm": 4.59031343460083, + "learning_rate": 2.5695054945054946e-05, + "loss": 0.0525, + "step": 17694 + }, + { + "epoch": 48.612637362637365, + "grad_norm": 10.025436401367188, + "learning_rate": 2.569368131868132e-05, + "loss": 0.2106, + "step": 17695 + }, + { + "epoch": 48.61538461538461, + "grad_norm": 13.98670768737793, + "learning_rate": 2.5692307692307692e-05, + "loss": 0.2892, + "step": 17696 + }, + { + "epoch": 48.61813186813187, + "grad_norm": 9.861759185791016, + "learning_rate": 2.569093406593407e-05, + "loss": 0.191, + "step": 17697 + }, + { + "epoch": 48.620879120879124, + "grad_norm": 14.487689018249512, + "learning_rate": 2.568956043956044e-05, + "loss": 0.4134, + "step": 17698 + }, + { + "epoch": 48.62362637362637, + "grad_norm": 3.6302762031555176, + "learning_rate": 2.5688186813186816e-05, + "loss": 0.0894, + "step": 17699 + }, + { + "epoch": 48.62637362637363, + "grad_norm": 2.5234670639038086, + "learning_rate": 2.5686813186813186e-05, + "loss": 0.0362, + "step": 17700 + }, + { + "epoch": 48.629120879120876, + "grad_norm": 4.315249919891357, + "learning_rate": 2.5685439560439563e-05, + "loss": 0.1071, + "step": 17701 + }, + { + "epoch": 48.63186813186813, + "grad_norm": 12.830035209655762, + "learning_rate": 2.5684065934065936e-05, + "loss": 0.2221, + "step": 17702 + }, + { + "epoch": 48.63461538461539, + "grad_norm": 11.254149436950684, + "learning_rate": 2.5682692307692306e-05, + "loss": 0.3492, + "step": 17703 + }, + { + "epoch": 48.637362637362635, + "grad_norm": 7.264063835144043, + "learning_rate": 2.5681318681318683e-05, + "loss": 0.1254, + "step": 17704 + }, + { + "epoch": 48.64010989010989, + "grad_norm": 9.012598991394043, + "learning_rate": 2.5679945054945053e-05, + "loss": 0.1945, + "step": 17705 + }, + { + "epoch": 48.642857142857146, + "grad_norm": 9.47858715057373, + "learning_rate": 2.567857142857143e-05, + "loss": 0.2459, + "step": 17706 + }, + { + "epoch": 48.645604395604394, + "grad_norm": 9.804883003234863, + "learning_rate": 2.5677197802197807e-05, + "loss": 0.2982, + "step": 17707 + }, + { + "epoch": 48.64835164835165, + "grad_norm": 6.801069259643555, + "learning_rate": 2.5675824175824177e-05, + "loss": 0.1452, + "step": 17708 + }, + { + "epoch": 48.6510989010989, + "grad_norm": 10.267233848571777, + "learning_rate": 2.567445054945055e-05, + "loss": 0.2898, + "step": 17709 + }, + { + "epoch": 48.65384615384615, + "grad_norm": 17.93521499633789, + "learning_rate": 2.5673076923076923e-05, + "loss": 0.3879, + "step": 17710 + }, + { + "epoch": 48.65659340659341, + "grad_norm": 13.667112350463867, + "learning_rate": 2.5671703296703297e-05, + "loss": 0.2141, + "step": 17711 + }, + { + "epoch": 48.65934065934066, + "grad_norm": 15.814189910888672, + "learning_rate": 2.5670329670329674e-05, + "loss": 0.3432, + "step": 17712 + }, + { + "epoch": 48.66208791208791, + "grad_norm": 3.288776397705078, + "learning_rate": 2.5668956043956044e-05, + "loss": 0.0723, + "step": 17713 + }, + { + "epoch": 48.66483516483517, + "grad_norm": 10.881824493408203, + "learning_rate": 2.566758241758242e-05, + "loss": 0.2963, + "step": 17714 + }, + { + "epoch": 48.667582417582416, + "grad_norm": 5.302093029022217, + "learning_rate": 2.566620879120879e-05, + "loss": 0.1137, + "step": 17715 + }, + { + "epoch": 48.67032967032967, + "grad_norm": 7.330077171325684, + "learning_rate": 2.5664835164835167e-05, + "loss": 0.2274, + "step": 17716 + }, + { + "epoch": 48.67307692307692, + "grad_norm": 13.957902908325195, + "learning_rate": 2.566346153846154e-05, + "loss": 0.2894, + "step": 17717 + }, + { + "epoch": 48.675824175824175, + "grad_norm": 17.183027267456055, + "learning_rate": 2.566208791208791e-05, + "loss": 0.2134, + "step": 17718 + }, + { + "epoch": 48.67857142857143, + "grad_norm": 4.75800895690918, + "learning_rate": 2.5660714285714287e-05, + "loss": 0.0965, + "step": 17719 + }, + { + "epoch": 48.68131868131868, + "grad_norm": 12.980286598205566, + "learning_rate": 2.5659340659340658e-05, + "loss": 0.197, + "step": 17720 + }, + { + "epoch": 48.684065934065934, + "grad_norm": 10.38006591796875, + "learning_rate": 2.5657967032967034e-05, + "loss": 0.3052, + "step": 17721 + }, + { + "epoch": 48.68681318681319, + "grad_norm": 19.578184127807617, + "learning_rate": 2.565659340659341e-05, + "loss": 0.5175, + "step": 17722 + }, + { + "epoch": 48.68956043956044, + "grad_norm": 15.947216033935547, + "learning_rate": 2.565521978021978e-05, + "loss": 0.6143, + "step": 17723 + }, + { + "epoch": 48.69230769230769, + "grad_norm": 15.720986366271973, + "learning_rate": 2.5653846153846155e-05, + "loss": 0.1969, + "step": 17724 + }, + { + "epoch": 48.69505494505494, + "grad_norm": 14.93790054321289, + "learning_rate": 2.5652472527472528e-05, + "loss": 0.3513, + "step": 17725 + }, + { + "epoch": 48.6978021978022, + "grad_norm": 7.194995880126953, + "learning_rate": 2.56510989010989e-05, + "loss": 0.1913, + "step": 17726 + }, + { + "epoch": 48.70054945054945, + "grad_norm": 13.207966804504395, + "learning_rate": 2.5649725274725278e-05, + "loss": 0.2959, + "step": 17727 + }, + { + "epoch": 48.7032967032967, + "grad_norm": 16.921789169311523, + "learning_rate": 2.5648351648351648e-05, + "loss": 0.4921, + "step": 17728 + }, + { + "epoch": 48.706043956043956, + "grad_norm": 7.468595027923584, + "learning_rate": 2.5646978021978025e-05, + "loss": 0.1991, + "step": 17729 + }, + { + "epoch": 48.70879120879121, + "grad_norm": 15.132587432861328, + "learning_rate": 2.5645604395604395e-05, + "loss": 0.2976, + "step": 17730 + }, + { + "epoch": 48.71153846153846, + "grad_norm": 13.048726081848145, + "learning_rate": 2.5644230769230772e-05, + "loss": 0.37, + "step": 17731 + }, + { + "epoch": 48.714285714285715, + "grad_norm": 9.642279624938965, + "learning_rate": 2.5642857142857145e-05, + "loss": 0.2266, + "step": 17732 + }, + { + "epoch": 48.717032967032964, + "grad_norm": 6.865755081176758, + "learning_rate": 2.5641483516483515e-05, + "loss": 0.163, + "step": 17733 + }, + { + "epoch": 48.71978021978022, + "grad_norm": 12.224665641784668, + "learning_rate": 2.5640109890109892e-05, + "loss": 0.1958, + "step": 17734 + }, + { + "epoch": 48.722527472527474, + "grad_norm": 12.876893997192383, + "learning_rate": 2.5638736263736262e-05, + "loss": 0.3026, + "step": 17735 + }, + { + "epoch": 48.72527472527472, + "grad_norm": 4.851276874542236, + "learning_rate": 2.563736263736264e-05, + "loss": 0.0982, + "step": 17736 + }, + { + "epoch": 48.72802197802198, + "grad_norm": 12.205170631408691, + "learning_rate": 2.5635989010989016e-05, + "loss": 0.1931, + "step": 17737 + }, + { + "epoch": 48.73076923076923, + "grad_norm": 11.947999000549316, + "learning_rate": 2.5634615384615386e-05, + "loss": 0.2461, + "step": 17738 + }, + { + "epoch": 48.73351648351648, + "grad_norm": 10.785025596618652, + "learning_rate": 2.563324175824176e-05, + "loss": 0.2018, + "step": 17739 + }, + { + "epoch": 48.73626373626374, + "grad_norm": 9.140079498291016, + "learning_rate": 2.5631868131868132e-05, + "loss": 0.1332, + "step": 17740 + }, + { + "epoch": 48.73901098901099, + "grad_norm": 15.686537742614746, + "learning_rate": 2.5630494505494506e-05, + "loss": 0.2843, + "step": 17741 + }, + { + "epoch": 48.74175824175824, + "grad_norm": 9.915573120117188, + "learning_rate": 2.5629120879120883e-05, + "loss": 0.1283, + "step": 17742 + }, + { + "epoch": 48.744505494505496, + "grad_norm": 15.889199256896973, + "learning_rate": 2.5627747252747253e-05, + "loss": 0.4141, + "step": 17743 + }, + { + "epoch": 48.747252747252745, + "grad_norm": 12.674698829650879, + "learning_rate": 2.562637362637363e-05, + "loss": 0.2296, + "step": 17744 + }, + { + "epoch": 48.75, + "grad_norm": 7.337149620056152, + "learning_rate": 2.5625e-05, + "loss": 0.1319, + "step": 17745 + }, + { + "epoch": 48.752747252747255, + "grad_norm": 16.388629913330078, + "learning_rate": 2.5623626373626376e-05, + "loss": 0.5478, + "step": 17746 + }, + { + "epoch": 48.755494505494504, + "grad_norm": 21.39217185974121, + "learning_rate": 2.562225274725275e-05, + "loss": 0.2806, + "step": 17747 + }, + { + "epoch": 48.75824175824176, + "grad_norm": 10.357597351074219, + "learning_rate": 2.562087912087912e-05, + "loss": 0.3116, + "step": 17748 + }, + { + "epoch": 48.76098901098901, + "grad_norm": 2.2135841846466064, + "learning_rate": 2.5619505494505496e-05, + "loss": 0.036, + "step": 17749 + }, + { + "epoch": 48.76373626373626, + "grad_norm": 21.792314529418945, + "learning_rate": 2.5618131868131866e-05, + "loss": 0.9534, + "step": 17750 + }, + { + "epoch": 48.76648351648352, + "grad_norm": 5.410080909729004, + "learning_rate": 2.5616758241758243e-05, + "loss": 0.0766, + "step": 17751 + }, + { + "epoch": 48.76923076923077, + "grad_norm": 14.931729316711426, + "learning_rate": 2.561538461538462e-05, + "loss": 0.2273, + "step": 17752 + }, + { + "epoch": 48.77197802197802, + "grad_norm": 7.768756866455078, + "learning_rate": 2.561401098901099e-05, + "loss": 0.2315, + "step": 17753 + }, + { + "epoch": 48.77472527472528, + "grad_norm": 6.84895133972168, + "learning_rate": 2.5612637362637363e-05, + "loss": 0.1671, + "step": 17754 + }, + { + "epoch": 48.777472527472526, + "grad_norm": 7.947444438934326, + "learning_rate": 2.5611263736263737e-05, + "loss": 0.2278, + "step": 17755 + }, + { + "epoch": 48.78021978021978, + "grad_norm": 8.657981872558594, + "learning_rate": 2.560989010989011e-05, + "loss": 0.2141, + "step": 17756 + }, + { + "epoch": 48.782967032967036, + "grad_norm": 11.613181114196777, + "learning_rate": 2.5608516483516487e-05, + "loss": 0.2596, + "step": 17757 + }, + { + "epoch": 48.785714285714285, + "grad_norm": 12.258694648742676, + "learning_rate": 2.5607142857142857e-05, + "loss": 0.3184, + "step": 17758 + }, + { + "epoch": 48.78846153846154, + "grad_norm": 14.96179485321045, + "learning_rate": 2.5605769230769234e-05, + "loss": 0.4038, + "step": 17759 + }, + { + "epoch": 48.79120879120879, + "grad_norm": 22.277606964111328, + "learning_rate": 2.5604395604395604e-05, + "loss": 0.5361, + "step": 17760 + }, + { + "epoch": 48.793956043956044, + "grad_norm": 20.59958267211914, + "learning_rate": 2.560302197802198e-05, + "loss": 0.4348, + "step": 17761 + }, + { + "epoch": 48.7967032967033, + "grad_norm": 3.1701934337615967, + "learning_rate": 2.5601648351648354e-05, + "loss": 0.0717, + "step": 17762 + }, + { + "epoch": 48.79945054945055, + "grad_norm": 11.556201934814453, + "learning_rate": 2.5600274725274724e-05, + "loss": 0.2989, + "step": 17763 + }, + { + "epoch": 48.8021978021978, + "grad_norm": 8.140898704528809, + "learning_rate": 2.55989010989011e-05, + "loss": 0.1183, + "step": 17764 + }, + { + "epoch": 48.80494505494506, + "grad_norm": 11.964777946472168, + "learning_rate": 2.559752747252747e-05, + "loss": 0.3416, + "step": 17765 + }, + { + "epoch": 48.80769230769231, + "grad_norm": 13.113816261291504, + "learning_rate": 2.5596153846153848e-05, + "loss": 0.22, + "step": 17766 + }, + { + "epoch": 48.81043956043956, + "grad_norm": 8.97851848602295, + "learning_rate": 2.5594780219780225e-05, + "loss": 0.1376, + "step": 17767 + }, + { + "epoch": 48.81318681318681, + "grad_norm": 3.9696455001831055, + "learning_rate": 2.5593406593406595e-05, + "loss": 0.0653, + "step": 17768 + }, + { + "epoch": 48.815934065934066, + "grad_norm": 5.675086975097656, + "learning_rate": 2.5592032967032968e-05, + "loss": 0.1473, + "step": 17769 + }, + { + "epoch": 48.81868131868132, + "grad_norm": 11.969749450683594, + "learning_rate": 2.559065934065934e-05, + "loss": 0.1779, + "step": 17770 + }, + { + "epoch": 48.82142857142857, + "grad_norm": 25.99173355102539, + "learning_rate": 2.5589285714285715e-05, + "loss": 0.8209, + "step": 17771 + }, + { + "epoch": 48.824175824175825, + "grad_norm": 22.50714111328125, + "learning_rate": 2.558791208791209e-05, + "loss": 0.3925, + "step": 17772 + }, + { + "epoch": 48.82692307692308, + "grad_norm": 20.22031021118164, + "learning_rate": 2.558653846153846e-05, + "loss": 0.4629, + "step": 17773 + }, + { + "epoch": 48.82967032967033, + "grad_norm": 8.26822566986084, + "learning_rate": 2.558516483516484e-05, + "loss": 0.0992, + "step": 17774 + }, + { + "epoch": 48.832417582417584, + "grad_norm": 22.176305770874023, + "learning_rate": 2.558379120879121e-05, + "loss": 0.6694, + "step": 17775 + }, + { + "epoch": 48.83516483516483, + "grad_norm": 11.831934928894043, + "learning_rate": 2.5582417582417585e-05, + "loss": 0.2594, + "step": 17776 + }, + { + "epoch": 48.83791208791209, + "grad_norm": 12.391545295715332, + "learning_rate": 2.558104395604396e-05, + "loss": 0.3798, + "step": 17777 + }, + { + "epoch": 48.84065934065934, + "grad_norm": 2.866497278213501, + "learning_rate": 2.557967032967033e-05, + "loss": 0.0468, + "step": 17778 + }, + { + "epoch": 48.84340659340659, + "grad_norm": 10.062018394470215, + "learning_rate": 2.5578296703296705e-05, + "loss": 0.2257, + "step": 17779 + }, + { + "epoch": 48.84615384615385, + "grad_norm": 8.138503074645996, + "learning_rate": 2.5576923076923075e-05, + "loss": 0.1803, + "step": 17780 + }, + { + "epoch": 48.8489010989011, + "grad_norm": 13.36031723022461, + "learning_rate": 2.5575549450549452e-05, + "loss": 0.337, + "step": 17781 + }, + { + "epoch": 48.85164835164835, + "grad_norm": 17.937381744384766, + "learning_rate": 2.557417582417583e-05, + "loss": 0.3409, + "step": 17782 + }, + { + "epoch": 48.854395604395606, + "grad_norm": 17.913644790649414, + "learning_rate": 2.55728021978022e-05, + "loss": 0.428, + "step": 17783 + }, + { + "epoch": 48.857142857142854, + "grad_norm": 12.178959846496582, + "learning_rate": 2.5571428571428572e-05, + "loss": 0.2514, + "step": 17784 + }, + { + "epoch": 48.85989010989011, + "grad_norm": 9.002984046936035, + "learning_rate": 2.5570054945054946e-05, + "loss": 0.1521, + "step": 17785 + }, + { + "epoch": 48.862637362637365, + "grad_norm": 12.814900398254395, + "learning_rate": 2.556868131868132e-05, + "loss": 0.2322, + "step": 17786 + }, + { + "epoch": 48.86538461538461, + "grad_norm": 10.58932113647461, + "learning_rate": 2.5567307692307696e-05, + "loss": 0.2039, + "step": 17787 + }, + { + "epoch": 48.86813186813187, + "grad_norm": 9.46733570098877, + "learning_rate": 2.5565934065934066e-05, + "loss": 0.2729, + "step": 17788 + }, + { + "epoch": 48.870879120879124, + "grad_norm": 5.996547222137451, + "learning_rate": 2.5564560439560443e-05, + "loss": 0.098, + "step": 17789 + }, + { + "epoch": 48.87362637362637, + "grad_norm": 16.838960647583008, + "learning_rate": 2.5563186813186813e-05, + "loss": 0.4173, + "step": 17790 + }, + { + "epoch": 48.87637362637363, + "grad_norm": 8.289955139160156, + "learning_rate": 2.556181318681319e-05, + "loss": 0.0884, + "step": 17791 + }, + { + "epoch": 48.879120879120876, + "grad_norm": 9.090203285217285, + "learning_rate": 2.5560439560439563e-05, + "loss": 0.1841, + "step": 17792 + }, + { + "epoch": 48.88186813186813, + "grad_norm": 14.26217269897461, + "learning_rate": 2.5559065934065933e-05, + "loss": 0.3709, + "step": 17793 + }, + { + "epoch": 48.88461538461539, + "grad_norm": 10.159232139587402, + "learning_rate": 2.555769230769231e-05, + "loss": 0.2073, + "step": 17794 + }, + { + "epoch": 48.887362637362635, + "grad_norm": 14.579408645629883, + "learning_rate": 2.555631868131868e-05, + "loss": 0.4035, + "step": 17795 + }, + { + "epoch": 48.89010989010989, + "grad_norm": 7.561164379119873, + "learning_rate": 2.5554945054945057e-05, + "loss": 0.1894, + "step": 17796 + }, + { + "epoch": 48.892857142857146, + "grad_norm": 13.407777786254883, + "learning_rate": 2.5553571428571434e-05, + "loss": 0.3771, + "step": 17797 + }, + { + "epoch": 48.895604395604394, + "grad_norm": 7.240907192230225, + "learning_rate": 2.5552197802197804e-05, + "loss": 0.1127, + "step": 17798 + }, + { + "epoch": 48.89835164835165, + "grad_norm": 16.74774932861328, + "learning_rate": 2.5550824175824177e-05, + "loss": 0.4307, + "step": 17799 + }, + { + "epoch": 48.9010989010989, + "grad_norm": 7.770371913909912, + "learning_rate": 2.554945054945055e-05, + "loss": 0.2575, + "step": 17800 + }, + { + "epoch": 48.90384615384615, + "grad_norm": 14.935503005981445, + "learning_rate": 2.5548076923076924e-05, + "loss": 0.1963, + "step": 17801 + }, + { + "epoch": 48.90659340659341, + "grad_norm": 17.020023345947266, + "learning_rate": 2.55467032967033e-05, + "loss": 0.414, + "step": 17802 + }, + { + "epoch": 48.90934065934066, + "grad_norm": 12.407827377319336, + "learning_rate": 2.554532967032967e-05, + "loss": 0.1902, + "step": 17803 + }, + { + "epoch": 48.91208791208791, + "grad_norm": 11.182031631469727, + "learning_rate": 2.5543956043956047e-05, + "loss": 0.2317, + "step": 17804 + }, + { + "epoch": 48.91483516483517, + "grad_norm": 5.719195365905762, + "learning_rate": 2.5542582417582417e-05, + "loss": 0.0917, + "step": 17805 + }, + { + "epoch": 48.917582417582416, + "grad_norm": 25.84471893310547, + "learning_rate": 2.5541208791208794e-05, + "loss": 0.9611, + "step": 17806 + }, + { + "epoch": 48.92032967032967, + "grad_norm": 4.320766925811768, + "learning_rate": 2.5539835164835168e-05, + "loss": 0.0659, + "step": 17807 + }, + { + "epoch": 48.92307692307692, + "grad_norm": 8.802901268005371, + "learning_rate": 2.5538461538461538e-05, + "loss": 0.1886, + "step": 17808 + }, + { + "epoch": 48.925824175824175, + "grad_norm": 8.666199684143066, + "learning_rate": 2.5537087912087914e-05, + "loss": 0.2172, + "step": 17809 + }, + { + "epoch": 48.92857142857143, + "grad_norm": 23.039155960083008, + "learning_rate": 2.5535714285714284e-05, + "loss": 0.4219, + "step": 17810 + }, + { + "epoch": 48.93131868131868, + "grad_norm": 9.176413536071777, + "learning_rate": 2.553434065934066e-05, + "loss": 0.1754, + "step": 17811 + }, + { + "epoch": 48.934065934065934, + "grad_norm": 14.585932731628418, + "learning_rate": 2.5532967032967038e-05, + "loss": 0.2233, + "step": 17812 + }, + { + "epoch": 48.93681318681319, + "grad_norm": 14.378700256347656, + "learning_rate": 2.5531593406593408e-05, + "loss": 0.3086, + "step": 17813 + }, + { + "epoch": 48.93956043956044, + "grad_norm": 8.036042213439941, + "learning_rate": 2.553021978021978e-05, + "loss": 0.2086, + "step": 17814 + }, + { + "epoch": 48.94230769230769, + "grad_norm": 2.478198289871216, + "learning_rate": 2.5528846153846155e-05, + "loss": 0.0355, + "step": 17815 + }, + { + "epoch": 48.94505494505494, + "grad_norm": 18.760698318481445, + "learning_rate": 2.5527472527472528e-05, + "loss": 0.5073, + "step": 17816 + }, + { + "epoch": 48.9478021978022, + "grad_norm": 19.951858520507812, + "learning_rate": 2.5526098901098905e-05, + "loss": 0.6393, + "step": 17817 + }, + { + "epoch": 48.95054945054945, + "grad_norm": 3.8841309547424316, + "learning_rate": 2.5524725274725275e-05, + "loss": 0.075, + "step": 17818 + }, + { + "epoch": 48.9532967032967, + "grad_norm": 16.093095779418945, + "learning_rate": 2.5523351648351652e-05, + "loss": 0.5246, + "step": 17819 + }, + { + "epoch": 48.956043956043956, + "grad_norm": 7.011856555938721, + "learning_rate": 2.5521978021978022e-05, + "loss": 0.0914, + "step": 17820 + }, + { + "epoch": 48.95879120879121, + "grad_norm": 11.6406831741333, + "learning_rate": 2.55206043956044e-05, + "loss": 0.2939, + "step": 17821 + }, + { + "epoch": 48.96153846153846, + "grad_norm": 7.716949939727783, + "learning_rate": 2.5519230769230772e-05, + "loss": 0.1617, + "step": 17822 + }, + { + "epoch": 48.964285714285715, + "grad_norm": 14.599810600280762, + "learning_rate": 2.5517857142857142e-05, + "loss": 0.4492, + "step": 17823 + }, + { + "epoch": 48.967032967032964, + "grad_norm": 24.84742546081543, + "learning_rate": 2.551648351648352e-05, + "loss": 0.9198, + "step": 17824 + }, + { + "epoch": 48.96978021978022, + "grad_norm": 5.881344318389893, + "learning_rate": 2.551510989010989e-05, + "loss": 0.1141, + "step": 17825 + }, + { + "epoch": 48.972527472527474, + "grad_norm": 13.504659652709961, + "learning_rate": 2.5513736263736266e-05, + "loss": 0.1783, + "step": 17826 + }, + { + "epoch": 48.97527472527472, + "grad_norm": 9.800169944763184, + "learning_rate": 2.5512362637362643e-05, + "loss": 0.173, + "step": 17827 + }, + { + "epoch": 48.97802197802198, + "grad_norm": 10.010269165039062, + "learning_rate": 2.5510989010989013e-05, + "loss": 0.1592, + "step": 17828 + }, + { + "epoch": 48.98076923076923, + "grad_norm": 25.089353561401367, + "learning_rate": 2.5509615384615386e-05, + "loss": 0.7451, + "step": 17829 + }, + { + "epoch": 48.98351648351648, + "grad_norm": 2.7541329860687256, + "learning_rate": 2.5508241758241756e-05, + "loss": 0.0415, + "step": 17830 + }, + { + "epoch": 48.98626373626374, + "grad_norm": 6.621020317077637, + "learning_rate": 2.5506868131868133e-05, + "loss": 0.1509, + "step": 17831 + }, + { + "epoch": 48.98901098901099, + "grad_norm": 16.81723976135254, + "learning_rate": 2.550549450549451e-05, + "loss": 0.4326, + "step": 17832 + }, + { + "epoch": 48.99175824175824, + "grad_norm": 7.747212886810303, + "learning_rate": 2.550412087912088e-05, + "loss": 0.177, + "step": 17833 + }, + { + "epoch": 48.994505494505496, + "grad_norm": 16.840099334716797, + "learning_rate": 2.5502747252747256e-05, + "loss": 0.3213, + "step": 17834 + }, + { + "epoch": 48.997252747252745, + "grad_norm": 7.660411834716797, + "learning_rate": 2.5501373626373626e-05, + "loss": 0.1179, + "step": 17835 + }, + { + "epoch": 49.0, + "grad_norm": 49.24665451049805, + "learning_rate": 2.5500000000000003e-05, + "loss": 1.074, + "step": 17836 + }, + { + "epoch": 49.0, + "eval_accuracy": 0.41046831955922863, + "eval_f1": 0.30653677175641725, + "eval_f1_DuraRiadoRio_64x64": 0.09271523178807947, + "eval_f1_Mole_64x64": 0.10526315789473684, + "eval_f1_Quebrado_64x64": 0.642369020501139, + "eval_f1_RiadoRio_64x64": 0.4523364485981308, + "eval_f1_RioFechado_64x64": 0.24, + "eval_loss": 5.583917617797852, + "eval_precision": 0.6860513262017885, + "eval_precision_DuraRiadoRio_64x64": 1.0, + "eval_precision_Mole_64x64": 1.0, + "eval_precision_Quebrado_64x64": 0.47796610169491527, + "eval_precision_RiadoRio_64x64": 0.31592689295039167, + "eval_precision_RioFechado_64x64": 0.6363636363636364, + "eval_recall": 0.4054546577711885, + "eval_recall_DuraRiadoRio_64x64": 0.04861111111111111, + "eval_recall_Mole_64x64": 0.05555555555555555, + "eval_recall_Quebrado_64x64": 0.9791666666666666, + "eval_recall_RiadoRio_64x64": 0.7960526315789473, + "eval_recall_RioFechado_64x64": 0.14788732394366197, + "eval_runtime": 1.7462, + "eval_samples_per_second": 415.768, + "eval_steps_per_second": 26.343, + "step": 17836 + }, + { + "epoch": 49.002747252747255, + "grad_norm": 6.126096725463867, + "learning_rate": 2.5498626373626377e-05, + "loss": 0.1373, + "step": 17837 + }, + { + "epoch": 49.005494505494504, + "grad_norm": 7.704735279083252, + "learning_rate": 2.5497252747252747e-05, + "loss": 0.175, + "step": 17838 + }, + { + "epoch": 49.00824175824176, + "grad_norm": 7.776268005371094, + "learning_rate": 2.5495879120879123e-05, + "loss": 0.1422, + "step": 17839 + }, + { + "epoch": 49.010989010989015, + "grad_norm": 8.426531791687012, + "learning_rate": 2.5494505494505493e-05, + "loss": 0.1364, + "step": 17840 + }, + { + "epoch": 49.01373626373626, + "grad_norm": 10.316164016723633, + "learning_rate": 2.549313186813187e-05, + "loss": 0.2091, + "step": 17841 + }, + { + "epoch": 49.01648351648352, + "grad_norm": 13.084583282470703, + "learning_rate": 2.5491758241758247e-05, + "loss": 0.3578, + "step": 17842 + }, + { + "epoch": 49.01923076923077, + "grad_norm": 10.328594207763672, + "learning_rate": 2.5490384615384617e-05, + "loss": 0.1777, + "step": 17843 + }, + { + "epoch": 49.02197802197802, + "grad_norm": 8.12331771850586, + "learning_rate": 2.548901098901099e-05, + "loss": 0.1595, + "step": 17844 + }, + { + "epoch": 49.02472527472528, + "grad_norm": 14.456036567687988, + "learning_rate": 2.548763736263736e-05, + "loss": 0.3045, + "step": 17845 + }, + { + "epoch": 49.027472527472526, + "grad_norm": 2.9941396713256836, + "learning_rate": 2.5486263736263737e-05, + "loss": 0.0591, + "step": 17846 + }, + { + "epoch": 49.03021978021978, + "grad_norm": 10.626236915588379, + "learning_rate": 2.5484890109890114e-05, + "loss": 0.2143, + "step": 17847 + }, + { + "epoch": 49.032967032967036, + "grad_norm": 7.398962497711182, + "learning_rate": 2.5483516483516484e-05, + "loss": 0.128, + "step": 17848 + }, + { + "epoch": 49.035714285714285, + "grad_norm": 13.829300880432129, + "learning_rate": 2.548214285714286e-05, + "loss": 0.3697, + "step": 17849 + }, + { + "epoch": 49.03846153846154, + "grad_norm": 12.3118257522583, + "learning_rate": 2.548076923076923e-05, + "loss": 0.193, + "step": 17850 + }, + { + "epoch": 49.04120879120879, + "grad_norm": 11.069372177124023, + "learning_rate": 2.5479395604395608e-05, + "loss": 0.3391, + "step": 17851 + }, + { + "epoch": 49.043956043956044, + "grad_norm": 14.505724906921387, + "learning_rate": 2.547802197802198e-05, + "loss": 0.2138, + "step": 17852 + }, + { + "epoch": 49.0467032967033, + "grad_norm": 1.777430772781372, + "learning_rate": 2.547664835164835e-05, + "loss": 0.0314, + "step": 17853 + }, + { + "epoch": 49.04945054945055, + "grad_norm": 11.926108360290527, + "learning_rate": 2.5475274725274728e-05, + "loss": 0.3194, + "step": 17854 + }, + { + "epoch": 49.0521978021978, + "grad_norm": 26.180593490600586, + "learning_rate": 2.5473901098901098e-05, + "loss": 0.8752, + "step": 17855 + }, + { + "epoch": 49.05494505494506, + "grad_norm": 13.33410358428955, + "learning_rate": 2.5472527472527475e-05, + "loss": 0.2554, + "step": 17856 + }, + { + "epoch": 49.05769230769231, + "grad_norm": 13.946805953979492, + "learning_rate": 2.547115384615385e-05, + "loss": 0.2181, + "step": 17857 + }, + { + "epoch": 49.06043956043956, + "grad_norm": 24.129966735839844, + "learning_rate": 2.546978021978022e-05, + "loss": 0.7979, + "step": 17858 + }, + { + "epoch": 49.06318681318681, + "grad_norm": 14.33501148223877, + "learning_rate": 2.5468406593406595e-05, + "loss": 0.4086, + "step": 17859 + }, + { + "epoch": 49.065934065934066, + "grad_norm": 16.450101852416992, + "learning_rate": 2.5467032967032965e-05, + "loss": 0.3833, + "step": 17860 + }, + { + "epoch": 49.06868131868132, + "grad_norm": 15.854449272155762, + "learning_rate": 2.5465659340659342e-05, + "loss": 0.261, + "step": 17861 + }, + { + "epoch": 49.07142857142857, + "grad_norm": 3.0312914848327637, + "learning_rate": 2.5464285714285712e-05, + "loss": 0.0381, + "step": 17862 + }, + { + "epoch": 49.074175824175825, + "grad_norm": 13.606073379516602, + "learning_rate": 2.546291208791209e-05, + "loss": 0.4111, + "step": 17863 + }, + { + "epoch": 49.07692307692308, + "grad_norm": 14.039041519165039, + "learning_rate": 2.5461538461538465e-05, + "loss": 0.2716, + "step": 17864 + }, + { + "epoch": 49.07967032967033, + "grad_norm": 8.104549407958984, + "learning_rate": 2.5460164835164835e-05, + "loss": 0.3576, + "step": 17865 + }, + { + "epoch": 49.082417582417584, + "grad_norm": 12.24514102935791, + "learning_rate": 2.5458791208791212e-05, + "loss": 0.1373, + "step": 17866 + }, + { + "epoch": 49.08516483516483, + "grad_norm": 17.17860984802246, + "learning_rate": 2.5457417582417582e-05, + "loss": 0.4159, + "step": 17867 + }, + { + "epoch": 49.08791208791209, + "grad_norm": 17.288633346557617, + "learning_rate": 2.5456043956043956e-05, + "loss": 0.2455, + "step": 17868 + }, + { + "epoch": 49.09065934065934, + "grad_norm": 11.969414710998535, + "learning_rate": 2.5454670329670332e-05, + "loss": 0.164, + "step": 17869 + }, + { + "epoch": 49.09340659340659, + "grad_norm": 5.609821319580078, + "learning_rate": 2.5453296703296702e-05, + "loss": 0.1122, + "step": 17870 + }, + { + "epoch": 49.09615384615385, + "grad_norm": 11.913156509399414, + "learning_rate": 2.545192307692308e-05, + "loss": 0.2386, + "step": 17871 + }, + { + "epoch": 49.0989010989011, + "grad_norm": 11.901877403259277, + "learning_rate": 2.545054945054945e-05, + "loss": 0.251, + "step": 17872 + }, + { + "epoch": 49.10164835164835, + "grad_norm": 11.732121467590332, + "learning_rate": 2.5449175824175826e-05, + "loss": 0.2943, + "step": 17873 + }, + { + "epoch": 49.104395604395606, + "grad_norm": 18.620824813842773, + "learning_rate": 2.54478021978022e-05, + "loss": 0.3739, + "step": 17874 + }, + { + "epoch": 49.107142857142854, + "grad_norm": 11.154107093811035, + "learning_rate": 2.544642857142857e-05, + "loss": 0.2155, + "step": 17875 + }, + { + "epoch": 49.10989010989011, + "grad_norm": 11.434205055236816, + "learning_rate": 2.5445054945054946e-05, + "loss": 0.349, + "step": 17876 + }, + { + "epoch": 49.112637362637365, + "grad_norm": 22.28744125366211, + "learning_rate": 2.5443681318681316e-05, + "loss": 0.5267, + "step": 17877 + }, + { + "epoch": 49.11538461538461, + "grad_norm": 4.2694597244262695, + "learning_rate": 2.5442307692307693e-05, + "loss": 0.1249, + "step": 17878 + }, + { + "epoch": 49.11813186813187, + "grad_norm": 8.568028450012207, + "learning_rate": 2.544093406593407e-05, + "loss": 0.1081, + "step": 17879 + }, + { + "epoch": 49.120879120879124, + "grad_norm": 18.230716705322266, + "learning_rate": 2.543956043956044e-05, + "loss": 0.2495, + "step": 17880 + }, + { + "epoch": 49.12362637362637, + "grad_norm": 17.519519805908203, + "learning_rate": 2.5438186813186817e-05, + "loss": 0.3629, + "step": 17881 + }, + { + "epoch": 49.12637362637363, + "grad_norm": 8.604352951049805, + "learning_rate": 2.5436813186813187e-05, + "loss": 0.1398, + "step": 17882 + }, + { + "epoch": 49.129120879120876, + "grad_norm": 9.95962142944336, + "learning_rate": 2.543543956043956e-05, + "loss": 0.0963, + "step": 17883 + }, + { + "epoch": 49.13186813186813, + "grad_norm": 8.470718383789062, + "learning_rate": 2.5434065934065937e-05, + "loss": 0.1402, + "step": 17884 + }, + { + "epoch": 49.13461538461539, + "grad_norm": 4.634867191314697, + "learning_rate": 2.5432692307692307e-05, + "loss": 0.099, + "step": 17885 + }, + { + "epoch": 49.137362637362635, + "grad_norm": 14.267294883728027, + "learning_rate": 2.5431318681318684e-05, + "loss": 0.4312, + "step": 17886 + }, + { + "epoch": 49.14010989010989, + "grad_norm": 15.49941635131836, + "learning_rate": 2.5429945054945054e-05, + "loss": 0.3371, + "step": 17887 + }, + { + "epoch": 49.142857142857146, + "grad_norm": 12.860214233398438, + "learning_rate": 2.542857142857143e-05, + "loss": 0.2622, + "step": 17888 + }, + { + "epoch": 49.145604395604394, + "grad_norm": 17.598487854003906, + "learning_rate": 2.5427197802197804e-05, + "loss": 0.479, + "step": 17889 + }, + { + "epoch": 49.14835164835165, + "grad_norm": 17.863147735595703, + "learning_rate": 2.5425824175824174e-05, + "loss": 0.4669, + "step": 17890 + }, + { + "epoch": 49.1510989010989, + "grad_norm": 11.143142700195312, + "learning_rate": 2.542445054945055e-05, + "loss": 0.2104, + "step": 17891 + }, + { + "epoch": 49.15384615384615, + "grad_norm": 18.71783447265625, + "learning_rate": 2.542307692307692e-05, + "loss": 0.4103, + "step": 17892 + }, + { + "epoch": 49.15659340659341, + "grad_norm": 10.24393081665039, + "learning_rate": 2.5421703296703298e-05, + "loss": 0.1887, + "step": 17893 + }, + { + "epoch": 49.15934065934066, + "grad_norm": 4.872195720672607, + "learning_rate": 2.5420329670329674e-05, + "loss": 0.0802, + "step": 17894 + }, + { + "epoch": 49.16208791208791, + "grad_norm": 6.374049186706543, + "learning_rate": 2.5418956043956044e-05, + "loss": 0.0664, + "step": 17895 + }, + { + "epoch": 49.16483516483517, + "grad_norm": 8.490580558776855, + "learning_rate": 2.541758241758242e-05, + "loss": 0.2762, + "step": 17896 + }, + { + "epoch": 49.167582417582416, + "grad_norm": 9.769783973693848, + "learning_rate": 2.541620879120879e-05, + "loss": 0.1276, + "step": 17897 + }, + { + "epoch": 49.17032967032967, + "grad_norm": 10.031067848205566, + "learning_rate": 2.5414835164835165e-05, + "loss": 0.2266, + "step": 17898 + }, + { + "epoch": 49.17307692307692, + "grad_norm": 8.133520126342773, + "learning_rate": 2.541346153846154e-05, + "loss": 0.1296, + "step": 17899 + }, + { + "epoch": 49.175824175824175, + "grad_norm": 14.493476867675781, + "learning_rate": 2.541208791208791e-05, + "loss": 0.4229, + "step": 17900 + }, + { + "epoch": 49.17857142857143, + "grad_norm": 2.274308443069458, + "learning_rate": 2.5410714285714288e-05, + "loss": 0.0544, + "step": 17901 + }, + { + "epoch": 49.18131868131868, + "grad_norm": 18.319889068603516, + "learning_rate": 2.5409340659340658e-05, + "loss": 0.6263, + "step": 17902 + }, + { + "epoch": 49.184065934065934, + "grad_norm": 16.525171279907227, + "learning_rate": 2.5407967032967035e-05, + "loss": 0.3261, + "step": 17903 + }, + { + "epoch": 49.18681318681319, + "grad_norm": 8.654464721679688, + "learning_rate": 2.540659340659341e-05, + "loss": 0.1463, + "step": 17904 + }, + { + "epoch": 49.18956043956044, + "grad_norm": 10.468099594116211, + "learning_rate": 2.540521978021978e-05, + "loss": 0.3746, + "step": 17905 + }, + { + "epoch": 49.19230769230769, + "grad_norm": 15.919147491455078, + "learning_rate": 2.5403846153846155e-05, + "loss": 0.3334, + "step": 17906 + }, + { + "epoch": 49.19505494505494, + "grad_norm": 11.303801536560059, + "learning_rate": 2.5402472527472525e-05, + "loss": 0.4598, + "step": 17907 + }, + { + "epoch": 49.1978021978022, + "grad_norm": 17.802326202392578, + "learning_rate": 2.5401098901098902e-05, + "loss": 0.406, + "step": 17908 + }, + { + "epoch": 49.20054945054945, + "grad_norm": 8.236441612243652, + "learning_rate": 2.539972527472528e-05, + "loss": 0.1134, + "step": 17909 + }, + { + "epoch": 49.2032967032967, + "grad_norm": 10.027070999145508, + "learning_rate": 2.539835164835165e-05, + "loss": 0.2709, + "step": 17910 + }, + { + "epoch": 49.206043956043956, + "grad_norm": 5.639066219329834, + "learning_rate": 2.5396978021978026e-05, + "loss": 0.1155, + "step": 17911 + }, + { + "epoch": 49.20879120879121, + "grad_norm": 15.735100746154785, + "learning_rate": 2.5395604395604396e-05, + "loss": 0.5945, + "step": 17912 + }, + { + "epoch": 49.21153846153846, + "grad_norm": 16.908355712890625, + "learning_rate": 2.539423076923077e-05, + "loss": 0.4602, + "step": 17913 + }, + { + "epoch": 49.214285714285715, + "grad_norm": 12.555261611938477, + "learning_rate": 2.5392857142857146e-05, + "loss": 0.2751, + "step": 17914 + }, + { + "epoch": 49.217032967032964, + "grad_norm": 16.50872230529785, + "learning_rate": 2.5391483516483516e-05, + "loss": 0.4601, + "step": 17915 + }, + { + "epoch": 49.21978021978022, + "grad_norm": 13.440149307250977, + "learning_rate": 2.5390109890109893e-05, + "loss": 0.2879, + "step": 17916 + }, + { + "epoch": 49.222527472527474, + "grad_norm": 6.339029788970947, + "learning_rate": 2.5388736263736263e-05, + "loss": 0.0976, + "step": 17917 + }, + { + "epoch": 49.22527472527472, + "grad_norm": 11.645484924316406, + "learning_rate": 2.538736263736264e-05, + "loss": 0.2706, + "step": 17918 + }, + { + "epoch": 49.22802197802198, + "grad_norm": 5.045288562774658, + "learning_rate": 2.5385989010989013e-05, + "loss": 0.0794, + "step": 17919 + }, + { + "epoch": 49.23076923076923, + "grad_norm": 3.756744146347046, + "learning_rate": 2.5384615384615383e-05, + "loss": 0.0828, + "step": 17920 + }, + { + "epoch": 49.23351648351648, + "grad_norm": 6.411107063293457, + "learning_rate": 2.538324175824176e-05, + "loss": 0.198, + "step": 17921 + }, + { + "epoch": 49.23626373626374, + "grad_norm": 4.277527332305908, + "learning_rate": 2.538186813186813e-05, + "loss": 0.0678, + "step": 17922 + }, + { + "epoch": 49.239010989010985, + "grad_norm": 19.550350189208984, + "learning_rate": 2.5380494505494507e-05, + "loss": 0.5296, + "step": 17923 + }, + { + "epoch": 49.24175824175824, + "grad_norm": 11.024368286132812, + "learning_rate": 2.5379120879120883e-05, + "loss": 0.1553, + "step": 17924 + }, + { + "epoch": 49.244505494505496, + "grad_norm": 6.318696022033691, + "learning_rate": 2.5377747252747253e-05, + "loss": 0.1316, + "step": 17925 + }, + { + "epoch": 49.247252747252745, + "grad_norm": 23.990188598632812, + "learning_rate": 2.537637362637363e-05, + "loss": 0.4492, + "step": 17926 + }, + { + "epoch": 49.25, + "grad_norm": 11.283445358276367, + "learning_rate": 2.5375e-05, + "loss": 0.3901, + "step": 17927 + }, + { + "epoch": 49.252747252747255, + "grad_norm": 8.027983665466309, + "learning_rate": 2.5373626373626374e-05, + "loss": 0.1885, + "step": 17928 + }, + { + "epoch": 49.255494505494504, + "grad_norm": 23.74376106262207, + "learning_rate": 2.537225274725275e-05, + "loss": 0.6408, + "step": 17929 + }, + { + "epoch": 49.25824175824176, + "grad_norm": 12.471945762634277, + "learning_rate": 2.537087912087912e-05, + "loss": 0.2639, + "step": 17930 + }, + { + "epoch": 49.260989010989015, + "grad_norm": 23.66696548461914, + "learning_rate": 2.5369505494505497e-05, + "loss": 0.2937, + "step": 17931 + }, + { + "epoch": 49.26373626373626, + "grad_norm": 10.623218536376953, + "learning_rate": 2.5368131868131867e-05, + "loss": 0.3006, + "step": 17932 + }, + { + "epoch": 49.26648351648352, + "grad_norm": 15.422887802124023, + "learning_rate": 2.5366758241758244e-05, + "loss": 0.3253, + "step": 17933 + }, + { + "epoch": 49.26923076923077, + "grad_norm": 4.595606327056885, + "learning_rate": 2.5365384615384617e-05, + "loss": 0.0799, + "step": 17934 + }, + { + "epoch": 49.27197802197802, + "grad_norm": 7.640251636505127, + "learning_rate": 2.5364010989010987e-05, + "loss": 0.1434, + "step": 17935 + }, + { + "epoch": 49.27472527472528, + "grad_norm": 14.237427711486816, + "learning_rate": 2.5362637362637364e-05, + "loss": 0.4132, + "step": 17936 + }, + { + "epoch": 49.277472527472526, + "grad_norm": 7.160999774932861, + "learning_rate": 2.5361263736263734e-05, + "loss": 0.1017, + "step": 17937 + }, + { + "epoch": 49.28021978021978, + "grad_norm": 8.441855430603027, + "learning_rate": 2.535989010989011e-05, + "loss": 0.1966, + "step": 17938 + }, + { + "epoch": 49.282967032967036, + "grad_norm": 2.1045238971710205, + "learning_rate": 2.5358516483516488e-05, + "loss": 0.0396, + "step": 17939 + }, + { + "epoch": 49.285714285714285, + "grad_norm": 9.25843620300293, + "learning_rate": 2.5357142857142858e-05, + "loss": 0.2271, + "step": 17940 + }, + { + "epoch": 49.28846153846154, + "grad_norm": 12.739964485168457, + "learning_rate": 2.5355769230769235e-05, + "loss": 0.2774, + "step": 17941 + }, + { + "epoch": 49.29120879120879, + "grad_norm": 7.408907890319824, + "learning_rate": 2.5354395604395605e-05, + "loss": 0.0929, + "step": 17942 + }, + { + "epoch": 49.293956043956044, + "grad_norm": 15.010862350463867, + "learning_rate": 2.5353021978021978e-05, + "loss": 0.3779, + "step": 17943 + }, + { + "epoch": 49.2967032967033, + "grad_norm": 9.429492950439453, + "learning_rate": 2.5351648351648355e-05, + "loss": 0.3001, + "step": 17944 + }, + { + "epoch": 49.29945054945055, + "grad_norm": 17.946090698242188, + "learning_rate": 2.5350274725274725e-05, + "loss": 0.6107, + "step": 17945 + }, + { + "epoch": 49.3021978021978, + "grad_norm": 9.641889572143555, + "learning_rate": 2.53489010989011e-05, + "loss": 0.1827, + "step": 17946 + }, + { + "epoch": 49.30494505494506, + "grad_norm": 13.897856712341309, + "learning_rate": 2.534752747252747e-05, + "loss": 0.2241, + "step": 17947 + }, + { + "epoch": 49.30769230769231, + "grad_norm": 16.60097885131836, + "learning_rate": 2.534615384615385e-05, + "loss": 0.4246, + "step": 17948 + }, + { + "epoch": 49.31043956043956, + "grad_norm": 5.921530246734619, + "learning_rate": 2.5344780219780222e-05, + "loss": 0.1129, + "step": 17949 + }, + { + "epoch": 49.31318681318681, + "grad_norm": 8.029272079467773, + "learning_rate": 2.5343406593406592e-05, + "loss": 0.2031, + "step": 17950 + }, + { + "epoch": 49.315934065934066, + "grad_norm": 11.395703315734863, + "learning_rate": 2.534203296703297e-05, + "loss": 0.2235, + "step": 17951 + }, + { + "epoch": 49.31868131868132, + "grad_norm": 7.56351375579834, + "learning_rate": 2.534065934065934e-05, + "loss": 0.1162, + "step": 17952 + }, + { + "epoch": 49.32142857142857, + "grad_norm": 9.051813125610352, + "learning_rate": 2.5339285714285716e-05, + "loss": 0.1355, + "step": 17953 + }, + { + "epoch": 49.324175824175825, + "grad_norm": 9.371792793273926, + "learning_rate": 2.5337912087912092e-05, + "loss": 0.2958, + "step": 17954 + }, + { + "epoch": 49.32692307692308, + "grad_norm": 23.42078399658203, + "learning_rate": 2.5336538461538462e-05, + "loss": 0.8676, + "step": 17955 + }, + { + "epoch": 49.32967032967033, + "grad_norm": 6.892885684967041, + "learning_rate": 2.5335164835164836e-05, + "loss": 0.1861, + "step": 17956 + }, + { + "epoch": 49.332417582417584, + "grad_norm": 14.959259986877441, + "learning_rate": 2.533379120879121e-05, + "loss": 0.4409, + "step": 17957 + }, + { + "epoch": 49.33516483516483, + "grad_norm": 8.609489440917969, + "learning_rate": 2.5332417582417583e-05, + "loss": 0.141, + "step": 17958 + }, + { + "epoch": 49.33791208791209, + "grad_norm": 17.14305877685547, + "learning_rate": 2.533104395604396e-05, + "loss": 0.5521, + "step": 17959 + }, + { + "epoch": 49.34065934065934, + "grad_norm": 8.924546241760254, + "learning_rate": 2.532967032967033e-05, + "loss": 0.1626, + "step": 17960 + }, + { + "epoch": 49.34340659340659, + "grad_norm": 8.954856872558594, + "learning_rate": 2.5328296703296706e-05, + "loss": 0.1653, + "step": 17961 + }, + { + "epoch": 49.34615384615385, + "grad_norm": 2.1652164459228516, + "learning_rate": 2.5326923076923076e-05, + "loss": 0.0303, + "step": 17962 + }, + { + "epoch": 49.3489010989011, + "grad_norm": 9.392557144165039, + "learning_rate": 2.5325549450549453e-05, + "loss": 0.1609, + "step": 17963 + }, + { + "epoch": 49.35164835164835, + "grad_norm": 1.8866767883300781, + "learning_rate": 2.5324175824175826e-05, + "loss": 0.0327, + "step": 17964 + }, + { + "epoch": 49.354395604395606, + "grad_norm": 3.701897144317627, + "learning_rate": 2.5322802197802196e-05, + "loss": 0.1046, + "step": 17965 + }, + { + "epoch": 49.357142857142854, + "grad_norm": 11.205214500427246, + "learning_rate": 2.5321428571428573e-05, + "loss": 0.262, + "step": 17966 + }, + { + "epoch": 49.35989010989011, + "grad_norm": 2.1604506969451904, + "learning_rate": 2.5320054945054943e-05, + "loss": 0.05, + "step": 17967 + }, + { + "epoch": 49.362637362637365, + "grad_norm": 9.62006664276123, + "learning_rate": 2.531868131868132e-05, + "loss": 0.1781, + "step": 17968 + }, + { + "epoch": 49.36538461538461, + "grad_norm": 15.467459678649902, + "learning_rate": 2.5317307692307697e-05, + "loss": 0.2539, + "step": 17969 + }, + { + "epoch": 49.36813186813187, + "grad_norm": 9.085183143615723, + "learning_rate": 2.5315934065934067e-05, + "loss": 0.1402, + "step": 17970 + }, + { + "epoch": 49.370879120879124, + "grad_norm": 12.536578178405762, + "learning_rate": 2.531456043956044e-05, + "loss": 0.3491, + "step": 17971 + }, + { + "epoch": 49.37362637362637, + "grad_norm": 8.195021629333496, + "learning_rate": 2.5313186813186814e-05, + "loss": 0.1797, + "step": 17972 + }, + { + "epoch": 49.37637362637363, + "grad_norm": 7.284593105316162, + "learning_rate": 2.5311813186813187e-05, + "loss": 0.1104, + "step": 17973 + }, + { + "epoch": 49.379120879120876, + "grad_norm": 14.321738243103027, + "learning_rate": 2.5310439560439564e-05, + "loss": 0.1861, + "step": 17974 + }, + { + "epoch": 49.38186813186813, + "grad_norm": 3.149102210998535, + "learning_rate": 2.5309065934065934e-05, + "loss": 0.0579, + "step": 17975 + }, + { + "epoch": 49.38461538461539, + "grad_norm": 7.1093316078186035, + "learning_rate": 2.530769230769231e-05, + "loss": 0.1769, + "step": 17976 + }, + { + "epoch": 49.387362637362635, + "grad_norm": 12.005056381225586, + "learning_rate": 2.530631868131868e-05, + "loss": 0.3154, + "step": 17977 + }, + { + "epoch": 49.39010989010989, + "grad_norm": 15.142053604125977, + "learning_rate": 2.5304945054945057e-05, + "loss": 0.2993, + "step": 17978 + }, + { + "epoch": 49.392857142857146, + "grad_norm": 8.215664863586426, + "learning_rate": 2.530357142857143e-05, + "loss": 0.1475, + "step": 17979 + }, + { + "epoch": 49.395604395604394, + "grad_norm": 17.89952278137207, + "learning_rate": 2.53021978021978e-05, + "loss": 0.437, + "step": 17980 + }, + { + "epoch": 49.39835164835165, + "grad_norm": 7.383872032165527, + "learning_rate": 2.5300824175824178e-05, + "loss": 0.1017, + "step": 17981 + }, + { + "epoch": 49.4010989010989, + "grad_norm": 10.730690956115723, + "learning_rate": 2.5299450549450548e-05, + "loss": 0.1992, + "step": 17982 + }, + { + "epoch": 49.40384615384615, + "grad_norm": 3.610494613647461, + "learning_rate": 2.5298076923076924e-05, + "loss": 0.0541, + "step": 17983 + }, + { + "epoch": 49.40659340659341, + "grad_norm": 10.883550643920898, + "learning_rate": 2.52967032967033e-05, + "loss": 0.3475, + "step": 17984 + }, + { + "epoch": 49.40934065934066, + "grad_norm": 11.964673042297363, + "learning_rate": 2.529532967032967e-05, + "loss": 0.2665, + "step": 17985 + }, + { + "epoch": 49.41208791208791, + "grad_norm": 13.857878684997559, + "learning_rate": 2.5293956043956045e-05, + "loss": 0.1902, + "step": 17986 + }, + { + "epoch": 49.41483516483517, + "grad_norm": 15.597675323486328, + "learning_rate": 2.5292582417582418e-05, + "loss": 0.7022, + "step": 17987 + }, + { + "epoch": 49.417582417582416, + "grad_norm": 21.319795608520508, + "learning_rate": 2.529120879120879e-05, + "loss": 0.4961, + "step": 17988 + }, + { + "epoch": 49.42032967032967, + "grad_norm": 9.518660545349121, + "learning_rate": 2.528983516483517e-05, + "loss": 0.2569, + "step": 17989 + }, + { + "epoch": 49.42307692307692, + "grad_norm": 9.96481704711914, + "learning_rate": 2.528846153846154e-05, + "loss": 0.3093, + "step": 17990 + }, + { + "epoch": 49.425824175824175, + "grad_norm": 15.533333778381348, + "learning_rate": 2.5287087912087915e-05, + "loss": 0.2668, + "step": 17991 + }, + { + "epoch": 49.42857142857143, + "grad_norm": 6.852975368499756, + "learning_rate": 2.5285714285714285e-05, + "loss": 0.1697, + "step": 17992 + }, + { + "epoch": 49.43131868131868, + "grad_norm": 10.892672538757324, + "learning_rate": 2.5284340659340662e-05, + "loss": 0.2863, + "step": 17993 + }, + { + "epoch": 49.434065934065934, + "grad_norm": 3.2824065685272217, + "learning_rate": 2.5282967032967035e-05, + "loss": 0.0423, + "step": 17994 + }, + { + "epoch": 49.43681318681319, + "grad_norm": 17.44612693786621, + "learning_rate": 2.5281593406593405e-05, + "loss": 0.2941, + "step": 17995 + }, + { + "epoch": 49.43956043956044, + "grad_norm": 15.585558891296387, + "learning_rate": 2.5280219780219782e-05, + "loss": 0.3505, + "step": 17996 + }, + { + "epoch": 49.44230769230769, + "grad_norm": 11.847749710083008, + "learning_rate": 2.5278846153846152e-05, + "loss": 0.2071, + "step": 17997 + }, + { + "epoch": 49.44505494505494, + "grad_norm": 3.8664145469665527, + "learning_rate": 2.527747252747253e-05, + "loss": 0.0534, + "step": 17998 + }, + { + "epoch": 49.4478021978022, + "grad_norm": 7.4469075202941895, + "learning_rate": 2.5276098901098906e-05, + "loss": 0.1632, + "step": 17999 + }, + { + "epoch": 49.45054945054945, + "grad_norm": 3.6370999813079834, + "learning_rate": 2.5274725274725276e-05, + "loss": 0.0517, + "step": 18000 + }, + { + "epoch": 49.4532967032967, + "grad_norm": 8.561993598937988, + "learning_rate": 2.527335164835165e-05, + "loss": 0.1991, + "step": 18001 + }, + { + "epoch": 49.456043956043956, + "grad_norm": 17.99018096923828, + "learning_rate": 2.5271978021978023e-05, + "loss": 0.374, + "step": 18002 + }, + { + "epoch": 49.45879120879121, + "grad_norm": 16.143468856811523, + "learning_rate": 2.5270604395604396e-05, + "loss": 0.4335, + "step": 18003 + }, + { + "epoch": 49.46153846153846, + "grad_norm": 14.289507865905762, + "learning_rate": 2.5269230769230773e-05, + "loss": 0.4011, + "step": 18004 + }, + { + "epoch": 49.464285714285715, + "grad_norm": 10.265084266662598, + "learning_rate": 2.5267857142857143e-05, + "loss": 0.297, + "step": 18005 + }, + { + "epoch": 49.467032967032964, + "grad_norm": 10.655991554260254, + "learning_rate": 2.526648351648352e-05, + "loss": 0.3431, + "step": 18006 + }, + { + "epoch": 49.46978021978022, + "grad_norm": 12.014655113220215, + "learning_rate": 2.526510989010989e-05, + "loss": 0.2321, + "step": 18007 + }, + { + "epoch": 49.472527472527474, + "grad_norm": 8.421186447143555, + "learning_rate": 2.5263736263736266e-05, + "loss": 0.2166, + "step": 18008 + }, + { + "epoch": 49.47527472527472, + "grad_norm": 18.088285446166992, + "learning_rate": 2.526236263736264e-05, + "loss": 0.4994, + "step": 18009 + }, + { + "epoch": 49.47802197802198, + "grad_norm": 10.547478675842285, + "learning_rate": 2.526098901098901e-05, + "loss": 0.1112, + "step": 18010 + }, + { + "epoch": 49.48076923076923, + "grad_norm": 12.308764457702637, + "learning_rate": 2.5259615384615387e-05, + "loss": 0.3914, + "step": 18011 + }, + { + "epoch": 49.48351648351648, + "grad_norm": 11.124271392822266, + "learning_rate": 2.5258241758241757e-05, + "loss": 0.1936, + "step": 18012 + }, + { + "epoch": 49.48626373626374, + "grad_norm": 9.269023895263672, + "learning_rate": 2.5256868131868133e-05, + "loss": 0.1477, + "step": 18013 + }, + { + "epoch": 49.489010989010985, + "grad_norm": 21.484981536865234, + "learning_rate": 2.525549450549451e-05, + "loss": 0.624, + "step": 18014 + }, + { + "epoch": 49.49175824175824, + "grad_norm": 11.122051239013672, + "learning_rate": 2.525412087912088e-05, + "loss": 0.2051, + "step": 18015 + }, + { + "epoch": 49.494505494505496, + "grad_norm": 11.673626899719238, + "learning_rate": 2.5252747252747254e-05, + "loss": 0.2472, + "step": 18016 + }, + { + "epoch": 49.497252747252745, + "grad_norm": 8.45416259765625, + "learning_rate": 2.5251373626373627e-05, + "loss": 0.1347, + "step": 18017 + }, + { + "epoch": 49.5, + "grad_norm": 14.62176513671875, + "learning_rate": 2.525e-05, + "loss": 0.1844, + "step": 18018 + }, + { + "epoch": 49.502747252747255, + "grad_norm": 17.41153335571289, + "learning_rate": 2.5248626373626377e-05, + "loss": 0.223, + "step": 18019 + }, + { + "epoch": 49.505494505494504, + "grad_norm": 8.48060417175293, + "learning_rate": 2.5247252747252747e-05, + "loss": 0.1643, + "step": 18020 + }, + { + "epoch": 49.50824175824176, + "grad_norm": 5.691101551055908, + "learning_rate": 2.5245879120879124e-05, + "loss": 0.0906, + "step": 18021 + }, + { + "epoch": 49.51098901098901, + "grad_norm": 12.634267807006836, + "learning_rate": 2.5244505494505494e-05, + "loss": 0.355, + "step": 18022 + }, + { + "epoch": 49.51373626373626, + "grad_norm": 14.542390823364258, + "learning_rate": 2.524313186813187e-05, + "loss": 0.2869, + "step": 18023 + }, + { + "epoch": 49.51648351648352, + "grad_norm": 15.629072189331055, + "learning_rate": 2.5241758241758244e-05, + "loss": 0.2745, + "step": 18024 + }, + { + "epoch": 49.51923076923077, + "grad_norm": 11.230657577514648, + "learning_rate": 2.5240384615384614e-05, + "loss": 0.4583, + "step": 18025 + }, + { + "epoch": 49.52197802197802, + "grad_norm": 9.780689239501953, + "learning_rate": 2.523901098901099e-05, + "loss": 0.2171, + "step": 18026 + }, + { + "epoch": 49.52472527472528, + "grad_norm": 11.257162094116211, + "learning_rate": 2.523763736263736e-05, + "loss": 0.2264, + "step": 18027 + }, + { + "epoch": 49.527472527472526, + "grad_norm": 5.757430553436279, + "learning_rate": 2.5236263736263738e-05, + "loss": 0.1088, + "step": 18028 + }, + { + "epoch": 49.53021978021978, + "grad_norm": 9.72566032409668, + "learning_rate": 2.5234890109890115e-05, + "loss": 0.1494, + "step": 18029 + }, + { + "epoch": 49.532967032967036, + "grad_norm": 13.580294609069824, + "learning_rate": 2.5233516483516485e-05, + "loss": 0.4576, + "step": 18030 + }, + { + "epoch": 49.535714285714285, + "grad_norm": 10.736595153808594, + "learning_rate": 2.5232142857142858e-05, + "loss": 0.4377, + "step": 18031 + }, + { + "epoch": 49.53846153846154, + "grad_norm": 11.530890464782715, + "learning_rate": 2.523076923076923e-05, + "loss": 0.2612, + "step": 18032 + }, + { + "epoch": 49.54120879120879, + "grad_norm": 6.399957656860352, + "learning_rate": 2.5229395604395605e-05, + "loss": 0.1022, + "step": 18033 + }, + { + "epoch": 49.543956043956044, + "grad_norm": 9.054939270019531, + "learning_rate": 2.5228021978021982e-05, + "loss": 0.1742, + "step": 18034 + }, + { + "epoch": 49.5467032967033, + "grad_norm": 14.222136497497559, + "learning_rate": 2.5226648351648352e-05, + "loss": 0.3629, + "step": 18035 + }, + { + "epoch": 49.54945054945055, + "grad_norm": 1.4729946851730347, + "learning_rate": 2.522527472527473e-05, + "loss": 0.0147, + "step": 18036 + }, + { + "epoch": 49.5521978021978, + "grad_norm": 25.56587791442871, + "learning_rate": 2.52239010989011e-05, + "loss": 0.5093, + "step": 18037 + }, + { + "epoch": 49.55494505494506, + "grad_norm": 19.3468017578125, + "learning_rate": 2.5222527472527475e-05, + "loss": 0.3395, + "step": 18038 + }, + { + "epoch": 49.55769230769231, + "grad_norm": 10.931350708007812, + "learning_rate": 2.522115384615385e-05, + "loss": 0.2069, + "step": 18039 + }, + { + "epoch": 49.56043956043956, + "grad_norm": 8.710160255432129, + "learning_rate": 2.521978021978022e-05, + "loss": 0.1841, + "step": 18040 + }, + { + "epoch": 49.56318681318681, + "grad_norm": 15.104308128356934, + "learning_rate": 2.5218406593406596e-05, + "loss": 0.3493, + "step": 18041 + }, + { + "epoch": 49.565934065934066, + "grad_norm": 5.661465644836426, + "learning_rate": 2.5217032967032966e-05, + "loss": 0.1004, + "step": 18042 + }, + { + "epoch": 49.56868131868132, + "grad_norm": 5.826298713684082, + "learning_rate": 2.5215659340659342e-05, + "loss": 0.097, + "step": 18043 + }, + { + "epoch": 49.57142857142857, + "grad_norm": 8.175372123718262, + "learning_rate": 2.521428571428572e-05, + "loss": 0.1998, + "step": 18044 + }, + { + "epoch": 49.574175824175825, + "grad_norm": 7.470742225646973, + "learning_rate": 2.521291208791209e-05, + "loss": 0.2438, + "step": 18045 + }, + { + "epoch": 49.57692307692308, + "grad_norm": 6.014983654022217, + "learning_rate": 2.5211538461538463e-05, + "loss": 0.0972, + "step": 18046 + }, + { + "epoch": 49.57967032967033, + "grad_norm": 6.069437503814697, + "learning_rate": 2.5210164835164836e-05, + "loss": 0.0987, + "step": 18047 + }, + { + "epoch": 49.582417582417584, + "grad_norm": 10.669798851013184, + "learning_rate": 2.520879120879121e-05, + "loss": 0.1169, + "step": 18048 + }, + { + "epoch": 49.58516483516483, + "grad_norm": 4.606900691986084, + "learning_rate": 2.5207417582417586e-05, + "loss": 0.1124, + "step": 18049 + }, + { + "epoch": 49.58791208791209, + "grad_norm": 14.93424129486084, + "learning_rate": 2.5206043956043956e-05, + "loss": 0.3357, + "step": 18050 + }, + { + "epoch": 49.59065934065934, + "grad_norm": 19.146953582763672, + "learning_rate": 2.5204670329670333e-05, + "loss": 0.3822, + "step": 18051 + }, + { + "epoch": 49.59340659340659, + "grad_norm": 14.104288101196289, + "learning_rate": 2.5203296703296703e-05, + "loss": 0.6335, + "step": 18052 + }, + { + "epoch": 49.59615384615385, + "grad_norm": 9.23088264465332, + "learning_rate": 2.520192307692308e-05, + "loss": 0.1334, + "step": 18053 + }, + { + "epoch": 49.5989010989011, + "grad_norm": 21.345611572265625, + "learning_rate": 2.5200549450549453e-05, + "loss": 0.2941, + "step": 18054 + }, + { + "epoch": 49.60164835164835, + "grad_norm": 9.250635147094727, + "learning_rate": 2.5199175824175823e-05, + "loss": 0.2081, + "step": 18055 + }, + { + "epoch": 49.604395604395606, + "grad_norm": 21.77407455444336, + "learning_rate": 2.51978021978022e-05, + "loss": 0.4962, + "step": 18056 + }, + { + "epoch": 49.607142857142854, + "grad_norm": 12.421256065368652, + "learning_rate": 2.519642857142857e-05, + "loss": 0.4519, + "step": 18057 + }, + { + "epoch": 49.60989010989011, + "grad_norm": 18.956111907958984, + "learning_rate": 2.5195054945054947e-05, + "loss": 0.4047, + "step": 18058 + }, + { + "epoch": 49.612637362637365, + "grad_norm": 18.046432495117188, + "learning_rate": 2.5193681318681324e-05, + "loss": 0.3844, + "step": 18059 + }, + { + "epoch": 49.61538461538461, + "grad_norm": 25.138044357299805, + "learning_rate": 2.5192307692307694e-05, + "loss": 0.4602, + "step": 18060 + }, + { + "epoch": 49.61813186813187, + "grad_norm": 14.341666221618652, + "learning_rate": 2.5190934065934067e-05, + "loss": 0.2961, + "step": 18061 + }, + { + "epoch": 49.620879120879124, + "grad_norm": 15.612939834594727, + "learning_rate": 2.518956043956044e-05, + "loss": 0.3931, + "step": 18062 + }, + { + "epoch": 49.62362637362637, + "grad_norm": 8.9619140625, + "learning_rate": 2.5188186813186814e-05, + "loss": 0.1292, + "step": 18063 + }, + { + "epoch": 49.62637362637363, + "grad_norm": 11.006502151489258, + "learning_rate": 2.518681318681319e-05, + "loss": 0.2179, + "step": 18064 + }, + { + "epoch": 49.629120879120876, + "grad_norm": 22.39128303527832, + "learning_rate": 2.518543956043956e-05, + "loss": 0.8974, + "step": 18065 + }, + { + "epoch": 49.63186813186813, + "grad_norm": 9.212936401367188, + "learning_rate": 2.5184065934065938e-05, + "loss": 0.2257, + "step": 18066 + }, + { + "epoch": 49.63461538461539, + "grad_norm": 4.140016078948975, + "learning_rate": 2.5182692307692308e-05, + "loss": 0.0596, + "step": 18067 + }, + { + "epoch": 49.637362637362635, + "grad_norm": 15.605707168579102, + "learning_rate": 2.5181318681318684e-05, + "loss": 0.2965, + "step": 18068 + }, + { + "epoch": 49.64010989010989, + "grad_norm": 7.384472370147705, + "learning_rate": 2.5179945054945058e-05, + "loss": 0.1745, + "step": 18069 + }, + { + "epoch": 49.642857142857146, + "grad_norm": 6.050748348236084, + "learning_rate": 2.5178571428571428e-05, + "loss": 0.0909, + "step": 18070 + }, + { + "epoch": 49.645604395604394, + "grad_norm": 5.771355152130127, + "learning_rate": 2.5177197802197805e-05, + "loss": 0.0812, + "step": 18071 + }, + { + "epoch": 49.64835164835165, + "grad_norm": 18.616315841674805, + "learning_rate": 2.5175824175824175e-05, + "loss": 0.4524, + "step": 18072 + }, + { + "epoch": 49.6510989010989, + "grad_norm": 19.489566802978516, + "learning_rate": 2.517445054945055e-05, + "loss": 0.7845, + "step": 18073 + }, + { + "epoch": 49.65384615384615, + "grad_norm": 5.909056186676025, + "learning_rate": 2.5173076923076928e-05, + "loss": 0.2386, + "step": 18074 + }, + { + "epoch": 49.65659340659341, + "grad_norm": 4.017361640930176, + "learning_rate": 2.5171703296703298e-05, + "loss": 0.0623, + "step": 18075 + }, + { + "epoch": 49.65934065934066, + "grad_norm": 9.552122116088867, + "learning_rate": 2.517032967032967e-05, + "loss": 0.1954, + "step": 18076 + }, + { + "epoch": 49.66208791208791, + "grad_norm": 15.844687461853027, + "learning_rate": 2.5168956043956045e-05, + "loss": 0.5026, + "step": 18077 + }, + { + "epoch": 49.66483516483517, + "grad_norm": 15.311335563659668, + "learning_rate": 2.516758241758242e-05, + "loss": 0.3117, + "step": 18078 + }, + { + "epoch": 49.667582417582416, + "grad_norm": 13.563616752624512, + "learning_rate": 2.5166208791208795e-05, + "loss": 0.2093, + "step": 18079 + }, + { + "epoch": 49.67032967032967, + "grad_norm": 11.544203758239746, + "learning_rate": 2.5164835164835165e-05, + "loss": 0.1525, + "step": 18080 + }, + { + "epoch": 49.67307692307692, + "grad_norm": 9.430242538452148, + "learning_rate": 2.5163461538461542e-05, + "loss": 0.143, + "step": 18081 + }, + { + "epoch": 49.675824175824175, + "grad_norm": 3.542604446411133, + "learning_rate": 2.5162087912087912e-05, + "loss": 0.0769, + "step": 18082 + }, + { + "epoch": 49.67857142857143, + "grad_norm": 14.022672653198242, + "learning_rate": 2.516071428571429e-05, + "loss": 0.2132, + "step": 18083 + }, + { + "epoch": 49.68131868131868, + "grad_norm": 11.299764633178711, + "learning_rate": 2.5159340659340662e-05, + "loss": 0.1409, + "step": 18084 + }, + { + "epoch": 49.684065934065934, + "grad_norm": 12.975247383117676, + "learning_rate": 2.5157967032967032e-05, + "loss": 0.3828, + "step": 18085 + }, + { + "epoch": 49.68681318681319, + "grad_norm": 12.511350631713867, + "learning_rate": 2.515659340659341e-05, + "loss": 0.134, + "step": 18086 + }, + { + "epoch": 49.68956043956044, + "grad_norm": 3.838620662689209, + "learning_rate": 2.515521978021978e-05, + "loss": 0.036, + "step": 18087 + }, + { + "epoch": 49.69230769230769, + "grad_norm": 12.278550148010254, + "learning_rate": 2.5153846153846156e-05, + "loss": 0.2841, + "step": 18088 + }, + { + "epoch": 49.69505494505494, + "grad_norm": 8.587711334228516, + "learning_rate": 2.5152472527472526e-05, + "loss": 0.206, + "step": 18089 + }, + { + "epoch": 49.6978021978022, + "grad_norm": 9.58765983581543, + "learning_rate": 2.5151098901098903e-05, + "loss": 0.1842, + "step": 18090 + }, + { + "epoch": 49.70054945054945, + "grad_norm": 8.909403800964355, + "learning_rate": 2.5149725274725276e-05, + "loss": 0.3017, + "step": 18091 + }, + { + "epoch": 49.7032967032967, + "grad_norm": 4.631957530975342, + "learning_rate": 2.514835164835165e-05, + "loss": 0.1043, + "step": 18092 + }, + { + "epoch": 49.706043956043956, + "grad_norm": 13.639777183532715, + "learning_rate": 2.5146978021978023e-05, + "loss": 0.2479, + "step": 18093 + }, + { + "epoch": 49.70879120879121, + "grad_norm": 17.84382438659668, + "learning_rate": 2.5145604395604393e-05, + "loss": 0.6071, + "step": 18094 + }, + { + "epoch": 49.71153846153846, + "grad_norm": 4.685956954956055, + "learning_rate": 2.514423076923077e-05, + "loss": 0.0893, + "step": 18095 + }, + { + "epoch": 49.714285714285715, + "grad_norm": 3.533895969390869, + "learning_rate": 2.5142857142857147e-05, + "loss": 0.0677, + "step": 18096 + }, + { + "epoch": 49.717032967032964, + "grad_norm": 17.844932556152344, + "learning_rate": 2.5141483516483517e-05, + "loss": 0.2616, + "step": 18097 + }, + { + "epoch": 49.71978021978022, + "grad_norm": 8.514618873596191, + "learning_rate": 2.5140109890109893e-05, + "loss": 0.2283, + "step": 18098 + }, + { + "epoch": 49.722527472527474, + "grad_norm": 10.439958572387695, + "learning_rate": 2.5138736263736263e-05, + "loss": 0.3365, + "step": 18099 + }, + { + "epoch": 49.72527472527472, + "grad_norm": 8.303277015686035, + "learning_rate": 2.5137362637362637e-05, + "loss": 0.1823, + "step": 18100 + }, + { + "epoch": 49.72802197802198, + "grad_norm": 22.484575271606445, + "learning_rate": 2.5135989010989014e-05, + "loss": 0.5436, + "step": 18101 + }, + { + "epoch": 49.73076923076923, + "grad_norm": 9.568926811218262, + "learning_rate": 2.5134615384615384e-05, + "loss": 0.217, + "step": 18102 + }, + { + "epoch": 49.73351648351648, + "grad_norm": 12.228405952453613, + "learning_rate": 2.513324175824176e-05, + "loss": 0.224, + "step": 18103 + }, + { + "epoch": 49.73626373626374, + "grad_norm": 4.195041656494141, + "learning_rate": 2.513186813186813e-05, + "loss": 0.0582, + "step": 18104 + }, + { + "epoch": 49.73901098901099, + "grad_norm": 15.718873977661133, + "learning_rate": 2.5130494505494507e-05, + "loss": 0.3196, + "step": 18105 + }, + { + "epoch": 49.74175824175824, + "grad_norm": 16.0654239654541, + "learning_rate": 2.512912087912088e-05, + "loss": 0.4324, + "step": 18106 + }, + { + "epoch": 49.744505494505496, + "grad_norm": 14.430463790893555, + "learning_rate": 2.5127747252747254e-05, + "loss": 0.2743, + "step": 18107 + }, + { + "epoch": 49.747252747252745, + "grad_norm": 16.305606842041016, + "learning_rate": 2.5126373626373627e-05, + "loss": 0.4425, + "step": 18108 + }, + { + "epoch": 49.75, + "grad_norm": 12.407713890075684, + "learning_rate": 2.5124999999999997e-05, + "loss": 0.3645, + "step": 18109 + }, + { + "epoch": 49.752747252747255, + "grad_norm": 21.20265769958496, + "learning_rate": 2.5123626373626374e-05, + "loss": 0.6833, + "step": 18110 + }, + { + "epoch": 49.755494505494504, + "grad_norm": 12.614421844482422, + "learning_rate": 2.512225274725275e-05, + "loss": 0.2897, + "step": 18111 + }, + { + "epoch": 49.75824175824176, + "grad_norm": 6.761368274688721, + "learning_rate": 2.512087912087912e-05, + "loss": 0.1106, + "step": 18112 + }, + { + "epoch": 49.76098901098901, + "grad_norm": 19.554487228393555, + "learning_rate": 2.5119505494505498e-05, + "loss": 0.655, + "step": 18113 + }, + { + "epoch": 49.76373626373626, + "grad_norm": 4.07014274597168, + "learning_rate": 2.5118131868131868e-05, + "loss": 0.0658, + "step": 18114 + }, + { + "epoch": 49.76648351648352, + "grad_norm": 11.186603546142578, + "learning_rate": 2.511675824175824e-05, + "loss": 0.2189, + "step": 18115 + }, + { + "epoch": 49.76923076923077, + "grad_norm": 7.5231146812438965, + "learning_rate": 2.5115384615384618e-05, + "loss": 0.2008, + "step": 18116 + }, + { + "epoch": 49.77197802197802, + "grad_norm": 1.2641808986663818, + "learning_rate": 2.5114010989010988e-05, + "loss": 0.022, + "step": 18117 + }, + { + "epoch": 49.77472527472528, + "grad_norm": 11.552436828613281, + "learning_rate": 2.5112637362637365e-05, + "loss": 0.2253, + "step": 18118 + }, + { + "epoch": 49.777472527472526, + "grad_norm": 17.94102668762207, + "learning_rate": 2.5111263736263735e-05, + "loss": 0.4507, + "step": 18119 + }, + { + "epoch": 49.78021978021978, + "grad_norm": 7.479806900024414, + "learning_rate": 2.5109890109890112e-05, + "loss": 0.1829, + "step": 18120 + }, + { + "epoch": 49.782967032967036, + "grad_norm": 12.674209594726562, + "learning_rate": 2.5108516483516485e-05, + "loss": 0.3194, + "step": 18121 + }, + { + "epoch": 49.785714285714285, + "grad_norm": 13.717808723449707, + "learning_rate": 2.510714285714286e-05, + "loss": 0.3285, + "step": 18122 + }, + { + "epoch": 49.78846153846154, + "grad_norm": 13.333441734313965, + "learning_rate": 2.5105769230769232e-05, + "loss": 0.3055, + "step": 18123 + }, + { + "epoch": 49.79120879120879, + "grad_norm": 17.41960906982422, + "learning_rate": 2.5104395604395602e-05, + "loss": 0.2772, + "step": 18124 + }, + { + "epoch": 49.793956043956044, + "grad_norm": 15.686525344848633, + "learning_rate": 2.510302197802198e-05, + "loss": 0.4993, + "step": 18125 + }, + { + "epoch": 49.7967032967033, + "grad_norm": 24.005863189697266, + "learning_rate": 2.5101648351648356e-05, + "loss": 0.8527, + "step": 18126 + }, + { + "epoch": 49.79945054945055, + "grad_norm": 9.330541610717773, + "learning_rate": 2.5100274725274726e-05, + "loss": 0.188, + "step": 18127 + }, + { + "epoch": 49.8021978021978, + "grad_norm": 12.158135414123535, + "learning_rate": 2.5098901098901102e-05, + "loss": 0.2242, + "step": 18128 + }, + { + "epoch": 49.80494505494506, + "grad_norm": 10.353336334228516, + "learning_rate": 2.5097527472527472e-05, + "loss": 0.3212, + "step": 18129 + }, + { + "epoch": 49.80769230769231, + "grad_norm": 17.65234375, + "learning_rate": 2.5096153846153846e-05, + "loss": 0.5051, + "step": 18130 + }, + { + "epoch": 49.81043956043956, + "grad_norm": 11.996671676635742, + "learning_rate": 2.5094780219780223e-05, + "loss": 0.1871, + "step": 18131 + }, + { + "epoch": 49.81318681318681, + "grad_norm": 13.225960731506348, + "learning_rate": 2.5093406593406593e-05, + "loss": 0.1842, + "step": 18132 + }, + { + "epoch": 49.815934065934066, + "grad_norm": 17.890714645385742, + "learning_rate": 2.509203296703297e-05, + "loss": 0.3335, + "step": 18133 + }, + { + "epoch": 49.81868131868132, + "grad_norm": 11.476308822631836, + "learning_rate": 2.509065934065934e-05, + "loss": 0.3293, + "step": 18134 + }, + { + "epoch": 49.82142857142857, + "grad_norm": 11.04344654083252, + "learning_rate": 2.5089285714285716e-05, + "loss": 0.1032, + "step": 18135 + }, + { + "epoch": 49.824175824175825, + "grad_norm": 18.094608306884766, + "learning_rate": 2.508791208791209e-05, + "loss": 0.7018, + "step": 18136 + }, + { + "epoch": 49.82692307692308, + "grad_norm": 17.302080154418945, + "learning_rate": 2.5086538461538463e-05, + "loss": 0.4539, + "step": 18137 + }, + { + "epoch": 49.82967032967033, + "grad_norm": 14.964987754821777, + "learning_rate": 2.5085164835164836e-05, + "loss": 0.3697, + "step": 18138 + }, + { + "epoch": 49.832417582417584, + "grad_norm": 6.906111717224121, + "learning_rate": 2.5083791208791206e-05, + "loss": 0.1514, + "step": 18139 + }, + { + "epoch": 49.83516483516483, + "grad_norm": 10.422301292419434, + "learning_rate": 2.5082417582417583e-05, + "loss": 0.2217, + "step": 18140 + }, + { + "epoch": 49.83791208791209, + "grad_norm": 20.680662155151367, + "learning_rate": 2.508104395604396e-05, + "loss": 0.442, + "step": 18141 + }, + { + "epoch": 49.84065934065934, + "grad_norm": 11.071791648864746, + "learning_rate": 2.507967032967033e-05, + "loss": 0.3102, + "step": 18142 + }, + { + "epoch": 49.84340659340659, + "grad_norm": 15.19719409942627, + "learning_rate": 2.5078296703296707e-05, + "loss": 0.3815, + "step": 18143 + }, + { + "epoch": 49.84615384615385, + "grad_norm": 11.362295150756836, + "learning_rate": 2.5076923076923077e-05, + "loss": 0.2005, + "step": 18144 + }, + { + "epoch": 49.8489010989011, + "grad_norm": 11.239618301391602, + "learning_rate": 2.507554945054945e-05, + "loss": 0.1754, + "step": 18145 + }, + { + "epoch": 49.85164835164835, + "grad_norm": 13.144160270690918, + "learning_rate": 2.5074175824175827e-05, + "loss": 0.3173, + "step": 18146 + }, + { + "epoch": 49.854395604395606, + "grad_norm": 12.885457992553711, + "learning_rate": 2.5072802197802197e-05, + "loss": 0.235, + "step": 18147 + }, + { + "epoch": 49.857142857142854, + "grad_norm": 14.439298629760742, + "learning_rate": 2.5071428571428574e-05, + "loss": 0.2927, + "step": 18148 + }, + { + "epoch": 49.85989010989011, + "grad_norm": 16.751829147338867, + "learning_rate": 2.5070054945054944e-05, + "loss": 0.1893, + "step": 18149 + }, + { + "epoch": 49.862637362637365, + "grad_norm": 21.716955184936523, + "learning_rate": 2.506868131868132e-05, + "loss": 0.7264, + "step": 18150 + }, + { + "epoch": 49.86538461538461, + "grad_norm": 8.612074851989746, + "learning_rate": 2.5067307692307694e-05, + "loss": 0.1482, + "step": 18151 + }, + { + "epoch": 49.86813186813187, + "grad_norm": 19.069067001342773, + "learning_rate": 2.5065934065934064e-05, + "loss": 0.3792, + "step": 18152 + }, + { + "epoch": 49.870879120879124, + "grad_norm": 17.75701141357422, + "learning_rate": 2.506456043956044e-05, + "loss": 0.5569, + "step": 18153 + }, + { + "epoch": 49.87362637362637, + "grad_norm": 20.160072326660156, + "learning_rate": 2.506318681318681e-05, + "loss": 0.549, + "step": 18154 + }, + { + "epoch": 49.87637362637363, + "grad_norm": 11.127686500549316, + "learning_rate": 2.5061813186813188e-05, + "loss": 0.1933, + "step": 18155 + }, + { + "epoch": 49.879120879120876, + "grad_norm": 16.50539207458496, + "learning_rate": 2.5060439560439565e-05, + "loss": 0.5459, + "step": 18156 + }, + { + "epoch": 49.88186813186813, + "grad_norm": 8.037392616271973, + "learning_rate": 2.5059065934065935e-05, + "loss": 0.1442, + "step": 18157 + }, + { + "epoch": 49.88461538461539, + "grad_norm": 9.896159172058105, + "learning_rate": 2.505769230769231e-05, + "loss": 0.1714, + "step": 18158 + }, + { + "epoch": 49.887362637362635, + "grad_norm": 8.783759117126465, + "learning_rate": 2.505631868131868e-05, + "loss": 0.1069, + "step": 18159 + }, + { + "epoch": 49.89010989010989, + "grad_norm": 10.33008861541748, + "learning_rate": 2.5054945054945055e-05, + "loss": 0.175, + "step": 18160 + }, + { + "epoch": 49.892857142857146, + "grad_norm": 11.045662879943848, + "learning_rate": 2.505357142857143e-05, + "loss": 0.2335, + "step": 18161 + }, + { + "epoch": 49.895604395604394, + "grad_norm": 13.713234901428223, + "learning_rate": 2.50521978021978e-05, + "loss": 0.3522, + "step": 18162 + }, + { + "epoch": 49.89835164835165, + "grad_norm": 12.353307723999023, + "learning_rate": 2.505082417582418e-05, + "loss": 0.1946, + "step": 18163 + }, + { + "epoch": 49.9010989010989, + "grad_norm": 8.82427978515625, + "learning_rate": 2.504945054945055e-05, + "loss": 0.1138, + "step": 18164 + }, + { + "epoch": 49.90384615384615, + "grad_norm": 10.185198783874512, + "learning_rate": 2.5048076923076925e-05, + "loss": 0.3047, + "step": 18165 + }, + { + "epoch": 49.90659340659341, + "grad_norm": 7.573143482208252, + "learning_rate": 2.50467032967033e-05, + "loss": 0.1232, + "step": 18166 + }, + { + "epoch": 49.90934065934066, + "grad_norm": 15.44147777557373, + "learning_rate": 2.504532967032967e-05, + "loss": 0.3998, + "step": 18167 + }, + { + "epoch": 49.91208791208791, + "grad_norm": 4.711562156677246, + "learning_rate": 2.5043956043956045e-05, + "loss": 0.0478, + "step": 18168 + }, + { + "epoch": 49.91483516483517, + "grad_norm": 5.16933012008667, + "learning_rate": 2.5042582417582415e-05, + "loss": 0.0747, + "step": 18169 + }, + { + "epoch": 49.917582417582416, + "grad_norm": 12.789078712463379, + "learning_rate": 2.5041208791208792e-05, + "loss": 0.2817, + "step": 18170 + }, + { + "epoch": 49.92032967032967, + "grad_norm": 7.087686061859131, + "learning_rate": 2.503983516483517e-05, + "loss": 0.1008, + "step": 18171 + }, + { + "epoch": 49.92307692307692, + "grad_norm": 11.855371475219727, + "learning_rate": 2.503846153846154e-05, + "loss": 0.4067, + "step": 18172 + }, + { + "epoch": 49.925824175824175, + "grad_norm": 4.740017890930176, + "learning_rate": 2.5037087912087916e-05, + "loss": 0.077, + "step": 18173 + }, + { + "epoch": 49.92857142857143, + "grad_norm": 5.596473217010498, + "learning_rate": 2.5035714285714286e-05, + "loss": 0.0914, + "step": 18174 + }, + { + "epoch": 49.93131868131868, + "grad_norm": 18.881078720092773, + "learning_rate": 2.503434065934066e-05, + "loss": 0.632, + "step": 18175 + }, + { + "epoch": 49.934065934065934, + "grad_norm": 6.123483657836914, + "learning_rate": 2.5032967032967036e-05, + "loss": 0.1272, + "step": 18176 + }, + { + "epoch": 49.93681318681319, + "grad_norm": 11.504762649536133, + "learning_rate": 2.5031593406593406e-05, + "loss": 0.1652, + "step": 18177 + }, + { + "epoch": 49.93956043956044, + "grad_norm": 10.410545349121094, + "learning_rate": 2.5030219780219783e-05, + "loss": 0.126, + "step": 18178 + }, + { + "epoch": 49.94230769230769, + "grad_norm": 2.9576432704925537, + "learning_rate": 2.5028846153846153e-05, + "loss": 0.0401, + "step": 18179 + }, + { + "epoch": 49.94505494505494, + "grad_norm": 15.794256210327148, + "learning_rate": 2.502747252747253e-05, + "loss": 0.3086, + "step": 18180 + }, + { + "epoch": 49.9478021978022, + "grad_norm": 14.718915939331055, + "learning_rate": 2.5026098901098903e-05, + "loss": 0.4423, + "step": 18181 + }, + { + "epoch": 49.95054945054945, + "grad_norm": 6.870554447174072, + "learning_rate": 2.5024725274725273e-05, + "loss": 0.0698, + "step": 18182 + }, + { + "epoch": 49.9532967032967, + "grad_norm": 10.64074420928955, + "learning_rate": 2.502335164835165e-05, + "loss": 0.2847, + "step": 18183 + }, + { + "epoch": 49.956043956043956, + "grad_norm": 18.644716262817383, + "learning_rate": 2.502197802197802e-05, + "loss": 0.4707, + "step": 18184 + }, + { + "epoch": 49.95879120879121, + "grad_norm": 20.697309494018555, + "learning_rate": 2.5020604395604397e-05, + "loss": 0.5551, + "step": 18185 + }, + { + "epoch": 49.96153846153846, + "grad_norm": 6.38948917388916, + "learning_rate": 2.5019230769230774e-05, + "loss": 0.075, + "step": 18186 + }, + { + "epoch": 49.964285714285715, + "grad_norm": 7.66073751449585, + "learning_rate": 2.5017857142857144e-05, + "loss": 0.1385, + "step": 18187 + }, + { + "epoch": 49.967032967032964, + "grad_norm": 20.41083526611328, + "learning_rate": 2.501648351648352e-05, + "loss": 0.8518, + "step": 18188 + }, + { + "epoch": 49.96978021978022, + "grad_norm": 8.476832389831543, + "learning_rate": 2.501510989010989e-05, + "loss": 0.1387, + "step": 18189 + }, + { + "epoch": 49.972527472527474, + "grad_norm": 3.6811580657958984, + "learning_rate": 2.5013736263736264e-05, + "loss": 0.1008, + "step": 18190 + }, + { + "epoch": 49.97527472527472, + "grad_norm": 11.538488388061523, + "learning_rate": 2.501236263736264e-05, + "loss": 0.2021, + "step": 18191 + }, + { + "epoch": 49.97802197802198, + "grad_norm": 16.978967666625977, + "learning_rate": 2.501098901098901e-05, + "loss": 0.381, + "step": 18192 + }, + { + "epoch": 49.98076923076923, + "grad_norm": 28.077259063720703, + "learning_rate": 2.5009615384615387e-05, + "loss": 0.6828, + "step": 18193 + }, + { + "epoch": 49.98351648351648, + "grad_norm": 6.8683624267578125, + "learning_rate": 2.5008241758241757e-05, + "loss": 0.1882, + "step": 18194 + }, + { + "epoch": 49.98626373626374, + "grad_norm": 8.547205924987793, + "learning_rate": 2.5006868131868134e-05, + "loss": 0.2132, + "step": 18195 + }, + { + "epoch": 49.98901098901099, + "grad_norm": 9.474966049194336, + "learning_rate": 2.5005494505494508e-05, + "loss": 0.1751, + "step": 18196 + }, + { + "epoch": 49.99175824175824, + "grad_norm": 19.029497146606445, + "learning_rate": 2.5004120879120878e-05, + "loss": 0.6063, + "step": 18197 + }, + { + "epoch": 49.994505494505496, + "grad_norm": 9.034748077392578, + "learning_rate": 2.5002747252747254e-05, + "loss": 0.1643, + "step": 18198 + }, + { + "epoch": 49.997252747252745, + "grad_norm": 18.043752670288086, + "learning_rate": 2.5001373626373624e-05, + "loss": 0.6349, + "step": 18199 + }, + { + "epoch": 50.0, + "grad_norm": 31.220985412597656, + "learning_rate": 2.5e-05, + "loss": 0.3016, + "step": 18200 + }, + { + "epoch": 50.0, + "eval_accuracy": 0.7272727272727273, + "eval_f1": 0.7389100071709817, + "eval_f1_DuraRiadoRio_64x64": 0.6614173228346457, + "eval_f1_Mole_64x64": 0.7372881355932204, + "eval_f1_Quebrado_64x64": 0.858085808580858, + "eval_f1_RiadoRio_64x64": 0.5931372549019608, + "eval_f1_RioFechado_64x64": 0.8446215139442231, + "eval_loss": 1.38997220993042, + "eval_precision": 0.7944063829325344, + "eval_precision_DuraRiadoRio_64x64": 0.7636363636363637, + "eval_precision_Mole_64x64": 0.9456521739130435, + "eval_precision_Quebrado_64x64": 0.8176100628930818, + "eval_precision_RiadoRio_64x64": 0.47265625, + "eval_precision_RioFechado_64x64": 0.9724770642201835, + "eval_recall": 0.7265618565192323, + "eval_recall_DuraRiadoRio_64x64": 0.5833333333333334, + "eval_recall_Mole_64x64": 0.6041666666666666, + "eval_recall_Quebrado_64x64": 0.9027777777777778, + "eval_recall_RiadoRio_64x64": 0.7960526315789473, + "eval_recall_RioFechado_64x64": 0.7464788732394366, + "eval_runtime": 1.7727, + "eval_samples_per_second": 409.551, + "eval_steps_per_second": 25.95, + "step": 18200 + }, + { + "epoch": 50.002747252747255, + "grad_norm": 15.262948036193848, + "learning_rate": 2.4998626373626375e-05, + "loss": 0.3584, + "step": 18201 + }, + { + "epoch": 50.005494505494504, + "grad_norm": 10.107856750488281, + "learning_rate": 2.4997252747252748e-05, + "loss": 0.122, + "step": 18202 + }, + { + "epoch": 50.00824175824176, + "grad_norm": 17.337352752685547, + "learning_rate": 2.4995879120879125e-05, + "loss": 0.424, + "step": 18203 + }, + { + "epoch": 50.010989010989015, + "grad_norm": 14.167949676513672, + "learning_rate": 2.4994505494505495e-05, + "loss": 0.185, + "step": 18204 + }, + { + "epoch": 50.01373626373626, + "grad_norm": 11.16149616241455, + "learning_rate": 2.4993131868131868e-05, + "loss": 0.3287, + "step": 18205 + }, + { + "epoch": 50.01648351648352, + "grad_norm": 3.574406623840332, + "learning_rate": 2.499175824175824e-05, + "loss": 0.0738, + "step": 18206 + }, + { + "epoch": 50.01923076923077, + "grad_norm": 8.12822151184082, + "learning_rate": 2.4990384615384615e-05, + "loss": 0.1677, + "step": 18207 + }, + { + "epoch": 50.02197802197802, + "grad_norm": 14.328808784484863, + "learning_rate": 2.4989010989010992e-05, + "loss": 0.3191, + "step": 18208 + }, + { + "epoch": 50.02472527472528, + "grad_norm": 10.408897399902344, + "learning_rate": 2.4987637362637365e-05, + "loss": 0.1542, + "step": 18209 + }, + { + "epoch": 50.027472527472526, + "grad_norm": 10.401135444641113, + "learning_rate": 2.498626373626374e-05, + "loss": 0.1299, + "step": 18210 + }, + { + "epoch": 50.03021978021978, + "grad_norm": 15.29190731048584, + "learning_rate": 2.4984890109890112e-05, + "loss": 0.3275, + "step": 18211 + }, + { + "epoch": 50.032967032967036, + "grad_norm": 5.906000137329102, + "learning_rate": 2.4983516483516482e-05, + "loss": 0.1083, + "step": 18212 + }, + { + "epoch": 50.035714285714285, + "grad_norm": 2.0294992923736572, + "learning_rate": 2.498214285714286e-05, + "loss": 0.0279, + "step": 18213 + }, + { + "epoch": 50.03846153846154, + "grad_norm": 12.55721378326416, + "learning_rate": 2.4980769230769232e-05, + "loss": 0.2125, + "step": 18214 + }, + { + "epoch": 50.04120879120879, + "grad_norm": 12.731034278869629, + "learning_rate": 2.4979395604395606e-05, + "loss": 0.2777, + "step": 18215 + }, + { + "epoch": 50.043956043956044, + "grad_norm": 11.921950340270996, + "learning_rate": 2.497802197802198e-05, + "loss": 0.3126, + "step": 18216 + }, + { + "epoch": 50.0467032967033, + "grad_norm": 19.240337371826172, + "learning_rate": 2.4976648351648353e-05, + "loss": 0.4809, + "step": 18217 + }, + { + "epoch": 50.04945054945055, + "grad_norm": 7.821190357208252, + "learning_rate": 2.497527472527473e-05, + "loss": 0.205, + "step": 18218 + }, + { + "epoch": 50.0521978021978, + "grad_norm": 9.452760696411133, + "learning_rate": 2.49739010989011e-05, + "loss": 0.2791, + "step": 18219 + }, + { + "epoch": 50.05494505494506, + "grad_norm": 12.267154693603516, + "learning_rate": 2.4972527472527473e-05, + "loss": 0.2188, + "step": 18220 + }, + { + "epoch": 50.05769230769231, + "grad_norm": 20.502758026123047, + "learning_rate": 2.4971153846153846e-05, + "loss": 0.7067, + "step": 18221 + }, + { + "epoch": 50.06043956043956, + "grad_norm": 14.29536247253418, + "learning_rate": 2.496978021978022e-05, + "loss": 0.3364, + "step": 18222 + }, + { + "epoch": 50.06318681318681, + "grad_norm": 25.47106170654297, + "learning_rate": 2.4968406593406596e-05, + "loss": 0.9142, + "step": 18223 + }, + { + "epoch": 50.065934065934066, + "grad_norm": 14.874009132385254, + "learning_rate": 2.496703296703297e-05, + "loss": 0.2684, + "step": 18224 + }, + { + "epoch": 50.06868131868132, + "grad_norm": 17.898059844970703, + "learning_rate": 2.4965659340659343e-05, + "loss": 0.3436, + "step": 18225 + }, + { + "epoch": 50.07142857142857, + "grad_norm": 9.556965827941895, + "learning_rate": 2.4964285714285717e-05, + "loss": 0.2098, + "step": 18226 + }, + { + "epoch": 50.074175824175825, + "grad_norm": 7.357883453369141, + "learning_rate": 2.4962912087912087e-05, + "loss": 0.1938, + "step": 18227 + }, + { + "epoch": 50.07692307692308, + "grad_norm": 13.24398422241211, + "learning_rate": 2.4961538461538463e-05, + "loss": 0.359, + "step": 18228 + }, + { + "epoch": 50.07967032967033, + "grad_norm": 8.351566314697266, + "learning_rate": 2.4960164835164837e-05, + "loss": 0.0892, + "step": 18229 + }, + { + "epoch": 50.082417582417584, + "grad_norm": 11.920632362365723, + "learning_rate": 2.495879120879121e-05, + "loss": 0.2809, + "step": 18230 + }, + { + "epoch": 50.08516483516483, + "grad_norm": 9.446697235107422, + "learning_rate": 2.4957417582417584e-05, + "loss": 0.0986, + "step": 18231 + }, + { + "epoch": 50.08791208791209, + "grad_norm": 11.302340507507324, + "learning_rate": 2.4956043956043957e-05, + "loss": 0.1661, + "step": 18232 + }, + { + "epoch": 50.09065934065934, + "grad_norm": 13.482458114624023, + "learning_rate": 2.4954670329670334e-05, + "loss": 0.3077, + "step": 18233 + }, + { + "epoch": 50.09340659340659, + "grad_norm": 12.760053634643555, + "learning_rate": 2.4953296703296704e-05, + "loss": 0.4361, + "step": 18234 + }, + { + "epoch": 50.09615384615385, + "grad_norm": 5.095923900604248, + "learning_rate": 2.4951923076923077e-05, + "loss": 0.0621, + "step": 18235 + }, + { + "epoch": 50.0989010989011, + "grad_norm": 12.332701683044434, + "learning_rate": 2.495054945054945e-05, + "loss": 0.2323, + "step": 18236 + }, + { + "epoch": 50.10164835164835, + "grad_norm": 8.780406951904297, + "learning_rate": 2.4949175824175824e-05, + "loss": 0.1548, + "step": 18237 + }, + { + "epoch": 50.104395604395606, + "grad_norm": 7.952641487121582, + "learning_rate": 2.49478021978022e-05, + "loss": 0.0588, + "step": 18238 + }, + { + "epoch": 50.107142857142854, + "grad_norm": 11.982203483581543, + "learning_rate": 2.4946428571428574e-05, + "loss": 0.3729, + "step": 18239 + }, + { + "epoch": 50.10989010989011, + "grad_norm": 6.620995998382568, + "learning_rate": 2.4945054945054948e-05, + "loss": 0.0752, + "step": 18240 + }, + { + "epoch": 50.112637362637365, + "grad_norm": 13.22763442993164, + "learning_rate": 2.494368131868132e-05, + "loss": 0.5481, + "step": 18241 + }, + { + "epoch": 50.11538461538461, + "grad_norm": 13.046829223632812, + "learning_rate": 2.494230769230769e-05, + "loss": 0.1984, + "step": 18242 + }, + { + "epoch": 50.11813186813187, + "grad_norm": 2.3959643840789795, + "learning_rate": 2.4940934065934068e-05, + "loss": 0.032, + "step": 18243 + }, + { + "epoch": 50.120879120879124, + "grad_norm": 11.38235855102539, + "learning_rate": 2.493956043956044e-05, + "loss": 0.3321, + "step": 18244 + }, + { + "epoch": 50.12362637362637, + "grad_norm": 18.015043258666992, + "learning_rate": 2.4938186813186815e-05, + "loss": 0.4177, + "step": 18245 + }, + { + "epoch": 50.12637362637363, + "grad_norm": 6.424105167388916, + "learning_rate": 2.4936813186813188e-05, + "loss": 0.1266, + "step": 18246 + }, + { + "epoch": 50.129120879120876, + "grad_norm": 11.76779556274414, + "learning_rate": 2.493543956043956e-05, + "loss": 0.2764, + "step": 18247 + }, + { + "epoch": 50.13186813186813, + "grad_norm": 22.641645431518555, + "learning_rate": 2.4934065934065938e-05, + "loss": 0.5735, + "step": 18248 + }, + { + "epoch": 50.13461538461539, + "grad_norm": 16.323198318481445, + "learning_rate": 2.4932692307692308e-05, + "loss": 0.364, + "step": 18249 + }, + { + "epoch": 50.137362637362635, + "grad_norm": 18.650835037231445, + "learning_rate": 2.4931318681318682e-05, + "loss": 0.4193, + "step": 18250 + }, + { + "epoch": 50.14010989010989, + "grad_norm": 3.9419219493865967, + "learning_rate": 2.4929945054945055e-05, + "loss": 0.0773, + "step": 18251 + }, + { + "epoch": 50.142857142857146, + "grad_norm": 8.753691673278809, + "learning_rate": 2.492857142857143e-05, + "loss": 0.1226, + "step": 18252 + }, + { + "epoch": 50.145604395604394, + "grad_norm": 12.199334144592285, + "learning_rate": 2.4927197802197805e-05, + "loss": 0.3278, + "step": 18253 + }, + { + "epoch": 50.14835164835165, + "grad_norm": 5.848683834075928, + "learning_rate": 2.492582417582418e-05, + "loss": 0.2198, + "step": 18254 + }, + { + "epoch": 50.1510989010989, + "grad_norm": 10.54658317565918, + "learning_rate": 2.4924450549450552e-05, + "loss": 0.5085, + "step": 18255 + }, + { + "epoch": 50.15384615384615, + "grad_norm": 20.974512100219727, + "learning_rate": 2.4923076923076926e-05, + "loss": 0.528, + "step": 18256 + }, + { + "epoch": 50.15659340659341, + "grad_norm": 5.2791290283203125, + "learning_rate": 2.4921703296703296e-05, + "loss": 0.0836, + "step": 18257 + }, + { + "epoch": 50.15934065934066, + "grad_norm": 21.652307510375977, + "learning_rate": 2.492032967032967e-05, + "loss": 0.5124, + "step": 18258 + }, + { + "epoch": 50.16208791208791, + "grad_norm": 19.633583068847656, + "learning_rate": 2.4918956043956046e-05, + "loss": 0.742, + "step": 18259 + }, + { + "epoch": 50.16483516483517, + "grad_norm": 11.72704792022705, + "learning_rate": 2.491758241758242e-05, + "loss": 0.2528, + "step": 18260 + }, + { + "epoch": 50.167582417582416, + "grad_norm": 9.578920364379883, + "learning_rate": 2.4916208791208793e-05, + "loss": 0.3051, + "step": 18261 + }, + { + "epoch": 50.17032967032967, + "grad_norm": 6.4465718269348145, + "learning_rate": 2.4914835164835166e-05, + "loss": 0.0362, + "step": 18262 + }, + { + "epoch": 50.17307692307692, + "grad_norm": 11.960505485534668, + "learning_rate": 2.491346153846154e-05, + "loss": 0.144, + "step": 18263 + }, + { + "epoch": 50.175824175824175, + "grad_norm": 1.7479894161224365, + "learning_rate": 2.4912087912087913e-05, + "loss": 0.0215, + "step": 18264 + }, + { + "epoch": 50.17857142857143, + "grad_norm": 18.90633773803711, + "learning_rate": 2.4910714285714286e-05, + "loss": 0.558, + "step": 18265 + }, + { + "epoch": 50.18131868131868, + "grad_norm": 13.546575546264648, + "learning_rate": 2.490934065934066e-05, + "loss": 0.2545, + "step": 18266 + }, + { + "epoch": 50.184065934065934, + "grad_norm": 14.553644180297852, + "learning_rate": 2.4907967032967033e-05, + "loss": 0.2173, + "step": 18267 + }, + { + "epoch": 50.18681318681319, + "grad_norm": 6.288443565368652, + "learning_rate": 2.4906593406593406e-05, + "loss": 0.1253, + "step": 18268 + }, + { + "epoch": 50.18956043956044, + "grad_norm": 17.899499893188477, + "learning_rate": 2.4905219780219783e-05, + "loss": 0.3704, + "step": 18269 + }, + { + "epoch": 50.19230769230769, + "grad_norm": 17.193359375, + "learning_rate": 2.4903846153846157e-05, + "loss": 0.5672, + "step": 18270 + }, + { + "epoch": 50.19505494505494, + "grad_norm": 14.712421417236328, + "learning_rate": 2.490247252747253e-05, + "loss": 0.4334, + "step": 18271 + }, + { + "epoch": 50.1978021978022, + "grad_norm": 13.51211929321289, + "learning_rate": 2.49010989010989e-05, + "loss": 0.4173, + "step": 18272 + }, + { + "epoch": 50.20054945054945, + "grad_norm": 9.691883087158203, + "learning_rate": 2.4899725274725273e-05, + "loss": 0.2391, + "step": 18273 + }, + { + "epoch": 50.2032967032967, + "grad_norm": 20.506500244140625, + "learning_rate": 2.489835164835165e-05, + "loss": 0.7035, + "step": 18274 + }, + { + "epoch": 50.206043956043956, + "grad_norm": 6.076739311218262, + "learning_rate": 2.4896978021978024e-05, + "loss": 0.0785, + "step": 18275 + }, + { + "epoch": 50.20879120879121, + "grad_norm": 8.888456344604492, + "learning_rate": 2.4895604395604397e-05, + "loss": 0.1886, + "step": 18276 + }, + { + "epoch": 50.21153846153846, + "grad_norm": 16.89920425415039, + "learning_rate": 2.489423076923077e-05, + "loss": 0.5098, + "step": 18277 + }, + { + "epoch": 50.214285714285715, + "grad_norm": 9.4568452835083, + "learning_rate": 2.4892857142857144e-05, + "loss": 0.1537, + "step": 18278 + }, + { + "epoch": 50.217032967032964, + "grad_norm": 9.11567497253418, + "learning_rate": 2.4891483516483517e-05, + "loss": 0.3526, + "step": 18279 + }, + { + "epoch": 50.21978021978022, + "grad_norm": 12.66511344909668, + "learning_rate": 2.489010989010989e-05, + "loss": 0.3631, + "step": 18280 + }, + { + "epoch": 50.222527472527474, + "grad_norm": 5.712050914764404, + "learning_rate": 2.4888736263736264e-05, + "loss": 0.0994, + "step": 18281 + }, + { + "epoch": 50.22527472527472, + "grad_norm": 9.481884002685547, + "learning_rate": 2.4887362637362637e-05, + "loss": 0.1914, + "step": 18282 + }, + { + "epoch": 50.22802197802198, + "grad_norm": 8.876440048217773, + "learning_rate": 2.488598901098901e-05, + "loss": 0.3114, + "step": 18283 + }, + { + "epoch": 50.23076923076923, + "grad_norm": 12.405682563781738, + "learning_rate": 2.4884615384615388e-05, + "loss": 0.2747, + "step": 18284 + }, + { + "epoch": 50.23351648351648, + "grad_norm": 10.058039665222168, + "learning_rate": 2.488324175824176e-05, + "loss": 0.1985, + "step": 18285 + }, + { + "epoch": 50.23626373626374, + "grad_norm": 12.858858108520508, + "learning_rate": 2.4881868131868135e-05, + "loss": 0.2521, + "step": 18286 + }, + { + "epoch": 50.239010989010985, + "grad_norm": 9.51638126373291, + "learning_rate": 2.4880494505494505e-05, + "loss": 0.1734, + "step": 18287 + }, + { + "epoch": 50.24175824175824, + "grad_norm": 20.136455535888672, + "learning_rate": 2.4879120879120878e-05, + "loss": 0.6622, + "step": 18288 + }, + { + "epoch": 50.244505494505496, + "grad_norm": 18.165754318237305, + "learning_rate": 2.4877747252747255e-05, + "loss": 0.5185, + "step": 18289 + }, + { + "epoch": 50.247252747252745, + "grad_norm": 11.770294189453125, + "learning_rate": 2.4876373626373628e-05, + "loss": 0.3865, + "step": 18290 + }, + { + "epoch": 50.25, + "grad_norm": 13.517887115478516, + "learning_rate": 2.4875e-05, + "loss": 0.4617, + "step": 18291 + }, + { + "epoch": 50.252747252747255, + "grad_norm": 7.962602138519287, + "learning_rate": 2.4873626373626375e-05, + "loss": 0.2305, + "step": 18292 + }, + { + "epoch": 50.255494505494504, + "grad_norm": 11.739705085754395, + "learning_rate": 2.487225274725275e-05, + "loss": 0.3206, + "step": 18293 + }, + { + "epoch": 50.25824175824176, + "grad_norm": 8.057198524475098, + "learning_rate": 2.4870879120879122e-05, + "loss": 0.1549, + "step": 18294 + }, + { + "epoch": 50.260989010989015, + "grad_norm": 14.372982025146484, + "learning_rate": 2.4869505494505495e-05, + "loss": 0.3093, + "step": 18295 + }, + { + "epoch": 50.26373626373626, + "grad_norm": 4.3196821212768555, + "learning_rate": 2.486813186813187e-05, + "loss": 0.07, + "step": 18296 + }, + { + "epoch": 50.26648351648352, + "grad_norm": 9.824506759643555, + "learning_rate": 2.4866758241758242e-05, + "loss": 0.2681, + "step": 18297 + }, + { + "epoch": 50.26923076923077, + "grad_norm": 11.982107162475586, + "learning_rate": 2.4865384615384615e-05, + "loss": 0.3134, + "step": 18298 + }, + { + "epoch": 50.27197802197802, + "grad_norm": 14.785367012023926, + "learning_rate": 2.4864010989010992e-05, + "loss": 0.1015, + "step": 18299 + }, + { + "epoch": 50.27472527472528, + "grad_norm": 7.345458984375, + "learning_rate": 2.4862637362637366e-05, + "loss": 0.1128, + "step": 18300 + }, + { + "epoch": 50.277472527472526, + "grad_norm": 17.24576759338379, + "learning_rate": 2.486126373626374e-05, + "loss": 0.479, + "step": 18301 + }, + { + "epoch": 50.28021978021978, + "grad_norm": 12.244491577148438, + "learning_rate": 2.485989010989011e-05, + "loss": 0.1715, + "step": 18302 + }, + { + "epoch": 50.282967032967036, + "grad_norm": 2.2242279052734375, + "learning_rate": 2.4858516483516482e-05, + "loss": 0.0311, + "step": 18303 + }, + { + "epoch": 50.285714285714285, + "grad_norm": 15.029800415039062, + "learning_rate": 2.485714285714286e-05, + "loss": 0.3467, + "step": 18304 + }, + { + "epoch": 50.28846153846154, + "grad_norm": 9.128732681274414, + "learning_rate": 2.4855769230769233e-05, + "loss": 0.14, + "step": 18305 + }, + { + "epoch": 50.29120879120879, + "grad_norm": 14.313745498657227, + "learning_rate": 2.4854395604395606e-05, + "loss": 0.3716, + "step": 18306 + }, + { + "epoch": 50.293956043956044, + "grad_norm": 12.562667846679688, + "learning_rate": 2.485302197802198e-05, + "loss": 0.2373, + "step": 18307 + }, + { + "epoch": 50.2967032967033, + "grad_norm": 8.092941284179688, + "learning_rate": 2.4851648351648353e-05, + "loss": 0.1925, + "step": 18308 + }, + { + "epoch": 50.29945054945055, + "grad_norm": 16.932024002075195, + "learning_rate": 2.4850274725274726e-05, + "loss": 0.3858, + "step": 18309 + }, + { + "epoch": 50.3021978021978, + "grad_norm": 25.271162033081055, + "learning_rate": 2.48489010989011e-05, + "loss": 0.4161, + "step": 18310 + }, + { + "epoch": 50.30494505494506, + "grad_norm": 17.505765914916992, + "learning_rate": 2.4847527472527473e-05, + "loss": 0.4012, + "step": 18311 + }, + { + "epoch": 50.30769230769231, + "grad_norm": 17.869287490844727, + "learning_rate": 2.4846153846153846e-05, + "loss": 0.3629, + "step": 18312 + }, + { + "epoch": 50.31043956043956, + "grad_norm": 5.74057674407959, + "learning_rate": 2.484478021978022e-05, + "loss": 0.1303, + "step": 18313 + }, + { + "epoch": 50.31318681318681, + "grad_norm": 6.970426082611084, + "learning_rate": 2.4843406593406597e-05, + "loss": 0.1182, + "step": 18314 + }, + { + "epoch": 50.315934065934066, + "grad_norm": 12.496482849121094, + "learning_rate": 2.484203296703297e-05, + "loss": 0.3141, + "step": 18315 + }, + { + "epoch": 50.31868131868132, + "grad_norm": 6.818646430969238, + "learning_rate": 2.4840659340659343e-05, + "loss": 0.1312, + "step": 18316 + }, + { + "epoch": 50.32142857142857, + "grad_norm": 12.043134689331055, + "learning_rate": 2.4839285714285714e-05, + "loss": 0.2904, + "step": 18317 + }, + { + "epoch": 50.324175824175825, + "grad_norm": 17.74146270751953, + "learning_rate": 2.4837912087912087e-05, + "loss": 0.6781, + "step": 18318 + }, + { + "epoch": 50.32692307692308, + "grad_norm": 5.561356544494629, + "learning_rate": 2.4836538461538464e-05, + "loss": 0.1574, + "step": 18319 + }, + { + "epoch": 50.32967032967033, + "grad_norm": 16.212574005126953, + "learning_rate": 2.4835164835164837e-05, + "loss": 0.438, + "step": 18320 + }, + { + "epoch": 50.332417582417584, + "grad_norm": 5.790278911590576, + "learning_rate": 2.483379120879121e-05, + "loss": 0.1256, + "step": 18321 + }, + { + "epoch": 50.33516483516483, + "grad_norm": 1.703991413116455, + "learning_rate": 2.4832417582417584e-05, + "loss": 0.034, + "step": 18322 + }, + { + "epoch": 50.33791208791209, + "grad_norm": 24.65196990966797, + "learning_rate": 2.4831043956043957e-05, + "loss": 0.909, + "step": 18323 + }, + { + "epoch": 50.34065934065934, + "grad_norm": 7.640778541564941, + "learning_rate": 2.482967032967033e-05, + "loss": 0.0945, + "step": 18324 + }, + { + "epoch": 50.34340659340659, + "grad_norm": 17.575490951538086, + "learning_rate": 2.4828296703296704e-05, + "loss": 0.4447, + "step": 18325 + }, + { + "epoch": 50.34615384615385, + "grad_norm": 13.331463813781738, + "learning_rate": 2.4826923076923078e-05, + "loss": 0.3758, + "step": 18326 + }, + { + "epoch": 50.3489010989011, + "grad_norm": 15.798810005187988, + "learning_rate": 2.482554945054945e-05, + "loss": 0.5988, + "step": 18327 + }, + { + "epoch": 50.35164835164835, + "grad_norm": 11.111923217773438, + "learning_rate": 2.4824175824175824e-05, + "loss": 0.3021, + "step": 18328 + }, + { + "epoch": 50.354395604395606, + "grad_norm": 16.149168014526367, + "learning_rate": 2.48228021978022e-05, + "loss": 0.3927, + "step": 18329 + }, + { + "epoch": 50.357142857142854, + "grad_norm": 11.39303970336914, + "learning_rate": 2.4821428571428575e-05, + "loss": 0.1508, + "step": 18330 + }, + { + "epoch": 50.35989010989011, + "grad_norm": 10.261666297912598, + "learning_rate": 2.4820054945054948e-05, + "loss": 0.2506, + "step": 18331 + }, + { + "epoch": 50.362637362637365, + "grad_norm": 10.121928215026855, + "learning_rate": 2.4818681318681318e-05, + "loss": 0.1949, + "step": 18332 + }, + { + "epoch": 50.36538461538461, + "grad_norm": 23.00611686706543, + "learning_rate": 2.481730769230769e-05, + "loss": 0.9656, + "step": 18333 + }, + { + "epoch": 50.36813186813187, + "grad_norm": 14.46940803527832, + "learning_rate": 2.4815934065934068e-05, + "loss": 0.286, + "step": 18334 + }, + { + "epoch": 50.370879120879124, + "grad_norm": 6.691548824310303, + "learning_rate": 2.481456043956044e-05, + "loss": 0.1513, + "step": 18335 + }, + { + "epoch": 50.37362637362637, + "grad_norm": 5.508373260498047, + "learning_rate": 2.4813186813186815e-05, + "loss": 0.1686, + "step": 18336 + }, + { + "epoch": 50.37637362637363, + "grad_norm": 13.782159805297852, + "learning_rate": 2.481181318681319e-05, + "loss": 0.2323, + "step": 18337 + }, + { + "epoch": 50.379120879120876, + "grad_norm": 14.932801246643066, + "learning_rate": 2.4810439560439562e-05, + "loss": 0.3731, + "step": 18338 + }, + { + "epoch": 50.38186813186813, + "grad_norm": 10.173127174377441, + "learning_rate": 2.4809065934065935e-05, + "loss": 0.2248, + "step": 18339 + }, + { + "epoch": 50.38461538461539, + "grad_norm": 7.96952486038208, + "learning_rate": 2.480769230769231e-05, + "loss": 0.1728, + "step": 18340 + }, + { + "epoch": 50.387362637362635, + "grad_norm": 10.73643684387207, + "learning_rate": 2.4806318681318682e-05, + "loss": 0.2115, + "step": 18341 + }, + { + "epoch": 50.39010989010989, + "grad_norm": 11.05236530303955, + "learning_rate": 2.4804945054945055e-05, + "loss": 0.22, + "step": 18342 + }, + { + "epoch": 50.392857142857146, + "grad_norm": 5.2871856689453125, + "learning_rate": 2.480357142857143e-05, + "loss": 0.1018, + "step": 18343 + }, + { + "epoch": 50.395604395604394, + "grad_norm": 12.957736015319824, + "learning_rate": 2.4802197802197806e-05, + "loss": 0.2136, + "step": 18344 + }, + { + "epoch": 50.39835164835165, + "grad_norm": 5.010703086853027, + "learning_rate": 2.480082417582418e-05, + "loss": 0.1265, + "step": 18345 + }, + { + "epoch": 50.4010989010989, + "grad_norm": 8.11981201171875, + "learning_rate": 2.4799450549450552e-05, + "loss": 0.1122, + "step": 18346 + }, + { + "epoch": 50.40384615384615, + "grad_norm": 2.755481243133545, + "learning_rate": 2.4798076923076922e-05, + "loss": 0.0535, + "step": 18347 + }, + { + "epoch": 50.40659340659341, + "grad_norm": 17.287649154663086, + "learning_rate": 2.4796703296703296e-05, + "loss": 0.3604, + "step": 18348 + }, + { + "epoch": 50.40934065934066, + "grad_norm": 6.020715236663818, + "learning_rate": 2.4795329670329673e-05, + "loss": 0.0881, + "step": 18349 + }, + { + "epoch": 50.41208791208791, + "grad_norm": 12.012567520141602, + "learning_rate": 2.4793956043956046e-05, + "loss": 0.2775, + "step": 18350 + }, + { + "epoch": 50.41483516483517, + "grad_norm": 13.873011589050293, + "learning_rate": 2.479258241758242e-05, + "loss": 0.4809, + "step": 18351 + }, + { + "epoch": 50.417582417582416, + "grad_norm": 12.998224258422852, + "learning_rate": 2.4791208791208793e-05, + "loss": 0.1955, + "step": 18352 + }, + { + "epoch": 50.42032967032967, + "grad_norm": 7.924031734466553, + "learning_rate": 2.4789835164835166e-05, + "loss": 0.0886, + "step": 18353 + }, + { + "epoch": 50.42307692307692, + "grad_norm": 19.4061336517334, + "learning_rate": 2.478846153846154e-05, + "loss": 0.7939, + "step": 18354 + }, + { + "epoch": 50.425824175824175, + "grad_norm": 6.081733226776123, + "learning_rate": 2.4787087912087913e-05, + "loss": 0.137, + "step": 18355 + }, + { + "epoch": 50.42857142857143, + "grad_norm": 17.833534240722656, + "learning_rate": 2.4785714285714287e-05, + "loss": 0.4088, + "step": 18356 + }, + { + "epoch": 50.43131868131868, + "grad_norm": 4.16845703125, + "learning_rate": 2.478434065934066e-05, + "loss": 0.0603, + "step": 18357 + }, + { + "epoch": 50.434065934065934, + "grad_norm": 14.057124137878418, + "learning_rate": 2.4782967032967033e-05, + "loss": 0.4729, + "step": 18358 + }, + { + "epoch": 50.43681318681319, + "grad_norm": 14.962899208068848, + "learning_rate": 2.478159340659341e-05, + "loss": 0.3857, + "step": 18359 + }, + { + "epoch": 50.43956043956044, + "grad_norm": 7.959346294403076, + "learning_rate": 2.4780219780219784e-05, + "loss": 0.2401, + "step": 18360 + }, + { + "epoch": 50.44230769230769, + "grad_norm": 9.412677764892578, + "learning_rate": 2.4778846153846154e-05, + "loss": 0.1915, + "step": 18361 + }, + { + "epoch": 50.44505494505494, + "grad_norm": 4.536544322967529, + "learning_rate": 2.4777472527472527e-05, + "loss": 0.0841, + "step": 18362 + }, + { + "epoch": 50.4478021978022, + "grad_norm": 2.9079794883728027, + "learning_rate": 2.47760989010989e-05, + "loss": 0.0545, + "step": 18363 + }, + { + "epoch": 50.45054945054945, + "grad_norm": 5.067519664764404, + "learning_rate": 2.4774725274725277e-05, + "loss": 0.1115, + "step": 18364 + }, + { + "epoch": 50.4532967032967, + "grad_norm": 12.498956680297852, + "learning_rate": 2.477335164835165e-05, + "loss": 0.2073, + "step": 18365 + }, + { + "epoch": 50.456043956043956, + "grad_norm": 13.354635238647461, + "learning_rate": 2.4771978021978024e-05, + "loss": 0.3568, + "step": 18366 + }, + { + "epoch": 50.45879120879121, + "grad_norm": 9.17451000213623, + "learning_rate": 2.4770604395604397e-05, + "loss": 0.1413, + "step": 18367 + }, + { + "epoch": 50.46153846153846, + "grad_norm": 21.948457717895508, + "learning_rate": 2.476923076923077e-05, + "loss": 0.6323, + "step": 18368 + }, + { + "epoch": 50.464285714285715, + "grad_norm": 15.684430122375488, + "learning_rate": 2.4767857142857144e-05, + "loss": 0.49, + "step": 18369 + }, + { + "epoch": 50.467032967032964, + "grad_norm": 10.376327514648438, + "learning_rate": 2.4766483516483518e-05, + "loss": 0.2076, + "step": 18370 + }, + { + "epoch": 50.46978021978022, + "grad_norm": 3.879695415496826, + "learning_rate": 2.476510989010989e-05, + "loss": 0.0614, + "step": 18371 + }, + { + "epoch": 50.472527472527474, + "grad_norm": 17.139978408813477, + "learning_rate": 2.4763736263736264e-05, + "loss": 0.1913, + "step": 18372 + }, + { + "epoch": 50.47527472527472, + "grad_norm": 5.994316101074219, + "learning_rate": 2.4762362637362638e-05, + "loss": 0.0919, + "step": 18373 + }, + { + "epoch": 50.47802197802198, + "grad_norm": 6.7912797927856445, + "learning_rate": 2.476098901098901e-05, + "loss": 0.1255, + "step": 18374 + }, + { + "epoch": 50.48076923076923, + "grad_norm": 19.35894775390625, + "learning_rate": 2.4759615384615388e-05, + "loss": 0.2377, + "step": 18375 + }, + { + "epoch": 50.48351648351648, + "grad_norm": 14.807470321655273, + "learning_rate": 2.4758241758241758e-05, + "loss": 0.4401, + "step": 18376 + }, + { + "epoch": 50.48626373626374, + "grad_norm": 6.5939154624938965, + "learning_rate": 2.475686813186813e-05, + "loss": 0.1051, + "step": 18377 + }, + { + "epoch": 50.489010989010985, + "grad_norm": 11.712698936462402, + "learning_rate": 2.4755494505494505e-05, + "loss": 0.2527, + "step": 18378 + }, + { + "epoch": 50.49175824175824, + "grad_norm": 18.436588287353516, + "learning_rate": 2.4754120879120878e-05, + "loss": 0.3136, + "step": 18379 + }, + { + "epoch": 50.494505494505496, + "grad_norm": 16.33101463317871, + "learning_rate": 2.4752747252747255e-05, + "loss": 0.5116, + "step": 18380 + }, + { + "epoch": 50.497252747252745, + "grad_norm": 7.8171000480651855, + "learning_rate": 2.475137362637363e-05, + "loss": 0.094, + "step": 18381 + }, + { + "epoch": 50.5, + "grad_norm": 12.119641304016113, + "learning_rate": 2.4750000000000002e-05, + "loss": 0.2196, + "step": 18382 + }, + { + "epoch": 50.502747252747255, + "grad_norm": 9.752212524414062, + "learning_rate": 2.4748626373626375e-05, + "loss": 0.1304, + "step": 18383 + }, + { + "epoch": 50.505494505494504, + "grad_norm": 18.540348052978516, + "learning_rate": 2.474725274725275e-05, + "loss": 0.5674, + "step": 18384 + }, + { + "epoch": 50.50824175824176, + "grad_norm": 10.72168254852295, + "learning_rate": 2.4745879120879122e-05, + "loss": 0.1778, + "step": 18385 + }, + { + "epoch": 50.51098901098901, + "grad_norm": 10.340740203857422, + "learning_rate": 2.4744505494505496e-05, + "loss": 0.157, + "step": 18386 + }, + { + "epoch": 50.51373626373626, + "grad_norm": 32.58360290527344, + "learning_rate": 2.474313186813187e-05, + "loss": 1.0187, + "step": 18387 + }, + { + "epoch": 50.51648351648352, + "grad_norm": 14.697453498840332, + "learning_rate": 2.4741758241758242e-05, + "loss": 0.2479, + "step": 18388 + }, + { + "epoch": 50.51923076923077, + "grad_norm": 7.991328716278076, + "learning_rate": 2.4740384615384616e-05, + "loss": 0.1367, + "step": 18389 + }, + { + "epoch": 50.52197802197802, + "grad_norm": 19.378957748413086, + "learning_rate": 2.4739010989010993e-05, + "loss": 0.481, + "step": 18390 + }, + { + "epoch": 50.52472527472528, + "grad_norm": 23.84382438659668, + "learning_rate": 2.4737637362637363e-05, + "loss": 0.5667, + "step": 18391 + }, + { + "epoch": 50.527472527472526, + "grad_norm": 13.147663116455078, + "learning_rate": 2.4736263736263736e-05, + "loss": 0.5631, + "step": 18392 + }, + { + "epoch": 50.53021978021978, + "grad_norm": 3.6827476024627686, + "learning_rate": 2.473489010989011e-05, + "loss": 0.0672, + "step": 18393 + }, + { + "epoch": 50.532967032967036, + "grad_norm": 12.65986156463623, + "learning_rate": 2.4733516483516483e-05, + "loss": 0.3768, + "step": 18394 + }, + { + "epoch": 50.535714285714285, + "grad_norm": 13.098658561706543, + "learning_rate": 2.473214285714286e-05, + "loss": 0.4105, + "step": 18395 + }, + { + "epoch": 50.53846153846154, + "grad_norm": 6.630882740020752, + "learning_rate": 2.4730769230769233e-05, + "loss": 0.1123, + "step": 18396 + }, + { + "epoch": 50.54120879120879, + "grad_norm": 17.201505661010742, + "learning_rate": 2.4729395604395606e-05, + "loss": 0.2405, + "step": 18397 + }, + { + "epoch": 50.543956043956044, + "grad_norm": 6.557401180267334, + "learning_rate": 2.472802197802198e-05, + "loss": 0.201, + "step": 18398 + }, + { + "epoch": 50.5467032967033, + "grad_norm": 19.157501220703125, + "learning_rate": 2.4726648351648353e-05, + "loss": 0.5533, + "step": 18399 + }, + { + "epoch": 50.54945054945055, + "grad_norm": 9.772337913513184, + "learning_rate": 2.4725274725274727e-05, + "loss": 0.352, + "step": 18400 + }, + { + "epoch": 50.5521978021978, + "grad_norm": 13.895374298095703, + "learning_rate": 2.47239010989011e-05, + "loss": 0.419, + "step": 18401 + }, + { + "epoch": 50.55494505494506, + "grad_norm": 15.294305801391602, + "learning_rate": 2.4722527472527473e-05, + "loss": 0.4651, + "step": 18402 + }, + { + "epoch": 50.55769230769231, + "grad_norm": 10.268876075744629, + "learning_rate": 2.4721153846153847e-05, + "loss": 0.2062, + "step": 18403 + }, + { + "epoch": 50.56043956043956, + "grad_norm": 4.8963470458984375, + "learning_rate": 2.471978021978022e-05, + "loss": 0.0578, + "step": 18404 + }, + { + "epoch": 50.56318681318681, + "grad_norm": 8.416974067687988, + "learning_rate": 2.4718406593406597e-05, + "loss": 0.0887, + "step": 18405 + }, + { + "epoch": 50.565934065934066, + "grad_norm": 22.149959564208984, + "learning_rate": 2.4717032967032967e-05, + "loss": 0.6894, + "step": 18406 + }, + { + "epoch": 50.56868131868132, + "grad_norm": 7.857143402099609, + "learning_rate": 2.471565934065934e-05, + "loss": 0.178, + "step": 18407 + }, + { + "epoch": 50.57142857142857, + "grad_norm": 19.027645111083984, + "learning_rate": 2.4714285714285714e-05, + "loss": 0.5574, + "step": 18408 + }, + { + "epoch": 50.574175824175825, + "grad_norm": 19.202484130859375, + "learning_rate": 2.4712912087912087e-05, + "loss": 0.3395, + "step": 18409 + }, + { + "epoch": 50.57692307692308, + "grad_norm": 17.22821617126465, + "learning_rate": 2.4711538461538464e-05, + "loss": 0.4583, + "step": 18410 + }, + { + "epoch": 50.57967032967033, + "grad_norm": 15.196329116821289, + "learning_rate": 2.4710164835164837e-05, + "loss": 0.3883, + "step": 18411 + }, + { + "epoch": 50.582417582417584, + "grad_norm": 6.881158828735352, + "learning_rate": 2.470879120879121e-05, + "loss": 0.1203, + "step": 18412 + }, + { + "epoch": 50.58516483516483, + "grad_norm": 14.254135131835938, + "learning_rate": 2.4707417582417584e-05, + "loss": 0.3052, + "step": 18413 + }, + { + "epoch": 50.58791208791209, + "grad_norm": 6.463897228240967, + "learning_rate": 2.4706043956043958e-05, + "loss": 0.0856, + "step": 18414 + }, + { + "epoch": 50.59065934065934, + "grad_norm": 18.7353515625, + "learning_rate": 2.470467032967033e-05, + "loss": 0.8738, + "step": 18415 + }, + { + "epoch": 50.59340659340659, + "grad_norm": 13.63709831237793, + "learning_rate": 2.4703296703296705e-05, + "loss": 0.2619, + "step": 18416 + }, + { + "epoch": 50.59615384615385, + "grad_norm": 14.426471710205078, + "learning_rate": 2.4701923076923078e-05, + "loss": 0.2766, + "step": 18417 + }, + { + "epoch": 50.5989010989011, + "grad_norm": 14.163179397583008, + "learning_rate": 2.470054945054945e-05, + "loss": 0.5363, + "step": 18418 + }, + { + "epoch": 50.60164835164835, + "grad_norm": 3.5985119342803955, + "learning_rate": 2.4699175824175825e-05, + "loss": 0.1759, + "step": 18419 + }, + { + "epoch": 50.604395604395606, + "grad_norm": 8.916227340698242, + "learning_rate": 2.46978021978022e-05, + "loss": 0.0961, + "step": 18420 + }, + { + "epoch": 50.607142857142854, + "grad_norm": 10.79638957977295, + "learning_rate": 2.469642857142857e-05, + "loss": 0.1529, + "step": 18421 + }, + { + "epoch": 50.60989010989011, + "grad_norm": 20.133922576904297, + "learning_rate": 2.4695054945054945e-05, + "loss": 0.4342, + "step": 18422 + }, + { + "epoch": 50.612637362637365, + "grad_norm": 10.152950286865234, + "learning_rate": 2.469368131868132e-05, + "loss": 0.1833, + "step": 18423 + }, + { + "epoch": 50.61538461538461, + "grad_norm": 18.562652587890625, + "learning_rate": 2.4692307692307692e-05, + "loss": 0.5351, + "step": 18424 + }, + { + "epoch": 50.61813186813187, + "grad_norm": 9.615168571472168, + "learning_rate": 2.469093406593407e-05, + "loss": 0.1903, + "step": 18425 + }, + { + "epoch": 50.620879120879124, + "grad_norm": 15.982534408569336, + "learning_rate": 2.4689560439560442e-05, + "loss": 0.5246, + "step": 18426 + }, + { + "epoch": 50.62362637362637, + "grad_norm": 9.112537384033203, + "learning_rate": 2.4688186813186815e-05, + "loss": 0.1653, + "step": 18427 + }, + { + "epoch": 50.62637362637363, + "grad_norm": 9.312053680419922, + "learning_rate": 2.468681318681319e-05, + "loss": 0.2037, + "step": 18428 + }, + { + "epoch": 50.629120879120876, + "grad_norm": 5.79099702835083, + "learning_rate": 2.4685439560439562e-05, + "loss": 0.0842, + "step": 18429 + }, + { + "epoch": 50.63186813186813, + "grad_norm": 11.403640747070312, + "learning_rate": 2.4684065934065936e-05, + "loss": 0.1483, + "step": 18430 + }, + { + "epoch": 50.63461538461539, + "grad_norm": 12.062557220458984, + "learning_rate": 2.468269230769231e-05, + "loss": 0.3263, + "step": 18431 + }, + { + "epoch": 50.637362637362635, + "grad_norm": 5.624183177947998, + "learning_rate": 2.4681318681318682e-05, + "loss": 0.1134, + "step": 18432 + }, + { + "epoch": 50.64010989010989, + "grad_norm": 13.376102447509766, + "learning_rate": 2.4679945054945056e-05, + "loss": 0.365, + "step": 18433 + }, + { + "epoch": 50.642857142857146, + "grad_norm": 11.39104175567627, + "learning_rate": 2.467857142857143e-05, + "loss": 0.3459, + "step": 18434 + }, + { + "epoch": 50.645604395604394, + "grad_norm": 18.122726440429688, + "learning_rate": 2.4677197802197806e-05, + "loss": 0.451, + "step": 18435 + }, + { + "epoch": 50.64835164835165, + "grad_norm": 7.262494087219238, + "learning_rate": 2.4675824175824176e-05, + "loss": 0.2124, + "step": 18436 + }, + { + "epoch": 50.6510989010989, + "grad_norm": 2.515950918197632, + "learning_rate": 2.467445054945055e-05, + "loss": 0.0438, + "step": 18437 + }, + { + "epoch": 50.65384615384615, + "grad_norm": 18.031038284301758, + "learning_rate": 2.4673076923076923e-05, + "loss": 0.4327, + "step": 18438 + }, + { + "epoch": 50.65659340659341, + "grad_norm": 5.701266288757324, + "learning_rate": 2.4671703296703296e-05, + "loss": 0.1847, + "step": 18439 + }, + { + "epoch": 50.65934065934066, + "grad_norm": 14.33263111114502, + "learning_rate": 2.4670329670329673e-05, + "loss": 0.4156, + "step": 18440 + }, + { + "epoch": 50.66208791208791, + "grad_norm": 10.076897621154785, + "learning_rate": 2.4668956043956046e-05, + "loss": 0.1273, + "step": 18441 + }, + { + "epoch": 50.66483516483517, + "grad_norm": 12.868593215942383, + "learning_rate": 2.466758241758242e-05, + "loss": 0.2876, + "step": 18442 + }, + { + "epoch": 50.667582417582416, + "grad_norm": 9.054227828979492, + "learning_rate": 2.4666208791208793e-05, + "loss": 0.2614, + "step": 18443 + }, + { + "epoch": 50.67032967032967, + "grad_norm": 12.525673866271973, + "learning_rate": 2.4664835164835167e-05, + "loss": 0.211, + "step": 18444 + }, + { + "epoch": 50.67307692307692, + "grad_norm": 7.543834686279297, + "learning_rate": 2.466346153846154e-05, + "loss": 0.1591, + "step": 18445 + }, + { + "epoch": 50.675824175824175, + "grad_norm": 10.7825927734375, + "learning_rate": 2.4662087912087913e-05, + "loss": 0.2929, + "step": 18446 + }, + { + "epoch": 50.67857142857143, + "grad_norm": 6.453470706939697, + "learning_rate": 2.4660714285714287e-05, + "loss": 0.1831, + "step": 18447 + }, + { + "epoch": 50.68131868131868, + "grad_norm": 2.806483030319214, + "learning_rate": 2.465934065934066e-05, + "loss": 0.0626, + "step": 18448 + }, + { + "epoch": 50.684065934065934, + "grad_norm": 18.658109664916992, + "learning_rate": 2.4657967032967034e-05, + "loss": 0.4148, + "step": 18449 + }, + { + "epoch": 50.68681318681319, + "grad_norm": 7.7261528968811035, + "learning_rate": 2.465659340659341e-05, + "loss": 0.1035, + "step": 18450 + }, + { + "epoch": 50.68956043956044, + "grad_norm": 13.677504539489746, + "learning_rate": 2.465521978021978e-05, + "loss": 0.2037, + "step": 18451 + }, + { + "epoch": 50.69230769230769, + "grad_norm": 13.261990547180176, + "learning_rate": 2.4653846153846154e-05, + "loss": 0.2409, + "step": 18452 + }, + { + "epoch": 50.69505494505494, + "grad_norm": 12.431510925292969, + "learning_rate": 2.4652472527472527e-05, + "loss": 0.2249, + "step": 18453 + }, + { + "epoch": 50.6978021978022, + "grad_norm": 5.663641452789307, + "learning_rate": 2.46510989010989e-05, + "loss": 0.1322, + "step": 18454 + }, + { + "epoch": 50.70054945054945, + "grad_norm": 5.265176296234131, + "learning_rate": 2.4649725274725278e-05, + "loss": 0.1177, + "step": 18455 + }, + { + "epoch": 50.7032967032967, + "grad_norm": 19.11187171936035, + "learning_rate": 2.464835164835165e-05, + "loss": 0.5568, + "step": 18456 + }, + { + "epoch": 50.706043956043956, + "grad_norm": 11.292534828186035, + "learning_rate": 2.4646978021978024e-05, + "loss": 0.2711, + "step": 18457 + }, + { + "epoch": 50.70879120879121, + "grad_norm": 14.006011009216309, + "learning_rate": 2.4645604395604398e-05, + "loss": 0.291, + "step": 18458 + }, + { + "epoch": 50.71153846153846, + "grad_norm": 10.913496971130371, + "learning_rate": 2.4644230769230768e-05, + "loss": 0.1988, + "step": 18459 + }, + { + "epoch": 50.714285714285715, + "grad_norm": 1.7314629554748535, + "learning_rate": 2.4642857142857145e-05, + "loss": 0.0292, + "step": 18460 + }, + { + "epoch": 50.717032967032964, + "grad_norm": 24.647489547729492, + "learning_rate": 2.4641483516483518e-05, + "loss": 0.5897, + "step": 18461 + }, + { + "epoch": 50.71978021978022, + "grad_norm": 17.24432373046875, + "learning_rate": 2.464010989010989e-05, + "loss": 0.5712, + "step": 18462 + }, + { + "epoch": 50.722527472527474, + "grad_norm": 8.03701114654541, + "learning_rate": 2.4638736263736265e-05, + "loss": 0.1177, + "step": 18463 + }, + { + "epoch": 50.72527472527472, + "grad_norm": 25.018091201782227, + "learning_rate": 2.4637362637362638e-05, + "loss": 0.8617, + "step": 18464 + }, + { + "epoch": 50.72802197802198, + "grad_norm": 13.7254056930542, + "learning_rate": 2.4635989010989015e-05, + "loss": 0.2427, + "step": 18465 + }, + { + "epoch": 50.73076923076923, + "grad_norm": 20.034395217895508, + "learning_rate": 2.4634615384615385e-05, + "loss": 0.3917, + "step": 18466 + }, + { + "epoch": 50.73351648351648, + "grad_norm": 6.708505153656006, + "learning_rate": 2.463324175824176e-05, + "loss": 0.2446, + "step": 18467 + }, + { + "epoch": 50.73626373626374, + "grad_norm": 18.494400024414062, + "learning_rate": 2.4631868131868132e-05, + "loss": 0.4269, + "step": 18468 + }, + { + "epoch": 50.73901098901099, + "grad_norm": 6.8144378662109375, + "learning_rate": 2.4630494505494505e-05, + "loss": 0.2093, + "step": 18469 + }, + { + "epoch": 50.74175824175824, + "grad_norm": 14.677576065063477, + "learning_rate": 2.4629120879120882e-05, + "loss": 0.4474, + "step": 18470 + }, + { + "epoch": 50.744505494505496, + "grad_norm": 12.052693367004395, + "learning_rate": 2.4627747252747255e-05, + "loss": 0.2568, + "step": 18471 + }, + { + "epoch": 50.747252747252745, + "grad_norm": 7.072293758392334, + "learning_rate": 2.462637362637363e-05, + "loss": 0.0915, + "step": 18472 + }, + { + "epoch": 50.75, + "grad_norm": 5.7973151206970215, + "learning_rate": 2.4625000000000002e-05, + "loss": 0.1088, + "step": 18473 + }, + { + "epoch": 50.752747252747255, + "grad_norm": 3.536315441131592, + "learning_rate": 2.4623626373626372e-05, + "loss": 0.0581, + "step": 18474 + }, + { + "epoch": 50.755494505494504, + "grad_norm": 6.9234514236450195, + "learning_rate": 2.462225274725275e-05, + "loss": 0.1109, + "step": 18475 + }, + { + "epoch": 50.75824175824176, + "grad_norm": 9.835331916809082, + "learning_rate": 2.4620879120879122e-05, + "loss": 0.2746, + "step": 18476 + }, + { + "epoch": 50.76098901098901, + "grad_norm": 8.975927352905273, + "learning_rate": 2.4619505494505496e-05, + "loss": 0.1724, + "step": 18477 + }, + { + "epoch": 50.76373626373626, + "grad_norm": 10.223014831542969, + "learning_rate": 2.461813186813187e-05, + "loss": 0.1817, + "step": 18478 + }, + { + "epoch": 50.76648351648352, + "grad_norm": 12.371513366699219, + "learning_rate": 2.4616758241758243e-05, + "loss": 0.3341, + "step": 18479 + }, + { + "epoch": 50.76923076923077, + "grad_norm": 25.052379608154297, + "learning_rate": 2.461538461538462e-05, + "loss": 0.6622, + "step": 18480 + }, + { + "epoch": 50.77197802197802, + "grad_norm": 1.8628193140029907, + "learning_rate": 2.461401098901099e-05, + "loss": 0.0273, + "step": 18481 + }, + { + "epoch": 50.77472527472528, + "grad_norm": 9.02260971069336, + "learning_rate": 2.4612637362637363e-05, + "loss": 0.2083, + "step": 18482 + }, + { + "epoch": 50.777472527472526, + "grad_norm": 11.799701690673828, + "learning_rate": 2.4611263736263736e-05, + "loss": 0.2928, + "step": 18483 + }, + { + "epoch": 50.78021978021978, + "grad_norm": 16.19417381286621, + "learning_rate": 2.460989010989011e-05, + "loss": 0.4509, + "step": 18484 + }, + { + "epoch": 50.782967032967036, + "grad_norm": 12.824163436889648, + "learning_rate": 2.4608516483516483e-05, + "loss": 0.2634, + "step": 18485 + }, + { + "epoch": 50.785714285714285, + "grad_norm": 18.712799072265625, + "learning_rate": 2.460714285714286e-05, + "loss": 0.52, + "step": 18486 + }, + { + "epoch": 50.78846153846154, + "grad_norm": 8.830501556396484, + "learning_rate": 2.4605769230769233e-05, + "loss": 0.1927, + "step": 18487 + }, + { + "epoch": 50.79120879120879, + "grad_norm": 8.709269523620605, + "learning_rate": 2.4604395604395607e-05, + "loss": 0.1689, + "step": 18488 + }, + { + "epoch": 50.793956043956044, + "grad_norm": 0.6718313097953796, + "learning_rate": 2.4603021978021977e-05, + "loss": 0.0114, + "step": 18489 + }, + { + "epoch": 50.7967032967033, + "grad_norm": 11.305594444274902, + "learning_rate": 2.460164835164835e-05, + "loss": 0.2823, + "step": 18490 + }, + { + "epoch": 50.79945054945055, + "grad_norm": 4.320345401763916, + "learning_rate": 2.4600274725274727e-05, + "loss": 0.071, + "step": 18491 + }, + { + "epoch": 50.8021978021978, + "grad_norm": 14.07672119140625, + "learning_rate": 2.45989010989011e-05, + "loss": 0.2219, + "step": 18492 + }, + { + "epoch": 50.80494505494506, + "grad_norm": 24.08987045288086, + "learning_rate": 2.4597527472527474e-05, + "loss": 0.4579, + "step": 18493 + }, + { + "epoch": 50.80769230769231, + "grad_norm": 5.426334381103516, + "learning_rate": 2.4596153846153847e-05, + "loss": 0.1204, + "step": 18494 + }, + { + "epoch": 50.81043956043956, + "grad_norm": 6.421660423278809, + "learning_rate": 2.459478021978022e-05, + "loss": 0.0948, + "step": 18495 + }, + { + "epoch": 50.81318681318681, + "grad_norm": 27.12846565246582, + "learning_rate": 2.4593406593406594e-05, + "loss": 0.9193, + "step": 18496 + }, + { + "epoch": 50.815934065934066, + "grad_norm": 12.076297760009766, + "learning_rate": 2.4592032967032967e-05, + "loss": 0.1465, + "step": 18497 + }, + { + "epoch": 50.81868131868132, + "grad_norm": 18.259300231933594, + "learning_rate": 2.459065934065934e-05, + "loss": 0.2984, + "step": 18498 + }, + { + "epoch": 50.82142857142857, + "grad_norm": 5.495354175567627, + "learning_rate": 2.4589285714285714e-05, + "loss": 0.0949, + "step": 18499 + }, + { + "epoch": 50.824175824175825, + "grad_norm": 17.502565383911133, + "learning_rate": 2.4587912087912088e-05, + "loss": 0.4029, + "step": 18500 + }, + { + "epoch": 50.82692307692308, + "grad_norm": 1.7907447814941406, + "learning_rate": 2.4586538461538464e-05, + "loss": 0.0316, + "step": 18501 + }, + { + "epoch": 50.82967032967033, + "grad_norm": 20.87866973876953, + "learning_rate": 2.4585164835164838e-05, + "loss": 0.3508, + "step": 18502 + }, + { + "epoch": 50.832417582417584, + "grad_norm": 8.237123489379883, + "learning_rate": 2.458379120879121e-05, + "loss": 0.1504, + "step": 18503 + }, + { + "epoch": 50.83516483516483, + "grad_norm": 8.871162414550781, + "learning_rate": 2.458241758241758e-05, + "loss": 0.2552, + "step": 18504 + }, + { + "epoch": 50.83791208791209, + "grad_norm": 9.826956748962402, + "learning_rate": 2.4581043956043955e-05, + "loss": 0.2113, + "step": 18505 + }, + { + "epoch": 50.84065934065934, + "grad_norm": 12.955400466918945, + "learning_rate": 2.457967032967033e-05, + "loss": 0.2185, + "step": 18506 + }, + { + "epoch": 50.84340659340659, + "grad_norm": 19.32383918762207, + "learning_rate": 2.4578296703296705e-05, + "loss": 0.2296, + "step": 18507 + }, + { + "epoch": 50.84615384615385, + "grad_norm": 6.203744888305664, + "learning_rate": 2.4576923076923078e-05, + "loss": 0.1062, + "step": 18508 + }, + { + "epoch": 50.8489010989011, + "grad_norm": 12.599677085876465, + "learning_rate": 2.457554945054945e-05, + "loss": 0.1921, + "step": 18509 + }, + { + "epoch": 50.85164835164835, + "grad_norm": 13.922955513000488, + "learning_rate": 2.4574175824175825e-05, + "loss": 0.1972, + "step": 18510 + }, + { + "epoch": 50.854395604395606, + "grad_norm": 15.428590774536133, + "learning_rate": 2.45728021978022e-05, + "loss": 0.2277, + "step": 18511 + }, + { + "epoch": 50.857142857142854, + "grad_norm": 0.5364649295806885, + "learning_rate": 2.4571428571428572e-05, + "loss": 0.0105, + "step": 18512 + }, + { + "epoch": 50.85989010989011, + "grad_norm": 19.733745574951172, + "learning_rate": 2.4570054945054945e-05, + "loss": 0.2695, + "step": 18513 + }, + { + "epoch": 50.862637362637365, + "grad_norm": 8.869171142578125, + "learning_rate": 2.456868131868132e-05, + "loss": 0.0918, + "step": 18514 + }, + { + "epoch": 50.86538461538461, + "grad_norm": 13.512310981750488, + "learning_rate": 2.4567307692307692e-05, + "loss": 0.2577, + "step": 18515 + }, + { + "epoch": 50.86813186813187, + "grad_norm": 5.3409929275512695, + "learning_rate": 2.456593406593407e-05, + "loss": 0.0588, + "step": 18516 + }, + { + "epoch": 50.870879120879124, + "grad_norm": 8.208113670349121, + "learning_rate": 2.4564560439560442e-05, + "loss": 0.1865, + "step": 18517 + }, + { + "epoch": 50.87362637362637, + "grad_norm": 17.36876106262207, + "learning_rate": 2.4563186813186816e-05, + "loss": 0.5006, + "step": 18518 + }, + { + "epoch": 50.87637362637363, + "grad_norm": 15.796038627624512, + "learning_rate": 2.4561813186813186e-05, + "loss": 0.2804, + "step": 18519 + }, + { + "epoch": 50.879120879120876, + "grad_norm": 3.942312717437744, + "learning_rate": 2.456043956043956e-05, + "loss": 0.0963, + "step": 18520 + }, + { + "epoch": 50.88186813186813, + "grad_norm": 15.318824768066406, + "learning_rate": 2.4559065934065936e-05, + "loss": 0.2951, + "step": 18521 + }, + { + "epoch": 50.88461538461539, + "grad_norm": 18.089889526367188, + "learning_rate": 2.455769230769231e-05, + "loss": 0.3649, + "step": 18522 + }, + { + "epoch": 50.887362637362635, + "grad_norm": 7.839176177978516, + "learning_rate": 2.4556318681318683e-05, + "loss": 0.0828, + "step": 18523 + }, + { + "epoch": 50.89010989010989, + "grad_norm": 7.424612522125244, + "learning_rate": 2.4554945054945056e-05, + "loss": 0.2253, + "step": 18524 + }, + { + "epoch": 50.892857142857146, + "grad_norm": 12.95469856262207, + "learning_rate": 2.455357142857143e-05, + "loss": 0.2719, + "step": 18525 + }, + { + "epoch": 50.895604395604394, + "grad_norm": 7.022561073303223, + "learning_rate": 2.4552197802197803e-05, + "loss": 0.1253, + "step": 18526 + }, + { + "epoch": 50.89835164835165, + "grad_norm": 8.949341773986816, + "learning_rate": 2.4550824175824176e-05, + "loss": 0.1557, + "step": 18527 + }, + { + "epoch": 50.9010989010989, + "grad_norm": 8.211669921875, + "learning_rate": 2.454945054945055e-05, + "loss": 0.1632, + "step": 18528 + }, + { + "epoch": 50.90384615384615, + "grad_norm": 1.1928025484085083, + "learning_rate": 2.4548076923076923e-05, + "loss": 0.018, + "step": 18529 + }, + { + "epoch": 50.90659340659341, + "grad_norm": 10.490026473999023, + "learning_rate": 2.4546703296703297e-05, + "loss": 0.275, + "step": 18530 + }, + { + "epoch": 50.90934065934066, + "grad_norm": 8.400999069213867, + "learning_rate": 2.4545329670329673e-05, + "loss": 0.0984, + "step": 18531 + }, + { + "epoch": 50.91208791208791, + "grad_norm": 21.033071517944336, + "learning_rate": 2.4543956043956047e-05, + "loss": 0.4523, + "step": 18532 + }, + { + "epoch": 50.91483516483517, + "grad_norm": 11.424301147460938, + "learning_rate": 2.454258241758242e-05, + "loss": 0.1616, + "step": 18533 + }, + { + "epoch": 50.917582417582416, + "grad_norm": 8.321510314941406, + "learning_rate": 2.454120879120879e-05, + "loss": 0.2858, + "step": 18534 + }, + { + "epoch": 50.92032967032967, + "grad_norm": 6.636722087860107, + "learning_rate": 2.4539835164835164e-05, + "loss": 0.1037, + "step": 18535 + }, + { + "epoch": 50.92307692307692, + "grad_norm": 15.787357330322266, + "learning_rate": 2.453846153846154e-05, + "loss": 0.1443, + "step": 18536 + }, + { + "epoch": 50.925824175824175, + "grad_norm": 18.869842529296875, + "learning_rate": 2.4537087912087914e-05, + "loss": 0.3834, + "step": 18537 + }, + { + "epoch": 50.92857142857143, + "grad_norm": 9.41806411743164, + "learning_rate": 2.4535714285714287e-05, + "loss": 0.2415, + "step": 18538 + }, + { + "epoch": 50.93131868131868, + "grad_norm": 15.878946304321289, + "learning_rate": 2.453434065934066e-05, + "loss": 0.3616, + "step": 18539 + }, + { + "epoch": 50.934065934065934, + "grad_norm": 12.262648582458496, + "learning_rate": 2.4532967032967034e-05, + "loss": 0.2147, + "step": 18540 + }, + { + "epoch": 50.93681318681319, + "grad_norm": 17.824323654174805, + "learning_rate": 2.4531593406593407e-05, + "loss": 0.3174, + "step": 18541 + }, + { + "epoch": 50.93956043956044, + "grad_norm": 11.823444366455078, + "learning_rate": 2.453021978021978e-05, + "loss": 0.2146, + "step": 18542 + }, + { + "epoch": 50.94230769230769, + "grad_norm": 9.713882446289062, + "learning_rate": 2.4528846153846154e-05, + "loss": 0.2168, + "step": 18543 + }, + { + "epoch": 50.94505494505494, + "grad_norm": 12.639979362487793, + "learning_rate": 2.4527472527472528e-05, + "loss": 0.1545, + "step": 18544 + }, + { + "epoch": 50.9478021978022, + "grad_norm": 15.77000617980957, + "learning_rate": 2.45260989010989e-05, + "loss": 0.4699, + "step": 18545 + }, + { + "epoch": 50.95054945054945, + "grad_norm": 17.21281623840332, + "learning_rate": 2.4524725274725278e-05, + "loss": 0.2049, + "step": 18546 + }, + { + "epoch": 50.9532967032967, + "grad_norm": 11.115711212158203, + "learning_rate": 2.452335164835165e-05, + "loss": 0.3698, + "step": 18547 + }, + { + "epoch": 50.956043956043956, + "grad_norm": 2.786261558532715, + "learning_rate": 2.4521978021978025e-05, + "loss": 0.0431, + "step": 18548 + }, + { + "epoch": 50.95879120879121, + "grad_norm": 19.928255081176758, + "learning_rate": 2.4520604395604395e-05, + "loss": 0.1855, + "step": 18549 + }, + { + "epoch": 50.96153846153846, + "grad_norm": 25.54145050048828, + "learning_rate": 2.4519230769230768e-05, + "loss": 0.7812, + "step": 18550 + }, + { + "epoch": 50.964285714285715, + "grad_norm": 18.320327758789062, + "learning_rate": 2.4517857142857145e-05, + "loss": 0.4527, + "step": 18551 + }, + { + "epoch": 50.967032967032964, + "grad_norm": 1.4888046979904175, + "learning_rate": 2.451648351648352e-05, + "loss": 0.0232, + "step": 18552 + }, + { + "epoch": 50.96978021978022, + "grad_norm": 21.31104850769043, + "learning_rate": 2.4515109890109892e-05, + "loss": 0.2987, + "step": 18553 + }, + { + "epoch": 50.972527472527474, + "grad_norm": 9.067699432373047, + "learning_rate": 2.4513736263736265e-05, + "loss": 0.2267, + "step": 18554 + }, + { + "epoch": 50.97527472527472, + "grad_norm": 6.871059894561768, + "learning_rate": 2.451236263736264e-05, + "loss": 0.0918, + "step": 18555 + }, + { + "epoch": 50.97802197802198, + "grad_norm": 5.98443078994751, + "learning_rate": 2.4510989010989012e-05, + "loss": 0.1678, + "step": 18556 + }, + { + "epoch": 50.98076923076923, + "grad_norm": 16.73758316040039, + "learning_rate": 2.4509615384615385e-05, + "loss": 0.452, + "step": 18557 + }, + { + "epoch": 50.98351648351648, + "grad_norm": 10.51363468170166, + "learning_rate": 2.450824175824176e-05, + "loss": 0.1386, + "step": 18558 + }, + { + "epoch": 50.98626373626374, + "grad_norm": 19.567787170410156, + "learning_rate": 2.4506868131868132e-05, + "loss": 0.7043, + "step": 18559 + }, + { + "epoch": 50.98901098901099, + "grad_norm": 23.496627807617188, + "learning_rate": 2.4505494505494506e-05, + "loss": 0.5949, + "step": 18560 + }, + { + "epoch": 50.99175824175824, + "grad_norm": 4.20676326751709, + "learning_rate": 2.4504120879120882e-05, + "loss": 0.0499, + "step": 18561 + }, + { + "epoch": 50.994505494505496, + "grad_norm": 6.26230525970459, + "learning_rate": 2.4502747252747256e-05, + "loss": 0.0728, + "step": 18562 + }, + { + "epoch": 50.997252747252745, + "grad_norm": 15.371087074279785, + "learning_rate": 2.450137362637363e-05, + "loss": 0.3143, + "step": 18563 + }, + { + "epoch": 51.0, + "grad_norm": 53.170738220214844, + "learning_rate": 2.45e-05, + "loss": 0.7923, + "step": 18564 + }, + { + "epoch": 51.0, + "eval_accuracy": 0.7369146005509641, + "eval_f1": 0.7273263615927157, + "eval_f1_DuraRiadoRio_64x64": 0.7027027027027027, + "eval_f1_Mole_64x64": 0.8373493975903614, + "eval_f1_Quebrado_64x64": 0.712707182320442, + "eval_f1_RiadoRio_64x64": 0.7522935779816514, + "eval_f1_RioFechado_64x64": 0.631578947368421, + "eval_loss": 1.1039292812347412, + "eval_precision": 0.8038073182233058, + "eval_precision_DuraRiadoRio_64x64": 1.0, + "eval_precision_Mole_64x64": 0.7393617021276596, + "eval_precision_Quebrado_64x64": 0.591743119266055, + "eval_precision_RiadoRio_64x64": 0.7028571428571428, + "eval_precision_RioFechado_64x64": 0.9850746268656716, + "eval_recall": 0.7353554072975866, + "eval_recall_DuraRiadoRio_64x64": 0.5416666666666666, + "eval_recall_Mole_64x64": 0.9652777777777778, + "eval_recall_Quebrado_64x64": 0.8958333333333334, + "eval_recall_RiadoRio_64x64": 0.8092105263157895, + "eval_recall_RioFechado_64x64": 0.4647887323943662, + "eval_runtime": 1.7206, + "eval_samples_per_second": 421.951, + "eval_steps_per_second": 26.735, + "step": 18564 + }, + { + "epoch": 51.002747252747255, + "grad_norm": 8.440004348754883, + "learning_rate": 2.4498626373626373e-05, + "loss": 0.1635, + "step": 18565 + }, + { + "epoch": 51.005494505494504, + "grad_norm": 5.623804569244385, + "learning_rate": 2.449725274725275e-05, + "loss": 0.0651, + "step": 18566 + }, + { + "epoch": 51.00824175824176, + "grad_norm": 17.339427947998047, + "learning_rate": 2.4495879120879123e-05, + "loss": 0.4451, + "step": 18567 + }, + { + "epoch": 51.010989010989015, + "grad_norm": 9.044841766357422, + "learning_rate": 2.4494505494505496e-05, + "loss": 0.1072, + "step": 18568 + }, + { + "epoch": 51.01373626373626, + "grad_norm": 7.776215553283691, + "learning_rate": 2.449313186813187e-05, + "loss": 0.1396, + "step": 18569 + }, + { + "epoch": 51.01648351648352, + "grad_norm": 6.565979957580566, + "learning_rate": 2.4491758241758243e-05, + "loss": 0.0864, + "step": 18570 + }, + { + "epoch": 51.01923076923077, + "grad_norm": 17.02381134033203, + "learning_rate": 2.4490384615384616e-05, + "loss": 0.5373, + "step": 18571 + }, + { + "epoch": 51.02197802197802, + "grad_norm": 10.341996192932129, + "learning_rate": 2.448901098901099e-05, + "loss": 0.1637, + "step": 18572 + }, + { + "epoch": 51.02472527472528, + "grad_norm": 6.268888473510742, + "learning_rate": 2.4487637362637363e-05, + "loss": 0.1178, + "step": 18573 + }, + { + "epoch": 51.027472527472526, + "grad_norm": 10.534815788269043, + "learning_rate": 2.4486263736263737e-05, + "loss": 0.3303, + "step": 18574 + }, + { + "epoch": 51.03021978021978, + "grad_norm": 6.137232780456543, + "learning_rate": 2.448489010989011e-05, + "loss": 0.1303, + "step": 18575 + }, + { + "epoch": 51.032967032967036, + "grad_norm": 15.561816215515137, + "learning_rate": 2.4483516483516487e-05, + "loss": 0.2805, + "step": 18576 + }, + { + "epoch": 51.035714285714285, + "grad_norm": 25.219141006469727, + "learning_rate": 2.448214285714286e-05, + "loss": 0.8024, + "step": 18577 + }, + { + "epoch": 51.03846153846154, + "grad_norm": 8.896700859069824, + "learning_rate": 2.4480769230769234e-05, + "loss": 0.2098, + "step": 18578 + }, + { + "epoch": 51.04120879120879, + "grad_norm": 17.40224838256836, + "learning_rate": 2.4479395604395604e-05, + "loss": 0.2027, + "step": 18579 + }, + { + "epoch": 51.043956043956044, + "grad_norm": 6.386281490325928, + "learning_rate": 2.4478021978021977e-05, + "loss": 0.175, + "step": 18580 + }, + { + "epoch": 51.0467032967033, + "grad_norm": 4.663552284240723, + "learning_rate": 2.4476648351648354e-05, + "loss": 0.0947, + "step": 18581 + }, + { + "epoch": 51.04945054945055, + "grad_norm": 7.316348075866699, + "learning_rate": 2.4475274725274727e-05, + "loss": 0.0567, + "step": 18582 + }, + { + "epoch": 51.0521978021978, + "grad_norm": 8.94416332244873, + "learning_rate": 2.44739010989011e-05, + "loss": 0.1936, + "step": 18583 + }, + { + "epoch": 51.05494505494506, + "grad_norm": 12.999402046203613, + "learning_rate": 2.4472527472527474e-05, + "loss": 0.267, + "step": 18584 + }, + { + "epoch": 51.05769230769231, + "grad_norm": 17.655960083007812, + "learning_rate": 2.4471153846153848e-05, + "loss": 0.5362, + "step": 18585 + }, + { + "epoch": 51.06043956043956, + "grad_norm": 18.140661239624023, + "learning_rate": 2.446978021978022e-05, + "loss": 0.4222, + "step": 18586 + }, + { + "epoch": 51.06318681318681, + "grad_norm": 17.65653419494629, + "learning_rate": 2.4468406593406594e-05, + "loss": 0.2909, + "step": 18587 + }, + { + "epoch": 51.065934065934066, + "grad_norm": 12.510252952575684, + "learning_rate": 2.4467032967032968e-05, + "loss": 0.2045, + "step": 18588 + }, + { + "epoch": 51.06868131868132, + "grad_norm": 9.983504295349121, + "learning_rate": 2.446565934065934e-05, + "loss": 0.209, + "step": 18589 + }, + { + "epoch": 51.07142857142857, + "grad_norm": 18.6010684967041, + "learning_rate": 2.4464285714285715e-05, + "loss": 0.2948, + "step": 18590 + }, + { + "epoch": 51.074175824175825, + "grad_norm": 23.584949493408203, + "learning_rate": 2.446291208791209e-05, + "loss": 0.5459, + "step": 18591 + }, + { + "epoch": 51.07692307692308, + "grad_norm": 18.46225929260254, + "learning_rate": 2.4461538461538465e-05, + "loss": 0.4297, + "step": 18592 + }, + { + "epoch": 51.07967032967033, + "grad_norm": 11.66281795501709, + "learning_rate": 2.4460164835164838e-05, + "loss": 0.1593, + "step": 18593 + }, + { + "epoch": 51.082417582417584, + "grad_norm": 5.151332378387451, + "learning_rate": 2.4458791208791208e-05, + "loss": 0.1387, + "step": 18594 + }, + { + "epoch": 51.08516483516483, + "grad_norm": 4.42681884765625, + "learning_rate": 2.445741758241758e-05, + "loss": 0.0579, + "step": 18595 + }, + { + "epoch": 51.08791208791209, + "grad_norm": 19.35489845275879, + "learning_rate": 2.445604395604396e-05, + "loss": 0.4761, + "step": 18596 + }, + { + "epoch": 51.09065934065934, + "grad_norm": 11.884329795837402, + "learning_rate": 2.4454670329670332e-05, + "loss": 0.1547, + "step": 18597 + }, + { + "epoch": 51.09340659340659, + "grad_norm": 21.029617309570312, + "learning_rate": 2.4453296703296705e-05, + "loss": 0.5252, + "step": 18598 + }, + { + "epoch": 51.09615384615385, + "grad_norm": 13.034573554992676, + "learning_rate": 2.445192307692308e-05, + "loss": 0.4228, + "step": 18599 + }, + { + "epoch": 51.0989010989011, + "grad_norm": 15.666509628295898, + "learning_rate": 2.4450549450549452e-05, + "loss": 0.1663, + "step": 18600 + }, + { + "epoch": 51.10164835164835, + "grad_norm": 14.366336822509766, + "learning_rate": 2.4449175824175825e-05, + "loss": 0.2441, + "step": 18601 + }, + { + "epoch": 51.104395604395606, + "grad_norm": 5.473278045654297, + "learning_rate": 2.44478021978022e-05, + "loss": 0.1358, + "step": 18602 + }, + { + "epoch": 51.107142857142854, + "grad_norm": 7.372230529785156, + "learning_rate": 2.4446428571428572e-05, + "loss": 0.1657, + "step": 18603 + }, + { + "epoch": 51.10989010989011, + "grad_norm": 3.521578073501587, + "learning_rate": 2.4445054945054946e-05, + "loss": 0.0608, + "step": 18604 + }, + { + "epoch": 51.112637362637365, + "grad_norm": 7.017311096191406, + "learning_rate": 2.444368131868132e-05, + "loss": 0.1439, + "step": 18605 + }, + { + "epoch": 51.11538461538461, + "grad_norm": 7.640329360961914, + "learning_rate": 2.4442307692307692e-05, + "loss": 0.1158, + "step": 18606 + }, + { + "epoch": 51.11813186813187, + "grad_norm": 6.890217304229736, + "learning_rate": 2.444093406593407e-05, + "loss": 0.1409, + "step": 18607 + }, + { + "epoch": 51.120879120879124, + "grad_norm": 8.045551300048828, + "learning_rate": 2.4439560439560443e-05, + "loss": 0.1345, + "step": 18608 + }, + { + "epoch": 51.12362637362637, + "grad_norm": 7.050987720489502, + "learning_rate": 2.4438186813186813e-05, + "loss": 0.2129, + "step": 18609 + }, + { + "epoch": 51.12637362637363, + "grad_norm": 3.447796106338501, + "learning_rate": 2.4436813186813186e-05, + "loss": 0.0576, + "step": 18610 + }, + { + "epoch": 51.129120879120876, + "grad_norm": 3.768815040588379, + "learning_rate": 2.443543956043956e-05, + "loss": 0.0714, + "step": 18611 + }, + { + "epoch": 51.13186813186813, + "grad_norm": 5.165244102478027, + "learning_rate": 2.4434065934065936e-05, + "loss": 0.1271, + "step": 18612 + }, + { + "epoch": 51.13461538461539, + "grad_norm": 5.913595199584961, + "learning_rate": 2.443269230769231e-05, + "loss": 0.1691, + "step": 18613 + }, + { + "epoch": 51.137362637362635, + "grad_norm": 8.647171020507812, + "learning_rate": 2.4431318681318683e-05, + "loss": 0.0955, + "step": 18614 + }, + { + "epoch": 51.14010989010989, + "grad_norm": 11.680983543395996, + "learning_rate": 2.4429945054945057e-05, + "loss": 0.3333, + "step": 18615 + }, + { + "epoch": 51.142857142857146, + "grad_norm": 3.740900993347168, + "learning_rate": 2.442857142857143e-05, + "loss": 0.079, + "step": 18616 + }, + { + "epoch": 51.145604395604394, + "grad_norm": 12.189668655395508, + "learning_rate": 2.4427197802197803e-05, + "loss": 0.3749, + "step": 18617 + }, + { + "epoch": 51.14835164835165, + "grad_norm": 7.766152381896973, + "learning_rate": 2.4425824175824177e-05, + "loss": 0.0958, + "step": 18618 + }, + { + "epoch": 51.1510989010989, + "grad_norm": 13.41185474395752, + "learning_rate": 2.442445054945055e-05, + "loss": 0.3129, + "step": 18619 + }, + { + "epoch": 51.15384615384615, + "grad_norm": 13.035822868347168, + "learning_rate": 2.4423076923076924e-05, + "loss": 0.2888, + "step": 18620 + }, + { + "epoch": 51.15659340659341, + "grad_norm": 14.84530258178711, + "learning_rate": 2.4421703296703297e-05, + "loss": 0.2593, + "step": 18621 + }, + { + "epoch": 51.15934065934066, + "grad_norm": 20.233877182006836, + "learning_rate": 2.4420329670329674e-05, + "loss": 0.49, + "step": 18622 + }, + { + "epoch": 51.16208791208791, + "grad_norm": 19.08037567138672, + "learning_rate": 2.4418956043956047e-05, + "loss": 0.8503, + "step": 18623 + }, + { + "epoch": 51.16483516483517, + "grad_norm": 19.499961853027344, + "learning_rate": 2.4417582417582417e-05, + "loss": 0.6281, + "step": 18624 + }, + { + "epoch": 51.167582417582416, + "grad_norm": 21.85009765625, + "learning_rate": 2.441620879120879e-05, + "loss": 0.9455, + "step": 18625 + }, + { + "epoch": 51.17032967032967, + "grad_norm": 10.1666898727417, + "learning_rate": 2.4414835164835164e-05, + "loss": 0.1864, + "step": 18626 + }, + { + "epoch": 51.17307692307692, + "grad_norm": 15.680821418762207, + "learning_rate": 2.441346153846154e-05, + "loss": 0.5137, + "step": 18627 + }, + { + "epoch": 51.175824175824175, + "grad_norm": 8.648238182067871, + "learning_rate": 2.4412087912087914e-05, + "loss": 0.1754, + "step": 18628 + }, + { + "epoch": 51.17857142857143, + "grad_norm": 2.576681613922119, + "learning_rate": 2.4410714285714288e-05, + "loss": 0.0348, + "step": 18629 + }, + { + "epoch": 51.18131868131868, + "grad_norm": 16.454240798950195, + "learning_rate": 2.440934065934066e-05, + "loss": 0.4107, + "step": 18630 + }, + { + "epoch": 51.184065934065934, + "grad_norm": 9.928869247436523, + "learning_rate": 2.4407967032967034e-05, + "loss": 0.1733, + "step": 18631 + }, + { + "epoch": 51.18681318681319, + "grad_norm": 5.857160568237305, + "learning_rate": 2.4406593406593408e-05, + "loss": 0.1707, + "step": 18632 + }, + { + "epoch": 51.18956043956044, + "grad_norm": 12.027329444885254, + "learning_rate": 2.440521978021978e-05, + "loss": 0.1465, + "step": 18633 + }, + { + "epoch": 51.19230769230769, + "grad_norm": 13.605646133422852, + "learning_rate": 2.4403846153846155e-05, + "loss": 0.3029, + "step": 18634 + }, + { + "epoch": 51.19505494505494, + "grad_norm": 14.410624504089355, + "learning_rate": 2.4402472527472528e-05, + "loss": 0.2892, + "step": 18635 + }, + { + "epoch": 51.1978021978022, + "grad_norm": 11.593282699584961, + "learning_rate": 2.44010989010989e-05, + "loss": 0.3475, + "step": 18636 + }, + { + "epoch": 51.20054945054945, + "grad_norm": 12.03031063079834, + "learning_rate": 2.4399725274725278e-05, + "loss": 0.2727, + "step": 18637 + }, + { + "epoch": 51.2032967032967, + "grad_norm": 13.929895401000977, + "learning_rate": 2.439835164835165e-05, + "loss": 0.2557, + "step": 18638 + }, + { + "epoch": 51.206043956043956, + "grad_norm": 3.754849672317505, + "learning_rate": 2.439697802197802e-05, + "loss": 0.036, + "step": 18639 + }, + { + "epoch": 51.20879120879121, + "grad_norm": 11.319534301757812, + "learning_rate": 2.4395604395604395e-05, + "loss": 0.1466, + "step": 18640 + }, + { + "epoch": 51.21153846153846, + "grad_norm": 6.700606346130371, + "learning_rate": 2.439423076923077e-05, + "loss": 0.1003, + "step": 18641 + }, + { + "epoch": 51.214285714285715, + "grad_norm": 9.503656387329102, + "learning_rate": 2.4392857142857145e-05, + "loss": 0.1869, + "step": 18642 + }, + { + "epoch": 51.217032967032964, + "grad_norm": 17.14701271057129, + "learning_rate": 2.439148351648352e-05, + "loss": 0.3352, + "step": 18643 + }, + { + "epoch": 51.21978021978022, + "grad_norm": 7.797513008117676, + "learning_rate": 2.4390109890109892e-05, + "loss": 0.0845, + "step": 18644 + }, + { + "epoch": 51.222527472527474, + "grad_norm": 11.401596069335938, + "learning_rate": 2.4388736263736265e-05, + "loss": 0.2006, + "step": 18645 + }, + { + "epoch": 51.22527472527472, + "grad_norm": 10.304593086242676, + "learning_rate": 2.438736263736264e-05, + "loss": 0.1371, + "step": 18646 + }, + { + "epoch": 51.22802197802198, + "grad_norm": 7.834935665130615, + "learning_rate": 2.4385989010989012e-05, + "loss": 0.1048, + "step": 18647 + }, + { + "epoch": 51.23076923076923, + "grad_norm": 16.77483558654785, + "learning_rate": 2.4384615384615386e-05, + "loss": 0.352, + "step": 18648 + }, + { + "epoch": 51.23351648351648, + "grad_norm": 9.797444343566895, + "learning_rate": 2.438324175824176e-05, + "loss": 0.1759, + "step": 18649 + }, + { + "epoch": 51.23626373626374, + "grad_norm": 13.286118507385254, + "learning_rate": 2.4381868131868133e-05, + "loss": 0.2492, + "step": 18650 + }, + { + "epoch": 51.239010989010985, + "grad_norm": 13.290573120117188, + "learning_rate": 2.4380494505494506e-05, + "loss": 0.4091, + "step": 18651 + }, + { + "epoch": 51.24175824175824, + "grad_norm": 13.66683292388916, + "learning_rate": 2.4379120879120883e-05, + "loss": 0.4472, + "step": 18652 + }, + { + "epoch": 51.244505494505496, + "grad_norm": 0.5851102471351624, + "learning_rate": 2.4377747252747256e-05, + "loss": 0.0064, + "step": 18653 + }, + { + "epoch": 51.247252747252745, + "grad_norm": 18.257675170898438, + "learning_rate": 2.4376373626373626e-05, + "loss": 0.3281, + "step": 18654 + }, + { + "epoch": 51.25, + "grad_norm": 6.147756099700928, + "learning_rate": 2.4375e-05, + "loss": 0.1042, + "step": 18655 + }, + { + "epoch": 51.252747252747255, + "grad_norm": 10.858558654785156, + "learning_rate": 2.4373626373626373e-05, + "loss": 0.1779, + "step": 18656 + }, + { + "epoch": 51.255494505494504, + "grad_norm": 4.3195695877075195, + "learning_rate": 2.437225274725275e-05, + "loss": 0.0844, + "step": 18657 + }, + { + "epoch": 51.25824175824176, + "grad_norm": 2.6655426025390625, + "learning_rate": 2.4370879120879123e-05, + "loss": 0.0426, + "step": 18658 + }, + { + "epoch": 51.260989010989015, + "grad_norm": 10.317072868347168, + "learning_rate": 2.4369505494505497e-05, + "loss": 0.1446, + "step": 18659 + }, + { + "epoch": 51.26373626373626, + "grad_norm": 12.743474960327148, + "learning_rate": 2.436813186813187e-05, + "loss": 0.301, + "step": 18660 + }, + { + "epoch": 51.26648351648352, + "grad_norm": 21.454792022705078, + "learning_rate": 2.4366758241758243e-05, + "loss": 0.584, + "step": 18661 + }, + { + "epoch": 51.26923076923077, + "grad_norm": 15.729326248168945, + "learning_rate": 2.4365384615384617e-05, + "loss": 0.4593, + "step": 18662 + }, + { + "epoch": 51.27197802197802, + "grad_norm": 5.556654930114746, + "learning_rate": 2.436401098901099e-05, + "loss": 0.0799, + "step": 18663 + }, + { + "epoch": 51.27472527472528, + "grad_norm": 20.027484893798828, + "learning_rate": 2.4362637362637364e-05, + "loss": 0.725, + "step": 18664 + }, + { + "epoch": 51.277472527472526, + "grad_norm": 4.24289608001709, + "learning_rate": 2.4361263736263737e-05, + "loss": 0.0529, + "step": 18665 + }, + { + "epoch": 51.28021978021978, + "grad_norm": 9.696690559387207, + "learning_rate": 2.435989010989011e-05, + "loss": 0.203, + "step": 18666 + }, + { + "epoch": 51.282967032967036, + "grad_norm": 31.891036987304688, + "learning_rate": 2.4358516483516487e-05, + "loss": 0.9824, + "step": 18667 + }, + { + "epoch": 51.285714285714285, + "grad_norm": 5.377978324890137, + "learning_rate": 2.4357142857142857e-05, + "loss": 0.1102, + "step": 18668 + }, + { + "epoch": 51.28846153846154, + "grad_norm": 13.709168434143066, + "learning_rate": 2.435576923076923e-05, + "loss": 0.2696, + "step": 18669 + }, + { + "epoch": 51.29120879120879, + "grad_norm": 6.289733409881592, + "learning_rate": 2.4354395604395604e-05, + "loss": 0.1115, + "step": 18670 + }, + { + "epoch": 51.293956043956044, + "grad_norm": 7.09307861328125, + "learning_rate": 2.4353021978021977e-05, + "loss": 0.0946, + "step": 18671 + }, + { + "epoch": 51.2967032967033, + "grad_norm": 13.957763671875, + "learning_rate": 2.4351648351648354e-05, + "loss": 0.2554, + "step": 18672 + }, + { + "epoch": 51.29945054945055, + "grad_norm": 13.641867637634277, + "learning_rate": 2.4350274725274728e-05, + "loss": 0.3903, + "step": 18673 + }, + { + "epoch": 51.3021978021978, + "grad_norm": 10.71665096282959, + "learning_rate": 2.43489010989011e-05, + "loss": 0.1457, + "step": 18674 + }, + { + "epoch": 51.30494505494506, + "grad_norm": 7.546292781829834, + "learning_rate": 2.4347527472527474e-05, + "loss": 0.1941, + "step": 18675 + }, + { + "epoch": 51.30769230769231, + "grad_norm": 12.515214920043945, + "learning_rate": 2.4346153846153848e-05, + "loss": 0.436, + "step": 18676 + }, + { + "epoch": 51.31043956043956, + "grad_norm": 18.358474731445312, + "learning_rate": 2.434478021978022e-05, + "loss": 0.468, + "step": 18677 + }, + { + "epoch": 51.31318681318681, + "grad_norm": 13.938000679016113, + "learning_rate": 2.4343406593406595e-05, + "loss": 0.1745, + "step": 18678 + }, + { + "epoch": 51.315934065934066, + "grad_norm": 5.227616310119629, + "learning_rate": 2.4342032967032968e-05, + "loss": 0.0996, + "step": 18679 + }, + { + "epoch": 51.31868131868132, + "grad_norm": 22.021448135375977, + "learning_rate": 2.434065934065934e-05, + "loss": 0.3641, + "step": 18680 + }, + { + "epoch": 51.32142857142857, + "grad_norm": 14.681745529174805, + "learning_rate": 2.4339285714285715e-05, + "loss": 0.3818, + "step": 18681 + }, + { + "epoch": 51.324175824175825, + "grad_norm": 23.755138397216797, + "learning_rate": 2.4337912087912092e-05, + "loss": 0.5432, + "step": 18682 + }, + { + "epoch": 51.32692307692308, + "grad_norm": 24.323654174804688, + "learning_rate": 2.4336538461538462e-05, + "loss": 0.3586, + "step": 18683 + }, + { + "epoch": 51.32967032967033, + "grad_norm": 14.603889465332031, + "learning_rate": 2.4335164835164835e-05, + "loss": 0.3162, + "step": 18684 + }, + { + "epoch": 51.332417582417584, + "grad_norm": 6.550812721252441, + "learning_rate": 2.433379120879121e-05, + "loss": 0.1061, + "step": 18685 + }, + { + "epoch": 51.33516483516483, + "grad_norm": 18.70282745361328, + "learning_rate": 2.4332417582417582e-05, + "loss": 0.6072, + "step": 18686 + }, + { + "epoch": 51.33791208791209, + "grad_norm": 15.010038375854492, + "learning_rate": 2.433104395604396e-05, + "loss": 0.2522, + "step": 18687 + }, + { + "epoch": 51.34065934065934, + "grad_norm": 11.820711135864258, + "learning_rate": 2.4329670329670332e-05, + "loss": 0.1174, + "step": 18688 + }, + { + "epoch": 51.34340659340659, + "grad_norm": 7.851017475128174, + "learning_rate": 2.4328296703296706e-05, + "loss": 0.1901, + "step": 18689 + }, + { + "epoch": 51.34615384615385, + "grad_norm": 5.779754638671875, + "learning_rate": 2.432692307692308e-05, + "loss": 0.0871, + "step": 18690 + }, + { + "epoch": 51.3489010989011, + "grad_norm": 6.662512302398682, + "learning_rate": 2.4325549450549452e-05, + "loss": 0.1026, + "step": 18691 + }, + { + "epoch": 51.35164835164835, + "grad_norm": 14.879264831542969, + "learning_rate": 2.4324175824175826e-05, + "loss": 0.211, + "step": 18692 + }, + { + "epoch": 51.354395604395606, + "grad_norm": 22.879610061645508, + "learning_rate": 2.43228021978022e-05, + "loss": 0.8697, + "step": 18693 + }, + { + "epoch": 51.357142857142854, + "grad_norm": 15.730548858642578, + "learning_rate": 2.4321428571428573e-05, + "loss": 0.3536, + "step": 18694 + }, + { + "epoch": 51.35989010989011, + "grad_norm": 3.7361042499542236, + "learning_rate": 2.4320054945054946e-05, + "loss": 0.0716, + "step": 18695 + }, + { + "epoch": 51.362637362637365, + "grad_norm": 12.013859748840332, + "learning_rate": 2.431868131868132e-05, + "loss": 0.3903, + "step": 18696 + }, + { + "epoch": 51.36538461538461, + "grad_norm": 19.915973663330078, + "learning_rate": 2.4317307692307696e-05, + "loss": 0.4117, + "step": 18697 + }, + { + "epoch": 51.36813186813187, + "grad_norm": 3.1887664794921875, + "learning_rate": 2.4315934065934066e-05, + "loss": 0.0621, + "step": 18698 + }, + { + "epoch": 51.370879120879124, + "grad_norm": 14.965392112731934, + "learning_rate": 2.431456043956044e-05, + "loss": 0.2957, + "step": 18699 + }, + { + "epoch": 51.37362637362637, + "grad_norm": 7.233717918395996, + "learning_rate": 2.4313186813186813e-05, + "loss": 0.1121, + "step": 18700 + }, + { + "epoch": 51.37637362637363, + "grad_norm": 8.499533653259277, + "learning_rate": 2.4311813186813186e-05, + "loss": 0.1021, + "step": 18701 + }, + { + "epoch": 51.379120879120876, + "grad_norm": 26.085041046142578, + "learning_rate": 2.4310439560439563e-05, + "loss": 0.842, + "step": 18702 + }, + { + "epoch": 51.38186813186813, + "grad_norm": 9.891016006469727, + "learning_rate": 2.4309065934065937e-05, + "loss": 0.1889, + "step": 18703 + }, + { + "epoch": 51.38461538461539, + "grad_norm": 10.75387954711914, + "learning_rate": 2.430769230769231e-05, + "loss": 0.1462, + "step": 18704 + }, + { + "epoch": 51.387362637362635, + "grad_norm": 9.093304634094238, + "learning_rate": 2.4306318681318683e-05, + "loss": 0.2185, + "step": 18705 + }, + { + "epoch": 51.39010989010989, + "grad_norm": 14.357354164123535, + "learning_rate": 2.4304945054945057e-05, + "loss": 0.36, + "step": 18706 + }, + { + "epoch": 51.392857142857146, + "grad_norm": 4.811977386474609, + "learning_rate": 2.430357142857143e-05, + "loss": 0.0865, + "step": 18707 + }, + { + "epoch": 51.395604395604394, + "grad_norm": 20.813859939575195, + "learning_rate": 2.4302197802197804e-05, + "loss": 0.5671, + "step": 18708 + }, + { + "epoch": 51.39835164835165, + "grad_norm": 10.359542846679688, + "learning_rate": 2.4300824175824177e-05, + "loss": 0.1912, + "step": 18709 + }, + { + "epoch": 51.4010989010989, + "grad_norm": 10.200196266174316, + "learning_rate": 2.429945054945055e-05, + "loss": 0.1499, + "step": 18710 + }, + { + "epoch": 51.40384615384615, + "grad_norm": 7.9047627449035645, + "learning_rate": 2.4298076923076924e-05, + "loss": 0.1747, + "step": 18711 + }, + { + "epoch": 51.40659340659341, + "grad_norm": 7.917699337005615, + "learning_rate": 2.4296703296703297e-05, + "loss": 0.2847, + "step": 18712 + }, + { + "epoch": 51.40934065934066, + "grad_norm": 4.694465160369873, + "learning_rate": 2.429532967032967e-05, + "loss": 0.0929, + "step": 18713 + }, + { + "epoch": 51.41208791208791, + "grad_norm": 8.099543571472168, + "learning_rate": 2.4293956043956044e-05, + "loss": 0.2094, + "step": 18714 + }, + { + "epoch": 51.41483516483517, + "grad_norm": 5.990137577056885, + "learning_rate": 2.4292582417582418e-05, + "loss": 0.1136, + "step": 18715 + }, + { + "epoch": 51.417582417582416, + "grad_norm": 13.884023666381836, + "learning_rate": 2.429120879120879e-05, + "loss": 0.3806, + "step": 18716 + }, + { + "epoch": 51.42032967032967, + "grad_norm": 8.234347343444824, + "learning_rate": 2.4289835164835164e-05, + "loss": 0.2455, + "step": 18717 + }, + { + "epoch": 51.42307692307692, + "grad_norm": 21.517541885375977, + "learning_rate": 2.428846153846154e-05, + "loss": 0.6748, + "step": 18718 + }, + { + "epoch": 51.425824175824175, + "grad_norm": 4.271588325500488, + "learning_rate": 2.4287087912087915e-05, + "loss": 0.0607, + "step": 18719 + }, + { + "epoch": 51.42857142857143, + "grad_norm": 13.127242088317871, + "learning_rate": 2.4285714285714288e-05, + "loss": 0.2703, + "step": 18720 + }, + { + "epoch": 51.43131868131868, + "grad_norm": 21.52369499206543, + "learning_rate": 2.428434065934066e-05, + "loss": 0.7195, + "step": 18721 + }, + { + "epoch": 51.434065934065934, + "grad_norm": 6.462644577026367, + "learning_rate": 2.428296703296703e-05, + "loss": 0.1317, + "step": 18722 + }, + { + "epoch": 51.43681318681319, + "grad_norm": 13.050768852233887, + "learning_rate": 2.4281593406593408e-05, + "loss": 0.1909, + "step": 18723 + }, + { + "epoch": 51.43956043956044, + "grad_norm": 7.814455032348633, + "learning_rate": 2.428021978021978e-05, + "loss": 0.1404, + "step": 18724 + }, + { + "epoch": 51.44230769230769, + "grad_norm": 19.861793518066406, + "learning_rate": 2.4278846153846155e-05, + "loss": 0.5227, + "step": 18725 + }, + { + "epoch": 51.44505494505494, + "grad_norm": 7.973069667816162, + "learning_rate": 2.427747252747253e-05, + "loss": 0.2383, + "step": 18726 + }, + { + "epoch": 51.4478021978022, + "grad_norm": 11.007450103759766, + "learning_rate": 2.4276098901098902e-05, + "loss": 0.2476, + "step": 18727 + }, + { + "epoch": 51.45054945054945, + "grad_norm": 15.996194839477539, + "learning_rate": 2.4274725274725275e-05, + "loss": 0.5494, + "step": 18728 + }, + { + "epoch": 51.4532967032967, + "grad_norm": 15.043187141418457, + "learning_rate": 2.427335164835165e-05, + "loss": 0.4672, + "step": 18729 + }, + { + "epoch": 51.456043956043956, + "grad_norm": 24.4291934967041, + "learning_rate": 2.4271978021978022e-05, + "loss": 0.7594, + "step": 18730 + }, + { + "epoch": 51.45879120879121, + "grad_norm": 10.778189659118652, + "learning_rate": 2.4270604395604395e-05, + "loss": 0.0909, + "step": 18731 + }, + { + "epoch": 51.46153846153846, + "grad_norm": 5.910645484924316, + "learning_rate": 2.426923076923077e-05, + "loss": 0.1277, + "step": 18732 + }, + { + "epoch": 51.464285714285715, + "grad_norm": 12.308778762817383, + "learning_rate": 2.4267857142857146e-05, + "loss": 0.2447, + "step": 18733 + }, + { + "epoch": 51.467032967032964, + "grad_norm": 16.181188583374023, + "learning_rate": 2.426648351648352e-05, + "loss": 0.3479, + "step": 18734 + }, + { + "epoch": 51.46978021978022, + "grad_norm": 7.223036289215088, + "learning_rate": 2.4265109890109892e-05, + "loss": 0.1007, + "step": 18735 + }, + { + "epoch": 51.472527472527474, + "grad_norm": 14.703862190246582, + "learning_rate": 2.4263736263736266e-05, + "loss": 0.3914, + "step": 18736 + }, + { + "epoch": 51.47527472527472, + "grad_norm": 13.305204391479492, + "learning_rate": 2.4262362637362636e-05, + "loss": 0.3225, + "step": 18737 + }, + { + "epoch": 51.47802197802198, + "grad_norm": 8.379003524780273, + "learning_rate": 2.4260989010989013e-05, + "loss": 0.1899, + "step": 18738 + }, + { + "epoch": 51.48076923076923, + "grad_norm": 8.214738845825195, + "learning_rate": 2.4259615384615386e-05, + "loss": 0.1476, + "step": 18739 + }, + { + "epoch": 51.48351648351648, + "grad_norm": 1.1862679719924927, + "learning_rate": 2.425824175824176e-05, + "loss": 0.0208, + "step": 18740 + }, + { + "epoch": 51.48626373626374, + "grad_norm": 10.012259483337402, + "learning_rate": 2.4256868131868133e-05, + "loss": 0.3343, + "step": 18741 + }, + { + "epoch": 51.489010989010985, + "grad_norm": 16.765066146850586, + "learning_rate": 2.4255494505494506e-05, + "loss": 0.2953, + "step": 18742 + }, + { + "epoch": 51.49175824175824, + "grad_norm": 5.152205467224121, + "learning_rate": 2.425412087912088e-05, + "loss": 0.0808, + "step": 18743 + }, + { + "epoch": 51.494505494505496, + "grad_norm": 8.13357162475586, + "learning_rate": 2.4252747252747253e-05, + "loss": 0.137, + "step": 18744 + }, + { + "epoch": 51.497252747252745, + "grad_norm": 13.64263916015625, + "learning_rate": 2.4251373626373627e-05, + "loss": 0.2996, + "step": 18745 + }, + { + "epoch": 51.5, + "grad_norm": 15.535055160522461, + "learning_rate": 2.425e-05, + "loss": 0.5679, + "step": 18746 + }, + { + "epoch": 51.502747252747255, + "grad_norm": 8.148849487304688, + "learning_rate": 2.4248626373626373e-05, + "loss": 0.1785, + "step": 18747 + }, + { + "epoch": 51.505494505494504, + "grad_norm": 15.41915512084961, + "learning_rate": 2.424725274725275e-05, + "loss": 0.2205, + "step": 18748 + }, + { + "epoch": 51.50824175824176, + "grad_norm": 15.557751655578613, + "learning_rate": 2.4245879120879124e-05, + "loss": 0.2902, + "step": 18749 + }, + { + "epoch": 51.51098901098901, + "grad_norm": 9.5827054977417, + "learning_rate": 2.4244505494505497e-05, + "loss": 0.1125, + "step": 18750 + }, + { + "epoch": 51.51373626373626, + "grad_norm": 11.912271499633789, + "learning_rate": 2.424313186813187e-05, + "loss": 0.2905, + "step": 18751 + }, + { + "epoch": 51.51648351648352, + "grad_norm": 5.061017036437988, + "learning_rate": 2.424175824175824e-05, + "loss": 0.0936, + "step": 18752 + }, + { + "epoch": 51.51923076923077, + "grad_norm": 9.221435546875, + "learning_rate": 2.4240384615384617e-05, + "loss": 0.1368, + "step": 18753 + }, + { + "epoch": 51.52197802197802, + "grad_norm": 11.163492202758789, + "learning_rate": 2.423901098901099e-05, + "loss": 0.3658, + "step": 18754 + }, + { + "epoch": 51.52472527472528, + "grad_norm": 1.2779297828674316, + "learning_rate": 2.4237637362637364e-05, + "loss": 0.0206, + "step": 18755 + }, + { + "epoch": 51.527472527472526, + "grad_norm": 14.295872688293457, + "learning_rate": 2.4236263736263737e-05, + "loss": 0.2761, + "step": 18756 + }, + { + "epoch": 51.53021978021978, + "grad_norm": 5.8826069831848145, + "learning_rate": 2.423489010989011e-05, + "loss": 0.0802, + "step": 18757 + }, + { + "epoch": 51.532967032967036, + "grad_norm": 8.681744575500488, + "learning_rate": 2.4233516483516484e-05, + "loss": 0.2655, + "step": 18758 + }, + { + "epoch": 51.535714285714285, + "grad_norm": 0.975516676902771, + "learning_rate": 2.4232142857142858e-05, + "loss": 0.0141, + "step": 18759 + }, + { + "epoch": 51.53846153846154, + "grad_norm": 13.573801040649414, + "learning_rate": 2.423076923076923e-05, + "loss": 0.3249, + "step": 18760 + }, + { + "epoch": 51.54120879120879, + "grad_norm": 8.458121299743652, + "learning_rate": 2.4229395604395604e-05, + "loss": 0.1822, + "step": 18761 + }, + { + "epoch": 51.543956043956044, + "grad_norm": 3.6291074752807617, + "learning_rate": 2.4228021978021978e-05, + "loss": 0.0359, + "step": 18762 + }, + { + "epoch": 51.5467032967033, + "grad_norm": 8.934993743896484, + "learning_rate": 2.4226648351648355e-05, + "loss": 0.0925, + "step": 18763 + }, + { + "epoch": 51.54945054945055, + "grad_norm": 22.067811965942383, + "learning_rate": 2.4225274725274728e-05, + "loss": 0.4591, + "step": 18764 + }, + { + "epoch": 51.5521978021978, + "grad_norm": 19.633033752441406, + "learning_rate": 2.42239010989011e-05, + "loss": 0.8249, + "step": 18765 + }, + { + "epoch": 51.55494505494506, + "grad_norm": 6.964170455932617, + "learning_rate": 2.422252747252747e-05, + "loss": 0.2222, + "step": 18766 + }, + { + "epoch": 51.55769230769231, + "grad_norm": 18.153255462646484, + "learning_rate": 2.4221153846153845e-05, + "loss": 0.4816, + "step": 18767 + }, + { + "epoch": 51.56043956043956, + "grad_norm": 7.840514659881592, + "learning_rate": 2.421978021978022e-05, + "loss": 0.1341, + "step": 18768 + }, + { + "epoch": 51.56318681318681, + "grad_norm": 9.249896049499512, + "learning_rate": 2.4218406593406595e-05, + "loss": 0.1858, + "step": 18769 + }, + { + "epoch": 51.565934065934066, + "grad_norm": 15.939668655395508, + "learning_rate": 2.421703296703297e-05, + "loss": 0.3459, + "step": 18770 + }, + { + "epoch": 51.56868131868132, + "grad_norm": 19.35074806213379, + "learning_rate": 2.4215659340659342e-05, + "loss": 0.5085, + "step": 18771 + }, + { + "epoch": 51.57142857142857, + "grad_norm": 6.962579250335693, + "learning_rate": 2.4214285714285715e-05, + "loss": 0.0884, + "step": 18772 + }, + { + "epoch": 51.574175824175825, + "grad_norm": 22.580293655395508, + "learning_rate": 2.421291208791209e-05, + "loss": 0.7064, + "step": 18773 + }, + { + "epoch": 51.57692307692308, + "grad_norm": 20.154232025146484, + "learning_rate": 2.4211538461538462e-05, + "loss": 0.5113, + "step": 18774 + }, + { + "epoch": 51.57967032967033, + "grad_norm": 6.743130207061768, + "learning_rate": 2.4210164835164835e-05, + "loss": 0.1492, + "step": 18775 + }, + { + "epoch": 51.582417582417584, + "grad_norm": 7.820840358734131, + "learning_rate": 2.420879120879121e-05, + "loss": 0.2078, + "step": 18776 + }, + { + "epoch": 51.58516483516483, + "grad_norm": 16.41765785217285, + "learning_rate": 2.4207417582417582e-05, + "loss": 0.3644, + "step": 18777 + }, + { + "epoch": 51.58791208791209, + "grad_norm": 13.27090072631836, + "learning_rate": 2.420604395604396e-05, + "loss": 0.2101, + "step": 18778 + }, + { + "epoch": 51.59065934065934, + "grad_norm": 12.19913387298584, + "learning_rate": 2.4204670329670332e-05, + "loss": 0.362, + "step": 18779 + }, + { + "epoch": 51.59340659340659, + "grad_norm": 18.896089553833008, + "learning_rate": 2.4203296703296706e-05, + "loss": 0.3823, + "step": 18780 + }, + { + "epoch": 51.59615384615385, + "grad_norm": 23.370763778686523, + "learning_rate": 2.4201923076923076e-05, + "loss": 0.755, + "step": 18781 + }, + { + "epoch": 51.5989010989011, + "grad_norm": 7.377760410308838, + "learning_rate": 2.420054945054945e-05, + "loss": 0.1237, + "step": 18782 + }, + { + "epoch": 51.60164835164835, + "grad_norm": 12.77538013458252, + "learning_rate": 2.4199175824175826e-05, + "loss": 0.3269, + "step": 18783 + }, + { + "epoch": 51.604395604395606, + "grad_norm": 4.094379425048828, + "learning_rate": 2.41978021978022e-05, + "loss": 0.0717, + "step": 18784 + }, + { + "epoch": 51.607142857142854, + "grad_norm": 8.60322380065918, + "learning_rate": 2.4196428571428573e-05, + "loss": 0.315, + "step": 18785 + }, + { + "epoch": 51.60989010989011, + "grad_norm": 12.195295333862305, + "learning_rate": 2.4195054945054946e-05, + "loss": 0.2199, + "step": 18786 + }, + { + "epoch": 51.612637362637365, + "grad_norm": 6.142305850982666, + "learning_rate": 2.419368131868132e-05, + "loss": 0.1149, + "step": 18787 + }, + { + "epoch": 51.61538461538461, + "grad_norm": 10.644683837890625, + "learning_rate": 2.4192307692307693e-05, + "loss": 0.1576, + "step": 18788 + }, + { + "epoch": 51.61813186813187, + "grad_norm": 11.642865180969238, + "learning_rate": 2.4190934065934067e-05, + "loss": 0.4168, + "step": 18789 + }, + { + "epoch": 51.620879120879124, + "grad_norm": 14.944981575012207, + "learning_rate": 2.418956043956044e-05, + "loss": 0.3111, + "step": 18790 + }, + { + "epoch": 51.62362637362637, + "grad_norm": 7.6075849533081055, + "learning_rate": 2.4188186813186813e-05, + "loss": 0.1223, + "step": 18791 + }, + { + "epoch": 51.62637362637363, + "grad_norm": 21.20457649230957, + "learning_rate": 2.4186813186813187e-05, + "loss": 0.6196, + "step": 18792 + }, + { + "epoch": 51.629120879120876, + "grad_norm": 8.30579948425293, + "learning_rate": 2.4185439560439564e-05, + "loss": 0.2629, + "step": 18793 + }, + { + "epoch": 51.63186813186813, + "grad_norm": 3.045276641845703, + "learning_rate": 2.4184065934065937e-05, + "loss": 0.1796, + "step": 18794 + }, + { + "epoch": 51.63461538461539, + "grad_norm": 21.86692237854004, + "learning_rate": 2.418269230769231e-05, + "loss": 0.4796, + "step": 18795 + }, + { + "epoch": 51.637362637362635, + "grad_norm": 15.596253395080566, + "learning_rate": 2.418131868131868e-05, + "loss": 0.331, + "step": 18796 + }, + { + "epoch": 51.64010989010989, + "grad_norm": 5.81257963180542, + "learning_rate": 2.4179945054945054e-05, + "loss": 0.1024, + "step": 18797 + }, + { + "epoch": 51.642857142857146, + "grad_norm": 5.313624382019043, + "learning_rate": 2.417857142857143e-05, + "loss": 0.1199, + "step": 18798 + }, + { + "epoch": 51.645604395604394, + "grad_norm": 9.894281387329102, + "learning_rate": 2.4177197802197804e-05, + "loss": 0.1825, + "step": 18799 + }, + { + "epoch": 51.64835164835165, + "grad_norm": 6.407942295074463, + "learning_rate": 2.4175824175824177e-05, + "loss": 0.1581, + "step": 18800 + }, + { + "epoch": 51.6510989010989, + "grad_norm": 2.1212055683135986, + "learning_rate": 2.417445054945055e-05, + "loss": 0.0343, + "step": 18801 + }, + { + "epoch": 51.65384615384615, + "grad_norm": 1.9754111766815186, + "learning_rate": 2.4173076923076924e-05, + "loss": 0.0622, + "step": 18802 + }, + { + "epoch": 51.65659340659341, + "grad_norm": 3.5267133712768555, + "learning_rate": 2.4171703296703298e-05, + "loss": 0.0509, + "step": 18803 + }, + { + "epoch": 51.65934065934066, + "grad_norm": 8.316095352172852, + "learning_rate": 2.417032967032967e-05, + "loss": 0.0954, + "step": 18804 + }, + { + "epoch": 51.66208791208791, + "grad_norm": 13.896988868713379, + "learning_rate": 2.4168956043956044e-05, + "loss": 0.21, + "step": 18805 + }, + { + "epoch": 51.66483516483517, + "grad_norm": 23.46578598022461, + "learning_rate": 2.4167582417582418e-05, + "loss": 0.5568, + "step": 18806 + }, + { + "epoch": 51.667582417582416, + "grad_norm": 1.727649450302124, + "learning_rate": 2.416620879120879e-05, + "loss": 0.0299, + "step": 18807 + }, + { + "epoch": 51.67032967032967, + "grad_norm": 12.154504776000977, + "learning_rate": 2.4164835164835168e-05, + "loss": 0.244, + "step": 18808 + }, + { + "epoch": 51.67307692307692, + "grad_norm": 3.889181613922119, + "learning_rate": 2.416346153846154e-05, + "loss": 0.0602, + "step": 18809 + }, + { + "epoch": 51.675824175824175, + "grad_norm": 13.327445030212402, + "learning_rate": 2.4162087912087915e-05, + "loss": 0.394, + "step": 18810 + }, + { + "epoch": 51.67857142857143, + "grad_norm": 10.928811073303223, + "learning_rate": 2.4160714285714285e-05, + "loss": 0.1562, + "step": 18811 + }, + { + "epoch": 51.68131868131868, + "grad_norm": 14.257166862487793, + "learning_rate": 2.415934065934066e-05, + "loss": 0.5278, + "step": 18812 + }, + { + "epoch": 51.684065934065934, + "grad_norm": 9.319940567016602, + "learning_rate": 2.4157967032967035e-05, + "loss": 0.1496, + "step": 18813 + }, + { + "epoch": 51.68681318681319, + "grad_norm": 4.770284175872803, + "learning_rate": 2.415659340659341e-05, + "loss": 0.0754, + "step": 18814 + }, + { + "epoch": 51.68956043956044, + "grad_norm": 17.718172073364258, + "learning_rate": 2.4155219780219782e-05, + "loss": 0.285, + "step": 18815 + }, + { + "epoch": 51.69230769230769, + "grad_norm": 19.712480545043945, + "learning_rate": 2.4153846153846155e-05, + "loss": 0.3692, + "step": 18816 + }, + { + "epoch": 51.69505494505494, + "grad_norm": 3.1046841144561768, + "learning_rate": 2.415247252747253e-05, + "loss": 0.0521, + "step": 18817 + }, + { + "epoch": 51.6978021978022, + "grad_norm": 8.12160873413086, + "learning_rate": 2.4151098901098902e-05, + "loss": 0.1449, + "step": 18818 + }, + { + "epoch": 51.70054945054945, + "grad_norm": 9.17368221282959, + "learning_rate": 2.4149725274725276e-05, + "loss": 0.124, + "step": 18819 + }, + { + "epoch": 51.7032967032967, + "grad_norm": 6.280440330505371, + "learning_rate": 2.414835164835165e-05, + "loss": 0.1221, + "step": 18820 + }, + { + "epoch": 51.706043956043956, + "grad_norm": 17.397241592407227, + "learning_rate": 2.4146978021978022e-05, + "loss": 0.4455, + "step": 18821 + }, + { + "epoch": 51.70879120879121, + "grad_norm": 18.469043731689453, + "learning_rate": 2.4145604395604396e-05, + "loss": 0.3078, + "step": 18822 + }, + { + "epoch": 51.71153846153846, + "grad_norm": 5.58757209777832, + "learning_rate": 2.4144230769230773e-05, + "loss": 0.1142, + "step": 18823 + }, + { + "epoch": 51.714285714285715, + "grad_norm": 8.573395729064941, + "learning_rate": 2.4142857142857146e-05, + "loss": 0.1483, + "step": 18824 + }, + { + "epoch": 51.717032967032964, + "grad_norm": 16.261003494262695, + "learning_rate": 2.414148351648352e-05, + "loss": 0.7492, + "step": 18825 + }, + { + "epoch": 51.71978021978022, + "grad_norm": 9.945527076721191, + "learning_rate": 2.414010989010989e-05, + "loss": 0.1629, + "step": 18826 + }, + { + "epoch": 51.722527472527474, + "grad_norm": 10.010990142822266, + "learning_rate": 2.4138736263736263e-05, + "loss": 0.1662, + "step": 18827 + }, + { + "epoch": 51.72527472527472, + "grad_norm": 5.668041229248047, + "learning_rate": 2.4137362637362636e-05, + "loss": 0.0552, + "step": 18828 + }, + { + "epoch": 51.72802197802198, + "grad_norm": 6.738184928894043, + "learning_rate": 2.4135989010989013e-05, + "loss": 0.1078, + "step": 18829 + }, + { + "epoch": 51.73076923076923, + "grad_norm": 3.6112234592437744, + "learning_rate": 2.4134615384615386e-05, + "loss": 0.0808, + "step": 18830 + }, + { + "epoch": 51.73351648351648, + "grad_norm": 15.109490394592285, + "learning_rate": 2.413324175824176e-05, + "loss": 0.5004, + "step": 18831 + }, + { + "epoch": 51.73626373626374, + "grad_norm": 17.056055068969727, + "learning_rate": 2.4131868131868133e-05, + "loss": 0.4314, + "step": 18832 + }, + { + "epoch": 51.73901098901099, + "grad_norm": 5.339197635650635, + "learning_rate": 2.4130494505494507e-05, + "loss": 0.077, + "step": 18833 + }, + { + "epoch": 51.74175824175824, + "grad_norm": 16.034894943237305, + "learning_rate": 2.412912087912088e-05, + "loss": 0.2405, + "step": 18834 + }, + { + "epoch": 51.744505494505496, + "grad_norm": 8.588306427001953, + "learning_rate": 2.4127747252747253e-05, + "loss": 0.2759, + "step": 18835 + }, + { + "epoch": 51.747252747252745, + "grad_norm": 1.9779595136642456, + "learning_rate": 2.4126373626373627e-05, + "loss": 0.0148, + "step": 18836 + }, + { + "epoch": 51.75, + "grad_norm": 15.94797420501709, + "learning_rate": 2.4125e-05, + "loss": 0.3817, + "step": 18837 + }, + { + "epoch": 51.752747252747255, + "grad_norm": 10.987448692321777, + "learning_rate": 2.4123626373626374e-05, + "loss": 0.1889, + "step": 18838 + }, + { + "epoch": 51.755494505494504, + "grad_norm": 11.11363697052002, + "learning_rate": 2.412225274725275e-05, + "loss": 0.2212, + "step": 18839 + }, + { + "epoch": 51.75824175824176, + "grad_norm": 9.9993314743042, + "learning_rate": 2.4120879120879124e-05, + "loss": 0.2924, + "step": 18840 + }, + { + "epoch": 51.76098901098901, + "grad_norm": 5.664467811584473, + "learning_rate": 2.4119505494505494e-05, + "loss": 0.0756, + "step": 18841 + }, + { + "epoch": 51.76373626373626, + "grad_norm": 16.917495727539062, + "learning_rate": 2.4118131868131867e-05, + "loss": 0.4505, + "step": 18842 + }, + { + "epoch": 51.76648351648352, + "grad_norm": 3.086284637451172, + "learning_rate": 2.411675824175824e-05, + "loss": 0.0764, + "step": 18843 + }, + { + "epoch": 51.76923076923077, + "grad_norm": 13.635149002075195, + "learning_rate": 2.4115384615384617e-05, + "loss": 0.3417, + "step": 18844 + }, + { + "epoch": 51.77197802197802, + "grad_norm": 5.597698211669922, + "learning_rate": 2.411401098901099e-05, + "loss": 0.1507, + "step": 18845 + }, + { + "epoch": 51.77472527472528, + "grad_norm": 20.214370727539062, + "learning_rate": 2.4112637362637364e-05, + "loss": 0.692, + "step": 18846 + }, + { + "epoch": 51.777472527472526, + "grad_norm": 18.316829681396484, + "learning_rate": 2.4111263736263738e-05, + "loss": 0.2575, + "step": 18847 + }, + { + "epoch": 51.78021978021978, + "grad_norm": 6.239499568939209, + "learning_rate": 2.410989010989011e-05, + "loss": 0.139, + "step": 18848 + }, + { + "epoch": 51.782967032967036, + "grad_norm": 8.162506103515625, + "learning_rate": 2.4108516483516485e-05, + "loss": 0.095, + "step": 18849 + }, + { + "epoch": 51.785714285714285, + "grad_norm": 11.313274383544922, + "learning_rate": 2.4107142857142858e-05, + "loss": 0.2036, + "step": 18850 + }, + { + "epoch": 51.78846153846154, + "grad_norm": 5.394557952880859, + "learning_rate": 2.410576923076923e-05, + "loss": 0.0665, + "step": 18851 + }, + { + "epoch": 51.79120879120879, + "grad_norm": 10.975240707397461, + "learning_rate": 2.4104395604395605e-05, + "loss": 0.2273, + "step": 18852 + }, + { + "epoch": 51.793956043956044, + "grad_norm": 8.911018371582031, + "learning_rate": 2.4103021978021978e-05, + "loss": 0.1036, + "step": 18853 + }, + { + "epoch": 51.7967032967033, + "grad_norm": 8.161556243896484, + "learning_rate": 2.4101648351648355e-05, + "loss": 0.0822, + "step": 18854 + }, + { + "epoch": 51.79945054945055, + "grad_norm": 13.491460800170898, + "learning_rate": 2.410027472527473e-05, + "loss": 0.1652, + "step": 18855 + }, + { + "epoch": 51.8021978021978, + "grad_norm": 10.558555603027344, + "learning_rate": 2.40989010989011e-05, + "loss": 0.1068, + "step": 18856 + }, + { + "epoch": 51.80494505494506, + "grad_norm": 12.253747940063477, + "learning_rate": 2.4097527472527472e-05, + "loss": 0.3525, + "step": 18857 + }, + { + "epoch": 51.80769230769231, + "grad_norm": 10.396452903747559, + "learning_rate": 2.4096153846153845e-05, + "loss": 0.2375, + "step": 18858 + }, + { + "epoch": 51.81043956043956, + "grad_norm": 15.43228816986084, + "learning_rate": 2.4094780219780222e-05, + "loss": 0.3761, + "step": 18859 + }, + { + "epoch": 51.81318681318681, + "grad_norm": 18.993947982788086, + "learning_rate": 2.4093406593406595e-05, + "loss": 0.3199, + "step": 18860 + }, + { + "epoch": 51.815934065934066, + "grad_norm": 10.514355659484863, + "learning_rate": 2.409203296703297e-05, + "loss": 0.1599, + "step": 18861 + }, + { + "epoch": 51.81868131868132, + "grad_norm": 11.068185806274414, + "learning_rate": 2.4090659340659342e-05, + "loss": 0.1889, + "step": 18862 + }, + { + "epoch": 51.82142857142857, + "grad_norm": 14.805119514465332, + "learning_rate": 2.4089285714285716e-05, + "loss": 0.397, + "step": 18863 + }, + { + "epoch": 51.824175824175825, + "grad_norm": 13.607007026672363, + "learning_rate": 2.408791208791209e-05, + "loss": 0.5688, + "step": 18864 + }, + { + "epoch": 51.82692307692308, + "grad_norm": 16.737154006958008, + "learning_rate": 2.4086538461538462e-05, + "loss": 0.534, + "step": 18865 + }, + { + "epoch": 51.82967032967033, + "grad_norm": 17.702613830566406, + "learning_rate": 2.4085164835164836e-05, + "loss": 0.3819, + "step": 18866 + }, + { + "epoch": 51.832417582417584, + "grad_norm": 8.349672317504883, + "learning_rate": 2.408379120879121e-05, + "loss": 0.2548, + "step": 18867 + }, + { + "epoch": 51.83516483516483, + "grad_norm": 5.068817138671875, + "learning_rate": 2.4082417582417583e-05, + "loss": 0.0982, + "step": 18868 + }, + { + "epoch": 51.83791208791209, + "grad_norm": 4.936983108520508, + "learning_rate": 2.408104395604396e-05, + "loss": 0.123, + "step": 18869 + }, + { + "epoch": 51.84065934065934, + "grad_norm": 5.872823715209961, + "learning_rate": 2.4079670329670333e-05, + "loss": 0.0642, + "step": 18870 + }, + { + "epoch": 51.84340659340659, + "grad_norm": 10.37586784362793, + "learning_rate": 2.4078296703296703e-05, + "loss": 0.1381, + "step": 18871 + }, + { + "epoch": 51.84615384615385, + "grad_norm": 9.61363697052002, + "learning_rate": 2.4076923076923076e-05, + "loss": 0.1183, + "step": 18872 + }, + { + "epoch": 51.8489010989011, + "grad_norm": 14.596420288085938, + "learning_rate": 2.407554945054945e-05, + "loss": 0.2039, + "step": 18873 + }, + { + "epoch": 51.85164835164835, + "grad_norm": 11.873197555541992, + "learning_rate": 2.4074175824175826e-05, + "loss": 0.2572, + "step": 18874 + }, + { + "epoch": 51.854395604395606, + "grad_norm": 7.436465740203857, + "learning_rate": 2.40728021978022e-05, + "loss": 0.104, + "step": 18875 + }, + { + "epoch": 51.857142857142854, + "grad_norm": 8.025407791137695, + "learning_rate": 2.4071428571428573e-05, + "loss": 0.2013, + "step": 18876 + }, + { + "epoch": 51.85989010989011, + "grad_norm": 15.764289855957031, + "learning_rate": 2.4070054945054947e-05, + "loss": 0.3559, + "step": 18877 + }, + { + "epoch": 51.862637362637365, + "grad_norm": 11.70983600616455, + "learning_rate": 2.406868131868132e-05, + "loss": 0.2567, + "step": 18878 + }, + { + "epoch": 51.86538461538461, + "grad_norm": 16.943614959716797, + "learning_rate": 2.4067307692307694e-05, + "loss": 0.3279, + "step": 18879 + }, + { + "epoch": 51.86813186813187, + "grad_norm": 14.279805183410645, + "learning_rate": 2.4065934065934067e-05, + "loss": 0.2065, + "step": 18880 + }, + { + "epoch": 51.870879120879124, + "grad_norm": 15.865132331848145, + "learning_rate": 2.406456043956044e-05, + "loss": 0.3574, + "step": 18881 + }, + { + "epoch": 51.87362637362637, + "grad_norm": 5.611038684844971, + "learning_rate": 2.4063186813186814e-05, + "loss": 0.0686, + "step": 18882 + }, + { + "epoch": 51.87637362637363, + "grad_norm": 17.43079376220703, + "learning_rate": 2.4061813186813187e-05, + "loss": 0.5746, + "step": 18883 + }, + { + "epoch": 51.879120879120876, + "grad_norm": 13.3075590133667, + "learning_rate": 2.4060439560439564e-05, + "loss": 0.2745, + "step": 18884 + }, + { + "epoch": 51.88186813186813, + "grad_norm": 9.57225227355957, + "learning_rate": 2.4059065934065937e-05, + "loss": 0.0894, + "step": 18885 + }, + { + "epoch": 51.88461538461539, + "grad_norm": 6.268735408782959, + "learning_rate": 2.4057692307692307e-05, + "loss": 0.091, + "step": 18886 + }, + { + "epoch": 51.887362637362635, + "grad_norm": 6.644599437713623, + "learning_rate": 2.405631868131868e-05, + "loss": 0.1605, + "step": 18887 + }, + { + "epoch": 51.89010989010989, + "grad_norm": 17.042640686035156, + "learning_rate": 2.4054945054945054e-05, + "loss": 0.2411, + "step": 18888 + }, + { + "epoch": 51.892857142857146, + "grad_norm": 12.025150299072266, + "learning_rate": 2.405357142857143e-05, + "loss": 0.2037, + "step": 18889 + }, + { + "epoch": 51.895604395604394, + "grad_norm": 11.698531150817871, + "learning_rate": 2.4052197802197804e-05, + "loss": 0.2022, + "step": 18890 + }, + { + "epoch": 51.89835164835165, + "grad_norm": 19.385223388671875, + "learning_rate": 2.4050824175824178e-05, + "loss": 0.4133, + "step": 18891 + }, + { + "epoch": 51.9010989010989, + "grad_norm": 9.831689834594727, + "learning_rate": 2.404945054945055e-05, + "loss": 0.1867, + "step": 18892 + }, + { + "epoch": 51.90384615384615, + "grad_norm": 5.440754413604736, + "learning_rate": 2.4048076923076925e-05, + "loss": 0.0945, + "step": 18893 + }, + { + "epoch": 51.90659340659341, + "grad_norm": 21.442577362060547, + "learning_rate": 2.4046703296703298e-05, + "loss": 0.5627, + "step": 18894 + }, + { + "epoch": 51.90934065934066, + "grad_norm": 11.022895812988281, + "learning_rate": 2.404532967032967e-05, + "loss": 0.241, + "step": 18895 + }, + { + "epoch": 51.91208791208791, + "grad_norm": 18.54766273498535, + "learning_rate": 2.4043956043956045e-05, + "loss": 0.4776, + "step": 18896 + }, + { + "epoch": 51.91483516483517, + "grad_norm": 16.236547470092773, + "learning_rate": 2.4042582417582418e-05, + "loss": 0.4648, + "step": 18897 + }, + { + "epoch": 51.917582417582416, + "grad_norm": 17.356597900390625, + "learning_rate": 2.404120879120879e-05, + "loss": 0.499, + "step": 18898 + }, + { + "epoch": 51.92032967032967, + "grad_norm": 16.507938385009766, + "learning_rate": 2.403983516483517e-05, + "loss": 0.3431, + "step": 18899 + }, + { + "epoch": 51.92307692307692, + "grad_norm": 19.11421775817871, + "learning_rate": 2.4038461538461542e-05, + "loss": 0.474, + "step": 18900 + }, + { + "epoch": 51.925824175824175, + "grad_norm": 7.229626655578613, + "learning_rate": 2.4037087912087912e-05, + "loss": 0.1975, + "step": 18901 + }, + { + "epoch": 51.92857142857143, + "grad_norm": 8.334163665771484, + "learning_rate": 2.4035714285714285e-05, + "loss": 0.197, + "step": 18902 + }, + { + "epoch": 51.93131868131868, + "grad_norm": 10.174251556396484, + "learning_rate": 2.403434065934066e-05, + "loss": 0.2363, + "step": 18903 + }, + { + "epoch": 51.934065934065934, + "grad_norm": 4.0308380126953125, + "learning_rate": 2.4032967032967035e-05, + "loss": 0.0617, + "step": 18904 + }, + { + "epoch": 51.93681318681319, + "grad_norm": 5.387941360473633, + "learning_rate": 2.403159340659341e-05, + "loss": 0.0573, + "step": 18905 + }, + { + "epoch": 51.93956043956044, + "grad_norm": 6.878311634063721, + "learning_rate": 2.4030219780219782e-05, + "loss": 0.1626, + "step": 18906 + }, + { + "epoch": 51.94230769230769, + "grad_norm": 16.001733779907227, + "learning_rate": 2.4028846153846156e-05, + "loss": 0.5322, + "step": 18907 + }, + { + "epoch": 51.94505494505494, + "grad_norm": 16.663644790649414, + "learning_rate": 2.402747252747253e-05, + "loss": 0.4973, + "step": 18908 + }, + { + "epoch": 51.9478021978022, + "grad_norm": 26.358972549438477, + "learning_rate": 2.4026098901098902e-05, + "loss": 0.6387, + "step": 18909 + }, + { + "epoch": 51.95054945054945, + "grad_norm": 14.776972770690918, + "learning_rate": 2.4024725274725276e-05, + "loss": 0.3735, + "step": 18910 + }, + { + "epoch": 51.9532967032967, + "grad_norm": 5.637134552001953, + "learning_rate": 2.402335164835165e-05, + "loss": 0.1069, + "step": 18911 + }, + { + "epoch": 51.956043956043956, + "grad_norm": 9.206908226013184, + "learning_rate": 2.4021978021978023e-05, + "loss": 0.1645, + "step": 18912 + }, + { + "epoch": 51.95879120879121, + "grad_norm": 16.964418411254883, + "learning_rate": 2.4020604395604396e-05, + "loss": 0.3435, + "step": 18913 + }, + { + "epoch": 51.96153846153846, + "grad_norm": 8.106393814086914, + "learning_rate": 2.4019230769230773e-05, + "loss": 0.092, + "step": 18914 + }, + { + "epoch": 51.964285714285715, + "grad_norm": 11.942667961120605, + "learning_rate": 2.4017857142857146e-05, + "loss": 0.1918, + "step": 18915 + }, + { + "epoch": 51.967032967032964, + "grad_norm": 12.277341842651367, + "learning_rate": 2.4016483516483516e-05, + "loss": 0.2893, + "step": 18916 + }, + { + "epoch": 51.96978021978022, + "grad_norm": 15.067891120910645, + "learning_rate": 2.401510989010989e-05, + "loss": 0.2636, + "step": 18917 + }, + { + "epoch": 51.972527472527474, + "grad_norm": 29.297096252441406, + "learning_rate": 2.4013736263736263e-05, + "loss": 0.8267, + "step": 18918 + }, + { + "epoch": 51.97527472527472, + "grad_norm": 20.061159133911133, + "learning_rate": 2.401236263736264e-05, + "loss": 0.6237, + "step": 18919 + }, + { + "epoch": 51.97802197802198, + "grad_norm": 4.638757705688477, + "learning_rate": 2.4010989010989013e-05, + "loss": 0.0686, + "step": 18920 + }, + { + "epoch": 51.98076923076923, + "grad_norm": 2.4602341651916504, + "learning_rate": 2.4009615384615387e-05, + "loss": 0.0276, + "step": 18921 + }, + { + "epoch": 51.98351648351648, + "grad_norm": 17.4133358001709, + "learning_rate": 2.400824175824176e-05, + "loss": 0.3764, + "step": 18922 + }, + { + "epoch": 51.98626373626374, + "grad_norm": 15.210431098937988, + "learning_rate": 2.4006868131868134e-05, + "loss": 0.4458, + "step": 18923 + }, + { + "epoch": 51.98901098901099, + "grad_norm": 17.725078582763672, + "learning_rate": 2.4005494505494507e-05, + "loss": 0.3382, + "step": 18924 + }, + { + "epoch": 51.99175824175824, + "grad_norm": 13.821084022521973, + "learning_rate": 2.400412087912088e-05, + "loss": 0.1941, + "step": 18925 + }, + { + "epoch": 51.994505494505496, + "grad_norm": 12.911752700805664, + "learning_rate": 2.4002747252747254e-05, + "loss": 0.1442, + "step": 18926 + }, + { + "epoch": 51.997252747252745, + "grad_norm": 11.030359268188477, + "learning_rate": 2.4001373626373627e-05, + "loss": 0.2663, + "step": 18927 + }, + { + "epoch": 52.0, + "grad_norm": 8.189894676208496, + "learning_rate": 2.4e-05, + "loss": 0.079, + "step": 18928 + }, + { + "epoch": 52.0, + "eval_accuracy": 0.768595041322314, + "eval_f1": 0.7711653951159282, + "eval_f1_DuraRiadoRio_64x64": 0.7272727272727273, + "eval_f1_Mole_64x64": 0.8130081300813008, + "eval_f1_Quebrado_64x64": 0.8045977011494253, + "eval_f1_RiadoRio_64x64": 0.7062146892655368, + "eval_f1_RioFechado_64x64": 0.8047337278106509, + "eval_loss": 0.9145449995994568, + "eval_precision": 0.8069108367509752, + "eval_precision_DuraRiadoRio_64x64": 0.8440366972477065, + "eval_precision_Mole_64x64": 0.9803921568627451, + "eval_precision_Quebrado_64x64": 0.8974358974358975, + "eval_precision_RiadoRio_64x64": 0.6188118811881188, + "eval_precision_RioFechado_64x64": 0.6938775510204082, + "eval_recall": 0.7685229799851742, + "eval_recall_DuraRiadoRio_64x64": 0.6388888888888888, + "eval_recall_Mole_64x64": 0.6944444444444444, + "eval_recall_Quebrado_64x64": 0.7291666666666666, + "eval_recall_RiadoRio_64x64": 0.8223684210526315, + "eval_recall_RioFechado_64x64": 0.9577464788732394, + "eval_runtime": 1.717, + "eval_samples_per_second": 422.829, + "eval_steps_per_second": 26.791, + "step": 18928 + }, + { + "epoch": 52.002747252747255, + "grad_norm": 6.530884265899658, + "learning_rate": 2.3998626373626377e-05, + "loss": 0.1207, + "step": 18929 + }, + { + "epoch": 52.005494505494504, + "grad_norm": 13.018933296203613, + "learning_rate": 2.399725274725275e-05, + "loss": 0.2354, + "step": 18930 + }, + { + "epoch": 52.00824175824176, + "grad_norm": 22.17930030822754, + "learning_rate": 2.399587912087912e-05, + "loss": 0.4905, + "step": 18931 + }, + { + "epoch": 52.010989010989015, + "grad_norm": 8.186440467834473, + "learning_rate": 2.3994505494505494e-05, + "loss": 0.1971, + "step": 18932 + }, + { + "epoch": 52.01373626373626, + "grad_norm": 15.047454833984375, + "learning_rate": 2.3993131868131868e-05, + "loss": 0.5222, + "step": 18933 + }, + { + "epoch": 52.01648351648352, + "grad_norm": 16.165674209594727, + "learning_rate": 2.3991758241758244e-05, + "loss": 0.4105, + "step": 18934 + }, + { + "epoch": 52.01923076923077, + "grad_norm": 7.527649402618408, + "learning_rate": 2.3990384615384618e-05, + "loss": 0.1046, + "step": 18935 + }, + { + "epoch": 52.02197802197802, + "grad_norm": 17.903779983520508, + "learning_rate": 2.398901098901099e-05, + "loss": 0.343, + "step": 18936 + }, + { + "epoch": 52.02472527472528, + "grad_norm": 10.337602615356445, + "learning_rate": 2.3987637362637365e-05, + "loss": 0.1189, + "step": 18937 + }, + { + "epoch": 52.027472527472526, + "grad_norm": 8.088327407836914, + "learning_rate": 2.3986263736263738e-05, + "loss": 0.1332, + "step": 18938 + }, + { + "epoch": 52.03021978021978, + "grad_norm": 12.501239776611328, + "learning_rate": 2.398489010989011e-05, + "loss": 0.1716, + "step": 18939 + }, + { + "epoch": 52.032967032967036, + "grad_norm": 13.329607009887695, + "learning_rate": 2.3983516483516485e-05, + "loss": 0.2492, + "step": 18940 + }, + { + "epoch": 52.035714285714285, + "grad_norm": 6.586704730987549, + "learning_rate": 2.3982142857142858e-05, + "loss": 0.0571, + "step": 18941 + }, + { + "epoch": 52.03846153846154, + "grad_norm": 8.672176361083984, + "learning_rate": 2.398076923076923e-05, + "loss": 0.1039, + "step": 18942 + }, + { + "epoch": 52.04120879120879, + "grad_norm": 6.304851055145264, + "learning_rate": 2.3979395604395605e-05, + "loss": 0.0739, + "step": 18943 + }, + { + "epoch": 52.043956043956044, + "grad_norm": 6.1476616859436035, + "learning_rate": 2.397802197802198e-05, + "loss": 0.1368, + "step": 18944 + }, + { + "epoch": 52.0467032967033, + "grad_norm": 8.432000160217285, + "learning_rate": 2.3976648351648355e-05, + "loss": 0.1484, + "step": 18945 + }, + { + "epoch": 52.04945054945055, + "grad_norm": 19.469247817993164, + "learning_rate": 2.3975274725274725e-05, + "loss": 0.3339, + "step": 18946 + }, + { + "epoch": 52.0521978021978, + "grad_norm": 7.257696151733398, + "learning_rate": 2.39739010989011e-05, + "loss": 0.1304, + "step": 18947 + }, + { + "epoch": 52.05494505494506, + "grad_norm": 7.172366619110107, + "learning_rate": 2.3972527472527472e-05, + "loss": 0.0997, + "step": 18948 + }, + { + "epoch": 52.05769230769231, + "grad_norm": 21.426189422607422, + "learning_rate": 2.3971153846153846e-05, + "loss": 0.3883, + "step": 18949 + }, + { + "epoch": 52.06043956043956, + "grad_norm": 10.04109001159668, + "learning_rate": 2.3969780219780222e-05, + "loss": 0.1889, + "step": 18950 + }, + { + "epoch": 52.06318681318681, + "grad_norm": 5.993383884429932, + "learning_rate": 2.3968406593406596e-05, + "loss": 0.1391, + "step": 18951 + }, + { + "epoch": 52.065934065934066, + "grad_norm": 19.449832916259766, + "learning_rate": 2.396703296703297e-05, + "loss": 0.3649, + "step": 18952 + }, + { + "epoch": 52.06868131868132, + "grad_norm": 3.479780673980713, + "learning_rate": 2.3965659340659343e-05, + "loss": 0.0723, + "step": 18953 + }, + { + "epoch": 52.07142857142857, + "grad_norm": 12.900141716003418, + "learning_rate": 2.3964285714285713e-05, + "loss": 0.3237, + "step": 18954 + }, + { + "epoch": 52.074175824175825, + "grad_norm": 13.100940704345703, + "learning_rate": 2.396291208791209e-05, + "loss": 0.2636, + "step": 18955 + }, + { + "epoch": 52.07692307692308, + "grad_norm": 11.31853199005127, + "learning_rate": 2.3961538461538463e-05, + "loss": 0.2719, + "step": 18956 + }, + { + "epoch": 52.07967032967033, + "grad_norm": 21.331018447875977, + "learning_rate": 2.3960164835164836e-05, + "loss": 0.3351, + "step": 18957 + }, + { + "epoch": 52.082417582417584, + "grad_norm": 21.309223175048828, + "learning_rate": 2.395879120879121e-05, + "loss": 0.4925, + "step": 18958 + }, + { + "epoch": 52.08516483516483, + "grad_norm": 12.484883308410645, + "learning_rate": 2.3957417582417583e-05, + "loss": 0.2891, + "step": 18959 + }, + { + "epoch": 52.08791208791209, + "grad_norm": 16.143522262573242, + "learning_rate": 2.395604395604396e-05, + "loss": 0.3505, + "step": 18960 + }, + { + "epoch": 52.09065934065934, + "grad_norm": 19.626068115234375, + "learning_rate": 2.395467032967033e-05, + "loss": 0.4064, + "step": 18961 + }, + { + "epoch": 52.09340659340659, + "grad_norm": 8.50545597076416, + "learning_rate": 2.3953296703296703e-05, + "loss": 0.1345, + "step": 18962 + }, + { + "epoch": 52.09615384615385, + "grad_norm": 14.771903038024902, + "learning_rate": 2.3951923076923077e-05, + "loss": 0.4037, + "step": 18963 + }, + { + "epoch": 52.0989010989011, + "grad_norm": 2.147164821624756, + "learning_rate": 2.395054945054945e-05, + "loss": 0.0363, + "step": 18964 + }, + { + "epoch": 52.10164835164835, + "grad_norm": 8.093086242675781, + "learning_rate": 2.3949175824175827e-05, + "loss": 0.0734, + "step": 18965 + }, + { + "epoch": 52.104395604395606, + "grad_norm": 23.744382858276367, + "learning_rate": 2.39478021978022e-05, + "loss": 1.091, + "step": 18966 + }, + { + "epoch": 52.107142857142854, + "grad_norm": 6.460629463195801, + "learning_rate": 2.3946428571428574e-05, + "loss": 0.1887, + "step": 18967 + }, + { + "epoch": 52.10989010989011, + "grad_norm": 15.214344024658203, + "learning_rate": 2.3945054945054947e-05, + "loss": 0.275, + "step": 18968 + }, + { + "epoch": 52.112637362637365, + "grad_norm": 9.68694019317627, + "learning_rate": 2.3943681318681317e-05, + "loss": 0.2597, + "step": 18969 + }, + { + "epoch": 52.11538461538461, + "grad_norm": 20.319358825683594, + "learning_rate": 2.3942307692307694e-05, + "loss": 0.6193, + "step": 18970 + }, + { + "epoch": 52.11813186813187, + "grad_norm": 5.6575703620910645, + "learning_rate": 2.3940934065934067e-05, + "loss": 0.0726, + "step": 18971 + }, + { + "epoch": 52.120879120879124, + "grad_norm": 17.27017593383789, + "learning_rate": 2.393956043956044e-05, + "loss": 0.4816, + "step": 18972 + }, + { + "epoch": 52.12362637362637, + "grad_norm": 17.206565856933594, + "learning_rate": 2.3938186813186814e-05, + "loss": 0.3839, + "step": 18973 + }, + { + "epoch": 52.12637362637363, + "grad_norm": 19.73472023010254, + "learning_rate": 2.3936813186813187e-05, + "loss": 0.3895, + "step": 18974 + }, + { + "epoch": 52.129120879120876, + "grad_norm": 17.6529483795166, + "learning_rate": 2.3935439560439564e-05, + "loss": 0.4064, + "step": 18975 + }, + { + "epoch": 52.13186813186813, + "grad_norm": 13.243268966674805, + "learning_rate": 2.3934065934065934e-05, + "loss": 0.2071, + "step": 18976 + }, + { + "epoch": 52.13461538461539, + "grad_norm": 13.453102111816406, + "learning_rate": 2.3932692307692308e-05, + "loss": 0.2271, + "step": 18977 + }, + { + "epoch": 52.137362637362635, + "grad_norm": 16.719280242919922, + "learning_rate": 2.393131868131868e-05, + "loss": 0.4775, + "step": 18978 + }, + { + "epoch": 52.14010989010989, + "grad_norm": 4.327800273895264, + "learning_rate": 2.3929945054945055e-05, + "loss": 0.0632, + "step": 18979 + }, + { + "epoch": 52.142857142857146, + "grad_norm": 7.698265075683594, + "learning_rate": 2.392857142857143e-05, + "loss": 0.1133, + "step": 18980 + }, + { + "epoch": 52.145604395604394, + "grad_norm": 16.050519943237305, + "learning_rate": 2.3927197802197805e-05, + "loss": 0.8018, + "step": 18981 + }, + { + "epoch": 52.14835164835165, + "grad_norm": 13.676551818847656, + "learning_rate": 2.3925824175824178e-05, + "loss": 0.3368, + "step": 18982 + }, + { + "epoch": 52.1510989010989, + "grad_norm": 12.834543228149414, + "learning_rate": 2.392445054945055e-05, + "loss": 0.2164, + "step": 18983 + }, + { + "epoch": 52.15384615384615, + "grad_norm": 21.749008178710938, + "learning_rate": 2.392307692307692e-05, + "loss": 0.4657, + "step": 18984 + }, + { + "epoch": 52.15659340659341, + "grad_norm": 7.828591346740723, + "learning_rate": 2.39217032967033e-05, + "loss": 0.1797, + "step": 18985 + }, + { + "epoch": 52.15934065934066, + "grad_norm": 6.144920349121094, + "learning_rate": 2.3920329670329672e-05, + "loss": 0.0517, + "step": 18986 + }, + { + "epoch": 52.16208791208791, + "grad_norm": 8.56581974029541, + "learning_rate": 2.3918956043956045e-05, + "loss": 0.1667, + "step": 18987 + }, + { + "epoch": 52.16483516483517, + "grad_norm": 11.36721134185791, + "learning_rate": 2.391758241758242e-05, + "loss": 0.2327, + "step": 18988 + }, + { + "epoch": 52.167582417582416, + "grad_norm": 9.646634101867676, + "learning_rate": 2.3916208791208792e-05, + "loss": 0.1725, + "step": 18989 + }, + { + "epoch": 52.17032967032967, + "grad_norm": 10.8859281539917, + "learning_rate": 2.3914835164835165e-05, + "loss": 0.1691, + "step": 18990 + }, + { + "epoch": 52.17307692307692, + "grad_norm": 29.75117301940918, + "learning_rate": 2.391346153846154e-05, + "loss": 0.9638, + "step": 18991 + }, + { + "epoch": 52.175824175824175, + "grad_norm": 19.184114456176758, + "learning_rate": 2.3912087912087912e-05, + "loss": 0.4896, + "step": 18992 + }, + { + "epoch": 52.17857142857143, + "grad_norm": 7.965236186981201, + "learning_rate": 2.3910714285714286e-05, + "loss": 0.1213, + "step": 18993 + }, + { + "epoch": 52.18131868131868, + "grad_norm": 12.791838645935059, + "learning_rate": 2.390934065934066e-05, + "loss": 0.2141, + "step": 18994 + }, + { + "epoch": 52.184065934065934, + "grad_norm": 9.0584077835083, + "learning_rate": 2.3907967032967036e-05, + "loss": 0.0601, + "step": 18995 + }, + { + "epoch": 52.18681318681319, + "grad_norm": 9.641088485717773, + "learning_rate": 2.390659340659341e-05, + "loss": 0.2109, + "step": 18996 + }, + { + "epoch": 52.18956043956044, + "grad_norm": 11.614709854125977, + "learning_rate": 2.3905219780219783e-05, + "loss": 0.2472, + "step": 18997 + }, + { + "epoch": 52.19230769230769, + "grad_norm": 5.415147304534912, + "learning_rate": 2.3903846153846156e-05, + "loss": 0.0985, + "step": 18998 + }, + { + "epoch": 52.19505494505494, + "grad_norm": 6.3198089599609375, + "learning_rate": 2.3902472527472526e-05, + "loss": 0.0723, + "step": 18999 + }, + { + "epoch": 52.1978021978022, + "grad_norm": 12.417567253112793, + "learning_rate": 2.3901098901098903e-05, + "loss": 0.1288, + "step": 19000 + }, + { + "epoch": 52.20054945054945, + "grad_norm": 11.314916610717773, + "learning_rate": 2.3899725274725276e-05, + "loss": 0.2239, + "step": 19001 + }, + { + "epoch": 52.2032967032967, + "grad_norm": 6.461954593658447, + "learning_rate": 2.389835164835165e-05, + "loss": 0.1541, + "step": 19002 + }, + { + "epoch": 52.206043956043956, + "grad_norm": 3.370823621749878, + "learning_rate": 2.3896978021978023e-05, + "loss": 0.088, + "step": 19003 + }, + { + "epoch": 52.20879120879121, + "grad_norm": 13.901491165161133, + "learning_rate": 2.3895604395604396e-05, + "loss": 0.2697, + "step": 19004 + }, + { + "epoch": 52.21153846153846, + "grad_norm": 6.1184468269348145, + "learning_rate": 2.389423076923077e-05, + "loss": 0.177, + "step": 19005 + }, + { + "epoch": 52.214285714285715, + "grad_norm": 16.45943260192871, + "learning_rate": 2.3892857142857143e-05, + "loss": 0.416, + "step": 19006 + }, + { + "epoch": 52.217032967032964, + "grad_norm": 2.554141044616699, + "learning_rate": 2.3891483516483517e-05, + "loss": 0.0963, + "step": 19007 + }, + { + "epoch": 52.21978021978022, + "grad_norm": 11.10258674621582, + "learning_rate": 2.389010989010989e-05, + "loss": 0.1297, + "step": 19008 + }, + { + "epoch": 52.222527472527474, + "grad_norm": 5.913377285003662, + "learning_rate": 2.3888736263736264e-05, + "loss": 0.065, + "step": 19009 + }, + { + "epoch": 52.22527472527472, + "grad_norm": 20.066112518310547, + "learning_rate": 2.388736263736264e-05, + "loss": 0.417, + "step": 19010 + }, + { + "epoch": 52.22802197802198, + "grad_norm": 17.958627700805664, + "learning_rate": 2.3885989010989014e-05, + "loss": 0.6792, + "step": 19011 + }, + { + "epoch": 52.23076923076923, + "grad_norm": 4.06820011138916, + "learning_rate": 2.3884615384615387e-05, + "loss": 0.0725, + "step": 19012 + }, + { + "epoch": 52.23351648351648, + "grad_norm": 10.44114875793457, + "learning_rate": 2.388324175824176e-05, + "loss": 0.3609, + "step": 19013 + }, + { + "epoch": 52.23626373626374, + "grad_norm": 5.726219654083252, + "learning_rate": 2.388186813186813e-05, + "loss": 0.1429, + "step": 19014 + }, + { + "epoch": 52.239010989010985, + "grad_norm": 2.888622760772705, + "learning_rate": 2.3880494505494507e-05, + "loss": 0.0455, + "step": 19015 + }, + { + "epoch": 52.24175824175824, + "grad_norm": 14.436322212219238, + "learning_rate": 2.387912087912088e-05, + "loss": 0.2497, + "step": 19016 + }, + { + "epoch": 52.244505494505496, + "grad_norm": 13.258448600769043, + "learning_rate": 2.3877747252747254e-05, + "loss": 0.4359, + "step": 19017 + }, + { + "epoch": 52.247252747252745, + "grad_norm": 20.493165969848633, + "learning_rate": 2.3876373626373628e-05, + "loss": 0.3602, + "step": 19018 + }, + { + "epoch": 52.25, + "grad_norm": 20.308094024658203, + "learning_rate": 2.3875e-05, + "loss": 0.4628, + "step": 19019 + }, + { + "epoch": 52.252747252747255, + "grad_norm": 9.126312255859375, + "learning_rate": 2.3873626373626374e-05, + "loss": 0.1495, + "step": 19020 + }, + { + "epoch": 52.255494505494504, + "grad_norm": 12.935785293579102, + "learning_rate": 2.3872252747252748e-05, + "loss": 0.3064, + "step": 19021 + }, + { + "epoch": 52.25824175824176, + "grad_norm": 9.02233600616455, + "learning_rate": 2.387087912087912e-05, + "loss": 0.1133, + "step": 19022 + }, + { + "epoch": 52.260989010989015, + "grad_norm": 19.154556274414062, + "learning_rate": 2.3869505494505495e-05, + "loss": 0.3775, + "step": 19023 + }, + { + "epoch": 52.26373626373626, + "grad_norm": 14.626852035522461, + "learning_rate": 2.3868131868131868e-05, + "loss": 0.3286, + "step": 19024 + }, + { + "epoch": 52.26648351648352, + "grad_norm": 25.67448616027832, + "learning_rate": 2.3866758241758245e-05, + "loss": 0.852, + "step": 19025 + }, + { + "epoch": 52.26923076923077, + "grad_norm": 5.652362823486328, + "learning_rate": 2.3865384615384618e-05, + "loss": 0.0377, + "step": 19026 + }, + { + "epoch": 52.27197802197802, + "grad_norm": 12.444112777709961, + "learning_rate": 2.386401098901099e-05, + "loss": 0.2452, + "step": 19027 + }, + { + "epoch": 52.27472527472528, + "grad_norm": 9.846888542175293, + "learning_rate": 2.3862637362637365e-05, + "loss": 0.1298, + "step": 19028 + }, + { + "epoch": 52.277472527472526, + "grad_norm": 13.424083709716797, + "learning_rate": 2.3861263736263735e-05, + "loss": 0.1995, + "step": 19029 + }, + { + "epoch": 52.28021978021978, + "grad_norm": 9.65191650390625, + "learning_rate": 2.3859890109890112e-05, + "loss": 0.179, + "step": 19030 + }, + { + "epoch": 52.282967032967036, + "grad_norm": 4.460515975952148, + "learning_rate": 2.3858516483516485e-05, + "loss": 0.0471, + "step": 19031 + }, + { + "epoch": 52.285714285714285, + "grad_norm": 16.5091609954834, + "learning_rate": 2.385714285714286e-05, + "loss": 0.1984, + "step": 19032 + }, + { + "epoch": 52.28846153846154, + "grad_norm": 6.868923664093018, + "learning_rate": 2.3855769230769232e-05, + "loss": 0.1522, + "step": 19033 + }, + { + "epoch": 52.29120879120879, + "grad_norm": 9.431503295898438, + "learning_rate": 2.3854395604395605e-05, + "loss": 0.2133, + "step": 19034 + }, + { + "epoch": 52.293956043956044, + "grad_norm": 2.879110097885132, + "learning_rate": 2.385302197802198e-05, + "loss": 0.0462, + "step": 19035 + }, + { + "epoch": 52.2967032967033, + "grad_norm": 11.425853729248047, + "learning_rate": 2.3851648351648352e-05, + "loss": 0.2962, + "step": 19036 + }, + { + "epoch": 52.29945054945055, + "grad_norm": 8.821889877319336, + "learning_rate": 2.3850274725274726e-05, + "loss": 0.1949, + "step": 19037 + }, + { + "epoch": 52.3021978021978, + "grad_norm": 4.20276403427124, + "learning_rate": 2.38489010989011e-05, + "loss": 0.0885, + "step": 19038 + }, + { + "epoch": 52.30494505494506, + "grad_norm": 11.712876319885254, + "learning_rate": 2.3847527472527472e-05, + "loss": 0.4143, + "step": 19039 + }, + { + "epoch": 52.30769230769231, + "grad_norm": 13.729281425476074, + "learning_rate": 2.384615384615385e-05, + "loss": 0.3483, + "step": 19040 + }, + { + "epoch": 52.31043956043956, + "grad_norm": 8.080764770507812, + "learning_rate": 2.3844780219780223e-05, + "loss": 0.1653, + "step": 19041 + }, + { + "epoch": 52.31318681318681, + "grad_norm": 11.689584732055664, + "learning_rate": 2.3843406593406596e-05, + "loss": 0.2206, + "step": 19042 + }, + { + "epoch": 52.315934065934066, + "grad_norm": 5.894556045532227, + "learning_rate": 2.384203296703297e-05, + "loss": 0.0838, + "step": 19043 + }, + { + "epoch": 52.31868131868132, + "grad_norm": 14.579814910888672, + "learning_rate": 2.384065934065934e-05, + "loss": 0.3043, + "step": 19044 + }, + { + "epoch": 52.32142857142857, + "grad_norm": 10.112926483154297, + "learning_rate": 2.3839285714285716e-05, + "loss": 0.2518, + "step": 19045 + }, + { + "epoch": 52.324175824175825, + "grad_norm": 13.607017517089844, + "learning_rate": 2.383791208791209e-05, + "loss": 0.4318, + "step": 19046 + }, + { + "epoch": 52.32692307692308, + "grad_norm": 5.248305320739746, + "learning_rate": 2.3836538461538463e-05, + "loss": 0.1435, + "step": 19047 + }, + { + "epoch": 52.32967032967033, + "grad_norm": 13.470280647277832, + "learning_rate": 2.3835164835164837e-05, + "loss": 0.2333, + "step": 19048 + }, + { + "epoch": 52.332417582417584, + "grad_norm": 14.616832733154297, + "learning_rate": 2.383379120879121e-05, + "loss": 0.2114, + "step": 19049 + }, + { + "epoch": 52.33516483516483, + "grad_norm": 14.83513069152832, + "learning_rate": 2.3832417582417583e-05, + "loss": 0.3288, + "step": 19050 + }, + { + "epoch": 52.33791208791209, + "grad_norm": 12.238262176513672, + "learning_rate": 2.3831043956043957e-05, + "loss": 0.2532, + "step": 19051 + }, + { + "epoch": 52.34065934065934, + "grad_norm": 10.398052215576172, + "learning_rate": 2.382967032967033e-05, + "loss": 0.2856, + "step": 19052 + }, + { + "epoch": 52.34340659340659, + "grad_norm": 10.817811965942383, + "learning_rate": 2.3828296703296704e-05, + "loss": 0.2093, + "step": 19053 + }, + { + "epoch": 52.34615384615385, + "grad_norm": 18.25817108154297, + "learning_rate": 2.3826923076923077e-05, + "loss": 0.4714, + "step": 19054 + }, + { + "epoch": 52.3489010989011, + "grad_norm": 7.535102367401123, + "learning_rate": 2.382554945054945e-05, + "loss": 0.1863, + "step": 19055 + }, + { + "epoch": 52.35164835164835, + "grad_norm": 22.965662002563477, + "learning_rate": 2.3824175824175827e-05, + "loss": 0.5634, + "step": 19056 + }, + { + "epoch": 52.354395604395606, + "grad_norm": 9.512551307678223, + "learning_rate": 2.38228021978022e-05, + "loss": 0.1583, + "step": 19057 + }, + { + "epoch": 52.357142857142854, + "grad_norm": 8.69493579864502, + "learning_rate": 2.3821428571428574e-05, + "loss": 0.1027, + "step": 19058 + }, + { + "epoch": 52.35989010989011, + "grad_norm": 3.7361221313476562, + "learning_rate": 2.3820054945054944e-05, + "loss": 0.066, + "step": 19059 + }, + { + "epoch": 52.362637362637365, + "grad_norm": 3.814789295196533, + "learning_rate": 2.3818681318681317e-05, + "loss": 0.0843, + "step": 19060 + }, + { + "epoch": 52.36538461538461, + "grad_norm": 11.530564308166504, + "learning_rate": 2.3817307692307694e-05, + "loss": 0.2713, + "step": 19061 + }, + { + "epoch": 52.36813186813187, + "grad_norm": 8.631145477294922, + "learning_rate": 2.3815934065934068e-05, + "loss": 0.2101, + "step": 19062 + }, + { + "epoch": 52.370879120879124, + "grad_norm": 19.389726638793945, + "learning_rate": 2.381456043956044e-05, + "loss": 0.5825, + "step": 19063 + }, + { + "epoch": 52.37362637362637, + "grad_norm": 16.141807556152344, + "learning_rate": 2.3813186813186814e-05, + "loss": 0.3482, + "step": 19064 + }, + { + "epoch": 52.37637362637363, + "grad_norm": 20.638595581054688, + "learning_rate": 2.3811813186813188e-05, + "loss": 0.4259, + "step": 19065 + }, + { + "epoch": 52.379120879120876, + "grad_norm": 15.724692344665527, + "learning_rate": 2.381043956043956e-05, + "loss": 0.229, + "step": 19066 + }, + { + "epoch": 52.38186813186813, + "grad_norm": 6.101935863494873, + "learning_rate": 2.3809065934065935e-05, + "loss": 0.1287, + "step": 19067 + }, + { + "epoch": 52.38461538461539, + "grad_norm": 15.157496452331543, + "learning_rate": 2.3807692307692308e-05, + "loss": 0.2958, + "step": 19068 + }, + { + "epoch": 52.387362637362635, + "grad_norm": 11.040520668029785, + "learning_rate": 2.380631868131868e-05, + "loss": 0.133, + "step": 19069 + }, + { + "epoch": 52.39010989010989, + "grad_norm": 8.550955772399902, + "learning_rate": 2.3804945054945055e-05, + "loss": 0.1791, + "step": 19070 + }, + { + "epoch": 52.392857142857146, + "grad_norm": 8.336652755737305, + "learning_rate": 2.380357142857143e-05, + "loss": 0.2053, + "step": 19071 + }, + { + "epoch": 52.395604395604394, + "grad_norm": 9.103755950927734, + "learning_rate": 2.3802197802197805e-05, + "loss": 0.1285, + "step": 19072 + }, + { + "epoch": 52.39835164835165, + "grad_norm": 5.130556583404541, + "learning_rate": 2.3800824175824175e-05, + "loss": 0.2112, + "step": 19073 + }, + { + "epoch": 52.4010989010989, + "grad_norm": 18.97941780090332, + "learning_rate": 2.379945054945055e-05, + "loss": 0.5526, + "step": 19074 + }, + { + "epoch": 52.40384615384615, + "grad_norm": 19.35570526123047, + "learning_rate": 2.3798076923076922e-05, + "loss": 0.2511, + "step": 19075 + }, + { + "epoch": 52.40659340659341, + "grad_norm": 17.05076026916504, + "learning_rate": 2.37967032967033e-05, + "loss": 0.4104, + "step": 19076 + }, + { + "epoch": 52.40934065934066, + "grad_norm": 22.274431228637695, + "learning_rate": 2.3795329670329672e-05, + "loss": 1.0856, + "step": 19077 + }, + { + "epoch": 52.41208791208791, + "grad_norm": 5.799466133117676, + "learning_rate": 2.3793956043956046e-05, + "loss": 0.0679, + "step": 19078 + }, + { + "epoch": 52.41483516483517, + "grad_norm": 9.14304256439209, + "learning_rate": 2.379258241758242e-05, + "loss": 0.1611, + "step": 19079 + }, + { + "epoch": 52.417582417582416, + "grad_norm": 21.37847137451172, + "learning_rate": 2.3791208791208792e-05, + "loss": 0.4378, + "step": 19080 + }, + { + "epoch": 52.42032967032967, + "grad_norm": 18.120481491088867, + "learning_rate": 2.3789835164835166e-05, + "loss": 0.4478, + "step": 19081 + }, + { + "epoch": 52.42307692307692, + "grad_norm": 10.762465476989746, + "learning_rate": 2.378846153846154e-05, + "loss": 0.1368, + "step": 19082 + }, + { + "epoch": 52.425824175824175, + "grad_norm": 6.804120063781738, + "learning_rate": 2.3787087912087913e-05, + "loss": 0.1099, + "step": 19083 + }, + { + "epoch": 52.42857142857143, + "grad_norm": 17.700956344604492, + "learning_rate": 2.3785714285714286e-05, + "loss": 0.4558, + "step": 19084 + }, + { + "epoch": 52.43131868131868, + "grad_norm": 13.823823928833008, + "learning_rate": 2.378434065934066e-05, + "loss": 0.1688, + "step": 19085 + }, + { + "epoch": 52.434065934065934, + "grad_norm": 18.716083526611328, + "learning_rate": 2.3782967032967036e-05, + "loss": 0.729, + "step": 19086 + }, + { + "epoch": 52.43681318681319, + "grad_norm": 11.192736625671387, + "learning_rate": 2.378159340659341e-05, + "loss": 0.2813, + "step": 19087 + }, + { + "epoch": 52.43956043956044, + "grad_norm": 15.441317558288574, + "learning_rate": 2.378021978021978e-05, + "loss": 0.4133, + "step": 19088 + }, + { + "epoch": 52.44230769230769, + "grad_norm": 20.337318420410156, + "learning_rate": 2.3778846153846153e-05, + "loss": 0.7048, + "step": 19089 + }, + { + "epoch": 52.44505494505494, + "grad_norm": 5.204235553741455, + "learning_rate": 2.3777472527472526e-05, + "loss": 0.0648, + "step": 19090 + }, + { + "epoch": 52.4478021978022, + "grad_norm": 14.661726951599121, + "learning_rate": 2.3776098901098903e-05, + "loss": 0.4193, + "step": 19091 + }, + { + "epoch": 52.45054945054945, + "grad_norm": 9.18929672241211, + "learning_rate": 2.3774725274725277e-05, + "loss": 0.0743, + "step": 19092 + }, + { + "epoch": 52.4532967032967, + "grad_norm": 14.86691665649414, + "learning_rate": 2.377335164835165e-05, + "loss": 0.4064, + "step": 19093 + }, + { + "epoch": 52.456043956043956, + "grad_norm": 19.465639114379883, + "learning_rate": 2.3771978021978023e-05, + "loss": 0.3447, + "step": 19094 + }, + { + "epoch": 52.45879120879121, + "grad_norm": 1.9251363277435303, + "learning_rate": 2.3770604395604397e-05, + "loss": 0.0306, + "step": 19095 + }, + { + "epoch": 52.46153846153846, + "grad_norm": 7.622900485992432, + "learning_rate": 2.376923076923077e-05, + "loss": 0.1518, + "step": 19096 + }, + { + "epoch": 52.464285714285715, + "grad_norm": 4.229064464569092, + "learning_rate": 2.3767857142857144e-05, + "loss": 0.0463, + "step": 19097 + }, + { + "epoch": 52.467032967032964, + "grad_norm": 26.925580978393555, + "learning_rate": 2.3766483516483517e-05, + "loss": 0.7565, + "step": 19098 + }, + { + "epoch": 52.46978021978022, + "grad_norm": 10.837953567504883, + "learning_rate": 2.376510989010989e-05, + "loss": 0.3114, + "step": 19099 + }, + { + "epoch": 52.472527472527474, + "grad_norm": 24.011314392089844, + "learning_rate": 2.3763736263736264e-05, + "loss": 0.6293, + "step": 19100 + }, + { + "epoch": 52.47527472527472, + "grad_norm": 4.790284633636475, + "learning_rate": 2.376236263736264e-05, + "loss": 0.0723, + "step": 19101 + }, + { + "epoch": 52.47802197802198, + "grad_norm": 13.075485229492188, + "learning_rate": 2.3760989010989014e-05, + "loss": 0.3714, + "step": 19102 + }, + { + "epoch": 52.48076923076923, + "grad_norm": 20.02290916442871, + "learning_rate": 2.3759615384615384e-05, + "loss": 0.5346, + "step": 19103 + }, + { + "epoch": 52.48351648351648, + "grad_norm": 14.759401321411133, + "learning_rate": 2.3758241758241757e-05, + "loss": 0.3077, + "step": 19104 + }, + { + "epoch": 52.48626373626374, + "grad_norm": 2.1876564025878906, + "learning_rate": 2.375686813186813e-05, + "loss": 0.0339, + "step": 19105 + }, + { + "epoch": 52.489010989010985, + "grad_norm": 9.55963134765625, + "learning_rate": 2.3755494505494508e-05, + "loss": 0.1475, + "step": 19106 + }, + { + "epoch": 52.49175824175824, + "grad_norm": 7.440245628356934, + "learning_rate": 2.375412087912088e-05, + "loss": 0.1235, + "step": 19107 + }, + { + "epoch": 52.494505494505496, + "grad_norm": 17.467273712158203, + "learning_rate": 2.3752747252747254e-05, + "loss": 0.4237, + "step": 19108 + }, + { + "epoch": 52.497252747252745, + "grad_norm": 13.964791297912598, + "learning_rate": 2.3751373626373628e-05, + "loss": 0.289, + "step": 19109 + }, + { + "epoch": 52.5, + "grad_norm": 7.519096851348877, + "learning_rate": 2.375e-05, + "loss": 0.1565, + "step": 19110 + }, + { + "epoch": 52.502747252747255, + "grad_norm": 14.101408004760742, + "learning_rate": 2.3748626373626375e-05, + "loss": 0.3807, + "step": 19111 + }, + { + "epoch": 52.505494505494504, + "grad_norm": 9.20604419708252, + "learning_rate": 2.3747252747252748e-05, + "loss": 0.2223, + "step": 19112 + }, + { + "epoch": 52.50824175824176, + "grad_norm": 16.322792053222656, + "learning_rate": 2.374587912087912e-05, + "loss": 0.435, + "step": 19113 + }, + { + "epoch": 52.51098901098901, + "grad_norm": 21.680850982666016, + "learning_rate": 2.3744505494505495e-05, + "loss": 0.689, + "step": 19114 + }, + { + "epoch": 52.51373626373626, + "grad_norm": 26.63181495666504, + "learning_rate": 2.374313186813187e-05, + "loss": 0.864, + "step": 19115 + }, + { + "epoch": 52.51648351648352, + "grad_norm": 11.680112838745117, + "learning_rate": 2.3741758241758245e-05, + "loss": 0.1431, + "step": 19116 + }, + { + "epoch": 52.51923076923077, + "grad_norm": 13.928923606872559, + "learning_rate": 2.374038461538462e-05, + "loss": 0.3722, + "step": 19117 + }, + { + "epoch": 52.52197802197802, + "grad_norm": 10.918875694274902, + "learning_rate": 2.373901098901099e-05, + "loss": 0.1137, + "step": 19118 + }, + { + "epoch": 52.52472527472528, + "grad_norm": 12.234241485595703, + "learning_rate": 2.3737637362637362e-05, + "loss": 0.1805, + "step": 19119 + }, + { + "epoch": 52.527472527472526, + "grad_norm": 11.555037498474121, + "learning_rate": 2.3736263736263735e-05, + "loss": 0.1787, + "step": 19120 + }, + { + "epoch": 52.53021978021978, + "grad_norm": 8.083984375, + "learning_rate": 2.3734890109890112e-05, + "loss": 0.1984, + "step": 19121 + }, + { + "epoch": 52.532967032967036, + "grad_norm": 20.488426208496094, + "learning_rate": 2.3733516483516486e-05, + "loss": 0.4251, + "step": 19122 + }, + { + "epoch": 52.535714285714285, + "grad_norm": 10.952088356018066, + "learning_rate": 2.373214285714286e-05, + "loss": 0.2379, + "step": 19123 + }, + { + "epoch": 52.53846153846154, + "grad_norm": 9.035784721374512, + "learning_rate": 2.3730769230769232e-05, + "loss": 0.1368, + "step": 19124 + }, + { + "epoch": 52.54120879120879, + "grad_norm": 6.2273850440979, + "learning_rate": 2.3729395604395606e-05, + "loss": 0.0938, + "step": 19125 + }, + { + "epoch": 52.543956043956044, + "grad_norm": 13.447212219238281, + "learning_rate": 2.372802197802198e-05, + "loss": 0.2358, + "step": 19126 + }, + { + "epoch": 52.5467032967033, + "grad_norm": 14.4240083694458, + "learning_rate": 2.3726648351648353e-05, + "loss": 0.67, + "step": 19127 + }, + { + "epoch": 52.54945054945055, + "grad_norm": 19.10523223876953, + "learning_rate": 2.3725274725274726e-05, + "loss": 0.4609, + "step": 19128 + }, + { + "epoch": 52.5521978021978, + "grad_norm": 2.5999343395233154, + "learning_rate": 2.37239010989011e-05, + "loss": 0.035, + "step": 19129 + }, + { + "epoch": 52.55494505494506, + "grad_norm": 13.124455451965332, + "learning_rate": 2.3722527472527473e-05, + "loss": 0.3147, + "step": 19130 + }, + { + "epoch": 52.55769230769231, + "grad_norm": 6.9086594581604, + "learning_rate": 2.372115384615385e-05, + "loss": 0.0811, + "step": 19131 + }, + { + "epoch": 52.56043956043956, + "grad_norm": 20.49041748046875, + "learning_rate": 2.3719780219780223e-05, + "loss": 0.519, + "step": 19132 + }, + { + "epoch": 52.56318681318681, + "grad_norm": 17.584367752075195, + "learning_rate": 2.3718406593406593e-05, + "loss": 0.6925, + "step": 19133 + }, + { + "epoch": 52.565934065934066, + "grad_norm": 5.308716297149658, + "learning_rate": 2.3717032967032966e-05, + "loss": 0.1063, + "step": 19134 + }, + { + "epoch": 52.56868131868132, + "grad_norm": 12.849385261535645, + "learning_rate": 2.371565934065934e-05, + "loss": 0.3058, + "step": 19135 + }, + { + "epoch": 52.57142857142857, + "grad_norm": 16.477428436279297, + "learning_rate": 2.3714285714285717e-05, + "loss": 0.4313, + "step": 19136 + }, + { + "epoch": 52.574175824175825, + "grad_norm": 5.5722737312316895, + "learning_rate": 2.371291208791209e-05, + "loss": 0.1531, + "step": 19137 + }, + { + "epoch": 52.57692307692308, + "grad_norm": 13.787266731262207, + "learning_rate": 2.3711538461538463e-05, + "loss": 0.2773, + "step": 19138 + }, + { + "epoch": 52.57967032967033, + "grad_norm": 20.72064208984375, + "learning_rate": 2.3710164835164837e-05, + "loss": 0.6496, + "step": 19139 + }, + { + "epoch": 52.582417582417584, + "grad_norm": 16.204532623291016, + "learning_rate": 2.370879120879121e-05, + "loss": 0.3875, + "step": 19140 + }, + { + "epoch": 52.58516483516483, + "grad_norm": 3.269192695617676, + "learning_rate": 2.3707417582417584e-05, + "loss": 0.047, + "step": 19141 + }, + { + "epoch": 52.58791208791209, + "grad_norm": 13.900705337524414, + "learning_rate": 2.3706043956043957e-05, + "loss": 0.1857, + "step": 19142 + }, + { + "epoch": 52.59065934065934, + "grad_norm": 9.460448265075684, + "learning_rate": 2.370467032967033e-05, + "loss": 0.277, + "step": 19143 + }, + { + "epoch": 52.59340659340659, + "grad_norm": 21.72840690612793, + "learning_rate": 2.3703296703296704e-05, + "loss": 0.525, + "step": 19144 + }, + { + "epoch": 52.59615384615385, + "grad_norm": 8.743253707885742, + "learning_rate": 2.3701923076923077e-05, + "loss": 0.1976, + "step": 19145 + }, + { + "epoch": 52.5989010989011, + "grad_norm": 11.5548677444458, + "learning_rate": 2.3700549450549454e-05, + "loss": 0.2446, + "step": 19146 + }, + { + "epoch": 52.60164835164835, + "grad_norm": 9.42715072631836, + "learning_rate": 2.3699175824175828e-05, + "loss": 0.1085, + "step": 19147 + }, + { + "epoch": 52.604395604395606, + "grad_norm": 14.497886657714844, + "learning_rate": 2.3697802197802198e-05, + "loss": 0.4593, + "step": 19148 + }, + { + "epoch": 52.607142857142854, + "grad_norm": 14.379312515258789, + "learning_rate": 2.369642857142857e-05, + "loss": 0.2692, + "step": 19149 + }, + { + "epoch": 52.60989010989011, + "grad_norm": 16.805158615112305, + "learning_rate": 2.3695054945054944e-05, + "loss": 0.4203, + "step": 19150 + }, + { + "epoch": 52.612637362637365, + "grad_norm": 11.575809478759766, + "learning_rate": 2.369368131868132e-05, + "loss": 0.168, + "step": 19151 + }, + { + "epoch": 52.61538461538461, + "grad_norm": 13.401639938354492, + "learning_rate": 2.3692307692307695e-05, + "loss": 0.4682, + "step": 19152 + }, + { + "epoch": 52.61813186813187, + "grad_norm": 7.2307915687561035, + "learning_rate": 2.3690934065934068e-05, + "loss": 0.1377, + "step": 19153 + }, + { + "epoch": 52.620879120879124, + "grad_norm": 9.941081047058105, + "learning_rate": 2.368956043956044e-05, + "loss": 0.2156, + "step": 19154 + }, + { + "epoch": 52.62362637362637, + "grad_norm": 3.7948081493377686, + "learning_rate": 2.3688186813186815e-05, + "loss": 0.0716, + "step": 19155 + }, + { + "epoch": 52.62637362637363, + "grad_norm": 15.048097610473633, + "learning_rate": 2.3686813186813188e-05, + "loss": 0.351, + "step": 19156 + }, + { + "epoch": 52.629120879120876, + "grad_norm": 14.366767883300781, + "learning_rate": 2.368543956043956e-05, + "loss": 0.3008, + "step": 19157 + }, + { + "epoch": 52.63186813186813, + "grad_norm": 12.016836166381836, + "learning_rate": 2.3684065934065935e-05, + "loss": 0.1655, + "step": 19158 + }, + { + "epoch": 52.63461538461539, + "grad_norm": 7.856230735778809, + "learning_rate": 2.368269230769231e-05, + "loss": 0.1163, + "step": 19159 + }, + { + "epoch": 52.637362637362635, + "grad_norm": 6.2771525382995605, + "learning_rate": 2.3681318681318682e-05, + "loss": 0.1029, + "step": 19160 + }, + { + "epoch": 52.64010989010989, + "grad_norm": 8.870689392089844, + "learning_rate": 2.367994505494506e-05, + "loss": 0.1564, + "step": 19161 + }, + { + "epoch": 52.642857142857146, + "grad_norm": 9.60010051727295, + "learning_rate": 2.3678571428571432e-05, + "loss": 0.1691, + "step": 19162 + }, + { + "epoch": 52.645604395604394, + "grad_norm": 15.161615371704102, + "learning_rate": 2.3677197802197802e-05, + "loss": 0.177, + "step": 19163 + }, + { + "epoch": 52.64835164835165, + "grad_norm": 13.41834545135498, + "learning_rate": 2.3675824175824175e-05, + "loss": 0.4335, + "step": 19164 + }, + { + "epoch": 52.6510989010989, + "grad_norm": 14.971879005432129, + "learning_rate": 2.367445054945055e-05, + "loss": 0.2875, + "step": 19165 + }, + { + "epoch": 52.65384615384615, + "grad_norm": 15.832202911376953, + "learning_rate": 2.3673076923076926e-05, + "loss": 0.4893, + "step": 19166 + }, + { + "epoch": 52.65659340659341, + "grad_norm": 10.70012378692627, + "learning_rate": 2.36717032967033e-05, + "loss": 0.1864, + "step": 19167 + }, + { + "epoch": 52.65934065934066, + "grad_norm": 6.3073015213012695, + "learning_rate": 2.3670329670329672e-05, + "loss": 0.1412, + "step": 19168 + }, + { + "epoch": 52.66208791208791, + "grad_norm": 12.445871353149414, + "learning_rate": 2.3668956043956046e-05, + "loss": 0.2064, + "step": 19169 + }, + { + "epoch": 52.66483516483517, + "grad_norm": 14.478053092956543, + "learning_rate": 2.366758241758242e-05, + "loss": 0.3013, + "step": 19170 + }, + { + "epoch": 52.667582417582416, + "grad_norm": 8.608905792236328, + "learning_rate": 2.366620879120879e-05, + "loss": 0.1916, + "step": 19171 + }, + { + "epoch": 52.67032967032967, + "grad_norm": 11.820995330810547, + "learning_rate": 2.3664835164835166e-05, + "loss": 0.2348, + "step": 19172 + }, + { + "epoch": 52.67307692307692, + "grad_norm": 14.611894607543945, + "learning_rate": 2.366346153846154e-05, + "loss": 0.245, + "step": 19173 + }, + { + "epoch": 52.675824175824175, + "grad_norm": 17.555572509765625, + "learning_rate": 2.3662087912087913e-05, + "loss": 0.2963, + "step": 19174 + }, + { + "epoch": 52.67857142857143, + "grad_norm": 8.321243286132812, + "learning_rate": 2.3660714285714286e-05, + "loss": 0.1519, + "step": 19175 + }, + { + "epoch": 52.68131868131868, + "grad_norm": 8.045738220214844, + "learning_rate": 2.365934065934066e-05, + "loss": 0.1507, + "step": 19176 + }, + { + "epoch": 52.684065934065934, + "grad_norm": 2.8838586807250977, + "learning_rate": 2.3657967032967037e-05, + "loss": 0.0342, + "step": 19177 + }, + { + "epoch": 52.68681318681319, + "grad_norm": 14.870246887207031, + "learning_rate": 2.3656593406593407e-05, + "loss": 0.3694, + "step": 19178 + }, + { + "epoch": 52.68956043956044, + "grad_norm": 12.184547424316406, + "learning_rate": 2.365521978021978e-05, + "loss": 0.303, + "step": 19179 + }, + { + "epoch": 52.69230769230769, + "grad_norm": 2.3755006790161133, + "learning_rate": 2.3653846153846153e-05, + "loss": 0.0271, + "step": 19180 + }, + { + "epoch": 52.69505494505494, + "grad_norm": 7.337393283843994, + "learning_rate": 2.3652472527472527e-05, + "loss": 0.1287, + "step": 19181 + }, + { + "epoch": 52.6978021978022, + "grad_norm": 4.324341297149658, + "learning_rate": 2.3651098901098904e-05, + "loss": 0.0639, + "step": 19182 + }, + { + "epoch": 52.70054945054945, + "grad_norm": 6.1125898361206055, + "learning_rate": 2.3649725274725277e-05, + "loss": 0.0769, + "step": 19183 + }, + { + "epoch": 52.7032967032967, + "grad_norm": 19.53936767578125, + "learning_rate": 2.364835164835165e-05, + "loss": 0.5906, + "step": 19184 + }, + { + "epoch": 52.706043956043956, + "grad_norm": 6.1429123878479, + "learning_rate": 2.3646978021978024e-05, + "loss": 0.0859, + "step": 19185 + }, + { + "epoch": 52.70879120879121, + "grad_norm": 12.121563911437988, + "learning_rate": 2.3645604395604394e-05, + "loss": 0.2338, + "step": 19186 + }, + { + "epoch": 52.71153846153846, + "grad_norm": 13.78210163116455, + "learning_rate": 2.364423076923077e-05, + "loss": 0.2855, + "step": 19187 + }, + { + "epoch": 52.714285714285715, + "grad_norm": 17.89963150024414, + "learning_rate": 2.3642857142857144e-05, + "loss": 0.3195, + "step": 19188 + }, + { + "epoch": 52.717032967032964, + "grad_norm": 23.384851455688477, + "learning_rate": 2.3641483516483517e-05, + "loss": 0.9456, + "step": 19189 + }, + { + "epoch": 52.71978021978022, + "grad_norm": 5.271002292633057, + "learning_rate": 2.364010989010989e-05, + "loss": 0.115, + "step": 19190 + }, + { + "epoch": 52.722527472527474, + "grad_norm": 7.296627044677734, + "learning_rate": 2.3638736263736264e-05, + "loss": 0.1006, + "step": 19191 + }, + { + "epoch": 52.72527472527472, + "grad_norm": 7.5337324142456055, + "learning_rate": 2.363736263736264e-05, + "loss": 0.1489, + "step": 19192 + }, + { + "epoch": 52.72802197802198, + "grad_norm": 19.154251098632812, + "learning_rate": 2.363598901098901e-05, + "loss": 0.4865, + "step": 19193 + }, + { + "epoch": 52.73076923076923, + "grad_norm": 21.643798828125, + "learning_rate": 2.3634615384615384e-05, + "loss": 0.6555, + "step": 19194 + }, + { + "epoch": 52.73351648351648, + "grad_norm": 6.046374797821045, + "learning_rate": 2.3633241758241758e-05, + "loss": 0.1558, + "step": 19195 + }, + { + "epoch": 52.73626373626374, + "grad_norm": 13.092323303222656, + "learning_rate": 2.363186813186813e-05, + "loss": 0.1991, + "step": 19196 + }, + { + "epoch": 52.73901098901099, + "grad_norm": 22.017112731933594, + "learning_rate": 2.3630494505494508e-05, + "loss": 0.42, + "step": 19197 + }, + { + "epoch": 52.74175824175824, + "grad_norm": 9.388067245483398, + "learning_rate": 2.362912087912088e-05, + "loss": 0.1188, + "step": 19198 + }, + { + "epoch": 52.744505494505496, + "grad_norm": 16.985811233520508, + "learning_rate": 2.3627747252747255e-05, + "loss": 0.6006, + "step": 19199 + }, + { + "epoch": 52.747252747252745, + "grad_norm": 10.259720802307129, + "learning_rate": 2.3626373626373628e-05, + "loss": 0.2144, + "step": 19200 + }, + { + "epoch": 52.75, + "grad_norm": 18.863666534423828, + "learning_rate": 2.3624999999999998e-05, + "loss": 0.3732, + "step": 19201 + }, + { + "epoch": 52.752747252747255, + "grad_norm": 7.891015529632568, + "learning_rate": 2.3623626373626375e-05, + "loss": 0.1546, + "step": 19202 + }, + { + "epoch": 52.755494505494504, + "grad_norm": 6.563689231872559, + "learning_rate": 2.362225274725275e-05, + "loss": 0.1387, + "step": 19203 + }, + { + "epoch": 52.75824175824176, + "grad_norm": 13.439708709716797, + "learning_rate": 2.3620879120879122e-05, + "loss": 0.3756, + "step": 19204 + }, + { + "epoch": 52.76098901098901, + "grad_norm": 7.550300598144531, + "learning_rate": 2.3619505494505495e-05, + "loss": 0.1069, + "step": 19205 + }, + { + "epoch": 52.76373626373626, + "grad_norm": 14.740991592407227, + "learning_rate": 2.361813186813187e-05, + "loss": 0.307, + "step": 19206 + }, + { + "epoch": 52.76648351648352, + "grad_norm": 13.554532051086426, + "learning_rate": 2.3616758241758245e-05, + "loss": 0.3956, + "step": 19207 + }, + { + "epoch": 52.76923076923077, + "grad_norm": 7.3759307861328125, + "learning_rate": 2.3615384615384616e-05, + "loss": 0.1038, + "step": 19208 + }, + { + "epoch": 52.77197802197802, + "grad_norm": 4.826818466186523, + "learning_rate": 2.361401098901099e-05, + "loss": 0.0874, + "step": 19209 + }, + { + "epoch": 52.77472527472528, + "grad_norm": 22.437633514404297, + "learning_rate": 2.3612637362637362e-05, + "loss": 0.5903, + "step": 19210 + }, + { + "epoch": 52.777472527472526, + "grad_norm": 7.024770736694336, + "learning_rate": 2.3611263736263736e-05, + "loss": 0.0938, + "step": 19211 + }, + { + "epoch": 52.78021978021978, + "grad_norm": 6.232275485992432, + "learning_rate": 2.3609890109890113e-05, + "loss": 0.1082, + "step": 19212 + }, + { + "epoch": 52.782967032967036, + "grad_norm": 24.435733795166016, + "learning_rate": 2.3608516483516486e-05, + "loss": 0.7325, + "step": 19213 + }, + { + "epoch": 52.785714285714285, + "grad_norm": 9.518451690673828, + "learning_rate": 2.360714285714286e-05, + "loss": 0.2213, + "step": 19214 + }, + { + "epoch": 52.78846153846154, + "grad_norm": 23.44025993347168, + "learning_rate": 2.3605769230769233e-05, + "loss": 0.8075, + "step": 19215 + }, + { + "epoch": 52.79120879120879, + "grad_norm": 6.791723728179932, + "learning_rate": 2.3604395604395603e-05, + "loss": 0.1394, + "step": 19216 + }, + { + "epoch": 52.793956043956044, + "grad_norm": 10.038360595703125, + "learning_rate": 2.360302197802198e-05, + "loss": 0.2094, + "step": 19217 + }, + { + "epoch": 52.7967032967033, + "grad_norm": 9.163687705993652, + "learning_rate": 2.3601648351648353e-05, + "loss": 0.3063, + "step": 19218 + }, + { + "epoch": 52.79945054945055, + "grad_norm": 17.275531768798828, + "learning_rate": 2.3600274725274726e-05, + "loss": 0.611, + "step": 19219 + }, + { + "epoch": 52.8021978021978, + "grad_norm": 21.93214225769043, + "learning_rate": 2.35989010989011e-05, + "loss": 0.558, + "step": 19220 + }, + { + "epoch": 52.80494505494506, + "grad_norm": 4.290927410125732, + "learning_rate": 2.3597527472527473e-05, + "loss": 0.0588, + "step": 19221 + }, + { + "epoch": 52.80769230769231, + "grad_norm": 13.564401626586914, + "learning_rate": 2.359615384615385e-05, + "loss": 0.2332, + "step": 19222 + }, + { + "epoch": 52.81043956043956, + "grad_norm": 12.18354606628418, + "learning_rate": 2.359478021978022e-05, + "loss": 0.2013, + "step": 19223 + }, + { + "epoch": 52.81318681318681, + "grad_norm": 18.963497161865234, + "learning_rate": 2.3593406593406593e-05, + "loss": 0.4141, + "step": 19224 + }, + { + "epoch": 52.815934065934066, + "grad_norm": 6.864112854003906, + "learning_rate": 2.3592032967032967e-05, + "loss": 0.085, + "step": 19225 + }, + { + "epoch": 52.81868131868132, + "grad_norm": 5.824854850769043, + "learning_rate": 2.359065934065934e-05, + "loss": 0.0963, + "step": 19226 + }, + { + "epoch": 52.82142857142857, + "grad_norm": 14.291187286376953, + "learning_rate": 2.3589285714285717e-05, + "loss": 0.2055, + "step": 19227 + }, + { + "epoch": 52.824175824175825, + "grad_norm": 6.540547847747803, + "learning_rate": 2.358791208791209e-05, + "loss": 0.0851, + "step": 19228 + }, + { + "epoch": 52.82692307692308, + "grad_norm": 14.150726318359375, + "learning_rate": 2.3586538461538464e-05, + "loss": 0.2529, + "step": 19229 + }, + { + "epoch": 52.82967032967033, + "grad_norm": 7.8083367347717285, + "learning_rate": 2.3585164835164837e-05, + "loss": 0.1541, + "step": 19230 + }, + { + "epoch": 52.832417582417584, + "grad_norm": 16.23188018798828, + "learning_rate": 2.3583791208791207e-05, + "loss": 0.3464, + "step": 19231 + }, + { + "epoch": 52.83516483516483, + "grad_norm": 9.120583534240723, + "learning_rate": 2.3582417582417584e-05, + "loss": 0.3204, + "step": 19232 + }, + { + "epoch": 52.83791208791209, + "grad_norm": 6.059032440185547, + "learning_rate": 2.3581043956043957e-05, + "loss": 0.0713, + "step": 19233 + }, + { + "epoch": 52.84065934065934, + "grad_norm": 11.477177619934082, + "learning_rate": 2.357967032967033e-05, + "loss": 0.212, + "step": 19234 + }, + { + "epoch": 52.84340659340659, + "grad_norm": 16.381378173828125, + "learning_rate": 2.3578296703296704e-05, + "loss": 0.2917, + "step": 19235 + }, + { + "epoch": 52.84615384615385, + "grad_norm": 5.815252304077148, + "learning_rate": 2.3576923076923078e-05, + "loss": 0.0814, + "step": 19236 + }, + { + "epoch": 52.8489010989011, + "grad_norm": 11.02225399017334, + "learning_rate": 2.3575549450549454e-05, + "loss": 0.2745, + "step": 19237 + }, + { + "epoch": 52.85164835164835, + "grad_norm": 13.079643249511719, + "learning_rate": 2.3574175824175824e-05, + "loss": 0.3329, + "step": 19238 + }, + { + "epoch": 52.854395604395606, + "grad_norm": 6.34092378616333, + "learning_rate": 2.3572802197802198e-05, + "loss": 0.1005, + "step": 19239 + }, + { + "epoch": 52.857142857142854, + "grad_norm": 15.994989395141602, + "learning_rate": 2.357142857142857e-05, + "loss": 0.3012, + "step": 19240 + }, + { + "epoch": 52.85989010989011, + "grad_norm": 8.306337356567383, + "learning_rate": 2.3570054945054945e-05, + "loss": 0.123, + "step": 19241 + }, + { + "epoch": 52.862637362637365, + "grad_norm": 19.120960235595703, + "learning_rate": 2.356868131868132e-05, + "loss": 0.6883, + "step": 19242 + }, + { + "epoch": 52.86538461538461, + "grad_norm": 13.06503963470459, + "learning_rate": 2.3567307692307695e-05, + "loss": 0.223, + "step": 19243 + }, + { + "epoch": 52.86813186813187, + "grad_norm": 22.037731170654297, + "learning_rate": 2.356593406593407e-05, + "loss": 0.6062, + "step": 19244 + }, + { + "epoch": 52.870879120879124, + "grad_norm": 8.948657989501953, + "learning_rate": 2.3564560439560442e-05, + "loss": 0.2141, + "step": 19245 + }, + { + "epoch": 52.87362637362637, + "grad_norm": 1.6839330196380615, + "learning_rate": 2.3563186813186812e-05, + "loss": 0.0198, + "step": 19246 + }, + { + "epoch": 52.87637362637363, + "grad_norm": 15.302447319030762, + "learning_rate": 2.356181318681319e-05, + "loss": 0.1986, + "step": 19247 + }, + { + "epoch": 52.879120879120876, + "grad_norm": 7.5393853187561035, + "learning_rate": 2.3560439560439562e-05, + "loss": 0.214, + "step": 19248 + }, + { + "epoch": 52.88186813186813, + "grad_norm": 20.586223602294922, + "learning_rate": 2.3559065934065935e-05, + "loss": 0.4993, + "step": 19249 + }, + { + "epoch": 52.88461538461539, + "grad_norm": 22.91789436340332, + "learning_rate": 2.355769230769231e-05, + "loss": 0.3873, + "step": 19250 + }, + { + "epoch": 52.887362637362635, + "grad_norm": 11.533031463623047, + "learning_rate": 2.3556318681318682e-05, + "loss": 0.214, + "step": 19251 + }, + { + "epoch": 52.89010989010989, + "grad_norm": 4.089189052581787, + "learning_rate": 2.355494505494506e-05, + "loss": 0.0512, + "step": 19252 + }, + { + "epoch": 52.892857142857146, + "grad_norm": 17.371042251586914, + "learning_rate": 2.355357142857143e-05, + "loss": 0.3157, + "step": 19253 + }, + { + "epoch": 52.895604395604394, + "grad_norm": 4.126804828643799, + "learning_rate": 2.3552197802197802e-05, + "loss": 0.0798, + "step": 19254 + }, + { + "epoch": 52.89835164835165, + "grad_norm": 16.779544830322266, + "learning_rate": 2.3550824175824176e-05, + "loss": 0.6448, + "step": 19255 + }, + { + "epoch": 52.9010989010989, + "grad_norm": 9.552488327026367, + "learning_rate": 2.354945054945055e-05, + "loss": 0.1035, + "step": 19256 + }, + { + "epoch": 52.90384615384615, + "grad_norm": 19.439456939697266, + "learning_rate": 2.3548076923076926e-05, + "loss": 0.4391, + "step": 19257 + }, + { + "epoch": 52.90659340659341, + "grad_norm": 9.292641639709473, + "learning_rate": 2.35467032967033e-05, + "loss": 0.2585, + "step": 19258 + }, + { + "epoch": 52.90934065934066, + "grad_norm": 5.1023149490356445, + "learning_rate": 2.3545329670329673e-05, + "loss": 0.0418, + "step": 19259 + }, + { + "epoch": 52.91208791208791, + "grad_norm": 16.783172607421875, + "learning_rate": 2.3543956043956046e-05, + "loss": 0.3872, + "step": 19260 + }, + { + "epoch": 52.91483516483517, + "grad_norm": 13.700491905212402, + "learning_rate": 2.3542582417582416e-05, + "loss": 0.2649, + "step": 19261 + }, + { + "epoch": 52.917582417582416, + "grad_norm": 12.61431884765625, + "learning_rate": 2.3541208791208793e-05, + "loss": 0.3058, + "step": 19262 + }, + { + "epoch": 52.92032967032967, + "grad_norm": 12.689230918884277, + "learning_rate": 2.3539835164835166e-05, + "loss": 0.4523, + "step": 19263 + }, + { + "epoch": 52.92307692307692, + "grad_norm": 9.754825592041016, + "learning_rate": 2.353846153846154e-05, + "loss": 0.2519, + "step": 19264 + }, + { + "epoch": 52.925824175824175, + "grad_norm": 7.003303050994873, + "learning_rate": 2.3537087912087913e-05, + "loss": 0.0676, + "step": 19265 + }, + { + "epoch": 52.92857142857143, + "grad_norm": 14.869396209716797, + "learning_rate": 2.3535714285714287e-05, + "loss": 0.4582, + "step": 19266 + }, + { + "epoch": 52.93131868131868, + "grad_norm": 9.68101978302002, + "learning_rate": 2.3534340659340663e-05, + "loss": 0.2022, + "step": 19267 + }, + { + "epoch": 52.934065934065934, + "grad_norm": 16.617982864379883, + "learning_rate": 2.3532967032967033e-05, + "loss": 0.48, + "step": 19268 + }, + { + "epoch": 52.93681318681319, + "grad_norm": 21.00470542907715, + "learning_rate": 2.3531593406593407e-05, + "loss": 0.3655, + "step": 19269 + }, + { + "epoch": 52.93956043956044, + "grad_norm": 3.427318811416626, + "learning_rate": 2.353021978021978e-05, + "loss": 0.0387, + "step": 19270 + }, + { + "epoch": 52.94230769230769, + "grad_norm": 1.7877027988433838, + "learning_rate": 2.3528846153846154e-05, + "loss": 0.0229, + "step": 19271 + }, + { + "epoch": 52.94505494505494, + "grad_norm": 5.296491622924805, + "learning_rate": 2.352747252747253e-05, + "loss": 0.0738, + "step": 19272 + }, + { + "epoch": 52.9478021978022, + "grad_norm": 9.740961074829102, + "learning_rate": 2.3526098901098904e-05, + "loss": 0.2765, + "step": 19273 + }, + { + "epoch": 52.95054945054945, + "grad_norm": 21.26082420349121, + "learning_rate": 2.3524725274725277e-05, + "loss": 0.4957, + "step": 19274 + }, + { + "epoch": 52.9532967032967, + "grad_norm": 21.060697555541992, + "learning_rate": 2.352335164835165e-05, + "loss": 0.4998, + "step": 19275 + }, + { + "epoch": 52.956043956043956, + "grad_norm": 7.21229887008667, + "learning_rate": 2.352197802197802e-05, + "loss": 0.1522, + "step": 19276 + }, + { + "epoch": 52.95879120879121, + "grad_norm": 10.462352752685547, + "learning_rate": 2.3520604395604398e-05, + "loss": 0.2041, + "step": 19277 + }, + { + "epoch": 52.96153846153846, + "grad_norm": 5.364800453186035, + "learning_rate": 2.351923076923077e-05, + "loss": 0.1381, + "step": 19278 + }, + { + "epoch": 52.964285714285715, + "grad_norm": 9.324991226196289, + "learning_rate": 2.3517857142857144e-05, + "loss": 0.3878, + "step": 19279 + }, + { + "epoch": 52.967032967032964, + "grad_norm": 17.774017333984375, + "learning_rate": 2.3516483516483518e-05, + "loss": 0.3547, + "step": 19280 + }, + { + "epoch": 52.96978021978022, + "grad_norm": 22.038211822509766, + "learning_rate": 2.351510989010989e-05, + "loss": 0.7132, + "step": 19281 + }, + { + "epoch": 52.972527472527474, + "grad_norm": 12.502303123474121, + "learning_rate": 2.3513736263736265e-05, + "loss": 0.2502, + "step": 19282 + }, + { + "epoch": 52.97527472527472, + "grad_norm": 3.2237730026245117, + "learning_rate": 2.3512362637362638e-05, + "loss": 0.05, + "step": 19283 + }, + { + "epoch": 52.97802197802198, + "grad_norm": 8.693912506103516, + "learning_rate": 2.351098901098901e-05, + "loss": 0.1267, + "step": 19284 + }, + { + "epoch": 52.98076923076923, + "grad_norm": 20.295085906982422, + "learning_rate": 2.3509615384615385e-05, + "loss": 0.4447, + "step": 19285 + }, + { + "epoch": 52.98351648351648, + "grad_norm": 16.14800453186035, + "learning_rate": 2.3508241758241758e-05, + "loss": 0.2323, + "step": 19286 + }, + { + "epoch": 52.98626373626374, + "grad_norm": 15.06908130645752, + "learning_rate": 2.350686813186813e-05, + "loss": 0.2678, + "step": 19287 + }, + { + "epoch": 52.98901098901099, + "grad_norm": 5.795920372009277, + "learning_rate": 2.350549450549451e-05, + "loss": 0.0881, + "step": 19288 + }, + { + "epoch": 52.99175824175824, + "grad_norm": 12.208535194396973, + "learning_rate": 2.3504120879120882e-05, + "loss": 0.2847, + "step": 19289 + }, + { + "epoch": 52.994505494505496, + "grad_norm": 16.08794403076172, + "learning_rate": 2.3502747252747255e-05, + "loss": 0.2263, + "step": 19290 + }, + { + "epoch": 52.997252747252745, + "grad_norm": 16.56456184387207, + "learning_rate": 2.3501373626373625e-05, + "loss": 0.209, + "step": 19291 + }, + { + "epoch": 53.0, + "grad_norm": 45.404319763183594, + "learning_rate": 2.35e-05, + "loss": 0.7387, + "step": 19292 + }, + { + "epoch": 53.0, + "eval_accuracy": 0.7644628099173554, + "eval_f1": 0.7750051017861749, + "eval_f1_DuraRiadoRio_64x64": 0.7436823104693141, + "eval_f1_Mole_64x64": 0.7916666666666666, + "eval_f1_Quebrado_64x64": 0.8014981273408239, + "eval_f1_RiadoRio_64x64": 0.6599496221662469, + "eval_f1_RioFechado_64x64": 0.8782287822878229, + "eval_loss": 1.1572383642196655, + "eval_precision": 0.8182225240903896, + "eval_precision_DuraRiadoRio_64x64": 0.7744360902255639, + "eval_precision_Mole_64x64": 0.9895833333333334, + "eval_precision_Quebrado_64x64": 0.8699186991869918, + "eval_precision_RiadoRio_64x64": 0.5346938775510204, + "eval_precision_RioFechado_64x64": 0.9224806201550387, + "eval_recall": 0.7635851659665596, + "eval_recall_DuraRiadoRio_64x64": 0.7152777777777778, + "eval_recall_Mole_64x64": 0.6597222222222222, + "eval_recall_Quebrado_64x64": 0.7430555555555556, + "eval_recall_RiadoRio_64x64": 0.8618421052631579, + "eval_recall_RioFechado_64x64": 0.8380281690140845, + "eval_runtime": 1.7418, + "eval_samples_per_second": 416.813, + "eval_steps_per_second": 26.41, + "step": 19292 + }, + { + "epoch": 53.002747252747255, + "grad_norm": 4.589113235473633, + "learning_rate": 2.3498626373626375e-05, + "loss": 0.1183, + "step": 19293 + }, + { + "epoch": 53.005494505494504, + "grad_norm": 9.890780448913574, + "learning_rate": 2.349725274725275e-05, + "loss": 0.2676, + "step": 19294 + }, + { + "epoch": 53.00824175824176, + "grad_norm": 9.846538543701172, + "learning_rate": 2.3495879120879122e-05, + "loss": 0.0902, + "step": 19295 + }, + { + "epoch": 53.010989010989015, + "grad_norm": 6.66894006729126, + "learning_rate": 2.3494505494505496e-05, + "loss": 0.0847, + "step": 19296 + }, + { + "epoch": 53.01373626373626, + "grad_norm": 15.165534973144531, + "learning_rate": 2.349313186813187e-05, + "loss": 0.3284, + "step": 19297 + }, + { + "epoch": 53.01648351648352, + "grad_norm": 6.73557186126709, + "learning_rate": 2.3491758241758242e-05, + "loss": 0.097, + "step": 19298 + }, + { + "epoch": 53.01923076923077, + "grad_norm": 6.950413227081299, + "learning_rate": 2.3490384615384616e-05, + "loss": 0.1265, + "step": 19299 + }, + { + "epoch": 53.02197802197802, + "grad_norm": 21.088821411132812, + "learning_rate": 2.348901098901099e-05, + "loss": 0.5083, + "step": 19300 + }, + { + "epoch": 53.02472527472528, + "grad_norm": 13.822590827941895, + "learning_rate": 2.3487637362637363e-05, + "loss": 0.4113, + "step": 19301 + }, + { + "epoch": 53.027472527472526, + "grad_norm": 12.36194133758545, + "learning_rate": 2.3486263736263736e-05, + "loss": 0.3623, + "step": 19302 + }, + { + "epoch": 53.03021978021978, + "grad_norm": 17.372482299804688, + "learning_rate": 2.3484890109890113e-05, + "loss": 0.3582, + "step": 19303 + }, + { + "epoch": 53.032967032967036, + "grad_norm": 13.71572208404541, + "learning_rate": 2.3483516483516486e-05, + "loss": 0.1667, + "step": 19304 + }, + { + "epoch": 53.035714285714285, + "grad_norm": 19.18814468383789, + "learning_rate": 2.348214285714286e-05, + "loss": 0.2918, + "step": 19305 + }, + { + "epoch": 53.03846153846154, + "grad_norm": 16.416215896606445, + "learning_rate": 2.348076923076923e-05, + "loss": 0.5596, + "step": 19306 + }, + { + "epoch": 53.04120879120879, + "grad_norm": 19.552806854248047, + "learning_rate": 2.3479395604395603e-05, + "loss": 0.4529, + "step": 19307 + }, + { + "epoch": 53.043956043956044, + "grad_norm": 12.165647506713867, + "learning_rate": 2.347802197802198e-05, + "loss": 0.2148, + "step": 19308 + }, + { + "epoch": 53.0467032967033, + "grad_norm": 1.4295620918273926, + "learning_rate": 2.3476648351648353e-05, + "loss": 0.0244, + "step": 19309 + }, + { + "epoch": 53.04945054945055, + "grad_norm": 21.545894622802734, + "learning_rate": 2.3475274725274727e-05, + "loss": 0.4611, + "step": 19310 + }, + { + "epoch": 53.0521978021978, + "grad_norm": 13.509026527404785, + "learning_rate": 2.34739010989011e-05, + "loss": 0.2902, + "step": 19311 + }, + { + "epoch": 53.05494505494506, + "grad_norm": 15.398454666137695, + "learning_rate": 2.3472527472527474e-05, + "loss": 0.2488, + "step": 19312 + }, + { + "epoch": 53.05769230769231, + "grad_norm": 5.725854396820068, + "learning_rate": 2.3471153846153847e-05, + "loss": 0.0935, + "step": 19313 + }, + { + "epoch": 53.06043956043956, + "grad_norm": 7.459290981292725, + "learning_rate": 2.346978021978022e-05, + "loss": 0.233, + "step": 19314 + }, + { + "epoch": 53.06318681318681, + "grad_norm": 1.62526273727417, + "learning_rate": 2.3468406593406594e-05, + "loss": 0.0208, + "step": 19315 + }, + { + "epoch": 53.065934065934066, + "grad_norm": 8.843682289123535, + "learning_rate": 2.3467032967032967e-05, + "loss": 0.1255, + "step": 19316 + }, + { + "epoch": 53.06868131868132, + "grad_norm": 13.130775451660156, + "learning_rate": 2.346565934065934e-05, + "loss": 0.2429, + "step": 19317 + }, + { + "epoch": 53.07142857142857, + "grad_norm": 15.76876449584961, + "learning_rate": 2.3464285714285717e-05, + "loss": 0.4288, + "step": 19318 + }, + { + "epoch": 53.074175824175825, + "grad_norm": 12.298820495605469, + "learning_rate": 2.346291208791209e-05, + "loss": 0.2437, + "step": 19319 + }, + { + "epoch": 53.07692307692308, + "grad_norm": 9.591141700744629, + "learning_rate": 2.3461538461538464e-05, + "loss": 0.2364, + "step": 19320 + }, + { + "epoch": 53.07967032967033, + "grad_norm": 7.957492351531982, + "learning_rate": 2.3460164835164834e-05, + "loss": 0.1027, + "step": 19321 + }, + { + "epoch": 53.082417582417584, + "grad_norm": 13.919553756713867, + "learning_rate": 2.3458791208791208e-05, + "loss": 0.2701, + "step": 19322 + }, + { + "epoch": 53.08516483516483, + "grad_norm": 26.277530670166016, + "learning_rate": 2.3457417582417584e-05, + "loss": 0.5427, + "step": 19323 + }, + { + "epoch": 53.08791208791209, + "grad_norm": 6.100717067718506, + "learning_rate": 2.3456043956043958e-05, + "loss": 0.0567, + "step": 19324 + }, + { + "epoch": 53.09065934065934, + "grad_norm": 22.2961368560791, + "learning_rate": 2.345467032967033e-05, + "loss": 0.608, + "step": 19325 + }, + { + "epoch": 53.09340659340659, + "grad_norm": 18.499475479125977, + "learning_rate": 2.3453296703296705e-05, + "loss": 0.4202, + "step": 19326 + }, + { + "epoch": 53.09615384615385, + "grad_norm": 10.138314247131348, + "learning_rate": 2.3451923076923078e-05, + "loss": 0.2014, + "step": 19327 + }, + { + "epoch": 53.0989010989011, + "grad_norm": 8.500772476196289, + "learning_rate": 2.345054945054945e-05, + "loss": 0.1377, + "step": 19328 + }, + { + "epoch": 53.10164835164835, + "grad_norm": 13.459638595581055, + "learning_rate": 2.3449175824175825e-05, + "loss": 0.219, + "step": 19329 + }, + { + "epoch": 53.104395604395606, + "grad_norm": 3.4935362339019775, + "learning_rate": 2.3447802197802198e-05, + "loss": 0.0913, + "step": 19330 + }, + { + "epoch": 53.107142857142854, + "grad_norm": 11.306865692138672, + "learning_rate": 2.344642857142857e-05, + "loss": 0.2421, + "step": 19331 + }, + { + "epoch": 53.10989010989011, + "grad_norm": 1.1007428169250488, + "learning_rate": 2.3445054945054945e-05, + "loss": 0.0204, + "step": 19332 + }, + { + "epoch": 53.112637362637365, + "grad_norm": 13.665950775146484, + "learning_rate": 2.3443681318681322e-05, + "loss": 0.1568, + "step": 19333 + }, + { + "epoch": 53.11538461538461, + "grad_norm": 11.84687614440918, + "learning_rate": 2.3442307692307695e-05, + "loss": 0.1211, + "step": 19334 + }, + { + "epoch": 53.11813186813187, + "grad_norm": 19.528175354003906, + "learning_rate": 2.344093406593407e-05, + "loss": 0.5833, + "step": 19335 + }, + { + "epoch": 53.120879120879124, + "grad_norm": 22.011962890625, + "learning_rate": 2.343956043956044e-05, + "loss": 0.5633, + "step": 19336 + }, + { + "epoch": 53.12362637362637, + "grad_norm": 15.404143333435059, + "learning_rate": 2.3438186813186812e-05, + "loss": 0.3886, + "step": 19337 + }, + { + "epoch": 53.12637362637363, + "grad_norm": 12.570035934448242, + "learning_rate": 2.343681318681319e-05, + "loss": 0.1752, + "step": 19338 + }, + { + "epoch": 53.129120879120876, + "grad_norm": 11.15081787109375, + "learning_rate": 2.3435439560439562e-05, + "loss": 0.2596, + "step": 19339 + }, + { + "epoch": 53.13186813186813, + "grad_norm": 16.529712677001953, + "learning_rate": 2.3434065934065936e-05, + "loss": 0.3703, + "step": 19340 + }, + { + "epoch": 53.13461538461539, + "grad_norm": 16.607072830200195, + "learning_rate": 2.343269230769231e-05, + "loss": 0.3623, + "step": 19341 + }, + { + "epoch": 53.137362637362635, + "grad_norm": 8.741385459899902, + "learning_rate": 2.3431318681318683e-05, + "loss": 0.2031, + "step": 19342 + }, + { + "epoch": 53.14010989010989, + "grad_norm": 4.881449222564697, + "learning_rate": 2.3429945054945056e-05, + "loss": 0.127, + "step": 19343 + }, + { + "epoch": 53.142857142857146, + "grad_norm": 8.285268783569336, + "learning_rate": 2.342857142857143e-05, + "loss": 0.107, + "step": 19344 + }, + { + "epoch": 53.145604395604394, + "grad_norm": 13.108002662658691, + "learning_rate": 2.3427197802197803e-05, + "loss": 0.2973, + "step": 19345 + }, + { + "epoch": 53.14835164835165, + "grad_norm": 12.516365051269531, + "learning_rate": 2.3425824175824176e-05, + "loss": 0.3655, + "step": 19346 + }, + { + "epoch": 53.1510989010989, + "grad_norm": 15.5407133102417, + "learning_rate": 2.342445054945055e-05, + "loss": 0.3155, + "step": 19347 + }, + { + "epoch": 53.15384615384615, + "grad_norm": 13.07190990447998, + "learning_rate": 2.3423076923076926e-05, + "loss": 0.269, + "step": 19348 + }, + { + "epoch": 53.15659340659341, + "grad_norm": 4.019609451293945, + "learning_rate": 2.34217032967033e-05, + "loss": 0.0585, + "step": 19349 + }, + { + "epoch": 53.15934065934066, + "grad_norm": 17.7408504486084, + "learning_rate": 2.3420329670329673e-05, + "loss": 0.4538, + "step": 19350 + }, + { + "epoch": 53.16208791208791, + "grad_norm": 2.346402406692505, + "learning_rate": 2.3418956043956043e-05, + "loss": 0.0743, + "step": 19351 + }, + { + "epoch": 53.16483516483517, + "grad_norm": 6.213796615600586, + "learning_rate": 2.3417582417582417e-05, + "loss": 0.0883, + "step": 19352 + }, + { + "epoch": 53.167582417582416, + "grad_norm": 15.087087631225586, + "learning_rate": 2.3416208791208793e-05, + "loss": 0.4398, + "step": 19353 + }, + { + "epoch": 53.17032967032967, + "grad_norm": 17.328815460205078, + "learning_rate": 2.3414835164835167e-05, + "loss": 0.6352, + "step": 19354 + }, + { + "epoch": 53.17307692307692, + "grad_norm": 15.059659004211426, + "learning_rate": 2.341346153846154e-05, + "loss": 0.2118, + "step": 19355 + }, + { + "epoch": 53.175824175824175, + "grad_norm": 8.359420776367188, + "learning_rate": 2.3412087912087914e-05, + "loss": 0.1409, + "step": 19356 + }, + { + "epoch": 53.17857142857143, + "grad_norm": 13.412368774414062, + "learning_rate": 2.3410714285714287e-05, + "loss": 0.2396, + "step": 19357 + }, + { + "epoch": 53.18131868131868, + "grad_norm": 16.057111740112305, + "learning_rate": 2.340934065934066e-05, + "loss": 0.4046, + "step": 19358 + }, + { + "epoch": 53.184065934065934, + "grad_norm": 7.79442834854126, + "learning_rate": 2.3407967032967034e-05, + "loss": 0.2007, + "step": 19359 + }, + { + "epoch": 53.18681318681319, + "grad_norm": 8.963335990905762, + "learning_rate": 2.3406593406593407e-05, + "loss": 0.1892, + "step": 19360 + }, + { + "epoch": 53.18956043956044, + "grad_norm": 1.8251609802246094, + "learning_rate": 2.340521978021978e-05, + "loss": 0.0249, + "step": 19361 + }, + { + "epoch": 53.19230769230769, + "grad_norm": 11.296966552734375, + "learning_rate": 2.3403846153846154e-05, + "loss": 0.2176, + "step": 19362 + }, + { + "epoch": 53.19505494505494, + "grad_norm": 5.6119771003723145, + "learning_rate": 2.340247252747253e-05, + "loss": 0.1297, + "step": 19363 + }, + { + "epoch": 53.1978021978022, + "grad_norm": 5.786808490753174, + "learning_rate": 2.3401098901098904e-05, + "loss": 0.109, + "step": 19364 + }, + { + "epoch": 53.20054945054945, + "grad_norm": 8.467612266540527, + "learning_rate": 2.3399725274725278e-05, + "loss": 0.1203, + "step": 19365 + }, + { + "epoch": 53.2032967032967, + "grad_norm": 8.262134552001953, + "learning_rate": 2.3398351648351648e-05, + "loss": 0.1628, + "step": 19366 + }, + { + "epoch": 53.206043956043956, + "grad_norm": 17.101608276367188, + "learning_rate": 2.339697802197802e-05, + "loss": 0.2922, + "step": 19367 + }, + { + "epoch": 53.20879120879121, + "grad_norm": 15.441473960876465, + "learning_rate": 2.3395604395604398e-05, + "loss": 0.5326, + "step": 19368 + }, + { + "epoch": 53.21153846153846, + "grad_norm": 10.045476913452148, + "learning_rate": 2.339423076923077e-05, + "loss": 0.2334, + "step": 19369 + }, + { + "epoch": 53.214285714285715, + "grad_norm": 2.3552119731903076, + "learning_rate": 2.3392857142857145e-05, + "loss": 0.0276, + "step": 19370 + }, + { + "epoch": 53.217032967032964, + "grad_norm": 19.816211700439453, + "learning_rate": 2.3391483516483518e-05, + "loss": 0.4925, + "step": 19371 + }, + { + "epoch": 53.21978021978022, + "grad_norm": 5.395829677581787, + "learning_rate": 2.339010989010989e-05, + "loss": 0.0951, + "step": 19372 + }, + { + "epoch": 53.222527472527474, + "grad_norm": 7.613293170928955, + "learning_rate": 2.3388736263736265e-05, + "loss": 0.1152, + "step": 19373 + }, + { + "epoch": 53.22527472527472, + "grad_norm": 6.3892292976379395, + "learning_rate": 2.338736263736264e-05, + "loss": 0.0977, + "step": 19374 + }, + { + "epoch": 53.22802197802198, + "grad_norm": 8.929376602172852, + "learning_rate": 2.3385989010989012e-05, + "loss": 0.1792, + "step": 19375 + }, + { + "epoch": 53.23076923076923, + "grad_norm": 15.758131980895996, + "learning_rate": 2.3384615384615385e-05, + "loss": 0.3134, + "step": 19376 + }, + { + "epoch": 53.23351648351648, + "grad_norm": 15.726158142089844, + "learning_rate": 2.338324175824176e-05, + "loss": 0.4575, + "step": 19377 + }, + { + "epoch": 53.23626373626374, + "grad_norm": 8.56363582611084, + "learning_rate": 2.3381868131868135e-05, + "loss": 0.1581, + "step": 19378 + }, + { + "epoch": 53.239010989010985, + "grad_norm": 12.276202201843262, + "learning_rate": 2.338049450549451e-05, + "loss": 0.1663, + "step": 19379 + }, + { + "epoch": 53.24175824175824, + "grad_norm": 24.101181030273438, + "learning_rate": 2.3379120879120882e-05, + "loss": 0.5341, + "step": 19380 + }, + { + "epoch": 53.244505494505496, + "grad_norm": 17.095016479492188, + "learning_rate": 2.3377747252747252e-05, + "loss": 0.2738, + "step": 19381 + }, + { + "epoch": 53.247252747252745, + "grad_norm": 1.8409048318862915, + "learning_rate": 2.3376373626373626e-05, + "loss": 0.0352, + "step": 19382 + }, + { + "epoch": 53.25, + "grad_norm": 8.641990661621094, + "learning_rate": 2.3375000000000002e-05, + "loss": 0.1588, + "step": 19383 + }, + { + "epoch": 53.252747252747255, + "grad_norm": 8.474593162536621, + "learning_rate": 2.3373626373626376e-05, + "loss": 0.1222, + "step": 19384 + }, + { + "epoch": 53.255494505494504, + "grad_norm": 10.903610229492188, + "learning_rate": 2.337225274725275e-05, + "loss": 0.1746, + "step": 19385 + }, + { + "epoch": 53.25824175824176, + "grad_norm": 12.747611045837402, + "learning_rate": 2.3370879120879123e-05, + "loss": 0.3006, + "step": 19386 + }, + { + "epoch": 53.260989010989015, + "grad_norm": 11.636277198791504, + "learning_rate": 2.3369505494505496e-05, + "loss": 0.1396, + "step": 19387 + }, + { + "epoch": 53.26373626373626, + "grad_norm": 23.18965721130371, + "learning_rate": 2.336813186813187e-05, + "loss": 0.4943, + "step": 19388 + }, + { + "epoch": 53.26648351648352, + "grad_norm": 13.039203643798828, + "learning_rate": 2.3366758241758243e-05, + "loss": 0.2288, + "step": 19389 + }, + { + "epoch": 53.26923076923077, + "grad_norm": 12.322382926940918, + "learning_rate": 2.3365384615384616e-05, + "loss": 0.171, + "step": 19390 + }, + { + "epoch": 53.27197802197802, + "grad_norm": 11.071199417114258, + "learning_rate": 2.336401098901099e-05, + "loss": 0.206, + "step": 19391 + }, + { + "epoch": 53.27472527472528, + "grad_norm": 13.774271965026855, + "learning_rate": 2.3362637362637363e-05, + "loss": 0.2483, + "step": 19392 + }, + { + "epoch": 53.277472527472526, + "grad_norm": 21.341928482055664, + "learning_rate": 2.336126373626374e-05, + "loss": 0.5952, + "step": 19393 + }, + { + "epoch": 53.28021978021978, + "grad_norm": 18.970430374145508, + "learning_rate": 2.3359890109890113e-05, + "loss": 0.4584, + "step": 19394 + }, + { + "epoch": 53.282967032967036, + "grad_norm": 16.6019229888916, + "learning_rate": 2.3358516483516483e-05, + "loss": 0.3896, + "step": 19395 + }, + { + "epoch": 53.285714285714285, + "grad_norm": 25.123537063598633, + "learning_rate": 2.3357142857142857e-05, + "loss": 0.9128, + "step": 19396 + }, + { + "epoch": 53.28846153846154, + "grad_norm": 6.165321350097656, + "learning_rate": 2.335576923076923e-05, + "loss": 0.0659, + "step": 19397 + }, + { + "epoch": 53.29120879120879, + "grad_norm": 14.833915710449219, + "learning_rate": 2.3354395604395603e-05, + "loss": 0.2444, + "step": 19398 + }, + { + "epoch": 53.293956043956044, + "grad_norm": 16.604896545410156, + "learning_rate": 2.335302197802198e-05, + "loss": 0.3486, + "step": 19399 + }, + { + "epoch": 53.2967032967033, + "grad_norm": 11.452661514282227, + "learning_rate": 2.3351648351648354e-05, + "loss": 0.1845, + "step": 19400 + }, + { + "epoch": 53.29945054945055, + "grad_norm": 15.431777000427246, + "learning_rate": 2.3350274725274727e-05, + "loss": 0.2936, + "step": 19401 + }, + { + "epoch": 53.3021978021978, + "grad_norm": 7.025035381317139, + "learning_rate": 2.33489010989011e-05, + "loss": 0.1223, + "step": 19402 + }, + { + "epoch": 53.30494505494506, + "grad_norm": 11.927851676940918, + "learning_rate": 2.3347527472527474e-05, + "loss": 0.2571, + "step": 19403 + }, + { + "epoch": 53.30769230769231, + "grad_norm": 19.038633346557617, + "learning_rate": 2.3346153846153847e-05, + "loss": 0.6493, + "step": 19404 + }, + { + "epoch": 53.31043956043956, + "grad_norm": 19.005050659179688, + "learning_rate": 2.334478021978022e-05, + "loss": 0.6689, + "step": 19405 + }, + { + "epoch": 53.31318681318681, + "grad_norm": 22.98634147644043, + "learning_rate": 2.3343406593406594e-05, + "loss": 0.4346, + "step": 19406 + }, + { + "epoch": 53.315934065934066, + "grad_norm": 22.134546279907227, + "learning_rate": 2.3342032967032968e-05, + "loss": 0.459, + "step": 19407 + }, + { + "epoch": 53.31868131868132, + "grad_norm": 16.859882354736328, + "learning_rate": 2.334065934065934e-05, + "loss": 0.4093, + "step": 19408 + }, + { + "epoch": 53.32142857142857, + "grad_norm": 7.3740234375, + "learning_rate": 2.3339285714285718e-05, + "loss": 0.0858, + "step": 19409 + }, + { + "epoch": 53.324175824175825, + "grad_norm": 11.736681938171387, + "learning_rate": 2.3337912087912088e-05, + "loss": 0.131, + "step": 19410 + }, + { + "epoch": 53.32692307692308, + "grad_norm": 5.607022762298584, + "learning_rate": 2.333653846153846e-05, + "loss": 0.0516, + "step": 19411 + }, + { + "epoch": 53.32967032967033, + "grad_norm": 12.203369140625, + "learning_rate": 2.3335164835164835e-05, + "loss": 0.404, + "step": 19412 + }, + { + "epoch": 53.332417582417584, + "grad_norm": 14.855114936828613, + "learning_rate": 2.3333791208791208e-05, + "loss": 0.3383, + "step": 19413 + }, + { + "epoch": 53.33516483516483, + "grad_norm": 19.19879913330078, + "learning_rate": 2.3332417582417585e-05, + "loss": 0.7319, + "step": 19414 + }, + { + "epoch": 53.33791208791209, + "grad_norm": 12.461050987243652, + "learning_rate": 2.3331043956043958e-05, + "loss": 0.3513, + "step": 19415 + }, + { + "epoch": 53.34065934065934, + "grad_norm": 14.931273460388184, + "learning_rate": 2.332967032967033e-05, + "loss": 0.2948, + "step": 19416 + }, + { + "epoch": 53.34340659340659, + "grad_norm": 10.892258644104004, + "learning_rate": 2.3328296703296705e-05, + "loss": 0.2345, + "step": 19417 + }, + { + "epoch": 53.34615384615385, + "grad_norm": 7.317004203796387, + "learning_rate": 2.332692307692308e-05, + "loss": 0.1386, + "step": 19418 + }, + { + "epoch": 53.3489010989011, + "grad_norm": 12.06054973602295, + "learning_rate": 2.3325549450549452e-05, + "loss": 0.3618, + "step": 19419 + }, + { + "epoch": 53.35164835164835, + "grad_norm": 10.541081428527832, + "learning_rate": 2.3324175824175825e-05, + "loss": 0.0686, + "step": 19420 + }, + { + "epoch": 53.354395604395606, + "grad_norm": 8.436896324157715, + "learning_rate": 2.33228021978022e-05, + "loss": 0.3115, + "step": 19421 + }, + { + "epoch": 53.357142857142854, + "grad_norm": 6.838674545288086, + "learning_rate": 2.3321428571428572e-05, + "loss": 0.0922, + "step": 19422 + }, + { + "epoch": 53.35989010989011, + "grad_norm": 21.098848342895508, + "learning_rate": 2.3320054945054945e-05, + "loss": 0.2219, + "step": 19423 + }, + { + "epoch": 53.362637362637365, + "grad_norm": 10.58763599395752, + "learning_rate": 2.3318681318681322e-05, + "loss": 0.2596, + "step": 19424 + }, + { + "epoch": 53.36538461538461, + "grad_norm": 20.554540634155273, + "learning_rate": 2.3317307692307692e-05, + "loss": 0.3165, + "step": 19425 + }, + { + "epoch": 53.36813186813187, + "grad_norm": 2.8978066444396973, + "learning_rate": 2.3315934065934066e-05, + "loss": 0.0536, + "step": 19426 + }, + { + "epoch": 53.370879120879124, + "grad_norm": 9.718721389770508, + "learning_rate": 2.331456043956044e-05, + "loss": 0.1741, + "step": 19427 + }, + { + "epoch": 53.37362637362637, + "grad_norm": 4.236981391906738, + "learning_rate": 2.3313186813186812e-05, + "loss": 0.0689, + "step": 19428 + }, + { + "epoch": 53.37637362637363, + "grad_norm": 28.096393585205078, + "learning_rate": 2.331181318681319e-05, + "loss": 0.8368, + "step": 19429 + }, + { + "epoch": 53.379120879120876, + "grad_norm": 13.711629867553711, + "learning_rate": 2.3310439560439563e-05, + "loss": 0.156, + "step": 19430 + }, + { + "epoch": 53.38186813186813, + "grad_norm": 22.23424530029297, + "learning_rate": 2.3309065934065936e-05, + "loss": 0.3482, + "step": 19431 + }, + { + "epoch": 53.38461538461539, + "grad_norm": 11.848043441772461, + "learning_rate": 2.330769230769231e-05, + "loss": 0.2413, + "step": 19432 + }, + { + "epoch": 53.387362637362635, + "grad_norm": 4.99733304977417, + "learning_rate": 2.3306318681318683e-05, + "loss": 0.0784, + "step": 19433 + }, + { + "epoch": 53.39010989010989, + "grad_norm": 5.691721439361572, + "learning_rate": 2.3304945054945056e-05, + "loss": 0.0845, + "step": 19434 + }, + { + "epoch": 53.392857142857146, + "grad_norm": 12.512608528137207, + "learning_rate": 2.330357142857143e-05, + "loss": 0.2333, + "step": 19435 + }, + { + "epoch": 53.395604395604394, + "grad_norm": 17.114295959472656, + "learning_rate": 2.3302197802197803e-05, + "loss": 0.2758, + "step": 19436 + }, + { + "epoch": 53.39835164835165, + "grad_norm": 12.056940078735352, + "learning_rate": 2.3300824175824176e-05, + "loss": 0.1792, + "step": 19437 + }, + { + "epoch": 53.4010989010989, + "grad_norm": 20.6877498626709, + "learning_rate": 2.329945054945055e-05, + "loss": 0.5815, + "step": 19438 + }, + { + "epoch": 53.40384615384615, + "grad_norm": 16.107730865478516, + "learning_rate": 2.3298076923076927e-05, + "loss": 0.3376, + "step": 19439 + }, + { + "epoch": 53.40659340659341, + "grad_norm": 12.66400146484375, + "learning_rate": 2.3296703296703297e-05, + "loss": 0.1985, + "step": 19440 + }, + { + "epoch": 53.40934065934066, + "grad_norm": 13.242690086364746, + "learning_rate": 2.329532967032967e-05, + "loss": 0.1533, + "step": 19441 + }, + { + "epoch": 53.41208791208791, + "grad_norm": 17.777992248535156, + "learning_rate": 2.3293956043956044e-05, + "loss": 0.4511, + "step": 19442 + }, + { + "epoch": 53.41483516483517, + "grad_norm": 20.708724975585938, + "learning_rate": 2.3292582417582417e-05, + "loss": 0.526, + "step": 19443 + }, + { + "epoch": 53.417582417582416, + "grad_norm": 7.776358127593994, + "learning_rate": 2.3291208791208794e-05, + "loss": 0.0966, + "step": 19444 + }, + { + "epoch": 53.42032967032967, + "grad_norm": 9.484382629394531, + "learning_rate": 2.3289835164835167e-05, + "loss": 0.156, + "step": 19445 + }, + { + "epoch": 53.42307692307692, + "grad_norm": 11.561200141906738, + "learning_rate": 2.328846153846154e-05, + "loss": 0.262, + "step": 19446 + }, + { + "epoch": 53.425824175824175, + "grad_norm": 15.390542030334473, + "learning_rate": 2.3287087912087914e-05, + "loss": 0.4093, + "step": 19447 + }, + { + "epoch": 53.42857142857143, + "grad_norm": 10.46164321899414, + "learning_rate": 2.3285714285714287e-05, + "loss": 0.1776, + "step": 19448 + }, + { + "epoch": 53.43131868131868, + "grad_norm": 10.595138549804688, + "learning_rate": 2.328434065934066e-05, + "loss": 0.2904, + "step": 19449 + }, + { + "epoch": 53.434065934065934, + "grad_norm": 12.705229759216309, + "learning_rate": 2.3282967032967034e-05, + "loss": 0.3501, + "step": 19450 + }, + { + "epoch": 53.43681318681319, + "grad_norm": 16.30388832092285, + "learning_rate": 2.3281593406593408e-05, + "loss": 0.3145, + "step": 19451 + }, + { + "epoch": 53.43956043956044, + "grad_norm": 13.001652717590332, + "learning_rate": 2.328021978021978e-05, + "loss": 0.3041, + "step": 19452 + }, + { + "epoch": 53.44230769230769, + "grad_norm": 11.34698486328125, + "learning_rate": 2.3278846153846154e-05, + "loss": 0.1563, + "step": 19453 + }, + { + "epoch": 53.44505494505494, + "grad_norm": 16.000993728637695, + "learning_rate": 2.327747252747253e-05, + "loss": 0.2586, + "step": 19454 + }, + { + "epoch": 53.4478021978022, + "grad_norm": 6.82144832611084, + "learning_rate": 2.32760989010989e-05, + "loss": 0.0811, + "step": 19455 + }, + { + "epoch": 53.45054945054945, + "grad_norm": 4.126291751861572, + "learning_rate": 2.3274725274725275e-05, + "loss": 0.0493, + "step": 19456 + }, + { + "epoch": 53.4532967032967, + "grad_norm": 5.833370208740234, + "learning_rate": 2.3273351648351648e-05, + "loss": 0.0744, + "step": 19457 + }, + { + "epoch": 53.456043956043956, + "grad_norm": 3.301378011703491, + "learning_rate": 2.327197802197802e-05, + "loss": 0.0422, + "step": 19458 + }, + { + "epoch": 53.45879120879121, + "grad_norm": 5.347893238067627, + "learning_rate": 2.3270604395604398e-05, + "loss": 0.1096, + "step": 19459 + }, + { + "epoch": 53.46153846153846, + "grad_norm": 17.83223533630371, + "learning_rate": 2.326923076923077e-05, + "loss": 0.4212, + "step": 19460 + }, + { + "epoch": 53.464285714285715, + "grad_norm": 12.790715217590332, + "learning_rate": 2.3267857142857145e-05, + "loss": 0.2547, + "step": 19461 + }, + { + "epoch": 53.467032967032964, + "grad_norm": 4.1875996589660645, + "learning_rate": 2.326648351648352e-05, + "loss": 0.0576, + "step": 19462 + }, + { + "epoch": 53.46978021978022, + "grad_norm": 13.342825889587402, + "learning_rate": 2.3265109890109892e-05, + "loss": 0.2888, + "step": 19463 + }, + { + "epoch": 53.472527472527474, + "grad_norm": 13.254536628723145, + "learning_rate": 2.3263736263736265e-05, + "loss": 0.277, + "step": 19464 + }, + { + "epoch": 53.47527472527472, + "grad_norm": 12.556955337524414, + "learning_rate": 2.326236263736264e-05, + "loss": 0.1374, + "step": 19465 + }, + { + "epoch": 53.47802197802198, + "grad_norm": 6.045800685882568, + "learning_rate": 2.3260989010989012e-05, + "loss": 0.1357, + "step": 19466 + }, + { + "epoch": 53.48076923076923, + "grad_norm": 19.455810546875, + "learning_rate": 2.3259615384615385e-05, + "loss": 0.4398, + "step": 19467 + }, + { + "epoch": 53.48351648351648, + "grad_norm": 9.609800338745117, + "learning_rate": 2.325824175824176e-05, + "loss": 0.1327, + "step": 19468 + }, + { + "epoch": 53.48626373626374, + "grad_norm": 8.97556209564209, + "learning_rate": 2.3256868131868136e-05, + "loss": 0.1726, + "step": 19469 + }, + { + "epoch": 53.489010989010985, + "grad_norm": 9.53205394744873, + "learning_rate": 2.3255494505494506e-05, + "loss": 0.2578, + "step": 19470 + }, + { + "epoch": 53.49175824175824, + "grad_norm": 13.435023307800293, + "learning_rate": 2.325412087912088e-05, + "loss": 0.2395, + "step": 19471 + }, + { + "epoch": 53.494505494505496, + "grad_norm": 18.63973617553711, + "learning_rate": 2.3252747252747253e-05, + "loss": 0.7081, + "step": 19472 + }, + { + "epoch": 53.497252747252745, + "grad_norm": 11.18991470336914, + "learning_rate": 2.3251373626373626e-05, + "loss": 0.1584, + "step": 19473 + }, + { + "epoch": 53.5, + "grad_norm": 10.207634925842285, + "learning_rate": 2.3250000000000003e-05, + "loss": 0.2258, + "step": 19474 + }, + { + "epoch": 53.502747252747255, + "grad_norm": 12.150629043579102, + "learning_rate": 2.3248626373626376e-05, + "loss": 0.1448, + "step": 19475 + }, + { + "epoch": 53.505494505494504, + "grad_norm": 14.835129737854004, + "learning_rate": 2.324725274725275e-05, + "loss": 0.2306, + "step": 19476 + }, + { + "epoch": 53.50824175824176, + "grad_norm": 4.8513503074646, + "learning_rate": 2.3245879120879123e-05, + "loss": 0.0608, + "step": 19477 + }, + { + "epoch": 53.51098901098901, + "grad_norm": 14.184273719787598, + "learning_rate": 2.3244505494505493e-05, + "loss": 0.3616, + "step": 19478 + }, + { + "epoch": 53.51373626373626, + "grad_norm": 20.510475158691406, + "learning_rate": 2.324313186813187e-05, + "loss": 0.4592, + "step": 19479 + }, + { + "epoch": 53.51648351648352, + "grad_norm": 8.63718032836914, + "learning_rate": 2.3241758241758243e-05, + "loss": 0.1541, + "step": 19480 + }, + { + "epoch": 53.51923076923077, + "grad_norm": 14.573838233947754, + "learning_rate": 2.3240384615384617e-05, + "loss": 0.3073, + "step": 19481 + }, + { + "epoch": 53.52197802197802, + "grad_norm": 11.503837585449219, + "learning_rate": 2.323901098901099e-05, + "loss": 0.3544, + "step": 19482 + }, + { + "epoch": 53.52472527472528, + "grad_norm": 4.451159477233887, + "learning_rate": 2.3237637362637363e-05, + "loss": 0.1045, + "step": 19483 + }, + { + "epoch": 53.527472527472526, + "grad_norm": 12.934407234191895, + "learning_rate": 2.323626373626374e-05, + "loss": 0.2402, + "step": 19484 + }, + { + "epoch": 53.53021978021978, + "grad_norm": 8.74117374420166, + "learning_rate": 2.323489010989011e-05, + "loss": 0.1058, + "step": 19485 + }, + { + "epoch": 53.532967032967036, + "grad_norm": 13.347821235656738, + "learning_rate": 2.3233516483516484e-05, + "loss": 0.3536, + "step": 19486 + }, + { + "epoch": 53.535714285714285, + "grad_norm": 10.521247863769531, + "learning_rate": 2.3232142857142857e-05, + "loss": 0.1626, + "step": 19487 + }, + { + "epoch": 53.53846153846154, + "grad_norm": 7.709282875061035, + "learning_rate": 2.323076923076923e-05, + "loss": 0.0933, + "step": 19488 + }, + { + "epoch": 53.54120879120879, + "grad_norm": 16.641138076782227, + "learning_rate": 2.3229395604395607e-05, + "loss": 0.4521, + "step": 19489 + }, + { + "epoch": 53.543956043956044, + "grad_norm": 8.71290397644043, + "learning_rate": 2.322802197802198e-05, + "loss": 0.1917, + "step": 19490 + }, + { + "epoch": 53.5467032967033, + "grad_norm": 7.729944705963135, + "learning_rate": 2.3226648351648354e-05, + "loss": 0.1848, + "step": 19491 + }, + { + "epoch": 53.54945054945055, + "grad_norm": 12.99168872833252, + "learning_rate": 2.3225274725274727e-05, + "loss": 0.1679, + "step": 19492 + }, + { + "epoch": 53.5521978021978, + "grad_norm": 20.4588565826416, + "learning_rate": 2.3223901098901097e-05, + "loss": 0.4768, + "step": 19493 + }, + { + "epoch": 53.55494505494506, + "grad_norm": 11.926543235778809, + "learning_rate": 2.3222527472527474e-05, + "loss": 0.243, + "step": 19494 + }, + { + "epoch": 53.55769230769231, + "grad_norm": 11.240939140319824, + "learning_rate": 2.3221153846153848e-05, + "loss": 0.1684, + "step": 19495 + }, + { + "epoch": 53.56043956043956, + "grad_norm": 14.897553443908691, + "learning_rate": 2.321978021978022e-05, + "loss": 0.4369, + "step": 19496 + }, + { + "epoch": 53.56318681318681, + "grad_norm": 18.584928512573242, + "learning_rate": 2.3218406593406594e-05, + "loss": 0.8415, + "step": 19497 + }, + { + "epoch": 53.565934065934066, + "grad_norm": 16.64015007019043, + "learning_rate": 2.3217032967032968e-05, + "loss": 0.4456, + "step": 19498 + }, + { + "epoch": 53.56868131868132, + "grad_norm": 9.617024421691895, + "learning_rate": 2.3215659340659345e-05, + "loss": 0.1455, + "step": 19499 + }, + { + "epoch": 53.57142857142857, + "grad_norm": 6.309029579162598, + "learning_rate": 2.3214285714285715e-05, + "loss": 0.1296, + "step": 19500 + }, + { + "epoch": 53.574175824175825, + "grad_norm": 19.93242645263672, + "learning_rate": 2.3212912087912088e-05, + "loss": 0.2736, + "step": 19501 + }, + { + "epoch": 53.57692307692308, + "grad_norm": 14.676382064819336, + "learning_rate": 2.321153846153846e-05, + "loss": 0.2589, + "step": 19502 + }, + { + "epoch": 53.57967032967033, + "grad_norm": 7.085570335388184, + "learning_rate": 2.3210164835164835e-05, + "loss": 0.145, + "step": 19503 + }, + { + "epoch": 53.582417582417584, + "grad_norm": 5.645542621612549, + "learning_rate": 2.320879120879121e-05, + "loss": 0.0959, + "step": 19504 + }, + { + "epoch": 53.58516483516483, + "grad_norm": 21.59342384338379, + "learning_rate": 2.3207417582417585e-05, + "loss": 0.6446, + "step": 19505 + }, + { + "epoch": 53.58791208791209, + "grad_norm": 15.956955909729004, + "learning_rate": 2.320604395604396e-05, + "loss": 0.5002, + "step": 19506 + }, + { + "epoch": 53.59065934065934, + "grad_norm": 12.874652862548828, + "learning_rate": 2.3204670329670332e-05, + "loss": 0.2772, + "step": 19507 + }, + { + "epoch": 53.59340659340659, + "grad_norm": 12.5469331741333, + "learning_rate": 2.3203296703296702e-05, + "loss": 0.1814, + "step": 19508 + }, + { + "epoch": 53.59615384615385, + "grad_norm": 16.87381362915039, + "learning_rate": 2.3201923076923075e-05, + "loss": 0.57, + "step": 19509 + }, + { + "epoch": 53.5989010989011, + "grad_norm": 5.420272350311279, + "learning_rate": 2.3200549450549452e-05, + "loss": 0.1001, + "step": 19510 + }, + { + "epoch": 53.60164835164835, + "grad_norm": 22.23198699951172, + "learning_rate": 2.3199175824175826e-05, + "loss": 0.7641, + "step": 19511 + }, + { + "epoch": 53.604395604395606, + "grad_norm": 12.169456481933594, + "learning_rate": 2.31978021978022e-05, + "loss": 0.3179, + "step": 19512 + }, + { + "epoch": 53.607142857142854, + "grad_norm": 10.364243507385254, + "learning_rate": 2.3196428571428572e-05, + "loss": 0.1466, + "step": 19513 + }, + { + "epoch": 53.60989010989011, + "grad_norm": 13.029598236083984, + "learning_rate": 2.3195054945054946e-05, + "loss": 0.3229, + "step": 19514 + }, + { + "epoch": 53.612637362637365, + "grad_norm": 7.303340435028076, + "learning_rate": 2.319368131868132e-05, + "loss": 0.0806, + "step": 19515 + }, + { + "epoch": 53.61538461538461, + "grad_norm": 17.422399520874023, + "learning_rate": 2.3192307692307693e-05, + "loss": 0.316, + "step": 19516 + }, + { + "epoch": 53.61813186813187, + "grad_norm": 7.7850751876831055, + "learning_rate": 2.3190934065934066e-05, + "loss": 0.1442, + "step": 19517 + }, + { + "epoch": 53.620879120879124, + "grad_norm": 10.880043029785156, + "learning_rate": 2.318956043956044e-05, + "loss": 0.2186, + "step": 19518 + }, + { + "epoch": 53.62362637362637, + "grad_norm": 17.798091888427734, + "learning_rate": 2.3188186813186813e-05, + "loss": 0.4571, + "step": 19519 + }, + { + "epoch": 53.62637362637363, + "grad_norm": 18.4384708404541, + "learning_rate": 2.318681318681319e-05, + "loss": 0.6282, + "step": 19520 + }, + { + "epoch": 53.629120879120876, + "grad_norm": 6.555112361907959, + "learning_rate": 2.3185439560439563e-05, + "loss": 0.1254, + "step": 19521 + }, + { + "epoch": 53.63186813186813, + "grad_norm": 14.55063533782959, + "learning_rate": 2.3184065934065936e-05, + "loss": 0.3578, + "step": 19522 + }, + { + "epoch": 53.63461538461539, + "grad_norm": 8.174334526062012, + "learning_rate": 2.3182692307692306e-05, + "loss": 0.1899, + "step": 19523 + }, + { + "epoch": 53.637362637362635, + "grad_norm": 11.210183143615723, + "learning_rate": 2.318131868131868e-05, + "loss": 0.2358, + "step": 19524 + }, + { + "epoch": 53.64010989010989, + "grad_norm": 6.613368988037109, + "learning_rate": 2.3179945054945057e-05, + "loss": 0.0742, + "step": 19525 + }, + { + "epoch": 53.642857142857146, + "grad_norm": 5.164886474609375, + "learning_rate": 2.317857142857143e-05, + "loss": 0.0755, + "step": 19526 + }, + { + "epoch": 53.645604395604394, + "grad_norm": 10.556096076965332, + "learning_rate": 2.3177197802197803e-05, + "loss": 0.2257, + "step": 19527 + }, + { + "epoch": 53.64835164835165, + "grad_norm": 9.749178886413574, + "learning_rate": 2.3175824175824177e-05, + "loss": 0.1541, + "step": 19528 + }, + { + "epoch": 53.6510989010989, + "grad_norm": 16.324247360229492, + "learning_rate": 2.317445054945055e-05, + "loss": 0.3781, + "step": 19529 + }, + { + "epoch": 53.65384615384615, + "grad_norm": 12.66707706451416, + "learning_rate": 2.3173076923076924e-05, + "loss": 0.1607, + "step": 19530 + }, + { + "epoch": 53.65659340659341, + "grad_norm": 16.121225357055664, + "learning_rate": 2.3171703296703297e-05, + "loss": 0.3858, + "step": 19531 + }, + { + "epoch": 53.65934065934066, + "grad_norm": 4.673761367797852, + "learning_rate": 2.317032967032967e-05, + "loss": 0.0406, + "step": 19532 + }, + { + "epoch": 53.66208791208791, + "grad_norm": 14.718804359436035, + "learning_rate": 2.3168956043956044e-05, + "loss": 0.2513, + "step": 19533 + }, + { + "epoch": 53.66483516483517, + "grad_norm": 14.482436180114746, + "learning_rate": 2.3167582417582417e-05, + "loss": 0.2351, + "step": 19534 + }, + { + "epoch": 53.667582417582416, + "grad_norm": 7.349738121032715, + "learning_rate": 2.3166208791208794e-05, + "loss": 0.1753, + "step": 19535 + }, + { + "epoch": 53.67032967032967, + "grad_norm": 13.385551452636719, + "learning_rate": 2.3164835164835167e-05, + "loss": 0.3232, + "step": 19536 + }, + { + "epoch": 53.67307692307692, + "grad_norm": 11.47114372253418, + "learning_rate": 2.316346153846154e-05, + "loss": 0.2767, + "step": 19537 + }, + { + "epoch": 53.675824175824175, + "grad_norm": 5.450510025024414, + "learning_rate": 2.316208791208791e-05, + "loss": 0.0663, + "step": 19538 + }, + { + "epoch": 53.67857142857143, + "grad_norm": 6.611950874328613, + "learning_rate": 2.3160714285714284e-05, + "loss": 0.1112, + "step": 19539 + }, + { + "epoch": 53.68131868131868, + "grad_norm": 21.147184371948242, + "learning_rate": 2.315934065934066e-05, + "loss": 0.4944, + "step": 19540 + }, + { + "epoch": 53.684065934065934, + "grad_norm": 16.812530517578125, + "learning_rate": 2.3157967032967035e-05, + "loss": 0.6221, + "step": 19541 + }, + { + "epoch": 53.68681318681319, + "grad_norm": 12.783252716064453, + "learning_rate": 2.3156593406593408e-05, + "loss": 0.2425, + "step": 19542 + }, + { + "epoch": 53.68956043956044, + "grad_norm": 11.821377754211426, + "learning_rate": 2.315521978021978e-05, + "loss": 0.2382, + "step": 19543 + }, + { + "epoch": 53.69230769230769, + "grad_norm": 13.70172119140625, + "learning_rate": 2.3153846153846155e-05, + "loss": 0.348, + "step": 19544 + }, + { + "epoch": 53.69505494505494, + "grad_norm": 14.265668869018555, + "learning_rate": 2.3152472527472528e-05, + "loss": 0.294, + "step": 19545 + }, + { + "epoch": 53.6978021978022, + "grad_norm": 7.728470325469971, + "learning_rate": 2.31510989010989e-05, + "loss": 0.1947, + "step": 19546 + }, + { + "epoch": 53.70054945054945, + "grad_norm": 7.120547771453857, + "learning_rate": 2.3149725274725275e-05, + "loss": 0.1878, + "step": 19547 + }, + { + "epoch": 53.7032967032967, + "grad_norm": 4.768463611602783, + "learning_rate": 2.314835164835165e-05, + "loss": 0.0778, + "step": 19548 + }, + { + "epoch": 53.706043956043956, + "grad_norm": 14.502087593078613, + "learning_rate": 2.3146978021978022e-05, + "loss": 0.3169, + "step": 19549 + }, + { + "epoch": 53.70879120879121, + "grad_norm": 15.529905319213867, + "learning_rate": 2.31456043956044e-05, + "loss": 0.3368, + "step": 19550 + }, + { + "epoch": 53.71153846153846, + "grad_norm": 9.2931489944458, + "learning_rate": 2.3144230769230772e-05, + "loss": 0.1551, + "step": 19551 + }, + { + "epoch": 53.714285714285715, + "grad_norm": 5.300297260284424, + "learning_rate": 2.3142857142857145e-05, + "loss": 0.0741, + "step": 19552 + }, + { + "epoch": 53.717032967032964, + "grad_norm": 14.667277336120605, + "learning_rate": 2.3141483516483515e-05, + "loss": 0.3065, + "step": 19553 + }, + { + "epoch": 53.71978021978022, + "grad_norm": 14.5742769241333, + "learning_rate": 2.314010989010989e-05, + "loss": 0.3079, + "step": 19554 + }, + { + "epoch": 53.722527472527474, + "grad_norm": 2.957068920135498, + "learning_rate": 2.3138736263736266e-05, + "loss": 0.0493, + "step": 19555 + }, + { + "epoch": 53.72527472527472, + "grad_norm": 10.219381332397461, + "learning_rate": 2.313736263736264e-05, + "loss": 0.2336, + "step": 19556 + }, + { + "epoch": 53.72802197802198, + "grad_norm": 14.895413398742676, + "learning_rate": 2.3135989010989012e-05, + "loss": 0.2387, + "step": 19557 + }, + { + "epoch": 53.73076923076923, + "grad_norm": 13.39799690246582, + "learning_rate": 2.3134615384615386e-05, + "loss": 0.485, + "step": 19558 + }, + { + "epoch": 53.73351648351648, + "grad_norm": 11.03882884979248, + "learning_rate": 2.313324175824176e-05, + "loss": 0.1722, + "step": 19559 + }, + { + "epoch": 53.73626373626374, + "grad_norm": 6.021623134613037, + "learning_rate": 2.3131868131868133e-05, + "loss": 0.0687, + "step": 19560 + }, + { + "epoch": 53.73901098901099, + "grad_norm": 9.885271072387695, + "learning_rate": 2.3130494505494506e-05, + "loss": 0.1723, + "step": 19561 + }, + { + "epoch": 53.74175824175824, + "grad_norm": 19.749242782592773, + "learning_rate": 2.312912087912088e-05, + "loss": 0.5298, + "step": 19562 + }, + { + "epoch": 53.744505494505496, + "grad_norm": 12.250164985656738, + "learning_rate": 2.3127747252747253e-05, + "loss": 0.2749, + "step": 19563 + }, + { + "epoch": 53.747252747252745, + "grad_norm": 3.313488721847534, + "learning_rate": 2.3126373626373626e-05, + "loss": 0.0393, + "step": 19564 + }, + { + "epoch": 53.75, + "grad_norm": 14.371302604675293, + "learning_rate": 2.3125000000000003e-05, + "loss": 0.3553, + "step": 19565 + }, + { + "epoch": 53.752747252747255, + "grad_norm": 12.464251518249512, + "learning_rate": 2.3123626373626376e-05, + "loss": 0.2467, + "step": 19566 + }, + { + "epoch": 53.755494505494504, + "grad_norm": 14.31704330444336, + "learning_rate": 2.312225274725275e-05, + "loss": 0.5668, + "step": 19567 + }, + { + "epoch": 53.75824175824176, + "grad_norm": 14.587592124938965, + "learning_rate": 2.312087912087912e-05, + "loss": 0.1057, + "step": 19568 + }, + { + "epoch": 53.76098901098901, + "grad_norm": 11.30687427520752, + "learning_rate": 2.3119505494505493e-05, + "loss": 0.3187, + "step": 19569 + }, + { + "epoch": 53.76373626373626, + "grad_norm": 7.243239879608154, + "learning_rate": 2.311813186813187e-05, + "loss": 0.1459, + "step": 19570 + }, + { + "epoch": 53.76648351648352, + "grad_norm": 7.9678568840026855, + "learning_rate": 2.3116758241758243e-05, + "loss": 0.1971, + "step": 19571 + }, + { + "epoch": 53.76923076923077, + "grad_norm": 12.309248924255371, + "learning_rate": 2.3115384615384617e-05, + "loss": 0.1893, + "step": 19572 + }, + { + "epoch": 53.77197802197802, + "grad_norm": 14.76831340789795, + "learning_rate": 2.311401098901099e-05, + "loss": 0.3102, + "step": 19573 + }, + { + "epoch": 53.77472527472528, + "grad_norm": 3.8332483768463135, + "learning_rate": 2.3112637362637364e-05, + "loss": 0.0482, + "step": 19574 + }, + { + "epoch": 53.777472527472526, + "grad_norm": 24.618921279907227, + "learning_rate": 2.3111263736263737e-05, + "loss": 0.4718, + "step": 19575 + }, + { + "epoch": 53.78021978021978, + "grad_norm": 21.4176025390625, + "learning_rate": 2.310989010989011e-05, + "loss": 0.418, + "step": 19576 + }, + { + "epoch": 53.782967032967036, + "grad_norm": 9.888481140136719, + "learning_rate": 2.3108516483516484e-05, + "loss": 0.242, + "step": 19577 + }, + { + "epoch": 53.785714285714285, + "grad_norm": 16.16104507446289, + "learning_rate": 2.3107142857142857e-05, + "loss": 0.406, + "step": 19578 + }, + { + "epoch": 53.78846153846154, + "grad_norm": 20.911945343017578, + "learning_rate": 2.310576923076923e-05, + "loss": 0.523, + "step": 19579 + }, + { + "epoch": 53.79120879120879, + "grad_norm": 21.360538482666016, + "learning_rate": 2.3104395604395608e-05, + "loss": 0.6367, + "step": 19580 + }, + { + "epoch": 53.793956043956044, + "grad_norm": 8.12405014038086, + "learning_rate": 2.310302197802198e-05, + "loss": 0.2512, + "step": 19581 + }, + { + "epoch": 53.7967032967033, + "grad_norm": 9.567903518676758, + "learning_rate": 2.3101648351648354e-05, + "loss": 0.1813, + "step": 19582 + }, + { + "epoch": 53.79945054945055, + "grad_norm": 10.62686538696289, + "learning_rate": 2.3100274725274724e-05, + "loss": 0.2791, + "step": 19583 + }, + { + "epoch": 53.8021978021978, + "grad_norm": 4.03197717666626, + "learning_rate": 2.3098901098901098e-05, + "loss": 0.0344, + "step": 19584 + }, + { + "epoch": 53.80494505494506, + "grad_norm": 10.692760467529297, + "learning_rate": 2.3097527472527475e-05, + "loss": 0.3851, + "step": 19585 + }, + { + "epoch": 53.80769230769231, + "grad_norm": 3.8799774646759033, + "learning_rate": 2.3096153846153848e-05, + "loss": 0.0464, + "step": 19586 + }, + { + "epoch": 53.81043956043956, + "grad_norm": 4.036396503448486, + "learning_rate": 2.309478021978022e-05, + "loss": 0.0458, + "step": 19587 + }, + { + "epoch": 53.81318681318681, + "grad_norm": 7.2493977546691895, + "learning_rate": 2.3093406593406595e-05, + "loss": 0.2008, + "step": 19588 + }, + { + "epoch": 53.815934065934066, + "grad_norm": 8.224125862121582, + "learning_rate": 2.3092032967032968e-05, + "loss": 0.1276, + "step": 19589 + }, + { + "epoch": 53.81868131868132, + "grad_norm": 4.431105613708496, + "learning_rate": 2.309065934065934e-05, + "loss": 0.0426, + "step": 19590 + }, + { + "epoch": 53.82142857142857, + "grad_norm": 11.821046829223633, + "learning_rate": 2.3089285714285715e-05, + "loss": 0.2517, + "step": 19591 + }, + { + "epoch": 53.824175824175825, + "grad_norm": 12.072432518005371, + "learning_rate": 2.308791208791209e-05, + "loss": 0.4181, + "step": 19592 + }, + { + "epoch": 53.82692307692308, + "grad_norm": 4.9617600440979, + "learning_rate": 2.3086538461538462e-05, + "loss": 0.091, + "step": 19593 + }, + { + "epoch": 53.82967032967033, + "grad_norm": 9.512008666992188, + "learning_rate": 2.3085164835164835e-05, + "loss": 0.2627, + "step": 19594 + }, + { + "epoch": 53.832417582417584, + "grad_norm": 9.545098304748535, + "learning_rate": 2.3083791208791212e-05, + "loss": 0.1342, + "step": 19595 + }, + { + "epoch": 53.83516483516483, + "grad_norm": 4.949690818786621, + "learning_rate": 2.3082417582417585e-05, + "loss": 0.0742, + "step": 19596 + }, + { + "epoch": 53.83791208791209, + "grad_norm": 16.82962417602539, + "learning_rate": 2.308104395604396e-05, + "loss": 0.6473, + "step": 19597 + }, + { + "epoch": 53.84065934065934, + "grad_norm": 8.578327178955078, + "learning_rate": 2.307967032967033e-05, + "loss": 0.2087, + "step": 19598 + }, + { + "epoch": 53.84340659340659, + "grad_norm": 17.409629821777344, + "learning_rate": 2.3078296703296702e-05, + "loss": 0.5608, + "step": 19599 + }, + { + "epoch": 53.84615384615385, + "grad_norm": 15.053035736083984, + "learning_rate": 2.307692307692308e-05, + "loss": 0.7043, + "step": 19600 + }, + { + "epoch": 53.8489010989011, + "grad_norm": 10.136604309082031, + "learning_rate": 2.3075549450549452e-05, + "loss": 0.1716, + "step": 19601 + }, + { + "epoch": 53.85164835164835, + "grad_norm": 12.18415641784668, + "learning_rate": 2.3074175824175826e-05, + "loss": 0.1916, + "step": 19602 + }, + { + "epoch": 53.854395604395606, + "grad_norm": 10.250717163085938, + "learning_rate": 2.30728021978022e-05, + "loss": 0.2533, + "step": 19603 + }, + { + "epoch": 53.857142857142854, + "grad_norm": 7.871006965637207, + "learning_rate": 2.3071428571428573e-05, + "loss": 0.2053, + "step": 19604 + }, + { + "epoch": 53.85989010989011, + "grad_norm": 3.6225767135620117, + "learning_rate": 2.3070054945054946e-05, + "loss": 0.0578, + "step": 19605 + }, + { + "epoch": 53.862637362637365, + "grad_norm": 13.074932098388672, + "learning_rate": 2.306868131868132e-05, + "loss": 0.2279, + "step": 19606 + }, + { + "epoch": 53.86538461538461, + "grad_norm": 14.576324462890625, + "learning_rate": 2.3067307692307693e-05, + "loss": 0.2493, + "step": 19607 + }, + { + "epoch": 53.86813186813187, + "grad_norm": 11.632287979125977, + "learning_rate": 2.3065934065934066e-05, + "loss": 0.4544, + "step": 19608 + }, + { + "epoch": 53.870879120879124, + "grad_norm": 4.269611358642578, + "learning_rate": 2.306456043956044e-05, + "loss": 0.0861, + "step": 19609 + }, + { + "epoch": 53.87362637362637, + "grad_norm": 10.952216148376465, + "learning_rate": 2.3063186813186817e-05, + "loss": 0.2231, + "step": 19610 + }, + { + "epoch": 53.87637362637363, + "grad_norm": 5.631906509399414, + "learning_rate": 2.306181318681319e-05, + "loss": 0.0689, + "step": 19611 + }, + { + "epoch": 53.879120879120876, + "grad_norm": 23.0963077545166, + "learning_rate": 2.3060439560439563e-05, + "loss": 1.0564, + "step": 19612 + }, + { + "epoch": 53.88186813186813, + "grad_norm": 8.154989242553711, + "learning_rate": 2.3059065934065933e-05, + "loss": 0.1144, + "step": 19613 + }, + { + "epoch": 53.88461538461539, + "grad_norm": 9.464012145996094, + "learning_rate": 2.3057692307692307e-05, + "loss": 0.193, + "step": 19614 + }, + { + "epoch": 53.887362637362635, + "grad_norm": 11.253856658935547, + "learning_rate": 2.3056318681318684e-05, + "loss": 0.1411, + "step": 19615 + }, + { + "epoch": 53.89010989010989, + "grad_norm": 11.144749641418457, + "learning_rate": 2.3054945054945057e-05, + "loss": 0.1905, + "step": 19616 + }, + { + "epoch": 53.892857142857146, + "grad_norm": 4.822149276733398, + "learning_rate": 2.305357142857143e-05, + "loss": 0.1692, + "step": 19617 + }, + { + "epoch": 53.895604395604394, + "grad_norm": 18.453975677490234, + "learning_rate": 2.3052197802197804e-05, + "loss": 0.3317, + "step": 19618 + }, + { + "epoch": 53.89835164835165, + "grad_norm": 6.791236877441406, + "learning_rate": 2.3050824175824177e-05, + "loss": 0.1016, + "step": 19619 + }, + { + "epoch": 53.9010989010989, + "grad_norm": 7.673166275024414, + "learning_rate": 2.304945054945055e-05, + "loss": 0.1096, + "step": 19620 + }, + { + "epoch": 53.90384615384615, + "grad_norm": 14.190988540649414, + "learning_rate": 2.3048076923076924e-05, + "loss": 0.3591, + "step": 19621 + }, + { + "epoch": 53.90659340659341, + "grad_norm": 5.933078289031982, + "learning_rate": 2.3046703296703297e-05, + "loss": 0.1026, + "step": 19622 + }, + { + "epoch": 53.90934065934066, + "grad_norm": 5.419654846191406, + "learning_rate": 2.304532967032967e-05, + "loss": 0.1517, + "step": 19623 + }, + { + "epoch": 53.91208791208791, + "grad_norm": 17.473737716674805, + "learning_rate": 2.3043956043956044e-05, + "loss": 0.353, + "step": 19624 + }, + { + "epoch": 53.91483516483517, + "grad_norm": 5.987090587615967, + "learning_rate": 2.3042582417582418e-05, + "loss": 0.0618, + "step": 19625 + }, + { + "epoch": 53.917582417582416, + "grad_norm": 15.523479461669922, + "learning_rate": 2.3041208791208794e-05, + "loss": 0.3148, + "step": 19626 + }, + { + "epoch": 53.92032967032967, + "grad_norm": 5.469784259796143, + "learning_rate": 2.3039835164835168e-05, + "loss": 0.1026, + "step": 19627 + }, + { + "epoch": 53.92307692307692, + "grad_norm": 10.457878112792969, + "learning_rate": 2.3038461538461538e-05, + "loss": 0.217, + "step": 19628 + }, + { + "epoch": 53.925824175824175, + "grad_norm": 23.72292709350586, + "learning_rate": 2.303708791208791e-05, + "loss": 0.7182, + "step": 19629 + }, + { + "epoch": 53.92857142857143, + "grad_norm": 8.568513870239258, + "learning_rate": 2.3035714285714285e-05, + "loss": 0.153, + "step": 19630 + }, + { + "epoch": 53.93131868131868, + "grad_norm": 7.388199329376221, + "learning_rate": 2.303434065934066e-05, + "loss": 0.1339, + "step": 19631 + }, + { + "epoch": 53.934065934065934, + "grad_norm": 5.63380241394043, + "learning_rate": 2.3032967032967035e-05, + "loss": 0.1721, + "step": 19632 + }, + { + "epoch": 53.93681318681319, + "grad_norm": 5.954336643218994, + "learning_rate": 2.3031593406593408e-05, + "loss": 0.0536, + "step": 19633 + }, + { + "epoch": 53.93956043956044, + "grad_norm": 15.556428909301758, + "learning_rate": 2.303021978021978e-05, + "loss": 0.3236, + "step": 19634 + }, + { + "epoch": 53.94230769230769, + "grad_norm": 19.69835090637207, + "learning_rate": 2.3028846153846155e-05, + "loss": 0.6337, + "step": 19635 + }, + { + "epoch": 53.94505494505494, + "grad_norm": 3.3879053592681885, + "learning_rate": 2.302747252747253e-05, + "loss": 0.0387, + "step": 19636 + }, + { + "epoch": 53.9478021978022, + "grad_norm": 10.456121444702148, + "learning_rate": 2.3026098901098902e-05, + "loss": 0.221, + "step": 19637 + }, + { + "epoch": 53.95054945054945, + "grad_norm": 15.741288185119629, + "learning_rate": 2.3024725274725275e-05, + "loss": 0.51, + "step": 19638 + }, + { + "epoch": 53.9532967032967, + "grad_norm": 12.663880348205566, + "learning_rate": 2.302335164835165e-05, + "loss": 0.349, + "step": 19639 + }, + { + "epoch": 53.956043956043956, + "grad_norm": 15.604386329650879, + "learning_rate": 2.3021978021978022e-05, + "loss": 0.2711, + "step": 19640 + }, + { + "epoch": 53.95879120879121, + "grad_norm": 5.251253604888916, + "learning_rate": 2.30206043956044e-05, + "loss": 0.0785, + "step": 19641 + }, + { + "epoch": 53.96153846153846, + "grad_norm": 5.775916576385498, + "learning_rate": 2.3019230769230772e-05, + "loss": 0.1466, + "step": 19642 + }, + { + "epoch": 53.964285714285715, + "grad_norm": 12.805776596069336, + "learning_rate": 2.3017857142857142e-05, + "loss": 0.4414, + "step": 19643 + }, + { + "epoch": 53.967032967032964, + "grad_norm": 14.358232498168945, + "learning_rate": 2.3016483516483516e-05, + "loss": 0.1785, + "step": 19644 + }, + { + "epoch": 53.96978021978022, + "grad_norm": 13.44998836517334, + "learning_rate": 2.301510989010989e-05, + "loss": 0.21, + "step": 19645 + }, + { + "epoch": 53.972527472527474, + "grad_norm": 8.706524848937988, + "learning_rate": 2.3013736263736266e-05, + "loss": 0.1823, + "step": 19646 + }, + { + "epoch": 53.97527472527472, + "grad_norm": 20.260251998901367, + "learning_rate": 2.301236263736264e-05, + "loss": 0.4918, + "step": 19647 + }, + { + "epoch": 53.97802197802198, + "grad_norm": 7.837832927703857, + "learning_rate": 2.3010989010989013e-05, + "loss": 0.2349, + "step": 19648 + }, + { + "epoch": 53.98076923076923, + "grad_norm": 12.723467826843262, + "learning_rate": 2.3009615384615386e-05, + "loss": 0.2769, + "step": 19649 + }, + { + "epoch": 53.98351648351648, + "grad_norm": 5.2035675048828125, + "learning_rate": 2.300824175824176e-05, + "loss": 0.0874, + "step": 19650 + }, + { + "epoch": 53.98626373626374, + "grad_norm": 14.343923568725586, + "learning_rate": 2.3006868131868133e-05, + "loss": 0.2296, + "step": 19651 + }, + { + "epoch": 53.98901098901099, + "grad_norm": 21.267166137695312, + "learning_rate": 2.3005494505494506e-05, + "loss": 0.5787, + "step": 19652 + }, + { + "epoch": 53.99175824175824, + "grad_norm": 4.875126361846924, + "learning_rate": 2.300412087912088e-05, + "loss": 0.0439, + "step": 19653 + }, + { + "epoch": 53.994505494505496, + "grad_norm": 8.539813041687012, + "learning_rate": 2.3002747252747253e-05, + "loss": 0.1898, + "step": 19654 + }, + { + "epoch": 53.997252747252745, + "grad_norm": 9.741239547729492, + "learning_rate": 2.3001373626373627e-05, + "loss": 0.1094, + "step": 19655 + }, + { + "epoch": 54.0, + "grad_norm": 40.786163330078125, + "learning_rate": 2.3000000000000003e-05, + "loss": 0.4333, + "step": 19656 + }, + { + "epoch": 54.0, + "eval_accuracy": 0.7727272727272727, + "eval_f1": 0.7788709052901752, + "eval_f1_DuraRiadoRio_64x64": 0.7777777777777778, + "eval_f1_Mole_64x64": 0.7647058823529411, + "eval_f1_Quebrado_64x64": 0.7938931297709924, + "eval_f1_RiadoRio_64x64": 0.6683673469387755, + "eval_f1_RioFechado_64x64": 0.8896103896103896, + "eval_loss": 0.8459255695343018, + "eval_precision": 0.8255965968292773, + "eval_precision_DuraRiadoRio_64x64": 0.9074074074074074, + "eval_precision_Mole_64x64": 0.9680851063829787, + "eval_precision_Quebrado_64x64": 0.8813559322033898, + "eval_precision_RiadoRio_64x64": 0.5458333333333333, + "eval_precision_RioFechado_64x64": 0.8253012048192772, + "eval_recall": 0.7722706119759494, + "eval_recall_DuraRiadoRio_64x64": 0.6805555555555556, + "eval_recall_Mole_64x64": 0.6319444444444444, + "eval_recall_Quebrado_64x64": 0.7222222222222222, + "eval_recall_RiadoRio_64x64": 0.8618421052631579, + "eval_recall_RioFechado_64x64": 0.9647887323943662, + "eval_runtime": 1.9186, + "eval_samples_per_second": 378.406, + "eval_steps_per_second": 23.976, + "step": 19656 + }, + { + "epoch": 54.002747252747255, + "grad_norm": 13.108843803405762, + "learning_rate": 2.2998626373626377e-05, + "loss": 0.3798, + "step": 19657 + }, + { + "epoch": 54.005494505494504, + "grad_norm": 5.417426109313965, + "learning_rate": 2.2997252747252747e-05, + "loss": 0.0842, + "step": 19658 + }, + { + "epoch": 54.00824175824176, + "grad_norm": 9.73273754119873, + "learning_rate": 2.299587912087912e-05, + "loss": 0.2798, + "step": 19659 + }, + { + "epoch": 54.010989010989015, + "grad_norm": 22.173202514648438, + "learning_rate": 2.2994505494505494e-05, + "loss": 0.4569, + "step": 19660 + }, + { + "epoch": 54.01373626373626, + "grad_norm": 12.117536544799805, + "learning_rate": 2.299313186813187e-05, + "loss": 0.1457, + "step": 19661 + }, + { + "epoch": 54.01648351648352, + "grad_norm": 20.98447608947754, + "learning_rate": 2.2991758241758244e-05, + "loss": 0.9649, + "step": 19662 + }, + { + "epoch": 54.01923076923077, + "grad_norm": 9.026176452636719, + "learning_rate": 2.2990384615384617e-05, + "loss": 0.1264, + "step": 19663 + }, + { + "epoch": 54.02197802197802, + "grad_norm": 18.6612491607666, + "learning_rate": 2.298901098901099e-05, + "loss": 0.4139, + "step": 19664 + }, + { + "epoch": 54.02472527472528, + "grad_norm": 6.148875713348389, + "learning_rate": 2.2987637362637364e-05, + "loss": 0.2006, + "step": 19665 + }, + { + "epoch": 54.027472527472526, + "grad_norm": 17.394189834594727, + "learning_rate": 2.2986263736263737e-05, + "loss": 0.3608, + "step": 19666 + }, + { + "epoch": 54.03021978021978, + "grad_norm": 6.884737968444824, + "learning_rate": 2.298489010989011e-05, + "loss": 0.13, + "step": 19667 + }, + { + "epoch": 54.032967032967036, + "grad_norm": 11.8524169921875, + "learning_rate": 2.2983516483516484e-05, + "loss": 0.268, + "step": 19668 + }, + { + "epoch": 54.035714285714285, + "grad_norm": 14.271881103515625, + "learning_rate": 2.2982142857142858e-05, + "loss": 0.2572, + "step": 19669 + }, + { + "epoch": 54.03846153846154, + "grad_norm": 24.310894012451172, + "learning_rate": 2.298076923076923e-05, + "loss": 0.6203, + "step": 19670 + }, + { + "epoch": 54.04120879120879, + "grad_norm": 7.1117262840271, + "learning_rate": 2.2979395604395608e-05, + "loss": 0.2009, + "step": 19671 + }, + { + "epoch": 54.043956043956044, + "grad_norm": 8.80926513671875, + "learning_rate": 2.297802197802198e-05, + "loss": 0.2134, + "step": 19672 + }, + { + "epoch": 54.0467032967033, + "grad_norm": 10.5132474899292, + "learning_rate": 2.297664835164835e-05, + "loss": 0.2213, + "step": 19673 + }, + { + "epoch": 54.04945054945055, + "grad_norm": 10.364642143249512, + "learning_rate": 2.2975274725274725e-05, + "loss": 0.2059, + "step": 19674 + }, + { + "epoch": 54.0521978021978, + "grad_norm": 12.47547721862793, + "learning_rate": 2.2973901098901098e-05, + "loss": 0.3312, + "step": 19675 + }, + { + "epoch": 54.05494505494506, + "grad_norm": 20.74677276611328, + "learning_rate": 2.2972527472527475e-05, + "loss": 0.7012, + "step": 19676 + }, + { + "epoch": 54.05769230769231, + "grad_norm": 3.4421637058258057, + "learning_rate": 2.297115384615385e-05, + "loss": 0.0661, + "step": 19677 + }, + { + "epoch": 54.06043956043956, + "grad_norm": 10.418168067932129, + "learning_rate": 2.2969780219780222e-05, + "loss": 0.2399, + "step": 19678 + }, + { + "epoch": 54.06318681318681, + "grad_norm": 3.7391130924224854, + "learning_rate": 2.2968406593406595e-05, + "loss": 0.0685, + "step": 19679 + }, + { + "epoch": 54.065934065934066, + "grad_norm": 16.977365493774414, + "learning_rate": 2.296703296703297e-05, + "loss": 0.4945, + "step": 19680 + }, + { + "epoch": 54.06868131868132, + "grad_norm": 9.922544479370117, + "learning_rate": 2.2965659340659342e-05, + "loss": 0.2513, + "step": 19681 + }, + { + "epoch": 54.07142857142857, + "grad_norm": 9.668861389160156, + "learning_rate": 2.2964285714285715e-05, + "loss": 0.2436, + "step": 19682 + }, + { + "epoch": 54.074175824175825, + "grad_norm": 7.770256996154785, + "learning_rate": 2.296291208791209e-05, + "loss": 0.1739, + "step": 19683 + }, + { + "epoch": 54.07692307692308, + "grad_norm": 12.810309410095215, + "learning_rate": 2.2961538461538462e-05, + "loss": 0.2908, + "step": 19684 + }, + { + "epoch": 54.07967032967033, + "grad_norm": 4.72307825088501, + "learning_rate": 2.2960164835164836e-05, + "loss": 0.057, + "step": 19685 + }, + { + "epoch": 54.082417582417584, + "grad_norm": 21.42058563232422, + "learning_rate": 2.2958791208791212e-05, + "loss": 0.4805, + "step": 19686 + }, + { + "epoch": 54.08516483516483, + "grad_norm": 12.977818489074707, + "learning_rate": 2.2957417582417586e-05, + "loss": 0.2416, + "step": 19687 + }, + { + "epoch": 54.08791208791209, + "grad_norm": 16.552942276000977, + "learning_rate": 2.2956043956043956e-05, + "loss": 0.3458, + "step": 19688 + }, + { + "epoch": 54.09065934065934, + "grad_norm": 11.567282676696777, + "learning_rate": 2.295467032967033e-05, + "loss": 0.2453, + "step": 19689 + }, + { + "epoch": 54.09340659340659, + "grad_norm": 16.91756820678711, + "learning_rate": 2.2953296703296703e-05, + "loss": 0.3408, + "step": 19690 + }, + { + "epoch": 54.09615384615385, + "grad_norm": 4.334353446960449, + "learning_rate": 2.295192307692308e-05, + "loss": 0.0872, + "step": 19691 + }, + { + "epoch": 54.0989010989011, + "grad_norm": 10.260764122009277, + "learning_rate": 2.2950549450549453e-05, + "loss": 0.2153, + "step": 19692 + }, + { + "epoch": 54.10164835164835, + "grad_norm": 6.6150054931640625, + "learning_rate": 2.2949175824175826e-05, + "loss": 0.1898, + "step": 19693 + }, + { + "epoch": 54.104395604395606, + "grad_norm": 8.993846893310547, + "learning_rate": 2.29478021978022e-05, + "loss": 0.2266, + "step": 19694 + }, + { + "epoch": 54.107142857142854, + "grad_norm": 12.62185287475586, + "learning_rate": 2.2946428571428573e-05, + "loss": 0.354, + "step": 19695 + }, + { + "epoch": 54.10989010989011, + "grad_norm": 10.543901443481445, + "learning_rate": 2.2945054945054946e-05, + "loss": 0.2149, + "step": 19696 + }, + { + "epoch": 54.112637362637365, + "grad_norm": 18.848495483398438, + "learning_rate": 2.294368131868132e-05, + "loss": 0.388, + "step": 19697 + }, + { + "epoch": 54.11538461538461, + "grad_norm": 18.498825073242188, + "learning_rate": 2.2942307692307693e-05, + "loss": 0.3932, + "step": 19698 + }, + { + "epoch": 54.11813186813187, + "grad_norm": 5.072368144989014, + "learning_rate": 2.2940934065934067e-05, + "loss": 0.0697, + "step": 19699 + }, + { + "epoch": 54.120879120879124, + "grad_norm": 5.186223030090332, + "learning_rate": 2.293956043956044e-05, + "loss": 0.0884, + "step": 19700 + }, + { + "epoch": 54.12362637362637, + "grad_norm": 6.727209091186523, + "learning_rate": 2.2938186813186817e-05, + "loss": 0.0952, + "step": 19701 + }, + { + "epoch": 54.12637362637363, + "grad_norm": 14.342072486877441, + "learning_rate": 2.2936813186813187e-05, + "loss": 0.4645, + "step": 19702 + }, + { + "epoch": 54.129120879120876, + "grad_norm": 13.302997589111328, + "learning_rate": 2.293543956043956e-05, + "loss": 0.244, + "step": 19703 + }, + { + "epoch": 54.13186813186813, + "grad_norm": 11.647723197937012, + "learning_rate": 2.2934065934065934e-05, + "loss": 0.2783, + "step": 19704 + }, + { + "epoch": 54.13461538461539, + "grad_norm": 21.110008239746094, + "learning_rate": 2.2932692307692307e-05, + "loss": 0.5465, + "step": 19705 + }, + { + "epoch": 54.137362637362635, + "grad_norm": 2.2458581924438477, + "learning_rate": 2.2931318681318684e-05, + "loss": 0.0263, + "step": 19706 + }, + { + "epoch": 54.14010989010989, + "grad_norm": 14.640364646911621, + "learning_rate": 2.2929945054945057e-05, + "loss": 0.2234, + "step": 19707 + }, + { + "epoch": 54.142857142857146, + "grad_norm": 11.378094673156738, + "learning_rate": 2.292857142857143e-05, + "loss": 0.2844, + "step": 19708 + }, + { + "epoch": 54.145604395604394, + "grad_norm": 11.206945419311523, + "learning_rate": 2.2927197802197804e-05, + "loss": 0.2099, + "step": 19709 + }, + { + "epoch": 54.14835164835165, + "grad_norm": 16.742948532104492, + "learning_rate": 2.2925824175824178e-05, + "loss": 0.4217, + "step": 19710 + }, + { + "epoch": 54.1510989010989, + "grad_norm": 15.578031539916992, + "learning_rate": 2.292445054945055e-05, + "loss": 0.3121, + "step": 19711 + }, + { + "epoch": 54.15384615384615, + "grad_norm": 8.848109245300293, + "learning_rate": 2.2923076923076924e-05, + "loss": 0.2905, + "step": 19712 + }, + { + "epoch": 54.15659340659341, + "grad_norm": 15.36806583404541, + "learning_rate": 2.2921703296703298e-05, + "loss": 0.4244, + "step": 19713 + }, + { + "epoch": 54.15934065934066, + "grad_norm": 7.029635429382324, + "learning_rate": 2.292032967032967e-05, + "loss": 0.0932, + "step": 19714 + }, + { + "epoch": 54.16208791208791, + "grad_norm": 12.036737442016602, + "learning_rate": 2.2918956043956045e-05, + "loss": 0.2734, + "step": 19715 + }, + { + "epoch": 54.16483516483517, + "grad_norm": 18.822444915771484, + "learning_rate": 2.291758241758242e-05, + "loss": 0.3493, + "step": 19716 + }, + { + "epoch": 54.167582417582416, + "grad_norm": 9.565349578857422, + "learning_rate": 2.291620879120879e-05, + "loss": 0.1545, + "step": 19717 + }, + { + "epoch": 54.17032967032967, + "grad_norm": 12.507043838500977, + "learning_rate": 2.2914835164835165e-05, + "loss": 0.4265, + "step": 19718 + }, + { + "epoch": 54.17307692307692, + "grad_norm": 8.54942512512207, + "learning_rate": 2.2913461538461538e-05, + "loss": 0.0911, + "step": 19719 + }, + { + "epoch": 54.175824175824175, + "grad_norm": 9.948203086853027, + "learning_rate": 2.291208791208791e-05, + "loss": 0.1276, + "step": 19720 + }, + { + "epoch": 54.17857142857143, + "grad_norm": 8.634347915649414, + "learning_rate": 2.291071428571429e-05, + "loss": 0.1444, + "step": 19721 + }, + { + "epoch": 54.18131868131868, + "grad_norm": 4.432876110076904, + "learning_rate": 2.2909340659340662e-05, + "loss": 0.0738, + "step": 19722 + }, + { + "epoch": 54.184065934065934, + "grad_norm": 13.900527954101562, + "learning_rate": 2.2907967032967035e-05, + "loss": 0.2355, + "step": 19723 + }, + { + "epoch": 54.18681318681319, + "grad_norm": 13.604238510131836, + "learning_rate": 2.290659340659341e-05, + "loss": 0.3695, + "step": 19724 + }, + { + "epoch": 54.18956043956044, + "grad_norm": 6.42220401763916, + "learning_rate": 2.2905219780219782e-05, + "loss": 0.1165, + "step": 19725 + }, + { + "epoch": 54.19230769230769, + "grad_norm": 7.305419921875, + "learning_rate": 2.2903846153846155e-05, + "loss": 0.209, + "step": 19726 + }, + { + "epoch": 54.19505494505494, + "grad_norm": 6.805840492248535, + "learning_rate": 2.290247252747253e-05, + "loss": 0.1175, + "step": 19727 + }, + { + "epoch": 54.1978021978022, + "grad_norm": 13.45985221862793, + "learning_rate": 2.2901098901098902e-05, + "loss": 0.3575, + "step": 19728 + }, + { + "epoch": 54.20054945054945, + "grad_norm": 8.669570922851562, + "learning_rate": 2.2899725274725276e-05, + "loss": 0.1688, + "step": 19729 + }, + { + "epoch": 54.2032967032967, + "grad_norm": 15.473824501037598, + "learning_rate": 2.289835164835165e-05, + "loss": 0.1933, + "step": 19730 + }, + { + "epoch": 54.206043956043956, + "grad_norm": 5.209771156311035, + "learning_rate": 2.2896978021978026e-05, + "loss": 0.095, + "step": 19731 + }, + { + "epoch": 54.20879120879121, + "grad_norm": 16.31710433959961, + "learning_rate": 2.2895604395604396e-05, + "loss": 0.4965, + "step": 19732 + }, + { + "epoch": 54.21153846153846, + "grad_norm": 11.435563087463379, + "learning_rate": 2.289423076923077e-05, + "loss": 0.2524, + "step": 19733 + }, + { + "epoch": 54.214285714285715, + "grad_norm": 15.177458763122559, + "learning_rate": 2.2892857142857143e-05, + "loss": 0.2162, + "step": 19734 + }, + { + "epoch": 54.217032967032964, + "grad_norm": 7.3102240562438965, + "learning_rate": 2.2891483516483516e-05, + "loss": 0.1793, + "step": 19735 + }, + { + "epoch": 54.21978021978022, + "grad_norm": 1.3994488716125488, + "learning_rate": 2.289010989010989e-05, + "loss": 0.0274, + "step": 19736 + }, + { + "epoch": 54.222527472527474, + "grad_norm": 13.324252128601074, + "learning_rate": 2.2888736263736266e-05, + "loss": 0.2873, + "step": 19737 + }, + { + "epoch": 54.22527472527472, + "grad_norm": 6.694637298583984, + "learning_rate": 2.288736263736264e-05, + "loss": 0.1688, + "step": 19738 + }, + { + "epoch": 54.22802197802198, + "grad_norm": 5.320511341094971, + "learning_rate": 2.2885989010989013e-05, + "loss": 0.0611, + "step": 19739 + }, + { + "epoch": 54.23076923076923, + "grad_norm": 9.09218692779541, + "learning_rate": 2.2884615384615387e-05, + "loss": 0.141, + "step": 19740 + }, + { + "epoch": 54.23351648351648, + "grad_norm": 22.217161178588867, + "learning_rate": 2.2883241758241757e-05, + "loss": 0.5942, + "step": 19741 + }, + { + "epoch": 54.23626373626374, + "grad_norm": 9.272605895996094, + "learning_rate": 2.2881868131868133e-05, + "loss": 0.1346, + "step": 19742 + }, + { + "epoch": 54.239010989010985, + "grad_norm": 16.817224502563477, + "learning_rate": 2.2880494505494507e-05, + "loss": 0.5076, + "step": 19743 + }, + { + "epoch": 54.24175824175824, + "grad_norm": 6.249318599700928, + "learning_rate": 2.287912087912088e-05, + "loss": 0.1387, + "step": 19744 + }, + { + "epoch": 54.244505494505496, + "grad_norm": 22.27103614807129, + "learning_rate": 2.2877747252747254e-05, + "loss": 0.363, + "step": 19745 + }, + { + "epoch": 54.247252747252745, + "grad_norm": 9.335182189941406, + "learning_rate": 2.2876373626373627e-05, + "loss": 0.1422, + "step": 19746 + }, + { + "epoch": 54.25, + "grad_norm": 6.317578315734863, + "learning_rate": 2.2875e-05, + "loss": 0.2351, + "step": 19747 + }, + { + "epoch": 54.252747252747255, + "grad_norm": 11.805876731872559, + "learning_rate": 2.2873626373626374e-05, + "loss": 0.2321, + "step": 19748 + }, + { + "epoch": 54.255494505494504, + "grad_norm": 11.415255546569824, + "learning_rate": 2.2872252747252747e-05, + "loss": 0.1441, + "step": 19749 + }, + { + "epoch": 54.25824175824176, + "grad_norm": 4.673081398010254, + "learning_rate": 2.287087912087912e-05, + "loss": 0.0886, + "step": 19750 + }, + { + "epoch": 54.260989010989015, + "grad_norm": 19.71013069152832, + "learning_rate": 2.2869505494505494e-05, + "loss": 0.4378, + "step": 19751 + }, + { + "epoch": 54.26373626373626, + "grad_norm": 10.818811416625977, + "learning_rate": 2.286813186813187e-05, + "loss": 0.3402, + "step": 19752 + }, + { + "epoch": 54.26648351648352, + "grad_norm": 10.54887580871582, + "learning_rate": 2.2866758241758244e-05, + "loss": 0.2711, + "step": 19753 + }, + { + "epoch": 54.26923076923077, + "grad_norm": 3.884188652038574, + "learning_rate": 2.2865384615384618e-05, + "loss": 0.0402, + "step": 19754 + }, + { + "epoch": 54.27197802197802, + "grad_norm": 4.691375255584717, + "learning_rate": 2.286401098901099e-05, + "loss": 0.0877, + "step": 19755 + }, + { + "epoch": 54.27472527472528, + "grad_norm": 17.997970581054688, + "learning_rate": 2.286263736263736e-05, + "loss": 0.381, + "step": 19756 + }, + { + "epoch": 54.277472527472526, + "grad_norm": 18.858131408691406, + "learning_rate": 2.2861263736263738e-05, + "loss": 0.4599, + "step": 19757 + }, + { + "epoch": 54.28021978021978, + "grad_norm": 14.431075096130371, + "learning_rate": 2.285989010989011e-05, + "loss": 0.2281, + "step": 19758 + }, + { + "epoch": 54.282967032967036, + "grad_norm": 10.624580383300781, + "learning_rate": 2.2858516483516485e-05, + "loss": 0.2217, + "step": 19759 + }, + { + "epoch": 54.285714285714285, + "grad_norm": 10.03775405883789, + "learning_rate": 2.2857142857142858e-05, + "loss": 0.1256, + "step": 19760 + }, + { + "epoch": 54.28846153846154, + "grad_norm": 11.946695327758789, + "learning_rate": 2.285576923076923e-05, + "loss": 0.2118, + "step": 19761 + }, + { + "epoch": 54.29120879120879, + "grad_norm": 8.429134368896484, + "learning_rate": 2.2854395604395605e-05, + "loss": 0.204, + "step": 19762 + }, + { + "epoch": 54.293956043956044, + "grad_norm": 19.155868530273438, + "learning_rate": 2.2853021978021978e-05, + "loss": 0.5761, + "step": 19763 + }, + { + "epoch": 54.2967032967033, + "grad_norm": 18.232038497924805, + "learning_rate": 2.285164835164835e-05, + "loss": 0.5836, + "step": 19764 + }, + { + "epoch": 54.29945054945055, + "grad_norm": 6.570925712585449, + "learning_rate": 2.2850274725274725e-05, + "loss": 0.1393, + "step": 19765 + }, + { + "epoch": 54.3021978021978, + "grad_norm": 11.068401336669922, + "learning_rate": 2.28489010989011e-05, + "loss": 0.3657, + "step": 19766 + }, + { + "epoch": 54.30494505494506, + "grad_norm": 10.501200675964355, + "learning_rate": 2.2847527472527475e-05, + "loss": 0.1795, + "step": 19767 + }, + { + "epoch": 54.30769230769231, + "grad_norm": 12.133774757385254, + "learning_rate": 2.284615384615385e-05, + "loss": 0.3815, + "step": 19768 + }, + { + "epoch": 54.31043956043956, + "grad_norm": 7.549869537353516, + "learning_rate": 2.2844780219780222e-05, + "loss": 0.0984, + "step": 19769 + }, + { + "epoch": 54.31318681318681, + "grad_norm": 14.433341026306152, + "learning_rate": 2.2843406593406596e-05, + "loss": 0.2002, + "step": 19770 + }, + { + "epoch": 54.315934065934066, + "grad_norm": 11.132698059082031, + "learning_rate": 2.2842032967032966e-05, + "loss": 0.1123, + "step": 19771 + }, + { + "epoch": 54.31868131868132, + "grad_norm": 11.962472915649414, + "learning_rate": 2.2840659340659342e-05, + "loss": 0.2329, + "step": 19772 + }, + { + "epoch": 54.32142857142857, + "grad_norm": 12.943350791931152, + "learning_rate": 2.2839285714285716e-05, + "loss": 0.3636, + "step": 19773 + }, + { + "epoch": 54.324175824175825, + "grad_norm": 13.934409141540527, + "learning_rate": 2.283791208791209e-05, + "loss": 0.2763, + "step": 19774 + }, + { + "epoch": 54.32692307692308, + "grad_norm": 12.554722785949707, + "learning_rate": 2.2836538461538463e-05, + "loss": 0.1547, + "step": 19775 + }, + { + "epoch": 54.32967032967033, + "grad_norm": 12.066786766052246, + "learning_rate": 2.2835164835164836e-05, + "loss": 0.3745, + "step": 19776 + }, + { + "epoch": 54.332417582417584, + "grad_norm": 5.581332206726074, + "learning_rate": 2.283379120879121e-05, + "loss": 0.124, + "step": 19777 + }, + { + "epoch": 54.33516483516483, + "grad_norm": 10.765070915222168, + "learning_rate": 2.2832417582417583e-05, + "loss": 0.1934, + "step": 19778 + }, + { + "epoch": 54.33791208791209, + "grad_norm": 13.104795455932617, + "learning_rate": 2.2831043956043956e-05, + "loss": 0.2052, + "step": 19779 + }, + { + "epoch": 54.34065934065934, + "grad_norm": 13.516090393066406, + "learning_rate": 2.282967032967033e-05, + "loss": 0.2892, + "step": 19780 + }, + { + "epoch": 54.34340659340659, + "grad_norm": 13.266388893127441, + "learning_rate": 2.2828296703296703e-05, + "loss": 0.2963, + "step": 19781 + }, + { + "epoch": 54.34615384615385, + "grad_norm": 9.781482696533203, + "learning_rate": 2.282692307692308e-05, + "loss": 0.2659, + "step": 19782 + }, + { + "epoch": 54.3489010989011, + "grad_norm": 13.961837768554688, + "learning_rate": 2.2825549450549453e-05, + "loss": 0.2183, + "step": 19783 + }, + { + "epoch": 54.35164835164835, + "grad_norm": 9.523433685302734, + "learning_rate": 2.2824175824175827e-05, + "loss": 0.1977, + "step": 19784 + }, + { + "epoch": 54.354395604395606, + "grad_norm": 12.974194526672363, + "learning_rate": 2.28228021978022e-05, + "loss": 0.3956, + "step": 19785 + }, + { + "epoch": 54.357142857142854, + "grad_norm": 15.307950973510742, + "learning_rate": 2.282142857142857e-05, + "loss": 0.3443, + "step": 19786 + }, + { + "epoch": 54.35989010989011, + "grad_norm": 6.277981758117676, + "learning_rate": 2.2820054945054947e-05, + "loss": 0.0854, + "step": 19787 + }, + { + "epoch": 54.362637362637365, + "grad_norm": 7.469692707061768, + "learning_rate": 2.281868131868132e-05, + "loss": 0.1483, + "step": 19788 + }, + { + "epoch": 54.36538461538461, + "grad_norm": 7.2509050369262695, + "learning_rate": 2.2817307692307694e-05, + "loss": 0.1095, + "step": 19789 + }, + { + "epoch": 54.36813186813187, + "grad_norm": 13.869060516357422, + "learning_rate": 2.2815934065934067e-05, + "loss": 0.2883, + "step": 19790 + }, + { + "epoch": 54.370879120879124, + "grad_norm": 5.101729393005371, + "learning_rate": 2.281456043956044e-05, + "loss": 0.0991, + "step": 19791 + }, + { + "epoch": 54.37362637362637, + "grad_norm": 9.932638168334961, + "learning_rate": 2.2813186813186814e-05, + "loss": 0.2994, + "step": 19792 + }, + { + "epoch": 54.37637362637363, + "grad_norm": 10.750699996948242, + "learning_rate": 2.2811813186813187e-05, + "loss": 0.3593, + "step": 19793 + }, + { + "epoch": 54.379120879120876, + "grad_norm": 8.738545417785645, + "learning_rate": 2.281043956043956e-05, + "loss": 0.1965, + "step": 19794 + }, + { + "epoch": 54.38186813186813, + "grad_norm": 13.547008514404297, + "learning_rate": 2.2809065934065934e-05, + "loss": 0.2217, + "step": 19795 + }, + { + "epoch": 54.38461538461539, + "grad_norm": 10.558009147644043, + "learning_rate": 2.2807692307692307e-05, + "loss": 0.3387, + "step": 19796 + }, + { + "epoch": 54.387362637362635, + "grad_norm": 13.143993377685547, + "learning_rate": 2.2806318681318684e-05, + "loss": 0.1942, + "step": 19797 + }, + { + "epoch": 54.39010989010989, + "grad_norm": 6.418910026550293, + "learning_rate": 2.2804945054945058e-05, + "loss": 0.1014, + "step": 19798 + }, + { + "epoch": 54.392857142857146, + "grad_norm": 5.724279403686523, + "learning_rate": 2.280357142857143e-05, + "loss": 0.0803, + "step": 19799 + }, + { + "epoch": 54.395604395604394, + "grad_norm": 22.26668930053711, + "learning_rate": 2.28021978021978e-05, + "loss": 0.5231, + "step": 19800 + }, + { + "epoch": 54.39835164835165, + "grad_norm": 22.42786979675293, + "learning_rate": 2.2800824175824175e-05, + "loss": 0.583, + "step": 19801 + }, + { + "epoch": 54.4010989010989, + "grad_norm": 19.8629093170166, + "learning_rate": 2.279945054945055e-05, + "loss": 0.4475, + "step": 19802 + }, + { + "epoch": 54.40384615384615, + "grad_norm": 4.651464939117432, + "learning_rate": 2.2798076923076925e-05, + "loss": 0.0974, + "step": 19803 + }, + { + "epoch": 54.40659340659341, + "grad_norm": 11.585283279418945, + "learning_rate": 2.2796703296703298e-05, + "loss": 0.2775, + "step": 19804 + }, + { + "epoch": 54.40934065934066, + "grad_norm": 8.246038436889648, + "learning_rate": 2.279532967032967e-05, + "loss": 0.1874, + "step": 19805 + }, + { + "epoch": 54.41208791208791, + "grad_norm": 6.109096050262451, + "learning_rate": 2.2793956043956045e-05, + "loss": 0.0991, + "step": 19806 + }, + { + "epoch": 54.41483516483517, + "grad_norm": 7.2437849044799805, + "learning_rate": 2.279258241758242e-05, + "loss": 0.1757, + "step": 19807 + }, + { + "epoch": 54.417582417582416, + "grad_norm": 4.056634902954102, + "learning_rate": 2.2791208791208792e-05, + "loss": 0.0772, + "step": 19808 + }, + { + "epoch": 54.42032967032967, + "grad_norm": 4.010351181030273, + "learning_rate": 2.2789835164835165e-05, + "loss": 0.0772, + "step": 19809 + }, + { + "epoch": 54.42307692307692, + "grad_norm": 22.577293395996094, + "learning_rate": 2.278846153846154e-05, + "loss": 0.4613, + "step": 19810 + }, + { + "epoch": 54.425824175824175, + "grad_norm": 13.885085105895996, + "learning_rate": 2.2787087912087912e-05, + "loss": 0.528, + "step": 19811 + }, + { + "epoch": 54.42857142857143, + "grad_norm": 14.017327308654785, + "learning_rate": 2.278571428571429e-05, + "loss": 0.4653, + "step": 19812 + }, + { + "epoch": 54.43131868131868, + "grad_norm": 8.341876029968262, + "learning_rate": 2.2784340659340662e-05, + "loss": 0.1299, + "step": 19813 + }, + { + "epoch": 54.434065934065934, + "grad_norm": 6.146503448486328, + "learning_rate": 2.2782967032967036e-05, + "loss": 0.1037, + "step": 19814 + }, + { + "epoch": 54.43681318681319, + "grad_norm": 5.239022254943848, + "learning_rate": 2.2781593406593406e-05, + "loss": 0.1008, + "step": 19815 + }, + { + "epoch": 54.43956043956044, + "grad_norm": 9.915140151977539, + "learning_rate": 2.278021978021978e-05, + "loss": 0.1826, + "step": 19816 + }, + { + "epoch": 54.44230769230769, + "grad_norm": 10.63260555267334, + "learning_rate": 2.2778846153846156e-05, + "loss": 0.1386, + "step": 19817 + }, + { + "epoch": 54.44505494505494, + "grad_norm": 18.466176986694336, + "learning_rate": 2.277747252747253e-05, + "loss": 0.429, + "step": 19818 + }, + { + "epoch": 54.4478021978022, + "grad_norm": 9.628042221069336, + "learning_rate": 2.2776098901098903e-05, + "loss": 0.1764, + "step": 19819 + }, + { + "epoch": 54.45054945054945, + "grad_norm": 8.667115211486816, + "learning_rate": 2.2774725274725276e-05, + "loss": 0.1738, + "step": 19820 + }, + { + "epoch": 54.4532967032967, + "grad_norm": 10.266148567199707, + "learning_rate": 2.277335164835165e-05, + "loss": 0.1686, + "step": 19821 + }, + { + "epoch": 54.456043956043956, + "grad_norm": 10.292766571044922, + "learning_rate": 2.2771978021978023e-05, + "loss": 0.205, + "step": 19822 + }, + { + "epoch": 54.45879120879121, + "grad_norm": 15.49247932434082, + "learning_rate": 2.2770604395604396e-05, + "loss": 0.1961, + "step": 19823 + }, + { + "epoch": 54.46153846153846, + "grad_norm": 2.1072585582733154, + "learning_rate": 2.276923076923077e-05, + "loss": 0.0317, + "step": 19824 + }, + { + "epoch": 54.464285714285715, + "grad_norm": 16.741594314575195, + "learning_rate": 2.2767857142857143e-05, + "loss": 0.5282, + "step": 19825 + }, + { + "epoch": 54.467032967032964, + "grad_norm": 5.233275890350342, + "learning_rate": 2.2766483516483516e-05, + "loss": 0.0933, + "step": 19826 + }, + { + "epoch": 54.46978021978022, + "grad_norm": 2.7849717140197754, + "learning_rate": 2.2765109890109893e-05, + "loss": 0.0428, + "step": 19827 + }, + { + "epoch": 54.472527472527474, + "grad_norm": 2.047226667404175, + "learning_rate": 2.2763736263736267e-05, + "loss": 0.0232, + "step": 19828 + }, + { + "epoch": 54.47527472527472, + "grad_norm": 9.787874221801758, + "learning_rate": 2.276236263736264e-05, + "loss": 0.1474, + "step": 19829 + }, + { + "epoch": 54.47802197802198, + "grad_norm": 25.918956756591797, + "learning_rate": 2.276098901098901e-05, + "loss": 0.7747, + "step": 19830 + }, + { + "epoch": 54.48076923076923, + "grad_norm": 18.60532569885254, + "learning_rate": 2.2759615384615383e-05, + "loss": 0.5065, + "step": 19831 + }, + { + "epoch": 54.48351648351648, + "grad_norm": 9.870258331298828, + "learning_rate": 2.275824175824176e-05, + "loss": 0.1085, + "step": 19832 + }, + { + "epoch": 54.48626373626374, + "grad_norm": 15.793325424194336, + "learning_rate": 2.2756868131868134e-05, + "loss": 0.3521, + "step": 19833 + }, + { + "epoch": 54.489010989010985, + "grad_norm": 16.480745315551758, + "learning_rate": 2.2755494505494507e-05, + "loss": 0.4261, + "step": 19834 + }, + { + "epoch": 54.49175824175824, + "grad_norm": 16.658994674682617, + "learning_rate": 2.275412087912088e-05, + "loss": 0.5081, + "step": 19835 + }, + { + "epoch": 54.494505494505496, + "grad_norm": 25.09890365600586, + "learning_rate": 2.2752747252747254e-05, + "loss": 0.57, + "step": 19836 + }, + { + "epoch": 54.497252747252745, + "grad_norm": 6.394185543060303, + "learning_rate": 2.2751373626373627e-05, + "loss": 0.1328, + "step": 19837 + }, + { + "epoch": 54.5, + "grad_norm": 6.542237281799316, + "learning_rate": 2.275e-05, + "loss": 0.1481, + "step": 19838 + }, + { + "epoch": 54.502747252747255, + "grad_norm": 8.424688339233398, + "learning_rate": 2.2748626373626374e-05, + "loss": 0.127, + "step": 19839 + }, + { + "epoch": 54.505494505494504, + "grad_norm": 8.179460525512695, + "learning_rate": 2.2747252747252748e-05, + "loss": 0.2818, + "step": 19840 + }, + { + "epoch": 54.50824175824176, + "grad_norm": 19.007905960083008, + "learning_rate": 2.274587912087912e-05, + "loss": 0.405, + "step": 19841 + }, + { + "epoch": 54.51098901098901, + "grad_norm": 11.424601554870605, + "learning_rate": 2.2744505494505498e-05, + "loss": 0.1739, + "step": 19842 + }, + { + "epoch": 54.51373626373626, + "grad_norm": 15.982396125793457, + "learning_rate": 2.274313186813187e-05, + "loss": 0.4407, + "step": 19843 + }, + { + "epoch": 54.51648351648352, + "grad_norm": 7.858180522918701, + "learning_rate": 2.2741758241758245e-05, + "loss": 0.1883, + "step": 19844 + }, + { + "epoch": 54.51923076923077, + "grad_norm": 6.013335227966309, + "learning_rate": 2.2740384615384615e-05, + "loss": 0.055, + "step": 19845 + }, + { + "epoch": 54.52197802197802, + "grad_norm": 17.15586280822754, + "learning_rate": 2.2739010989010988e-05, + "loss": 0.2306, + "step": 19846 + }, + { + "epoch": 54.52472527472528, + "grad_norm": 12.225740432739258, + "learning_rate": 2.2737637362637365e-05, + "loss": 0.1996, + "step": 19847 + }, + { + "epoch": 54.527472527472526, + "grad_norm": 12.666350364685059, + "learning_rate": 2.2736263736263738e-05, + "loss": 0.1386, + "step": 19848 + }, + { + "epoch": 54.53021978021978, + "grad_norm": 10.169088363647461, + "learning_rate": 2.273489010989011e-05, + "loss": 0.2903, + "step": 19849 + }, + { + "epoch": 54.532967032967036, + "grad_norm": 16.367006301879883, + "learning_rate": 2.2733516483516485e-05, + "loss": 0.3198, + "step": 19850 + }, + { + "epoch": 54.535714285714285, + "grad_norm": 10.561260223388672, + "learning_rate": 2.273214285714286e-05, + "loss": 0.2286, + "step": 19851 + }, + { + "epoch": 54.53846153846154, + "grad_norm": 24.949079513549805, + "learning_rate": 2.2730769230769232e-05, + "loss": 0.6325, + "step": 19852 + }, + { + "epoch": 54.54120879120879, + "grad_norm": 13.375439643859863, + "learning_rate": 2.2729395604395605e-05, + "loss": 0.3078, + "step": 19853 + }, + { + "epoch": 54.543956043956044, + "grad_norm": 7.374717712402344, + "learning_rate": 2.272802197802198e-05, + "loss": 0.0915, + "step": 19854 + }, + { + "epoch": 54.5467032967033, + "grad_norm": 4.015837669372559, + "learning_rate": 2.2726648351648352e-05, + "loss": 0.0554, + "step": 19855 + }, + { + "epoch": 54.54945054945055, + "grad_norm": 11.984766960144043, + "learning_rate": 2.2725274725274725e-05, + "loss": 0.2135, + "step": 19856 + }, + { + "epoch": 54.5521978021978, + "grad_norm": 16.67888832092285, + "learning_rate": 2.27239010989011e-05, + "loss": 0.4236, + "step": 19857 + }, + { + "epoch": 54.55494505494506, + "grad_norm": 15.70793628692627, + "learning_rate": 2.2722527472527476e-05, + "loss": 0.4052, + "step": 19858 + }, + { + "epoch": 54.55769230769231, + "grad_norm": 3.5968353748321533, + "learning_rate": 2.272115384615385e-05, + "loss": 0.0465, + "step": 19859 + }, + { + "epoch": 54.56043956043956, + "grad_norm": 27.420921325683594, + "learning_rate": 2.271978021978022e-05, + "loss": 1.0941, + "step": 19860 + }, + { + "epoch": 54.56318681318681, + "grad_norm": 3.5889885425567627, + "learning_rate": 2.2718406593406592e-05, + "loss": 0.07, + "step": 19861 + }, + { + "epoch": 54.565934065934066, + "grad_norm": 2.139336109161377, + "learning_rate": 2.2717032967032966e-05, + "loss": 0.0337, + "step": 19862 + }, + { + "epoch": 54.56868131868132, + "grad_norm": 4.203522205352783, + "learning_rate": 2.2715659340659343e-05, + "loss": 0.052, + "step": 19863 + }, + { + "epoch": 54.57142857142857, + "grad_norm": 19.53085708618164, + "learning_rate": 2.2714285714285716e-05, + "loss": 0.5112, + "step": 19864 + }, + { + "epoch": 54.574175824175825, + "grad_norm": 5.11387825012207, + "learning_rate": 2.271291208791209e-05, + "loss": 0.0928, + "step": 19865 + }, + { + "epoch": 54.57692307692308, + "grad_norm": 6.773833274841309, + "learning_rate": 2.2711538461538463e-05, + "loss": 0.1167, + "step": 19866 + }, + { + "epoch": 54.57967032967033, + "grad_norm": 9.566202163696289, + "learning_rate": 2.2710164835164836e-05, + "loss": 0.1055, + "step": 19867 + }, + { + "epoch": 54.582417582417584, + "grad_norm": 7.923607349395752, + "learning_rate": 2.270879120879121e-05, + "loss": 0.1194, + "step": 19868 + }, + { + "epoch": 54.58516483516483, + "grad_norm": 12.275766372680664, + "learning_rate": 2.2707417582417583e-05, + "loss": 0.2513, + "step": 19869 + }, + { + "epoch": 54.58791208791209, + "grad_norm": 6.83163595199585, + "learning_rate": 2.2706043956043957e-05, + "loss": 0.0793, + "step": 19870 + }, + { + "epoch": 54.59065934065934, + "grad_norm": 16.704740524291992, + "learning_rate": 2.270467032967033e-05, + "loss": 0.4036, + "step": 19871 + }, + { + "epoch": 54.59340659340659, + "grad_norm": 7.124237060546875, + "learning_rate": 2.2703296703296703e-05, + "loss": 0.0761, + "step": 19872 + }, + { + "epoch": 54.59615384615385, + "grad_norm": 17.83411407470703, + "learning_rate": 2.270192307692308e-05, + "loss": 0.3544, + "step": 19873 + }, + { + "epoch": 54.5989010989011, + "grad_norm": 17.42542266845703, + "learning_rate": 2.2700549450549454e-05, + "loss": 0.412, + "step": 19874 + }, + { + "epoch": 54.60164835164835, + "grad_norm": 14.263014793395996, + "learning_rate": 2.2699175824175824e-05, + "loss": 0.2963, + "step": 19875 + }, + { + "epoch": 54.604395604395606, + "grad_norm": 9.495633125305176, + "learning_rate": 2.2697802197802197e-05, + "loss": 0.2774, + "step": 19876 + }, + { + "epoch": 54.607142857142854, + "grad_norm": 10.13625717163086, + "learning_rate": 2.269642857142857e-05, + "loss": 0.282, + "step": 19877 + }, + { + "epoch": 54.60989010989011, + "grad_norm": 16.19798469543457, + "learning_rate": 2.2695054945054947e-05, + "loss": 0.2305, + "step": 19878 + }, + { + "epoch": 54.612637362637365, + "grad_norm": 12.8865327835083, + "learning_rate": 2.269368131868132e-05, + "loss": 0.2549, + "step": 19879 + }, + { + "epoch": 54.61538461538461, + "grad_norm": 3.6151232719421387, + "learning_rate": 2.2692307692307694e-05, + "loss": 0.0606, + "step": 19880 + }, + { + "epoch": 54.61813186813187, + "grad_norm": 9.644023895263672, + "learning_rate": 2.2690934065934067e-05, + "loss": 0.2091, + "step": 19881 + }, + { + "epoch": 54.620879120879124, + "grad_norm": 2.948667287826538, + "learning_rate": 2.268956043956044e-05, + "loss": 0.0374, + "step": 19882 + }, + { + "epoch": 54.62362637362637, + "grad_norm": 17.342844009399414, + "learning_rate": 2.2688186813186814e-05, + "loss": 0.4588, + "step": 19883 + }, + { + "epoch": 54.62637362637363, + "grad_norm": 13.327256202697754, + "learning_rate": 2.2686813186813188e-05, + "loss": 0.2053, + "step": 19884 + }, + { + "epoch": 54.629120879120876, + "grad_norm": 16.251323699951172, + "learning_rate": 2.268543956043956e-05, + "loss": 0.351, + "step": 19885 + }, + { + "epoch": 54.63186813186813, + "grad_norm": 14.139864921569824, + "learning_rate": 2.2684065934065934e-05, + "loss": 0.3111, + "step": 19886 + }, + { + "epoch": 54.63461538461539, + "grad_norm": 10.468443870544434, + "learning_rate": 2.2682692307692308e-05, + "loss": 0.2791, + "step": 19887 + }, + { + "epoch": 54.637362637362635, + "grad_norm": 8.850645065307617, + "learning_rate": 2.2681318681318685e-05, + "loss": 0.2864, + "step": 19888 + }, + { + "epoch": 54.64010989010989, + "grad_norm": 11.625781059265137, + "learning_rate": 2.2679945054945058e-05, + "loss": 0.1721, + "step": 19889 + }, + { + "epoch": 54.642857142857146, + "grad_norm": 6.286090850830078, + "learning_rate": 2.2678571428571428e-05, + "loss": 0.1695, + "step": 19890 + }, + { + "epoch": 54.645604395604394, + "grad_norm": 10.738725662231445, + "learning_rate": 2.26771978021978e-05, + "loss": 0.2037, + "step": 19891 + }, + { + "epoch": 54.64835164835165, + "grad_norm": 18.942819595336914, + "learning_rate": 2.2675824175824175e-05, + "loss": 0.4802, + "step": 19892 + }, + { + "epoch": 54.6510989010989, + "grad_norm": 2.1833908557891846, + "learning_rate": 2.267445054945055e-05, + "loss": 0.0278, + "step": 19893 + }, + { + "epoch": 54.65384615384615, + "grad_norm": 9.42632007598877, + "learning_rate": 2.2673076923076925e-05, + "loss": 0.2101, + "step": 19894 + }, + { + "epoch": 54.65659340659341, + "grad_norm": 6.484274387359619, + "learning_rate": 2.26717032967033e-05, + "loss": 0.1368, + "step": 19895 + }, + { + "epoch": 54.65934065934066, + "grad_norm": 12.060338973999023, + "learning_rate": 2.2670329670329672e-05, + "loss": 0.3699, + "step": 19896 + }, + { + "epoch": 54.66208791208791, + "grad_norm": 11.55284309387207, + "learning_rate": 2.2668956043956045e-05, + "loss": 0.2799, + "step": 19897 + }, + { + "epoch": 54.66483516483517, + "grad_norm": 5.599869251251221, + "learning_rate": 2.266758241758242e-05, + "loss": 0.1624, + "step": 19898 + }, + { + "epoch": 54.667582417582416, + "grad_norm": 9.499484062194824, + "learning_rate": 2.2666208791208792e-05, + "loss": 0.2522, + "step": 19899 + }, + { + "epoch": 54.67032967032967, + "grad_norm": 3.7061150074005127, + "learning_rate": 2.2664835164835165e-05, + "loss": 0.0538, + "step": 19900 + }, + { + "epoch": 54.67307692307692, + "grad_norm": 2.5282833576202393, + "learning_rate": 2.266346153846154e-05, + "loss": 0.0385, + "step": 19901 + }, + { + "epoch": 54.675824175824175, + "grad_norm": 9.619766235351562, + "learning_rate": 2.2662087912087912e-05, + "loss": 0.2145, + "step": 19902 + }, + { + "epoch": 54.67857142857143, + "grad_norm": 17.48895263671875, + "learning_rate": 2.266071428571429e-05, + "loss": 0.3869, + "step": 19903 + }, + { + "epoch": 54.68131868131868, + "grad_norm": 8.792716026306152, + "learning_rate": 2.2659340659340663e-05, + "loss": 0.1523, + "step": 19904 + }, + { + "epoch": 54.684065934065934, + "grad_norm": 23.843158721923828, + "learning_rate": 2.2657967032967033e-05, + "loss": 0.3123, + "step": 19905 + }, + { + "epoch": 54.68681318681319, + "grad_norm": 6.116918563842773, + "learning_rate": 2.2656593406593406e-05, + "loss": 0.0931, + "step": 19906 + }, + { + "epoch": 54.68956043956044, + "grad_norm": 14.67346477508545, + "learning_rate": 2.265521978021978e-05, + "loss": 0.3496, + "step": 19907 + }, + { + "epoch": 54.69230769230769, + "grad_norm": 14.81666088104248, + "learning_rate": 2.2653846153846156e-05, + "loss": 0.5071, + "step": 19908 + }, + { + "epoch": 54.69505494505494, + "grad_norm": 15.29410457611084, + "learning_rate": 2.265247252747253e-05, + "loss": 0.3324, + "step": 19909 + }, + { + "epoch": 54.6978021978022, + "grad_norm": 11.367461204528809, + "learning_rate": 2.2651098901098903e-05, + "loss": 0.1659, + "step": 19910 + }, + { + "epoch": 54.70054945054945, + "grad_norm": 17.157238006591797, + "learning_rate": 2.2649725274725276e-05, + "loss": 0.3755, + "step": 19911 + }, + { + "epoch": 54.7032967032967, + "grad_norm": 6.175615310668945, + "learning_rate": 2.264835164835165e-05, + "loss": 0.0692, + "step": 19912 + }, + { + "epoch": 54.706043956043956, + "grad_norm": 9.852126121520996, + "learning_rate": 2.2646978021978023e-05, + "loss": 0.1843, + "step": 19913 + }, + { + "epoch": 54.70879120879121, + "grad_norm": 11.453102111816406, + "learning_rate": 2.2645604395604397e-05, + "loss": 0.2316, + "step": 19914 + }, + { + "epoch": 54.71153846153846, + "grad_norm": 10.491124153137207, + "learning_rate": 2.264423076923077e-05, + "loss": 0.1302, + "step": 19915 + }, + { + "epoch": 54.714285714285715, + "grad_norm": 14.271133422851562, + "learning_rate": 2.2642857142857143e-05, + "loss": 0.312, + "step": 19916 + }, + { + "epoch": 54.717032967032964, + "grad_norm": 11.276853561401367, + "learning_rate": 2.2641483516483517e-05, + "loss": 0.3447, + "step": 19917 + }, + { + "epoch": 54.71978021978022, + "grad_norm": 14.553375244140625, + "learning_rate": 2.2640109890109894e-05, + "loss": 0.2921, + "step": 19918 + }, + { + "epoch": 54.722527472527474, + "grad_norm": 5.641767978668213, + "learning_rate": 2.2638736263736267e-05, + "loss": 0.1665, + "step": 19919 + }, + { + "epoch": 54.72527472527472, + "grad_norm": 11.240760803222656, + "learning_rate": 2.2637362637362637e-05, + "loss": 0.1655, + "step": 19920 + }, + { + "epoch": 54.72802197802198, + "grad_norm": 3.983335018157959, + "learning_rate": 2.263598901098901e-05, + "loss": 0.0501, + "step": 19921 + }, + { + "epoch": 54.73076923076923, + "grad_norm": 7.3552117347717285, + "learning_rate": 2.2634615384615384e-05, + "loss": 0.1248, + "step": 19922 + }, + { + "epoch": 54.73351648351648, + "grad_norm": 16.176301956176758, + "learning_rate": 2.263324175824176e-05, + "loss": 0.3502, + "step": 19923 + }, + { + "epoch": 54.73626373626374, + "grad_norm": 9.576239585876465, + "learning_rate": 2.2631868131868134e-05, + "loss": 0.3808, + "step": 19924 + }, + { + "epoch": 54.73901098901099, + "grad_norm": 11.99679183959961, + "learning_rate": 2.2630494505494507e-05, + "loss": 0.2154, + "step": 19925 + }, + { + "epoch": 54.74175824175824, + "grad_norm": 11.872748374938965, + "learning_rate": 2.262912087912088e-05, + "loss": 0.4428, + "step": 19926 + }, + { + "epoch": 54.744505494505496, + "grad_norm": 10.068696022033691, + "learning_rate": 2.2627747252747254e-05, + "loss": 0.2248, + "step": 19927 + }, + { + "epoch": 54.747252747252745, + "grad_norm": 12.416476249694824, + "learning_rate": 2.2626373626373628e-05, + "loss": 0.1886, + "step": 19928 + }, + { + "epoch": 54.75, + "grad_norm": 31.4835147857666, + "learning_rate": 2.2625e-05, + "loss": 0.9385, + "step": 19929 + }, + { + "epoch": 54.752747252747255, + "grad_norm": 4.488339424133301, + "learning_rate": 2.2623626373626374e-05, + "loss": 0.0624, + "step": 19930 + }, + { + "epoch": 54.755494505494504, + "grad_norm": 15.909893989562988, + "learning_rate": 2.2622252747252748e-05, + "loss": 0.2499, + "step": 19931 + }, + { + "epoch": 54.75824175824176, + "grad_norm": 18.153467178344727, + "learning_rate": 2.262087912087912e-05, + "loss": 0.3453, + "step": 19932 + }, + { + "epoch": 54.76098901098901, + "grad_norm": 10.20889663696289, + "learning_rate": 2.2619505494505498e-05, + "loss": 0.1176, + "step": 19933 + }, + { + "epoch": 54.76373626373626, + "grad_norm": 13.311198234558105, + "learning_rate": 2.261813186813187e-05, + "loss": 0.36, + "step": 19934 + }, + { + "epoch": 54.76648351648352, + "grad_norm": 3.8034133911132812, + "learning_rate": 2.261675824175824e-05, + "loss": 0.0566, + "step": 19935 + }, + { + "epoch": 54.76923076923077, + "grad_norm": 4.718552112579346, + "learning_rate": 2.2615384615384615e-05, + "loss": 0.0631, + "step": 19936 + }, + { + "epoch": 54.77197802197802, + "grad_norm": 9.475602149963379, + "learning_rate": 2.261401098901099e-05, + "loss": 0.1193, + "step": 19937 + }, + { + "epoch": 54.77472527472528, + "grad_norm": 9.225343704223633, + "learning_rate": 2.2612637362637365e-05, + "loss": 0.2821, + "step": 19938 + }, + { + "epoch": 54.777472527472526, + "grad_norm": 14.60794734954834, + "learning_rate": 2.261126373626374e-05, + "loss": 0.27, + "step": 19939 + }, + { + "epoch": 54.78021978021978, + "grad_norm": 15.494441986083984, + "learning_rate": 2.2609890109890112e-05, + "loss": 0.4555, + "step": 19940 + }, + { + "epoch": 54.782967032967036, + "grad_norm": 13.96882438659668, + "learning_rate": 2.2608516483516485e-05, + "loss": 0.2313, + "step": 19941 + }, + { + "epoch": 54.785714285714285, + "grad_norm": 10.000102043151855, + "learning_rate": 2.260714285714286e-05, + "loss": 0.1168, + "step": 19942 + }, + { + "epoch": 54.78846153846154, + "grad_norm": 12.84593677520752, + "learning_rate": 2.2605769230769232e-05, + "loss": 0.2711, + "step": 19943 + }, + { + "epoch": 54.79120879120879, + "grad_norm": 11.396896362304688, + "learning_rate": 2.2604395604395606e-05, + "loss": 0.2549, + "step": 19944 + }, + { + "epoch": 54.793956043956044, + "grad_norm": 2.9529061317443848, + "learning_rate": 2.260302197802198e-05, + "loss": 0.0429, + "step": 19945 + }, + { + "epoch": 54.7967032967033, + "grad_norm": 9.14526653289795, + "learning_rate": 2.2601648351648352e-05, + "loss": 0.1978, + "step": 19946 + }, + { + "epoch": 54.79945054945055, + "grad_norm": 16.753999710083008, + "learning_rate": 2.2600274725274726e-05, + "loss": 0.2887, + "step": 19947 + }, + { + "epoch": 54.8021978021978, + "grad_norm": 4.9510579109191895, + "learning_rate": 2.2598901098901103e-05, + "loss": 0.111, + "step": 19948 + }, + { + "epoch": 54.80494505494506, + "grad_norm": 8.8284273147583, + "learning_rate": 2.2597527472527476e-05, + "loss": 0.1275, + "step": 19949 + }, + { + "epoch": 54.80769230769231, + "grad_norm": 13.604314804077148, + "learning_rate": 2.2596153846153846e-05, + "loss": 0.2134, + "step": 19950 + }, + { + "epoch": 54.81043956043956, + "grad_norm": 9.610274314880371, + "learning_rate": 2.259478021978022e-05, + "loss": 0.248, + "step": 19951 + }, + { + "epoch": 54.81318681318681, + "grad_norm": 3.4035420417785645, + "learning_rate": 2.2593406593406593e-05, + "loss": 0.0531, + "step": 19952 + }, + { + "epoch": 54.815934065934066, + "grad_norm": 10.704390525817871, + "learning_rate": 2.259203296703297e-05, + "loss": 0.0975, + "step": 19953 + }, + { + "epoch": 54.81868131868132, + "grad_norm": 25.970535278320312, + "learning_rate": 2.2590659340659343e-05, + "loss": 0.8645, + "step": 19954 + }, + { + "epoch": 54.82142857142857, + "grad_norm": 8.13542652130127, + "learning_rate": 2.2589285714285716e-05, + "loss": 0.0985, + "step": 19955 + }, + { + "epoch": 54.824175824175825, + "grad_norm": 6.543105125427246, + "learning_rate": 2.258791208791209e-05, + "loss": 0.1782, + "step": 19956 + }, + { + "epoch": 54.82692307692308, + "grad_norm": 22.196208953857422, + "learning_rate": 2.2586538461538463e-05, + "loss": 0.5025, + "step": 19957 + }, + { + "epoch": 54.82967032967033, + "grad_norm": 5.277854919433594, + "learning_rate": 2.2585164835164837e-05, + "loss": 0.0946, + "step": 19958 + }, + { + "epoch": 54.832417582417584, + "grad_norm": 18.608339309692383, + "learning_rate": 2.258379120879121e-05, + "loss": 0.4305, + "step": 19959 + }, + { + "epoch": 54.83516483516483, + "grad_norm": 16.781063079833984, + "learning_rate": 2.2582417582417583e-05, + "loss": 0.4314, + "step": 19960 + }, + { + "epoch": 54.83791208791209, + "grad_norm": 8.027655601501465, + "learning_rate": 2.2581043956043957e-05, + "loss": 0.1904, + "step": 19961 + }, + { + "epoch": 54.84065934065934, + "grad_norm": 6.261066436767578, + "learning_rate": 2.257967032967033e-05, + "loss": 0.0889, + "step": 19962 + }, + { + "epoch": 54.84340659340659, + "grad_norm": 12.360565185546875, + "learning_rate": 2.2578296703296707e-05, + "loss": 0.1943, + "step": 19963 + }, + { + "epoch": 54.84615384615385, + "grad_norm": 11.493009567260742, + "learning_rate": 2.257692307692308e-05, + "loss": 0.162, + "step": 19964 + }, + { + "epoch": 54.8489010989011, + "grad_norm": 9.956096649169922, + "learning_rate": 2.257554945054945e-05, + "loss": 0.2512, + "step": 19965 + }, + { + "epoch": 54.85164835164835, + "grad_norm": 15.099093437194824, + "learning_rate": 2.2574175824175824e-05, + "loss": 0.2374, + "step": 19966 + }, + { + "epoch": 54.854395604395606, + "grad_norm": 6.716700077056885, + "learning_rate": 2.2572802197802197e-05, + "loss": 0.1858, + "step": 19967 + }, + { + "epoch": 54.857142857142854, + "grad_norm": 16.63340950012207, + "learning_rate": 2.257142857142857e-05, + "loss": 0.3302, + "step": 19968 + }, + { + "epoch": 54.85989010989011, + "grad_norm": 8.224905967712402, + "learning_rate": 2.2570054945054948e-05, + "loss": 0.1677, + "step": 19969 + }, + { + "epoch": 54.862637362637365, + "grad_norm": 15.399970054626465, + "learning_rate": 2.256868131868132e-05, + "loss": 0.1919, + "step": 19970 + }, + { + "epoch": 54.86538461538461, + "grad_norm": 17.272628784179688, + "learning_rate": 2.2567307692307694e-05, + "loss": 0.4928, + "step": 19971 + }, + { + "epoch": 54.86813186813187, + "grad_norm": 19.25285530090332, + "learning_rate": 2.2565934065934068e-05, + "loss": 0.3346, + "step": 19972 + }, + { + "epoch": 54.870879120879124, + "grad_norm": 13.367525100708008, + "learning_rate": 2.2564560439560438e-05, + "loss": 0.2073, + "step": 19973 + }, + { + "epoch": 54.87362637362637, + "grad_norm": 2.7826671600341797, + "learning_rate": 2.2563186813186815e-05, + "loss": 0.0279, + "step": 19974 + }, + { + "epoch": 54.87637362637363, + "grad_norm": 2.1033852100372314, + "learning_rate": 2.2561813186813188e-05, + "loss": 0.0247, + "step": 19975 + }, + { + "epoch": 54.879120879120876, + "grad_norm": 13.739387512207031, + "learning_rate": 2.256043956043956e-05, + "loss": 0.2573, + "step": 19976 + }, + { + "epoch": 54.88186813186813, + "grad_norm": 13.057433128356934, + "learning_rate": 2.2559065934065935e-05, + "loss": 0.3841, + "step": 19977 + }, + { + "epoch": 54.88461538461539, + "grad_norm": 14.90308666229248, + "learning_rate": 2.2557692307692308e-05, + "loss": 0.4699, + "step": 19978 + }, + { + "epoch": 54.887362637362635, + "grad_norm": 10.013655662536621, + "learning_rate": 2.2556318681318685e-05, + "loss": 0.4301, + "step": 19979 + }, + { + "epoch": 54.89010989010989, + "grad_norm": 12.278475761413574, + "learning_rate": 2.2554945054945055e-05, + "loss": 0.2155, + "step": 19980 + }, + { + "epoch": 54.892857142857146, + "grad_norm": 16.046297073364258, + "learning_rate": 2.255357142857143e-05, + "loss": 0.4675, + "step": 19981 + }, + { + "epoch": 54.895604395604394, + "grad_norm": 11.093070030212402, + "learning_rate": 2.2552197802197802e-05, + "loss": 0.2625, + "step": 19982 + }, + { + "epoch": 54.89835164835165, + "grad_norm": 15.410033226013184, + "learning_rate": 2.2550824175824175e-05, + "loss": 0.2205, + "step": 19983 + }, + { + "epoch": 54.9010989010989, + "grad_norm": 9.42733383178711, + "learning_rate": 2.2549450549450552e-05, + "loss": 0.1957, + "step": 19984 + }, + { + "epoch": 54.90384615384615, + "grad_norm": 2.436617136001587, + "learning_rate": 2.2548076923076925e-05, + "loss": 0.0449, + "step": 19985 + }, + { + "epoch": 54.90659340659341, + "grad_norm": 9.98674488067627, + "learning_rate": 2.25467032967033e-05, + "loss": 0.1344, + "step": 19986 + }, + { + "epoch": 54.90934065934066, + "grad_norm": 2.5275466442108154, + "learning_rate": 2.2545329670329672e-05, + "loss": 0.0258, + "step": 19987 + }, + { + "epoch": 54.91208791208791, + "grad_norm": 18.99232292175293, + "learning_rate": 2.2543956043956042e-05, + "loss": 0.6144, + "step": 19988 + }, + { + "epoch": 54.91483516483517, + "grad_norm": 8.584887504577637, + "learning_rate": 2.254258241758242e-05, + "loss": 0.1972, + "step": 19989 + }, + { + "epoch": 54.917582417582416, + "grad_norm": 11.462909698486328, + "learning_rate": 2.2541208791208792e-05, + "loss": 0.2348, + "step": 19990 + }, + { + "epoch": 54.92032967032967, + "grad_norm": 12.944129943847656, + "learning_rate": 2.2539835164835166e-05, + "loss": 0.3767, + "step": 19991 + }, + { + "epoch": 54.92307692307692, + "grad_norm": 10.004154205322266, + "learning_rate": 2.253846153846154e-05, + "loss": 0.2296, + "step": 19992 + }, + { + "epoch": 54.925824175824175, + "grad_norm": 8.170270919799805, + "learning_rate": 2.2537087912087913e-05, + "loss": 0.1224, + "step": 19993 + }, + { + "epoch": 54.92857142857143, + "grad_norm": 9.794715881347656, + "learning_rate": 2.253571428571429e-05, + "loss": 0.1326, + "step": 19994 + }, + { + "epoch": 54.93131868131868, + "grad_norm": 13.309144973754883, + "learning_rate": 2.253434065934066e-05, + "loss": 0.2315, + "step": 19995 + }, + { + "epoch": 54.934065934065934, + "grad_norm": 16.6457576751709, + "learning_rate": 2.2532967032967033e-05, + "loss": 0.4087, + "step": 19996 + }, + { + "epoch": 54.93681318681319, + "grad_norm": 8.961655616760254, + "learning_rate": 2.2531593406593406e-05, + "loss": 0.1405, + "step": 19997 + }, + { + "epoch": 54.93956043956044, + "grad_norm": 15.38498306274414, + "learning_rate": 2.253021978021978e-05, + "loss": 0.2632, + "step": 19998 + }, + { + "epoch": 54.94230769230769, + "grad_norm": 17.668500900268555, + "learning_rate": 2.2528846153846156e-05, + "loss": 0.3146, + "step": 19999 + }, + { + "epoch": 54.94505494505494, + "grad_norm": 16.319246292114258, + "learning_rate": 2.252747252747253e-05, + "loss": 0.3654, + "step": 20000 + }, + { + "epoch": 54.9478021978022, + "grad_norm": 7.185103893280029, + "learning_rate": 2.2526098901098903e-05, + "loss": 0.0991, + "step": 20001 + }, + { + "epoch": 54.95054945054945, + "grad_norm": 14.787500381469727, + "learning_rate": 2.2524725274725277e-05, + "loss": 0.3274, + "step": 20002 + }, + { + "epoch": 54.9532967032967, + "grad_norm": 3.60487961769104, + "learning_rate": 2.2523351648351647e-05, + "loss": 0.0713, + "step": 20003 + }, + { + "epoch": 54.956043956043956, + "grad_norm": 10.637804985046387, + "learning_rate": 2.2521978021978024e-05, + "loss": 0.1184, + "step": 20004 + }, + { + "epoch": 54.95879120879121, + "grad_norm": 18.20148468017578, + "learning_rate": 2.2520604395604397e-05, + "loss": 0.2784, + "step": 20005 + }, + { + "epoch": 54.96153846153846, + "grad_norm": 3.198545217514038, + "learning_rate": 2.251923076923077e-05, + "loss": 0.0675, + "step": 20006 + }, + { + "epoch": 54.964285714285715, + "grad_norm": 5.522922039031982, + "learning_rate": 2.2517857142857144e-05, + "loss": 0.0995, + "step": 20007 + }, + { + "epoch": 54.967032967032964, + "grad_norm": 15.107490539550781, + "learning_rate": 2.2516483516483517e-05, + "loss": 0.3432, + "step": 20008 + }, + { + "epoch": 54.96978021978022, + "grad_norm": 19.862836837768555, + "learning_rate": 2.251510989010989e-05, + "loss": 0.8392, + "step": 20009 + }, + { + "epoch": 54.972527472527474, + "grad_norm": 12.055734634399414, + "learning_rate": 2.2513736263736264e-05, + "loss": 0.3256, + "step": 20010 + }, + { + "epoch": 54.97527472527472, + "grad_norm": 21.66120719909668, + "learning_rate": 2.2512362637362637e-05, + "loss": 0.4981, + "step": 20011 + }, + { + "epoch": 54.97802197802198, + "grad_norm": 15.52125072479248, + "learning_rate": 2.251098901098901e-05, + "loss": 0.5095, + "step": 20012 + }, + { + "epoch": 54.98076923076923, + "grad_norm": 4.594795227050781, + "learning_rate": 2.2509615384615384e-05, + "loss": 0.0482, + "step": 20013 + }, + { + "epoch": 54.98351648351648, + "grad_norm": 9.34169864654541, + "learning_rate": 2.250824175824176e-05, + "loss": 0.1324, + "step": 20014 + }, + { + "epoch": 54.98626373626374, + "grad_norm": 24.772655487060547, + "learning_rate": 2.2506868131868134e-05, + "loss": 0.5815, + "step": 20015 + }, + { + "epoch": 54.98901098901099, + "grad_norm": 15.45530891418457, + "learning_rate": 2.2505494505494508e-05, + "loss": 0.3479, + "step": 20016 + }, + { + "epoch": 54.99175824175824, + "grad_norm": 20.674327850341797, + "learning_rate": 2.250412087912088e-05, + "loss": 0.4214, + "step": 20017 + }, + { + "epoch": 54.994505494505496, + "grad_norm": 10.628227233886719, + "learning_rate": 2.250274725274725e-05, + "loss": 0.1356, + "step": 20018 + }, + { + "epoch": 54.997252747252745, + "grad_norm": 24.372243881225586, + "learning_rate": 2.2501373626373628e-05, + "loss": 0.7776, + "step": 20019 + }, + { + "epoch": 55.0, + "grad_norm": 14.4086332321167, + "learning_rate": 2.25e-05, + "loss": 0.135, + "step": 20020 + }, + { + "epoch": 55.0, + "eval_accuracy": 0.7534435261707989, + "eval_f1": 0.7553588161534017, + "eval_f1_DuraRiadoRio_64x64": 0.7171052631578947, + "eval_f1_Mole_64x64": 0.6285714285714286, + "eval_f1_Quebrado_64x64": 0.8398576512455516, + "eval_f1_RiadoRio_64x64": 0.6526315789473685, + "eval_f1_RioFechado_64x64": 0.9386281588447654, + "eval_loss": 0.9281495809555054, + "eval_precision": 0.8098772961397817, + "eval_precision_DuraRiadoRio_64x64": 0.68125, + "eval_precision_Mole_64x64": 1.0, + "eval_precision_Quebrado_64x64": 0.8613138686131386, + "eval_precision_RiadoRio_64x64": 0.543859649122807, + "eval_precision_RioFechado_64x64": 0.9629629629629629, + "eval_recall": 0.7532009307305824, + "eval_recall_DuraRiadoRio_64x64": 0.7569444444444444, + "eval_recall_Mole_64x64": 0.4583333333333333, + "eval_recall_Quebrado_64x64": 0.8194444444444444, + "eval_recall_RiadoRio_64x64": 0.8157894736842105, + "eval_recall_RioFechado_64x64": 0.9154929577464789, + "eval_runtime": 1.7755, + "eval_samples_per_second": 408.902, + "eval_steps_per_second": 25.908, + "step": 20020 + }, + { + "epoch": 55.002747252747255, + "grad_norm": 2.7861781120300293, + "learning_rate": 2.2498626373626375e-05, + "loss": 0.0592, + "step": 20021 + }, + { + "epoch": 55.005494505494504, + "grad_norm": 10.546143531799316, + "learning_rate": 2.2497252747252748e-05, + "loss": 0.3062, + "step": 20022 + }, + { + "epoch": 55.00824175824176, + "grad_norm": 16.296493530273438, + "learning_rate": 2.249587912087912e-05, + "loss": 0.1862, + "step": 20023 + }, + { + "epoch": 55.010989010989015, + "grad_norm": 22.013690948486328, + "learning_rate": 2.2494505494505495e-05, + "loss": 0.325, + "step": 20024 + }, + { + "epoch": 55.01373626373626, + "grad_norm": 20.06365203857422, + "learning_rate": 2.249313186813187e-05, + "loss": 0.629, + "step": 20025 + }, + { + "epoch": 55.01648351648352, + "grad_norm": 16.01090431213379, + "learning_rate": 2.2491758241758242e-05, + "loss": 0.3763, + "step": 20026 + }, + { + "epoch": 55.01923076923077, + "grad_norm": 12.242069244384766, + "learning_rate": 2.2490384615384615e-05, + "loss": 0.3775, + "step": 20027 + }, + { + "epoch": 55.02197802197802, + "grad_norm": 12.31730842590332, + "learning_rate": 2.248901098901099e-05, + "loss": 0.24, + "step": 20028 + }, + { + "epoch": 55.02472527472528, + "grad_norm": 11.388115882873535, + "learning_rate": 2.2487637362637365e-05, + "loss": 0.2333, + "step": 20029 + }, + { + "epoch": 55.027472527472526, + "grad_norm": 3.949357748031616, + "learning_rate": 2.248626373626374e-05, + "loss": 0.1152, + "step": 20030 + }, + { + "epoch": 55.03021978021978, + "grad_norm": 5.3969807624816895, + "learning_rate": 2.2484890109890112e-05, + "loss": 0.0675, + "step": 20031 + }, + { + "epoch": 55.032967032967036, + "grad_norm": 2.838465452194214, + "learning_rate": 2.2483516483516486e-05, + "loss": 0.1006, + "step": 20032 + }, + { + "epoch": 55.035714285714285, + "grad_norm": 13.52285099029541, + "learning_rate": 2.2482142857142856e-05, + "loss": 0.2917, + "step": 20033 + }, + { + "epoch": 55.03846153846154, + "grad_norm": 13.264091491699219, + "learning_rate": 2.2480769230769233e-05, + "loss": 0.376, + "step": 20034 + }, + { + "epoch": 55.04120879120879, + "grad_norm": 10.131998062133789, + "learning_rate": 2.2479395604395606e-05, + "loss": 0.2696, + "step": 20035 + }, + { + "epoch": 55.043956043956044, + "grad_norm": 18.228851318359375, + "learning_rate": 2.247802197802198e-05, + "loss": 0.3607, + "step": 20036 + }, + { + "epoch": 55.0467032967033, + "grad_norm": 14.1784086227417, + "learning_rate": 2.2476648351648353e-05, + "loss": 0.3125, + "step": 20037 + }, + { + "epoch": 55.04945054945055, + "grad_norm": 10.748835563659668, + "learning_rate": 2.2475274725274726e-05, + "loss": 0.4167, + "step": 20038 + }, + { + "epoch": 55.0521978021978, + "grad_norm": 29.148029327392578, + "learning_rate": 2.24739010989011e-05, + "loss": 0.6026, + "step": 20039 + }, + { + "epoch": 55.05494505494506, + "grad_norm": 1.0587458610534668, + "learning_rate": 2.2472527472527473e-05, + "loss": 0.0122, + "step": 20040 + }, + { + "epoch": 55.05769230769231, + "grad_norm": 18.115234375, + "learning_rate": 2.2471153846153846e-05, + "loss": 0.2773, + "step": 20041 + }, + { + "epoch": 55.06043956043956, + "grad_norm": 7.2697906494140625, + "learning_rate": 2.246978021978022e-05, + "loss": 0.1446, + "step": 20042 + }, + { + "epoch": 55.06318681318681, + "grad_norm": 14.11567211151123, + "learning_rate": 2.2468406593406593e-05, + "loss": 0.2768, + "step": 20043 + }, + { + "epoch": 55.065934065934066, + "grad_norm": 4.588924407958984, + "learning_rate": 2.246703296703297e-05, + "loss": 0.0988, + "step": 20044 + }, + { + "epoch": 55.06868131868132, + "grad_norm": 6.794522762298584, + "learning_rate": 2.2465659340659343e-05, + "loss": 0.0829, + "step": 20045 + }, + { + "epoch": 55.07142857142857, + "grad_norm": 4.126847267150879, + "learning_rate": 2.2464285714285717e-05, + "loss": 0.0671, + "step": 20046 + }, + { + "epoch": 55.074175824175825, + "grad_norm": 11.359652519226074, + "learning_rate": 2.246291208791209e-05, + "loss": 0.2608, + "step": 20047 + }, + { + "epoch": 55.07692307692308, + "grad_norm": 15.759852409362793, + "learning_rate": 2.246153846153846e-05, + "loss": 0.4633, + "step": 20048 + }, + { + "epoch": 55.07967032967033, + "grad_norm": 18.818490982055664, + "learning_rate": 2.2460164835164837e-05, + "loss": 0.3824, + "step": 20049 + }, + { + "epoch": 55.082417582417584, + "grad_norm": 1.1468617916107178, + "learning_rate": 2.245879120879121e-05, + "loss": 0.0185, + "step": 20050 + }, + { + "epoch": 55.08516483516483, + "grad_norm": 10.584589004516602, + "learning_rate": 2.2457417582417584e-05, + "loss": 0.1946, + "step": 20051 + }, + { + "epoch": 55.08791208791209, + "grad_norm": 10.028575897216797, + "learning_rate": 2.2456043956043957e-05, + "loss": 0.1161, + "step": 20052 + }, + { + "epoch": 55.09065934065934, + "grad_norm": 3.433288097381592, + "learning_rate": 2.245467032967033e-05, + "loss": 0.0665, + "step": 20053 + }, + { + "epoch": 55.09340659340659, + "grad_norm": 16.65023422241211, + "learning_rate": 2.2453296703296704e-05, + "loss": 0.4293, + "step": 20054 + }, + { + "epoch": 55.09615384615385, + "grad_norm": 16.900014877319336, + "learning_rate": 2.2451923076923077e-05, + "loss": 0.4471, + "step": 20055 + }, + { + "epoch": 55.0989010989011, + "grad_norm": 9.91728687286377, + "learning_rate": 2.245054945054945e-05, + "loss": 0.1853, + "step": 20056 + }, + { + "epoch": 55.10164835164835, + "grad_norm": 0.8869559168815613, + "learning_rate": 2.2449175824175824e-05, + "loss": 0.011, + "step": 20057 + }, + { + "epoch": 55.104395604395606, + "grad_norm": 7.164640426635742, + "learning_rate": 2.2447802197802198e-05, + "loss": 0.0988, + "step": 20058 + }, + { + "epoch": 55.107142857142854, + "grad_norm": 5.335972785949707, + "learning_rate": 2.2446428571428574e-05, + "loss": 0.06, + "step": 20059 + }, + { + "epoch": 55.10989010989011, + "grad_norm": 12.740659713745117, + "learning_rate": 2.2445054945054948e-05, + "loss": 0.2182, + "step": 20060 + }, + { + "epoch": 55.112637362637365, + "grad_norm": 10.683259010314941, + "learning_rate": 2.244368131868132e-05, + "loss": 0.3369, + "step": 20061 + }, + { + "epoch": 55.11538461538461, + "grad_norm": 11.580731391906738, + "learning_rate": 2.2442307692307695e-05, + "loss": 0.225, + "step": 20062 + }, + { + "epoch": 55.11813186813187, + "grad_norm": 13.712615966796875, + "learning_rate": 2.2440934065934065e-05, + "loss": 0.4517, + "step": 20063 + }, + { + "epoch": 55.120879120879124, + "grad_norm": 10.126985549926758, + "learning_rate": 2.243956043956044e-05, + "loss": 0.1204, + "step": 20064 + }, + { + "epoch": 55.12362637362637, + "grad_norm": 6.650039196014404, + "learning_rate": 2.2438186813186815e-05, + "loss": 0.0916, + "step": 20065 + }, + { + "epoch": 55.12637362637363, + "grad_norm": 4.238334655761719, + "learning_rate": 2.2436813186813188e-05, + "loss": 0.0774, + "step": 20066 + }, + { + "epoch": 55.129120879120876, + "grad_norm": 31.21590232849121, + "learning_rate": 2.2435439560439562e-05, + "loss": 0.7218, + "step": 20067 + }, + { + "epoch": 55.13186813186813, + "grad_norm": 8.729840278625488, + "learning_rate": 2.2434065934065935e-05, + "loss": 0.2649, + "step": 20068 + }, + { + "epoch": 55.13461538461539, + "grad_norm": 1.8048152923583984, + "learning_rate": 2.243269230769231e-05, + "loss": 0.0358, + "step": 20069 + }, + { + "epoch": 55.137362637362635, + "grad_norm": 3.4393301010131836, + "learning_rate": 2.2431318681318682e-05, + "loss": 0.0706, + "step": 20070 + }, + { + "epoch": 55.14010989010989, + "grad_norm": 4.242778301239014, + "learning_rate": 2.2429945054945055e-05, + "loss": 0.0989, + "step": 20071 + }, + { + "epoch": 55.142857142857146, + "grad_norm": 19.73601531982422, + "learning_rate": 2.242857142857143e-05, + "loss": 0.7844, + "step": 20072 + }, + { + "epoch": 55.145604395604394, + "grad_norm": 29.76709747314453, + "learning_rate": 2.2427197802197802e-05, + "loss": 0.5195, + "step": 20073 + }, + { + "epoch": 55.14835164835165, + "grad_norm": 22.799535751342773, + "learning_rate": 2.242582417582418e-05, + "loss": 0.8798, + "step": 20074 + }, + { + "epoch": 55.1510989010989, + "grad_norm": 10.903341293334961, + "learning_rate": 2.2424450549450552e-05, + "loss": 0.316, + "step": 20075 + }, + { + "epoch": 55.15384615384615, + "grad_norm": 11.016861915588379, + "learning_rate": 2.2423076923076926e-05, + "loss": 0.2529, + "step": 20076 + }, + { + "epoch": 55.15659340659341, + "grad_norm": 17.438648223876953, + "learning_rate": 2.24217032967033e-05, + "loss": 0.4314, + "step": 20077 + }, + { + "epoch": 55.15934065934066, + "grad_norm": 15.244573593139648, + "learning_rate": 2.242032967032967e-05, + "loss": 0.3878, + "step": 20078 + }, + { + "epoch": 55.16208791208791, + "grad_norm": 15.234251976013184, + "learning_rate": 2.2418956043956043e-05, + "loss": 0.3608, + "step": 20079 + }, + { + "epoch": 55.16483516483517, + "grad_norm": 8.011555671691895, + "learning_rate": 2.241758241758242e-05, + "loss": 0.07, + "step": 20080 + }, + { + "epoch": 55.167582417582416, + "grad_norm": 23.92238998413086, + "learning_rate": 2.2416208791208793e-05, + "loss": 0.72, + "step": 20081 + }, + { + "epoch": 55.17032967032967, + "grad_norm": 14.518209457397461, + "learning_rate": 2.2414835164835166e-05, + "loss": 0.4472, + "step": 20082 + }, + { + "epoch": 55.17307692307692, + "grad_norm": 11.051025390625, + "learning_rate": 2.241346153846154e-05, + "loss": 0.3216, + "step": 20083 + }, + { + "epoch": 55.175824175824175, + "grad_norm": 12.621585845947266, + "learning_rate": 2.2412087912087913e-05, + "loss": 0.2883, + "step": 20084 + }, + { + "epoch": 55.17857142857143, + "grad_norm": 9.430534362792969, + "learning_rate": 2.2410714285714286e-05, + "loss": 0.1916, + "step": 20085 + }, + { + "epoch": 55.18131868131868, + "grad_norm": 15.012057304382324, + "learning_rate": 2.240934065934066e-05, + "loss": 0.2253, + "step": 20086 + }, + { + "epoch": 55.184065934065934, + "grad_norm": 13.190191268920898, + "learning_rate": 2.2407967032967033e-05, + "loss": 0.3602, + "step": 20087 + }, + { + "epoch": 55.18681318681319, + "grad_norm": 7.406408309936523, + "learning_rate": 2.2406593406593407e-05, + "loss": 0.1327, + "step": 20088 + }, + { + "epoch": 55.18956043956044, + "grad_norm": 13.929277420043945, + "learning_rate": 2.240521978021978e-05, + "loss": 0.3248, + "step": 20089 + }, + { + "epoch": 55.19230769230769, + "grad_norm": 23.346149444580078, + "learning_rate": 2.2403846153846157e-05, + "loss": 0.4456, + "step": 20090 + }, + { + "epoch": 55.19505494505494, + "grad_norm": 12.317490577697754, + "learning_rate": 2.240247252747253e-05, + "loss": 0.2836, + "step": 20091 + }, + { + "epoch": 55.1978021978022, + "grad_norm": 13.382002830505371, + "learning_rate": 2.2401098901098904e-05, + "loss": 0.1076, + "step": 20092 + }, + { + "epoch": 55.20054945054945, + "grad_norm": 2.9276857376098633, + "learning_rate": 2.2399725274725274e-05, + "loss": 0.0582, + "step": 20093 + }, + { + "epoch": 55.2032967032967, + "grad_norm": 19.34796905517578, + "learning_rate": 2.2398351648351647e-05, + "loss": 0.3796, + "step": 20094 + }, + { + "epoch": 55.206043956043956, + "grad_norm": 6.655725955963135, + "learning_rate": 2.2396978021978024e-05, + "loss": 0.1389, + "step": 20095 + }, + { + "epoch": 55.20879120879121, + "grad_norm": 7.917072772979736, + "learning_rate": 2.2395604395604397e-05, + "loss": 0.0983, + "step": 20096 + }, + { + "epoch": 55.21153846153846, + "grad_norm": 7.651446342468262, + "learning_rate": 2.239423076923077e-05, + "loss": 0.1555, + "step": 20097 + }, + { + "epoch": 55.214285714285715, + "grad_norm": 10.39647102355957, + "learning_rate": 2.2392857142857144e-05, + "loss": 0.188, + "step": 20098 + }, + { + "epoch": 55.217032967032964, + "grad_norm": 1.3024832010269165, + "learning_rate": 2.2391483516483517e-05, + "loss": 0.0273, + "step": 20099 + }, + { + "epoch": 55.21978021978022, + "grad_norm": 3.250929594039917, + "learning_rate": 2.239010989010989e-05, + "loss": 0.0499, + "step": 20100 + }, + { + "epoch": 55.222527472527474, + "grad_norm": 9.158048629760742, + "learning_rate": 2.2388736263736264e-05, + "loss": 0.193, + "step": 20101 + }, + { + "epoch": 55.22527472527472, + "grad_norm": 2.137831449508667, + "learning_rate": 2.2387362637362638e-05, + "loss": 0.0267, + "step": 20102 + }, + { + "epoch": 55.22802197802198, + "grad_norm": 11.787321090698242, + "learning_rate": 2.238598901098901e-05, + "loss": 0.2761, + "step": 20103 + }, + { + "epoch": 55.23076923076923, + "grad_norm": 15.748810768127441, + "learning_rate": 2.2384615384615385e-05, + "loss": 0.2815, + "step": 20104 + }, + { + "epoch": 55.23351648351648, + "grad_norm": 33.18798065185547, + "learning_rate": 2.238324175824176e-05, + "loss": 0.858, + "step": 20105 + }, + { + "epoch": 55.23626373626374, + "grad_norm": 14.895081520080566, + "learning_rate": 2.2381868131868135e-05, + "loss": 0.3388, + "step": 20106 + }, + { + "epoch": 55.239010989010985, + "grad_norm": 7.538338661193848, + "learning_rate": 2.2380494505494505e-05, + "loss": 0.1727, + "step": 20107 + }, + { + "epoch": 55.24175824175824, + "grad_norm": 10.865545272827148, + "learning_rate": 2.2379120879120878e-05, + "loss": 0.2294, + "step": 20108 + }, + { + "epoch": 55.244505494505496, + "grad_norm": 4.595924377441406, + "learning_rate": 2.237774725274725e-05, + "loss": 0.0834, + "step": 20109 + }, + { + "epoch": 55.247252747252745, + "grad_norm": 21.446535110473633, + "learning_rate": 2.237637362637363e-05, + "loss": 0.3668, + "step": 20110 + }, + { + "epoch": 55.25, + "grad_norm": 7.517575740814209, + "learning_rate": 2.2375000000000002e-05, + "loss": 0.0741, + "step": 20111 + }, + { + "epoch": 55.252747252747255, + "grad_norm": 10.3418550491333, + "learning_rate": 2.2373626373626375e-05, + "loss": 0.2032, + "step": 20112 + }, + { + "epoch": 55.255494505494504, + "grad_norm": 8.151692390441895, + "learning_rate": 2.237225274725275e-05, + "loss": 0.1017, + "step": 20113 + }, + { + "epoch": 55.25824175824176, + "grad_norm": 16.790807723999023, + "learning_rate": 2.2370879120879122e-05, + "loss": 0.5117, + "step": 20114 + }, + { + "epoch": 55.260989010989015, + "grad_norm": 1.3888729810714722, + "learning_rate": 2.2369505494505495e-05, + "loss": 0.0148, + "step": 20115 + }, + { + "epoch": 55.26373626373626, + "grad_norm": 12.573725700378418, + "learning_rate": 2.236813186813187e-05, + "loss": 0.2847, + "step": 20116 + }, + { + "epoch": 55.26648351648352, + "grad_norm": 20.992103576660156, + "learning_rate": 2.2366758241758242e-05, + "loss": 0.4059, + "step": 20117 + }, + { + "epoch": 55.26923076923077, + "grad_norm": 16.9646053314209, + "learning_rate": 2.2365384615384616e-05, + "loss": 0.308, + "step": 20118 + }, + { + "epoch": 55.27197802197802, + "grad_norm": 11.324530601501465, + "learning_rate": 2.236401098901099e-05, + "loss": 0.197, + "step": 20119 + }, + { + "epoch": 55.27472527472528, + "grad_norm": 2.8121392726898193, + "learning_rate": 2.2362637362637366e-05, + "loss": 0.0271, + "step": 20120 + }, + { + "epoch": 55.277472527472526, + "grad_norm": 5.345125675201416, + "learning_rate": 2.236126373626374e-05, + "loss": 0.0535, + "step": 20121 + }, + { + "epoch": 55.28021978021978, + "grad_norm": 14.365279197692871, + "learning_rate": 2.235989010989011e-05, + "loss": 0.6084, + "step": 20122 + }, + { + "epoch": 55.282967032967036, + "grad_norm": 11.075822830200195, + "learning_rate": 2.2358516483516483e-05, + "loss": 0.3696, + "step": 20123 + }, + { + "epoch": 55.285714285714285, + "grad_norm": 6.944527626037598, + "learning_rate": 2.2357142857142856e-05, + "loss": 0.1388, + "step": 20124 + }, + { + "epoch": 55.28846153846154, + "grad_norm": 17.1961612701416, + "learning_rate": 2.2355769230769233e-05, + "loss": 0.2609, + "step": 20125 + }, + { + "epoch": 55.29120879120879, + "grad_norm": 10.764350891113281, + "learning_rate": 2.2354395604395606e-05, + "loss": 0.2903, + "step": 20126 + }, + { + "epoch": 55.293956043956044, + "grad_norm": 17.390718460083008, + "learning_rate": 2.235302197802198e-05, + "loss": 0.3121, + "step": 20127 + }, + { + "epoch": 55.2967032967033, + "grad_norm": 13.45843505859375, + "learning_rate": 2.2351648351648353e-05, + "loss": 0.5541, + "step": 20128 + }, + { + "epoch": 55.29945054945055, + "grad_norm": 19.212779998779297, + "learning_rate": 2.2350274725274726e-05, + "loss": 0.3224, + "step": 20129 + }, + { + "epoch": 55.3021978021978, + "grad_norm": 10.258258819580078, + "learning_rate": 2.23489010989011e-05, + "loss": 0.2003, + "step": 20130 + }, + { + "epoch": 55.30494505494506, + "grad_norm": 23.435277938842773, + "learning_rate": 2.2347527472527473e-05, + "loss": 0.4005, + "step": 20131 + }, + { + "epoch": 55.30769230769231, + "grad_norm": 9.590045928955078, + "learning_rate": 2.2346153846153847e-05, + "loss": 0.1803, + "step": 20132 + }, + { + "epoch": 55.31043956043956, + "grad_norm": 7.670976638793945, + "learning_rate": 2.234478021978022e-05, + "loss": 0.1789, + "step": 20133 + }, + { + "epoch": 55.31318681318681, + "grad_norm": 4.7054243087768555, + "learning_rate": 2.2343406593406594e-05, + "loss": 0.0585, + "step": 20134 + }, + { + "epoch": 55.315934065934066, + "grad_norm": 11.270759582519531, + "learning_rate": 2.234203296703297e-05, + "loss": 0.1969, + "step": 20135 + }, + { + "epoch": 55.31868131868132, + "grad_norm": 10.127490043640137, + "learning_rate": 2.2340659340659344e-05, + "loss": 0.1504, + "step": 20136 + }, + { + "epoch": 55.32142857142857, + "grad_norm": 7.806005001068115, + "learning_rate": 2.2339285714285714e-05, + "loss": 0.0808, + "step": 20137 + }, + { + "epoch": 55.324175824175825, + "grad_norm": 11.347439765930176, + "learning_rate": 2.2337912087912087e-05, + "loss": 0.1887, + "step": 20138 + }, + { + "epoch": 55.32692307692308, + "grad_norm": 10.25540828704834, + "learning_rate": 2.233653846153846e-05, + "loss": 0.1143, + "step": 20139 + }, + { + "epoch": 55.32967032967033, + "grad_norm": 17.417531967163086, + "learning_rate": 2.2335164835164837e-05, + "loss": 0.4195, + "step": 20140 + }, + { + "epoch": 55.332417582417584, + "grad_norm": 4.912485122680664, + "learning_rate": 2.233379120879121e-05, + "loss": 0.0933, + "step": 20141 + }, + { + "epoch": 55.33516483516483, + "grad_norm": 6.901172161102295, + "learning_rate": 2.2332417582417584e-05, + "loss": 0.1272, + "step": 20142 + }, + { + "epoch": 55.33791208791209, + "grad_norm": 12.046040534973145, + "learning_rate": 2.2331043956043958e-05, + "loss": 0.4119, + "step": 20143 + }, + { + "epoch": 55.34065934065934, + "grad_norm": 2.6218984127044678, + "learning_rate": 2.232967032967033e-05, + "loss": 0.0391, + "step": 20144 + }, + { + "epoch": 55.34340659340659, + "grad_norm": 19.05937385559082, + "learning_rate": 2.2328296703296704e-05, + "loss": 0.6139, + "step": 20145 + }, + { + "epoch": 55.34615384615385, + "grad_norm": 15.19542407989502, + "learning_rate": 2.2326923076923078e-05, + "loss": 0.3361, + "step": 20146 + }, + { + "epoch": 55.3489010989011, + "grad_norm": 10.924115180969238, + "learning_rate": 2.232554945054945e-05, + "loss": 0.1575, + "step": 20147 + }, + { + "epoch": 55.35164835164835, + "grad_norm": 9.58711051940918, + "learning_rate": 2.2324175824175825e-05, + "loss": 0.1343, + "step": 20148 + }, + { + "epoch": 55.354395604395606, + "grad_norm": 6.826705455780029, + "learning_rate": 2.2322802197802198e-05, + "loss": 0.1084, + "step": 20149 + }, + { + "epoch": 55.357142857142854, + "grad_norm": 12.176713943481445, + "learning_rate": 2.2321428571428575e-05, + "loss": 0.2118, + "step": 20150 + }, + { + "epoch": 55.35989010989011, + "grad_norm": 6.055109024047852, + "learning_rate": 2.2320054945054948e-05, + "loss": 0.0665, + "step": 20151 + }, + { + "epoch": 55.362637362637365, + "grad_norm": 6.392271518707275, + "learning_rate": 2.2318681318681318e-05, + "loss": 0.156, + "step": 20152 + }, + { + "epoch": 55.36538461538461, + "grad_norm": 4.474880695343018, + "learning_rate": 2.231730769230769e-05, + "loss": 0.089, + "step": 20153 + }, + { + "epoch": 55.36813186813187, + "grad_norm": 14.730582237243652, + "learning_rate": 2.2315934065934065e-05, + "loss": 0.3388, + "step": 20154 + }, + { + "epoch": 55.370879120879124, + "grad_norm": 5.507211208343506, + "learning_rate": 2.2314560439560442e-05, + "loss": 0.1976, + "step": 20155 + }, + { + "epoch": 55.37362637362637, + "grad_norm": 5.949891567230225, + "learning_rate": 2.2313186813186815e-05, + "loss": 0.094, + "step": 20156 + }, + { + "epoch": 55.37637362637363, + "grad_norm": 7.892771244049072, + "learning_rate": 2.231181318681319e-05, + "loss": 0.1857, + "step": 20157 + }, + { + "epoch": 55.379120879120876, + "grad_norm": 10.327802658081055, + "learning_rate": 2.2310439560439562e-05, + "loss": 0.2123, + "step": 20158 + }, + { + "epoch": 55.38186813186813, + "grad_norm": 12.663850784301758, + "learning_rate": 2.2309065934065935e-05, + "loss": 0.2502, + "step": 20159 + }, + { + "epoch": 55.38461538461539, + "grad_norm": 8.101231575012207, + "learning_rate": 2.230769230769231e-05, + "loss": 0.3074, + "step": 20160 + }, + { + "epoch": 55.387362637362635, + "grad_norm": 11.500267028808594, + "learning_rate": 2.2306318681318682e-05, + "loss": 0.2274, + "step": 20161 + }, + { + "epoch": 55.39010989010989, + "grad_norm": 9.05284595489502, + "learning_rate": 2.2304945054945056e-05, + "loss": 0.1684, + "step": 20162 + }, + { + "epoch": 55.392857142857146, + "grad_norm": 8.890227317810059, + "learning_rate": 2.230357142857143e-05, + "loss": 0.1815, + "step": 20163 + }, + { + "epoch": 55.395604395604394, + "grad_norm": 10.681441307067871, + "learning_rate": 2.2302197802197802e-05, + "loss": 0.144, + "step": 20164 + }, + { + "epoch": 55.39835164835165, + "grad_norm": 12.00524616241455, + "learning_rate": 2.230082417582418e-05, + "loss": 0.2566, + "step": 20165 + }, + { + "epoch": 55.4010989010989, + "grad_norm": 25.089990615844727, + "learning_rate": 2.2299450549450553e-05, + "loss": 0.4953, + "step": 20166 + }, + { + "epoch": 55.40384615384615, + "grad_norm": 17.85256004333496, + "learning_rate": 2.2298076923076923e-05, + "loss": 0.6087, + "step": 20167 + }, + { + "epoch": 55.40659340659341, + "grad_norm": 24.25424575805664, + "learning_rate": 2.2296703296703296e-05, + "loss": 0.8394, + "step": 20168 + }, + { + "epoch": 55.40934065934066, + "grad_norm": 16.07660675048828, + "learning_rate": 2.229532967032967e-05, + "loss": 0.3475, + "step": 20169 + }, + { + "epoch": 55.41208791208791, + "grad_norm": 19.546510696411133, + "learning_rate": 2.2293956043956046e-05, + "loss": 0.7349, + "step": 20170 + }, + { + "epoch": 55.41483516483517, + "grad_norm": 9.98843765258789, + "learning_rate": 2.229258241758242e-05, + "loss": 0.2307, + "step": 20171 + }, + { + "epoch": 55.417582417582416, + "grad_norm": 15.44804859161377, + "learning_rate": 2.2291208791208793e-05, + "loss": 0.1938, + "step": 20172 + }, + { + "epoch": 55.42032967032967, + "grad_norm": 28.69257354736328, + "learning_rate": 2.2289835164835167e-05, + "loss": 0.5028, + "step": 20173 + }, + { + "epoch": 55.42307692307692, + "grad_norm": 7.03068733215332, + "learning_rate": 2.228846153846154e-05, + "loss": 0.1063, + "step": 20174 + }, + { + "epoch": 55.425824175824175, + "grad_norm": 9.608613967895508, + "learning_rate": 2.2287087912087913e-05, + "loss": 0.1403, + "step": 20175 + }, + { + "epoch": 55.42857142857143, + "grad_norm": 14.909031867980957, + "learning_rate": 2.2285714285714287e-05, + "loss": 0.3056, + "step": 20176 + }, + { + "epoch": 55.43131868131868, + "grad_norm": 9.816656112670898, + "learning_rate": 2.228434065934066e-05, + "loss": 0.1427, + "step": 20177 + }, + { + "epoch": 55.434065934065934, + "grad_norm": 8.300603866577148, + "learning_rate": 2.2282967032967034e-05, + "loss": 0.1775, + "step": 20178 + }, + { + "epoch": 55.43681318681319, + "grad_norm": 20.163110733032227, + "learning_rate": 2.2281593406593407e-05, + "loss": 0.4404, + "step": 20179 + }, + { + "epoch": 55.43956043956044, + "grad_norm": 17.326427459716797, + "learning_rate": 2.2280219780219784e-05, + "loss": 0.1958, + "step": 20180 + }, + { + "epoch": 55.44230769230769, + "grad_norm": 20.46475601196289, + "learning_rate": 2.2278846153846157e-05, + "loss": 0.4157, + "step": 20181 + }, + { + "epoch": 55.44505494505494, + "grad_norm": 8.467131614685059, + "learning_rate": 2.2277472527472527e-05, + "loss": 0.2145, + "step": 20182 + }, + { + "epoch": 55.4478021978022, + "grad_norm": 12.647027969360352, + "learning_rate": 2.22760989010989e-05, + "loss": 0.1218, + "step": 20183 + }, + { + "epoch": 55.45054945054945, + "grad_norm": 10.339282989501953, + "learning_rate": 2.2274725274725274e-05, + "loss": 0.1564, + "step": 20184 + }, + { + "epoch": 55.4532967032967, + "grad_norm": 2.5818874835968018, + "learning_rate": 2.227335164835165e-05, + "loss": 0.0392, + "step": 20185 + }, + { + "epoch": 55.456043956043956, + "grad_norm": 12.939711570739746, + "learning_rate": 2.2271978021978024e-05, + "loss": 0.2139, + "step": 20186 + }, + { + "epoch": 55.45879120879121, + "grad_norm": 9.882997512817383, + "learning_rate": 2.2270604395604398e-05, + "loss": 0.1062, + "step": 20187 + }, + { + "epoch": 55.46153846153846, + "grad_norm": 3.8685755729675293, + "learning_rate": 2.226923076923077e-05, + "loss": 0.0623, + "step": 20188 + }, + { + "epoch": 55.464285714285715, + "grad_norm": 6.703677177429199, + "learning_rate": 2.2267857142857144e-05, + "loss": 0.1121, + "step": 20189 + }, + { + "epoch": 55.467032967032964, + "grad_norm": 16.837459564208984, + "learning_rate": 2.2266483516483518e-05, + "loss": 0.7418, + "step": 20190 + }, + { + "epoch": 55.46978021978022, + "grad_norm": 15.841227531433105, + "learning_rate": 2.226510989010989e-05, + "loss": 0.4695, + "step": 20191 + }, + { + "epoch": 55.472527472527474, + "grad_norm": 11.106653213500977, + "learning_rate": 2.2263736263736265e-05, + "loss": 0.3339, + "step": 20192 + }, + { + "epoch": 55.47527472527472, + "grad_norm": 14.054227828979492, + "learning_rate": 2.2262362637362638e-05, + "loss": 0.1797, + "step": 20193 + }, + { + "epoch": 55.47802197802198, + "grad_norm": 21.269460678100586, + "learning_rate": 2.226098901098901e-05, + "loss": 0.4516, + "step": 20194 + }, + { + "epoch": 55.48076923076923, + "grad_norm": 10.723540306091309, + "learning_rate": 2.2259615384615385e-05, + "loss": 0.1494, + "step": 20195 + }, + { + "epoch": 55.48351648351648, + "grad_norm": 8.707746505737305, + "learning_rate": 2.225824175824176e-05, + "loss": 0.2503, + "step": 20196 + }, + { + "epoch": 55.48626373626374, + "grad_norm": 16.436813354492188, + "learning_rate": 2.2256868131868132e-05, + "loss": 0.2512, + "step": 20197 + }, + { + "epoch": 55.489010989010985, + "grad_norm": 20.315773010253906, + "learning_rate": 2.2255494505494505e-05, + "loss": 0.4285, + "step": 20198 + }, + { + "epoch": 55.49175824175824, + "grad_norm": 22.99977684020996, + "learning_rate": 2.225412087912088e-05, + "loss": 0.6367, + "step": 20199 + }, + { + "epoch": 55.494505494505496, + "grad_norm": 11.561213493347168, + "learning_rate": 2.2252747252747252e-05, + "loss": 0.1561, + "step": 20200 + }, + { + "epoch": 55.497252747252745, + "grad_norm": 12.29718017578125, + "learning_rate": 2.225137362637363e-05, + "loss": 0.2571, + "step": 20201 + }, + { + "epoch": 55.5, + "grad_norm": 12.650742530822754, + "learning_rate": 2.2250000000000002e-05, + "loss": 0.4264, + "step": 20202 + }, + { + "epoch": 55.502747252747255, + "grad_norm": 20.14077377319336, + "learning_rate": 2.2248626373626376e-05, + "loss": 0.363, + "step": 20203 + }, + { + "epoch": 55.505494505494504, + "grad_norm": 13.898582458496094, + "learning_rate": 2.224725274725275e-05, + "loss": 0.3633, + "step": 20204 + }, + { + "epoch": 55.50824175824176, + "grad_norm": 10.659960746765137, + "learning_rate": 2.224587912087912e-05, + "loss": 0.1956, + "step": 20205 + }, + { + "epoch": 55.51098901098901, + "grad_norm": 1.8633288145065308, + "learning_rate": 2.2244505494505496e-05, + "loss": 0.0266, + "step": 20206 + }, + { + "epoch": 55.51373626373626, + "grad_norm": 9.342824935913086, + "learning_rate": 2.224313186813187e-05, + "loss": 0.192, + "step": 20207 + }, + { + "epoch": 55.51648351648352, + "grad_norm": 18.040515899658203, + "learning_rate": 2.2241758241758243e-05, + "loss": 0.3553, + "step": 20208 + }, + { + "epoch": 55.51923076923077, + "grad_norm": 7.386098861694336, + "learning_rate": 2.2240384615384616e-05, + "loss": 0.122, + "step": 20209 + }, + { + "epoch": 55.52197802197802, + "grad_norm": 10.004693984985352, + "learning_rate": 2.223901098901099e-05, + "loss": 0.1471, + "step": 20210 + }, + { + "epoch": 55.52472527472528, + "grad_norm": 7.447484493255615, + "learning_rate": 2.2237637362637366e-05, + "loss": 0.1072, + "step": 20211 + }, + { + "epoch": 55.527472527472526, + "grad_norm": 8.411388397216797, + "learning_rate": 2.2236263736263736e-05, + "loss": 0.1306, + "step": 20212 + }, + { + "epoch": 55.53021978021978, + "grad_norm": 1.386163353919983, + "learning_rate": 2.223489010989011e-05, + "loss": 0.019, + "step": 20213 + }, + { + "epoch": 55.532967032967036, + "grad_norm": 5.466763496398926, + "learning_rate": 2.2233516483516483e-05, + "loss": 0.101, + "step": 20214 + }, + { + "epoch": 55.535714285714285, + "grad_norm": 20.025508880615234, + "learning_rate": 2.2232142857142856e-05, + "loss": 0.3297, + "step": 20215 + }, + { + "epoch": 55.53846153846154, + "grad_norm": 7.033435344696045, + "learning_rate": 2.2230769230769233e-05, + "loss": 0.1228, + "step": 20216 + }, + { + "epoch": 55.54120879120879, + "grad_norm": 8.171299934387207, + "learning_rate": 2.2229395604395607e-05, + "loss": 0.294, + "step": 20217 + }, + { + "epoch": 55.543956043956044, + "grad_norm": 24.925676345825195, + "learning_rate": 2.222802197802198e-05, + "loss": 0.403, + "step": 20218 + }, + { + "epoch": 55.5467032967033, + "grad_norm": 15.680191993713379, + "learning_rate": 2.2226648351648353e-05, + "loss": 0.3659, + "step": 20219 + }, + { + "epoch": 55.54945054945055, + "grad_norm": 16.549470901489258, + "learning_rate": 2.2225274725274723e-05, + "loss": 0.3766, + "step": 20220 + }, + { + "epoch": 55.5521978021978, + "grad_norm": 19.2696533203125, + "learning_rate": 2.22239010989011e-05, + "loss": 0.6668, + "step": 20221 + }, + { + "epoch": 55.55494505494506, + "grad_norm": 11.34326457977295, + "learning_rate": 2.2222527472527474e-05, + "loss": 0.16, + "step": 20222 + }, + { + "epoch": 55.55769230769231, + "grad_norm": 9.92624282836914, + "learning_rate": 2.2221153846153847e-05, + "loss": 0.2811, + "step": 20223 + }, + { + "epoch": 55.56043956043956, + "grad_norm": 9.246306419372559, + "learning_rate": 2.221978021978022e-05, + "loss": 0.1497, + "step": 20224 + }, + { + "epoch": 55.56318681318681, + "grad_norm": 17.137487411499023, + "learning_rate": 2.2218406593406594e-05, + "loss": 0.5976, + "step": 20225 + }, + { + "epoch": 55.565934065934066, + "grad_norm": 10.300196647644043, + "learning_rate": 2.221703296703297e-05, + "loss": 0.2725, + "step": 20226 + }, + { + "epoch": 55.56868131868132, + "grad_norm": 0.9524913430213928, + "learning_rate": 2.221565934065934e-05, + "loss": 0.0126, + "step": 20227 + }, + { + "epoch": 55.57142857142857, + "grad_norm": 8.158421516418457, + "learning_rate": 2.2214285714285714e-05, + "loss": 0.19, + "step": 20228 + }, + { + "epoch": 55.574175824175825, + "grad_norm": 13.262748718261719, + "learning_rate": 2.2212912087912087e-05, + "loss": 0.3966, + "step": 20229 + }, + { + "epoch": 55.57692307692308, + "grad_norm": 11.150341033935547, + "learning_rate": 2.221153846153846e-05, + "loss": 0.3043, + "step": 20230 + }, + { + "epoch": 55.57967032967033, + "grad_norm": 12.913182258605957, + "learning_rate": 2.2210164835164838e-05, + "loss": 0.5849, + "step": 20231 + }, + { + "epoch": 55.582417582417584, + "grad_norm": 25.212215423583984, + "learning_rate": 2.220879120879121e-05, + "loss": 0.6833, + "step": 20232 + }, + { + "epoch": 55.58516483516483, + "grad_norm": 2.7911641597747803, + "learning_rate": 2.2207417582417585e-05, + "loss": 0.0461, + "step": 20233 + }, + { + "epoch": 55.58791208791209, + "grad_norm": 17.73571014404297, + "learning_rate": 2.2206043956043958e-05, + "loss": 0.1919, + "step": 20234 + }, + { + "epoch": 55.59065934065934, + "grad_norm": 8.253787994384766, + "learning_rate": 2.2204670329670328e-05, + "loss": 0.29, + "step": 20235 + }, + { + "epoch": 55.59340659340659, + "grad_norm": 7.855238437652588, + "learning_rate": 2.2203296703296705e-05, + "loss": 0.1944, + "step": 20236 + }, + { + "epoch": 55.59615384615385, + "grad_norm": 9.0349760055542, + "learning_rate": 2.2201923076923078e-05, + "loss": 0.1565, + "step": 20237 + }, + { + "epoch": 55.5989010989011, + "grad_norm": 10.113581657409668, + "learning_rate": 2.220054945054945e-05, + "loss": 0.3378, + "step": 20238 + }, + { + "epoch": 55.60164835164835, + "grad_norm": 14.487327575683594, + "learning_rate": 2.2199175824175825e-05, + "loss": 0.2422, + "step": 20239 + }, + { + "epoch": 55.604395604395606, + "grad_norm": 14.507657051086426, + "learning_rate": 2.21978021978022e-05, + "loss": 0.2075, + "step": 20240 + }, + { + "epoch": 55.607142857142854, + "grad_norm": 9.137202262878418, + "learning_rate": 2.2196428571428575e-05, + "loss": 0.2336, + "step": 20241 + }, + { + "epoch": 55.60989010989011, + "grad_norm": 6.337575912475586, + "learning_rate": 2.2195054945054945e-05, + "loss": 0.0692, + "step": 20242 + }, + { + "epoch": 55.612637362637365, + "grad_norm": 13.741827011108398, + "learning_rate": 2.219368131868132e-05, + "loss": 0.2953, + "step": 20243 + }, + { + "epoch": 55.61538461538461, + "grad_norm": 4.571641445159912, + "learning_rate": 2.2192307692307692e-05, + "loss": 0.0826, + "step": 20244 + }, + { + "epoch": 55.61813186813187, + "grad_norm": 13.374031066894531, + "learning_rate": 2.2190934065934065e-05, + "loss": 0.2384, + "step": 20245 + }, + { + "epoch": 55.620879120879124, + "grad_norm": 12.540128707885742, + "learning_rate": 2.2189560439560442e-05, + "loss": 0.2679, + "step": 20246 + }, + { + "epoch": 55.62362637362637, + "grad_norm": 11.372919082641602, + "learning_rate": 2.2188186813186816e-05, + "loss": 0.2987, + "step": 20247 + }, + { + "epoch": 55.62637362637363, + "grad_norm": 8.661827087402344, + "learning_rate": 2.218681318681319e-05, + "loss": 0.1384, + "step": 20248 + }, + { + "epoch": 55.629120879120876, + "grad_norm": 17.50924301147461, + "learning_rate": 2.2185439560439562e-05, + "loss": 0.4736, + "step": 20249 + }, + { + "epoch": 55.63186813186813, + "grad_norm": 4.867043972015381, + "learning_rate": 2.2184065934065932e-05, + "loss": 0.0349, + "step": 20250 + }, + { + "epoch": 55.63461538461539, + "grad_norm": 21.51772117614746, + "learning_rate": 2.218269230769231e-05, + "loss": 0.563, + "step": 20251 + }, + { + "epoch": 55.637362637362635, + "grad_norm": 8.150840759277344, + "learning_rate": 2.2181318681318683e-05, + "loss": 0.1845, + "step": 20252 + }, + { + "epoch": 55.64010989010989, + "grad_norm": 18.494630813598633, + "learning_rate": 2.2179945054945056e-05, + "loss": 0.6596, + "step": 20253 + }, + { + "epoch": 55.642857142857146, + "grad_norm": 1.7281935214996338, + "learning_rate": 2.217857142857143e-05, + "loss": 0.0291, + "step": 20254 + }, + { + "epoch": 55.645604395604394, + "grad_norm": 21.616178512573242, + "learning_rate": 2.2177197802197803e-05, + "loss": 0.6184, + "step": 20255 + }, + { + "epoch": 55.64835164835165, + "grad_norm": 16.383813858032227, + "learning_rate": 2.217582417582418e-05, + "loss": 0.2895, + "step": 20256 + }, + { + "epoch": 55.6510989010989, + "grad_norm": 7.9700117111206055, + "learning_rate": 2.217445054945055e-05, + "loss": 0.1135, + "step": 20257 + }, + { + "epoch": 55.65384615384615, + "grad_norm": 10.105178833007812, + "learning_rate": 2.2173076923076923e-05, + "loss": 0.2168, + "step": 20258 + }, + { + "epoch": 55.65659340659341, + "grad_norm": 3.574148654937744, + "learning_rate": 2.2171703296703296e-05, + "loss": 0.0517, + "step": 20259 + }, + { + "epoch": 55.65934065934066, + "grad_norm": 28.900333404541016, + "learning_rate": 2.217032967032967e-05, + "loss": 0.8376, + "step": 20260 + }, + { + "epoch": 55.66208791208791, + "grad_norm": 17.234540939331055, + "learning_rate": 2.2168956043956047e-05, + "loss": 0.2519, + "step": 20261 + }, + { + "epoch": 55.66483516483517, + "grad_norm": 2.6695170402526855, + "learning_rate": 2.216758241758242e-05, + "loss": 0.0758, + "step": 20262 + }, + { + "epoch": 55.667582417582416, + "grad_norm": 11.400832176208496, + "learning_rate": 2.2166208791208793e-05, + "loss": 0.1424, + "step": 20263 + }, + { + "epoch": 55.67032967032967, + "grad_norm": 3.0405354499816895, + "learning_rate": 2.2164835164835167e-05, + "loss": 0.0358, + "step": 20264 + }, + { + "epoch": 55.67307692307692, + "grad_norm": 9.714640617370605, + "learning_rate": 2.2163461538461537e-05, + "loss": 0.163, + "step": 20265 + }, + { + "epoch": 55.675824175824175, + "grad_norm": 11.842623710632324, + "learning_rate": 2.2162087912087914e-05, + "loss": 0.2557, + "step": 20266 + }, + { + "epoch": 55.67857142857143, + "grad_norm": 16.833316802978516, + "learning_rate": 2.2160714285714287e-05, + "loss": 0.2384, + "step": 20267 + }, + { + "epoch": 55.68131868131868, + "grad_norm": 6.063568115234375, + "learning_rate": 2.215934065934066e-05, + "loss": 0.1502, + "step": 20268 + }, + { + "epoch": 55.684065934065934, + "grad_norm": 19.204906463623047, + "learning_rate": 2.2157967032967034e-05, + "loss": 0.2716, + "step": 20269 + }, + { + "epoch": 55.68681318681319, + "grad_norm": 18.64080238342285, + "learning_rate": 2.2156593406593407e-05, + "loss": 0.3677, + "step": 20270 + }, + { + "epoch": 55.68956043956044, + "grad_norm": 16.863370895385742, + "learning_rate": 2.2155219780219784e-05, + "loss": 0.4963, + "step": 20271 + }, + { + "epoch": 55.69230769230769, + "grad_norm": 17.277912139892578, + "learning_rate": 2.2153846153846154e-05, + "loss": 0.3836, + "step": 20272 + }, + { + "epoch": 55.69505494505494, + "grad_norm": 8.257615089416504, + "learning_rate": 2.2152472527472528e-05, + "loss": 0.1691, + "step": 20273 + }, + { + "epoch": 55.6978021978022, + "grad_norm": 29.007152557373047, + "learning_rate": 2.21510989010989e-05, + "loss": 0.9507, + "step": 20274 + }, + { + "epoch": 55.70054945054945, + "grad_norm": 8.225473403930664, + "learning_rate": 2.2149725274725274e-05, + "loss": 0.0755, + "step": 20275 + }, + { + "epoch": 55.7032967032967, + "grad_norm": 4.662810325622559, + "learning_rate": 2.214835164835165e-05, + "loss": 0.0555, + "step": 20276 + }, + { + "epoch": 55.706043956043956, + "grad_norm": 16.97707748413086, + "learning_rate": 2.2146978021978025e-05, + "loss": 0.3504, + "step": 20277 + }, + { + "epoch": 55.70879120879121, + "grad_norm": 5.1409783363342285, + "learning_rate": 2.2145604395604398e-05, + "loss": 0.0947, + "step": 20278 + }, + { + "epoch": 55.71153846153846, + "grad_norm": 22.342973709106445, + "learning_rate": 2.214423076923077e-05, + "loss": 0.5922, + "step": 20279 + }, + { + "epoch": 55.714285714285715, + "grad_norm": 16.599773406982422, + "learning_rate": 2.214285714285714e-05, + "loss": 0.4846, + "step": 20280 + }, + { + "epoch": 55.717032967032964, + "grad_norm": 11.807156562805176, + "learning_rate": 2.2141483516483518e-05, + "loss": 0.2376, + "step": 20281 + }, + { + "epoch": 55.71978021978022, + "grad_norm": 3.0930066108703613, + "learning_rate": 2.214010989010989e-05, + "loss": 0.0521, + "step": 20282 + }, + { + "epoch": 55.722527472527474, + "grad_norm": 9.110613822937012, + "learning_rate": 2.2138736263736265e-05, + "loss": 0.2299, + "step": 20283 + }, + { + "epoch": 55.72527472527472, + "grad_norm": 15.620722770690918, + "learning_rate": 2.213736263736264e-05, + "loss": 0.2851, + "step": 20284 + }, + { + "epoch": 55.72802197802198, + "grad_norm": 18.959463119506836, + "learning_rate": 2.2135989010989012e-05, + "loss": 0.5209, + "step": 20285 + }, + { + "epoch": 55.73076923076923, + "grad_norm": 15.720550537109375, + "learning_rate": 2.213461538461539e-05, + "loss": 0.4356, + "step": 20286 + }, + { + "epoch": 55.73351648351648, + "grad_norm": 9.65719223022461, + "learning_rate": 2.213324175824176e-05, + "loss": 0.1428, + "step": 20287 + }, + { + "epoch": 55.73626373626374, + "grad_norm": 24.228004455566406, + "learning_rate": 2.2131868131868132e-05, + "loss": 0.6725, + "step": 20288 + }, + { + "epoch": 55.73901098901099, + "grad_norm": 9.953912734985352, + "learning_rate": 2.2130494505494505e-05, + "loss": 0.1213, + "step": 20289 + }, + { + "epoch": 55.74175824175824, + "grad_norm": 16.09248161315918, + "learning_rate": 2.212912087912088e-05, + "loss": 0.3727, + "step": 20290 + }, + { + "epoch": 55.744505494505496, + "grad_norm": 5.59065580368042, + "learning_rate": 2.2127747252747256e-05, + "loss": 0.0666, + "step": 20291 + }, + { + "epoch": 55.747252747252745, + "grad_norm": 7.37005615234375, + "learning_rate": 2.212637362637363e-05, + "loss": 0.0857, + "step": 20292 + }, + { + "epoch": 55.75, + "grad_norm": 5.546945571899414, + "learning_rate": 2.2125000000000002e-05, + "loss": 0.1344, + "step": 20293 + }, + { + "epoch": 55.752747252747255, + "grad_norm": 11.650533676147461, + "learning_rate": 2.2123626373626376e-05, + "loss": 0.2703, + "step": 20294 + }, + { + "epoch": 55.755494505494504, + "grad_norm": 17.210525512695312, + "learning_rate": 2.2122252747252746e-05, + "loss": 0.7366, + "step": 20295 + }, + { + "epoch": 55.75824175824176, + "grad_norm": 6.727545738220215, + "learning_rate": 2.2120879120879123e-05, + "loss": 0.1611, + "step": 20296 + }, + { + "epoch": 55.76098901098901, + "grad_norm": 22.201984405517578, + "learning_rate": 2.2119505494505496e-05, + "loss": 0.5534, + "step": 20297 + }, + { + "epoch": 55.76373626373626, + "grad_norm": 11.954832077026367, + "learning_rate": 2.211813186813187e-05, + "loss": 0.1701, + "step": 20298 + }, + { + "epoch": 55.76648351648352, + "grad_norm": 10.287105560302734, + "learning_rate": 2.2116758241758243e-05, + "loss": 0.1457, + "step": 20299 + }, + { + "epoch": 55.76923076923077, + "grad_norm": 13.249743461608887, + "learning_rate": 2.2115384615384616e-05, + "loss": 0.1697, + "step": 20300 + }, + { + "epoch": 55.77197802197802, + "grad_norm": 2.6968345642089844, + "learning_rate": 2.2114010989010993e-05, + "loss": 0.0283, + "step": 20301 + }, + { + "epoch": 55.77472527472528, + "grad_norm": 13.476539611816406, + "learning_rate": 2.2112637362637363e-05, + "loss": 0.3067, + "step": 20302 + }, + { + "epoch": 55.777472527472526, + "grad_norm": 21.801855087280273, + "learning_rate": 2.2111263736263737e-05, + "loss": 0.5542, + "step": 20303 + }, + { + "epoch": 55.78021978021978, + "grad_norm": 7.210568428039551, + "learning_rate": 2.210989010989011e-05, + "loss": 0.1036, + "step": 20304 + }, + { + "epoch": 55.782967032967036, + "grad_norm": 24.996505737304688, + "learning_rate": 2.2108516483516483e-05, + "loss": 0.6756, + "step": 20305 + }, + { + "epoch": 55.785714285714285, + "grad_norm": 12.769022941589355, + "learning_rate": 2.2107142857142857e-05, + "loss": 0.5585, + "step": 20306 + }, + { + "epoch": 55.78846153846154, + "grad_norm": 6.387930393218994, + "learning_rate": 2.2105769230769234e-05, + "loss": 0.1147, + "step": 20307 + }, + { + "epoch": 55.79120879120879, + "grad_norm": 21.685035705566406, + "learning_rate": 2.2104395604395607e-05, + "loss": 0.4407, + "step": 20308 + }, + { + "epoch": 55.793956043956044, + "grad_norm": 17.678926467895508, + "learning_rate": 2.210302197802198e-05, + "loss": 0.5535, + "step": 20309 + }, + { + "epoch": 55.7967032967033, + "grad_norm": 16.846946716308594, + "learning_rate": 2.210164835164835e-05, + "loss": 0.4124, + "step": 20310 + }, + { + "epoch": 55.79945054945055, + "grad_norm": 17.245153427124023, + "learning_rate": 2.2100274725274724e-05, + "loss": 0.3027, + "step": 20311 + }, + { + "epoch": 55.8021978021978, + "grad_norm": 15.703275680541992, + "learning_rate": 2.20989010989011e-05, + "loss": 0.4084, + "step": 20312 + }, + { + "epoch": 55.80494505494506, + "grad_norm": 10.430448532104492, + "learning_rate": 2.2097527472527474e-05, + "loss": 0.1742, + "step": 20313 + }, + { + "epoch": 55.80769230769231, + "grad_norm": 12.567242622375488, + "learning_rate": 2.2096153846153847e-05, + "loss": 0.1614, + "step": 20314 + }, + { + "epoch": 55.81043956043956, + "grad_norm": 9.262848854064941, + "learning_rate": 2.209478021978022e-05, + "loss": 0.0969, + "step": 20315 + }, + { + "epoch": 55.81318681318681, + "grad_norm": 17.32102394104004, + "learning_rate": 2.2093406593406594e-05, + "loss": 0.5476, + "step": 20316 + }, + { + "epoch": 55.815934065934066, + "grad_norm": 13.485241889953613, + "learning_rate": 2.2092032967032968e-05, + "loss": 0.3029, + "step": 20317 + }, + { + "epoch": 55.81868131868132, + "grad_norm": 12.808276176452637, + "learning_rate": 2.209065934065934e-05, + "loss": 0.2082, + "step": 20318 + }, + { + "epoch": 55.82142857142857, + "grad_norm": 8.84506893157959, + "learning_rate": 2.2089285714285714e-05, + "loss": 0.1437, + "step": 20319 + }, + { + "epoch": 55.824175824175825, + "grad_norm": 13.225021362304688, + "learning_rate": 2.2087912087912088e-05, + "loss": 0.15, + "step": 20320 + }, + { + "epoch": 55.82692307692308, + "grad_norm": 16.06754493713379, + "learning_rate": 2.208653846153846e-05, + "loss": 0.1647, + "step": 20321 + }, + { + "epoch": 55.82967032967033, + "grad_norm": 8.191829681396484, + "learning_rate": 2.2085164835164838e-05, + "loss": 0.0976, + "step": 20322 + }, + { + "epoch": 55.832417582417584, + "grad_norm": 6.04465389251709, + "learning_rate": 2.208379120879121e-05, + "loss": 0.1184, + "step": 20323 + }, + { + "epoch": 55.83516483516483, + "grad_norm": 20.65358543395996, + "learning_rate": 2.2082417582417585e-05, + "loss": 0.5661, + "step": 20324 + }, + { + "epoch": 55.83791208791209, + "grad_norm": 14.084939002990723, + "learning_rate": 2.2081043956043955e-05, + "loss": 0.4364, + "step": 20325 + }, + { + "epoch": 55.84065934065934, + "grad_norm": 12.71771240234375, + "learning_rate": 2.2079670329670328e-05, + "loss": 0.1557, + "step": 20326 + }, + { + "epoch": 55.84340659340659, + "grad_norm": 18.401182174682617, + "learning_rate": 2.2078296703296705e-05, + "loss": 0.5089, + "step": 20327 + }, + { + "epoch": 55.84615384615385, + "grad_norm": 16.619104385375977, + "learning_rate": 2.207692307692308e-05, + "loss": 0.4325, + "step": 20328 + }, + { + "epoch": 55.8489010989011, + "grad_norm": 4.157683372497559, + "learning_rate": 2.2075549450549452e-05, + "loss": 0.0358, + "step": 20329 + }, + { + "epoch": 55.85164835164835, + "grad_norm": 14.937071800231934, + "learning_rate": 2.2074175824175825e-05, + "loss": 0.1696, + "step": 20330 + }, + { + "epoch": 55.854395604395606, + "grad_norm": 16.235626220703125, + "learning_rate": 2.20728021978022e-05, + "loss": 0.1675, + "step": 20331 + }, + { + "epoch": 55.857142857142854, + "grad_norm": 14.372685432434082, + "learning_rate": 2.2071428571428572e-05, + "loss": 0.2207, + "step": 20332 + }, + { + "epoch": 55.85989010989011, + "grad_norm": 5.553684711456299, + "learning_rate": 2.2070054945054946e-05, + "loss": 0.1105, + "step": 20333 + }, + { + "epoch": 55.862637362637365, + "grad_norm": 17.499149322509766, + "learning_rate": 2.206868131868132e-05, + "loss": 0.4484, + "step": 20334 + }, + { + "epoch": 55.86538461538461, + "grad_norm": 16.781600952148438, + "learning_rate": 2.2067307692307692e-05, + "loss": 0.2425, + "step": 20335 + }, + { + "epoch": 55.86813186813187, + "grad_norm": 3.785217761993408, + "learning_rate": 2.2065934065934066e-05, + "loss": 0.0614, + "step": 20336 + }, + { + "epoch": 55.870879120879124, + "grad_norm": 4.4849853515625, + "learning_rate": 2.2064560439560443e-05, + "loss": 0.0576, + "step": 20337 + }, + { + "epoch": 55.87362637362637, + "grad_norm": 17.286182403564453, + "learning_rate": 2.2063186813186816e-05, + "loss": 0.2645, + "step": 20338 + }, + { + "epoch": 55.87637362637363, + "grad_norm": 3.9271743297576904, + "learning_rate": 2.206181318681319e-05, + "loss": 0.0588, + "step": 20339 + }, + { + "epoch": 55.879120879120876, + "grad_norm": 16.09429931640625, + "learning_rate": 2.206043956043956e-05, + "loss": 0.4493, + "step": 20340 + }, + { + "epoch": 55.88186813186813, + "grad_norm": 15.361113548278809, + "learning_rate": 2.2059065934065933e-05, + "loss": 0.4186, + "step": 20341 + }, + { + "epoch": 55.88461538461539, + "grad_norm": 17.72338104248047, + "learning_rate": 2.205769230769231e-05, + "loss": 0.4838, + "step": 20342 + }, + { + "epoch": 55.887362637362635, + "grad_norm": 11.134072303771973, + "learning_rate": 2.2056318681318683e-05, + "loss": 0.4096, + "step": 20343 + }, + { + "epoch": 55.89010989010989, + "grad_norm": 21.98605728149414, + "learning_rate": 2.2054945054945056e-05, + "loss": 0.5158, + "step": 20344 + }, + { + "epoch": 55.892857142857146, + "grad_norm": 4.865828037261963, + "learning_rate": 2.205357142857143e-05, + "loss": 0.0861, + "step": 20345 + }, + { + "epoch": 55.895604395604394, + "grad_norm": 29.09000587463379, + "learning_rate": 2.2052197802197803e-05, + "loss": 1.1171, + "step": 20346 + }, + { + "epoch": 55.89835164835165, + "grad_norm": 11.260576248168945, + "learning_rate": 2.2050824175824177e-05, + "loss": 0.1851, + "step": 20347 + }, + { + "epoch": 55.9010989010989, + "grad_norm": 12.370796203613281, + "learning_rate": 2.204945054945055e-05, + "loss": 0.12, + "step": 20348 + }, + { + "epoch": 55.90384615384615, + "grad_norm": 13.239632606506348, + "learning_rate": 2.2048076923076923e-05, + "loss": 0.2072, + "step": 20349 + }, + { + "epoch": 55.90659340659341, + "grad_norm": 14.54155158996582, + "learning_rate": 2.2046703296703297e-05, + "loss": 0.2815, + "step": 20350 + }, + { + "epoch": 55.90934065934066, + "grad_norm": 10.144981384277344, + "learning_rate": 2.204532967032967e-05, + "loss": 0.1362, + "step": 20351 + }, + { + "epoch": 55.91208791208791, + "grad_norm": 5.6551833152771, + "learning_rate": 2.2043956043956047e-05, + "loss": 0.0855, + "step": 20352 + }, + { + "epoch": 55.91483516483517, + "grad_norm": 15.989283561706543, + "learning_rate": 2.204258241758242e-05, + "loss": 0.2305, + "step": 20353 + }, + { + "epoch": 55.917582417582416, + "grad_norm": 18.56899070739746, + "learning_rate": 2.2041208791208794e-05, + "loss": 0.5571, + "step": 20354 + }, + { + "epoch": 55.92032967032967, + "grad_norm": 17.828109741210938, + "learning_rate": 2.2039835164835164e-05, + "loss": 0.3259, + "step": 20355 + }, + { + "epoch": 55.92307692307692, + "grad_norm": 3.159944772720337, + "learning_rate": 2.2038461538461537e-05, + "loss": 0.0408, + "step": 20356 + }, + { + "epoch": 55.925824175824175, + "grad_norm": 13.482501029968262, + "learning_rate": 2.2037087912087914e-05, + "loss": 0.3447, + "step": 20357 + }, + { + "epoch": 55.92857142857143, + "grad_norm": 13.5242280960083, + "learning_rate": 2.2035714285714287e-05, + "loss": 0.4509, + "step": 20358 + }, + { + "epoch": 55.93131868131868, + "grad_norm": 10.985610008239746, + "learning_rate": 2.203434065934066e-05, + "loss": 0.2092, + "step": 20359 + }, + { + "epoch": 55.934065934065934, + "grad_norm": 15.104921340942383, + "learning_rate": 2.2032967032967034e-05, + "loss": 0.3441, + "step": 20360 + }, + { + "epoch": 55.93681318681319, + "grad_norm": 19.19339942932129, + "learning_rate": 2.2031593406593408e-05, + "loss": 0.4067, + "step": 20361 + }, + { + "epoch": 55.93956043956044, + "grad_norm": 16.781726837158203, + "learning_rate": 2.203021978021978e-05, + "loss": 0.4149, + "step": 20362 + }, + { + "epoch": 55.94230769230769, + "grad_norm": 13.98698616027832, + "learning_rate": 2.2028846153846154e-05, + "loss": 0.334, + "step": 20363 + }, + { + "epoch": 55.94505494505494, + "grad_norm": 24.975936889648438, + "learning_rate": 2.2027472527472528e-05, + "loss": 0.4103, + "step": 20364 + }, + { + "epoch": 55.9478021978022, + "grad_norm": 15.548754692077637, + "learning_rate": 2.20260989010989e-05, + "loss": 0.2362, + "step": 20365 + }, + { + "epoch": 55.95054945054945, + "grad_norm": 26.91980743408203, + "learning_rate": 2.2024725274725275e-05, + "loss": 0.8685, + "step": 20366 + }, + { + "epoch": 55.9532967032967, + "grad_norm": 9.523125648498535, + "learning_rate": 2.202335164835165e-05, + "loss": 0.1519, + "step": 20367 + }, + { + "epoch": 55.956043956043956, + "grad_norm": 9.332290649414062, + "learning_rate": 2.2021978021978025e-05, + "loss": 0.1941, + "step": 20368 + }, + { + "epoch": 55.95879120879121, + "grad_norm": 10.560296058654785, + "learning_rate": 2.20206043956044e-05, + "loss": 0.1427, + "step": 20369 + }, + { + "epoch": 55.96153846153846, + "grad_norm": 6.433248043060303, + "learning_rate": 2.201923076923077e-05, + "loss": 0.0597, + "step": 20370 + }, + { + "epoch": 55.964285714285715, + "grad_norm": 8.975213050842285, + "learning_rate": 2.2017857142857142e-05, + "loss": 0.1321, + "step": 20371 + }, + { + "epoch": 55.967032967032964, + "grad_norm": 13.317404747009277, + "learning_rate": 2.201648351648352e-05, + "loss": 0.224, + "step": 20372 + }, + { + "epoch": 55.96978021978022, + "grad_norm": 13.50349235534668, + "learning_rate": 2.2015109890109892e-05, + "loss": 0.3456, + "step": 20373 + }, + { + "epoch": 55.972527472527474, + "grad_norm": 16.824752807617188, + "learning_rate": 2.2013736263736265e-05, + "loss": 0.284, + "step": 20374 + }, + { + "epoch": 55.97527472527472, + "grad_norm": 17.719797134399414, + "learning_rate": 2.201236263736264e-05, + "loss": 0.4412, + "step": 20375 + }, + { + "epoch": 55.97802197802198, + "grad_norm": 12.745661735534668, + "learning_rate": 2.2010989010989012e-05, + "loss": 0.1893, + "step": 20376 + }, + { + "epoch": 55.98076923076923, + "grad_norm": 16.880290985107422, + "learning_rate": 2.2009615384615386e-05, + "loss": 0.3667, + "step": 20377 + }, + { + "epoch": 55.98351648351648, + "grad_norm": 14.045761108398438, + "learning_rate": 2.200824175824176e-05, + "loss": 0.4199, + "step": 20378 + }, + { + "epoch": 55.98626373626374, + "grad_norm": 6.525393009185791, + "learning_rate": 2.2006868131868132e-05, + "loss": 0.0976, + "step": 20379 + }, + { + "epoch": 55.98901098901099, + "grad_norm": 9.949772834777832, + "learning_rate": 2.2005494505494506e-05, + "loss": 0.1874, + "step": 20380 + }, + { + "epoch": 55.99175824175824, + "grad_norm": 19.376785278320312, + "learning_rate": 2.200412087912088e-05, + "loss": 0.441, + "step": 20381 + }, + { + "epoch": 55.994505494505496, + "grad_norm": 3.5302658081054688, + "learning_rate": 2.2002747252747256e-05, + "loss": 0.0437, + "step": 20382 + }, + { + "epoch": 55.997252747252745, + "grad_norm": 6.390885353088379, + "learning_rate": 2.200137362637363e-05, + "loss": 0.0871, + "step": 20383 + }, + { + "epoch": 56.0, + "grad_norm": 87.43122100830078, + "learning_rate": 2.2000000000000003e-05, + "loss": 1.4615, + "step": 20384 + }, + { + "epoch": 56.0, + "eval_accuracy": 0.7245179063360881, + "eval_f1": 0.7098017617690348, + "eval_f1_DuraRiadoRio_64x64": 0.6912751677852349, + "eval_f1_Mole_64x64": 0.5, + "eval_f1_Quebrado_64x64": 0.8673139158576052, + "eval_f1_RiadoRio_64x64": 0.6202898550724638, + "eval_f1_RioFechado_64x64": 0.8701298701298701, + "eval_loss": 1.5939826965332031, + "eval_precision": 0.7685170883385505, + "eval_precision_DuraRiadoRio_64x64": 0.6688311688311688, + "eval_precision_Mole_64x64": 1.0, + "eval_precision_Quebrado_64x64": 0.8121212121212121, + "eval_precision_RiadoRio_64x64": 0.5544041450777202, + "eval_precision_RioFechado_64x64": 0.8072289156626506, + "eval_recall": 0.725355201383741, + "eval_recall_DuraRiadoRio_64x64": 0.7152777777777778, + "eval_recall_Mole_64x64": 0.3333333333333333, + "eval_recall_Quebrado_64x64": 0.9305555555555556, + "eval_recall_RiadoRio_64x64": 0.7039473684210527, + "eval_recall_RioFechado_64x64": 0.9436619718309859, + "eval_runtime": 1.7442, + "eval_samples_per_second": 416.24, + "eval_steps_per_second": 26.373, + "step": 20384 + }, + { + "epoch": 56.002747252747255, + "grad_norm": 11.661470413208008, + "learning_rate": 2.1998626373626373e-05, + "loss": 0.2158, + "step": 20385 + }, + { + "epoch": 56.005494505494504, + "grad_norm": 11.673572540283203, + "learning_rate": 2.1997252747252746e-05, + "loss": 0.1844, + "step": 20386 + }, + { + "epoch": 56.00824175824176, + "grad_norm": 10.469215393066406, + "learning_rate": 2.1995879120879123e-05, + "loss": 0.2125, + "step": 20387 + }, + { + "epoch": 56.010989010989015, + "grad_norm": 14.362874031066895, + "learning_rate": 2.1994505494505496e-05, + "loss": 0.2822, + "step": 20388 + }, + { + "epoch": 56.01373626373626, + "grad_norm": 29.698514938354492, + "learning_rate": 2.199313186813187e-05, + "loss": 0.6869, + "step": 20389 + }, + { + "epoch": 56.01648351648352, + "grad_norm": 15.61575984954834, + "learning_rate": 2.1991758241758243e-05, + "loss": 0.3091, + "step": 20390 + }, + { + "epoch": 56.01923076923077, + "grad_norm": 19.342357635498047, + "learning_rate": 2.1990384615384617e-05, + "loss": 0.1468, + "step": 20391 + }, + { + "epoch": 56.02197802197802, + "grad_norm": 11.060755729675293, + "learning_rate": 2.198901098901099e-05, + "loss": 0.1815, + "step": 20392 + }, + { + "epoch": 56.02472527472528, + "grad_norm": 17.224430084228516, + "learning_rate": 2.1987637362637363e-05, + "loss": 0.2842, + "step": 20393 + }, + { + "epoch": 56.027472527472526, + "grad_norm": 5.661207675933838, + "learning_rate": 2.1986263736263737e-05, + "loss": 0.1008, + "step": 20394 + }, + { + "epoch": 56.03021978021978, + "grad_norm": 24.604970932006836, + "learning_rate": 2.198489010989011e-05, + "loss": 0.7225, + "step": 20395 + }, + { + "epoch": 56.032967032967036, + "grad_norm": 22.32269287109375, + "learning_rate": 2.1983516483516484e-05, + "loss": 0.4389, + "step": 20396 + }, + { + "epoch": 56.035714285714285, + "grad_norm": 4.753910541534424, + "learning_rate": 2.198214285714286e-05, + "loss": 0.0509, + "step": 20397 + }, + { + "epoch": 56.03846153846154, + "grad_norm": 7.750998497009277, + "learning_rate": 2.1980769230769234e-05, + "loss": 0.2097, + "step": 20398 + }, + { + "epoch": 56.04120879120879, + "grad_norm": 12.867147445678711, + "learning_rate": 2.1979395604395607e-05, + "loss": 0.4272, + "step": 20399 + }, + { + "epoch": 56.043956043956044, + "grad_norm": 8.65624713897705, + "learning_rate": 2.1978021978021977e-05, + "loss": 0.1667, + "step": 20400 + }, + { + "epoch": 56.0467032967033, + "grad_norm": 9.542662620544434, + "learning_rate": 2.197664835164835e-05, + "loss": 0.1638, + "step": 20401 + }, + { + "epoch": 56.04945054945055, + "grad_norm": 4.851617336273193, + "learning_rate": 2.1975274725274728e-05, + "loss": 0.0719, + "step": 20402 + }, + { + "epoch": 56.0521978021978, + "grad_norm": 21.881044387817383, + "learning_rate": 2.19739010989011e-05, + "loss": 0.6426, + "step": 20403 + }, + { + "epoch": 56.05494505494506, + "grad_norm": 9.78710651397705, + "learning_rate": 2.1972527472527474e-05, + "loss": 0.1889, + "step": 20404 + }, + { + "epoch": 56.05769230769231, + "grad_norm": 12.98044490814209, + "learning_rate": 2.1971153846153848e-05, + "loss": 0.242, + "step": 20405 + }, + { + "epoch": 56.06043956043956, + "grad_norm": 13.5075101852417, + "learning_rate": 2.196978021978022e-05, + "loss": 0.424, + "step": 20406 + }, + { + "epoch": 56.06318681318681, + "grad_norm": 15.32955265045166, + "learning_rate": 2.1968406593406595e-05, + "loss": 0.3315, + "step": 20407 + }, + { + "epoch": 56.065934065934066, + "grad_norm": 9.499900817871094, + "learning_rate": 2.1967032967032968e-05, + "loss": 0.1499, + "step": 20408 + }, + { + "epoch": 56.06868131868132, + "grad_norm": 13.64212417602539, + "learning_rate": 2.196565934065934e-05, + "loss": 0.2239, + "step": 20409 + }, + { + "epoch": 56.07142857142857, + "grad_norm": 16.8433895111084, + "learning_rate": 2.1964285714285715e-05, + "loss": 0.2653, + "step": 20410 + }, + { + "epoch": 56.074175824175825, + "grad_norm": 5.424464702606201, + "learning_rate": 2.1962912087912088e-05, + "loss": 0.1078, + "step": 20411 + }, + { + "epoch": 56.07692307692308, + "grad_norm": 24.14310073852539, + "learning_rate": 2.1961538461538465e-05, + "loss": 0.8156, + "step": 20412 + }, + { + "epoch": 56.07967032967033, + "grad_norm": 10.449336051940918, + "learning_rate": 2.196016483516484e-05, + "loss": 0.1935, + "step": 20413 + }, + { + "epoch": 56.082417582417584, + "grad_norm": 9.29896354675293, + "learning_rate": 2.195879120879121e-05, + "loss": 0.2792, + "step": 20414 + }, + { + "epoch": 56.08516483516483, + "grad_norm": 13.335912704467773, + "learning_rate": 2.1957417582417582e-05, + "loss": 0.336, + "step": 20415 + }, + { + "epoch": 56.08791208791209, + "grad_norm": 13.385791778564453, + "learning_rate": 2.1956043956043955e-05, + "loss": 0.3371, + "step": 20416 + }, + { + "epoch": 56.09065934065934, + "grad_norm": 17.82276725769043, + "learning_rate": 2.1954670329670332e-05, + "loss": 0.3963, + "step": 20417 + }, + { + "epoch": 56.09340659340659, + "grad_norm": 6.95558500289917, + "learning_rate": 2.1953296703296705e-05, + "loss": 0.1307, + "step": 20418 + }, + { + "epoch": 56.09615384615385, + "grad_norm": 6.699656009674072, + "learning_rate": 2.195192307692308e-05, + "loss": 0.1049, + "step": 20419 + }, + { + "epoch": 56.0989010989011, + "grad_norm": 8.349184036254883, + "learning_rate": 2.1950549450549452e-05, + "loss": 0.1094, + "step": 20420 + }, + { + "epoch": 56.10164835164835, + "grad_norm": 11.596208572387695, + "learning_rate": 2.1949175824175826e-05, + "loss": 0.191, + "step": 20421 + }, + { + "epoch": 56.104395604395606, + "grad_norm": 10.611139297485352, + "learning_rate": 2.19478021978022e-05, + "loss": 0.2483, + "step": 20422 + }, + { + "epoch": 56.107142857142854, + "grad_norm": 10.412015914916992, + "learning_rate": 2.1946428571428572e-05, + "loss": 0.251, + "step": 20423 + }, + { + "epoch": 56.10989010989011, + "grad_norm": 17.633642196655273, + "learning_rate": 2.1945054945054946e-05, + "loss": 0.495, + "step": 20424 + }, + { + "epoch": 56.112637362637365, + "grad_norm": 12.522193908691406, + "learning_rate": 2.194368131868132e-05, + "loss": 0.183, + "step": 20425 + }, + { + "epoch": 56.11538461538461, + "grad_norm": 11.563241004943848, + "learning_rate": 2.1942307692307693e-05, + "loss": 0.1606, + "step": 20426 + }, + { + "epoch": 56.11813186813187, + "grad_norm": 18.07427215576172, + "learning_rate": 2.1940934065934066e-05, + "loss": 0.3306, + "step": 20427 + }, + { + "epoch": 56.120879120879124, + "grad_norm": 5.434615135192871, + "learning_rate": 2.1939560439560443e-05, + "loss": 0.1406, + "step": 20428 + }, + { + "epoch": 56.12362637362637, + "grad_norm": 7.188358783721924, + "learning_rate": 2.1938186813186813e-05, + "loss": 0.158, + "step": 20429 + }, + { + "epoch": 56.12637362637363, + "grad_norm": 16.484050750732422, + "learning_rate": 2.1936813186813186e-05, + "loss": 0.4621, + "step": 20430 + }, + { + "epoch": 56.129120879120876, + "grad_norm": 19.02333641052246, + "learning_rate": 2.193543956043956e-05, + "loss": 0.4938, + "step": 20431 + }, + { + "epoch": 56.13186813186813, + "grad_norm": 4.503971099853516, + "learning_rate": 2.1934065934065933e-05, + "loss": 0.0836, + "step": 20432 + }, + { + "epoch": 56.13461538461539, + "grad_norm": 9.9730863571167, + "learning_rate": 2.193269230769231e-05, + "loss": 0.1266, + "step": 20433 + }, + { + "epoch": 56.137362637362635, + "grad_norm": 12.142363548278809, + "learning_rate": 2.1931318681318683e-05, + "loss": 0.2138, + "step": 20434 + }, + { + "epoch": 56.14010989010989, + "grad_norm": 9.205821990966797, + "learning_rate": 2.1929945054945057e-05, + "loss": 0.243, + "step": 20435 + }, + { + "epoch": 56.142857142857146, + "grad_norm": 9.30040454864502, + "learning_rate": 2.192857142857143e-05, + "loss": 0.1872, + "step": 20436 + }, + { + "epoch": 56.145604395604394, + "grad_norm": 2.9797470569610596, + "learning_rate": 2.1927197802197804e-05, + "loss": 0.0531, + "step": 20437 + }, + { + "epoch": 56.14835164835165, + "grad_norm": 15.841530799865723, + "learning_rate": 2.1925824175824177e-05, + "loss": 0.2667, + "step": 20438 + }, + { + "epoch": 56.1510989010989, + "grad_norm": 9.991480827331543, + "learning_rate": 2.192445054945055e-05, + "loss": 0.1693, + "step": 20439 + }, + { + "epoch": 56.15384615384615, + "grad_norm": 1.155312418937683, + "learning_rate": 2.1923076923076924e-05, + "loss": 0.02, + "step": 20440 + }, + { + "epoch": 56.15659340659341, + "grad_norm": 7.2940287590026855, + "learning_rate": 2.1921703296703297e-05, + "loss": 0.106, + "step": 20441 + }, + { + "epoch": 56.15934065934066, + "grad_norm": 16.702743530273438, + "learning_rate": 2.192032967032967e-05, + "loss": 0.2837, + "step": 20442 + }, + { + "epoch": 56.16208791208791, + "grad_norm": 14.74158000946045, + "learning_rate": 2.1918956043956047e-05, + "loss": 0.2966, + "step": 20443 + }, + { + "epoch": 56.16483516483517, + "grad_norm": 3.3961989879608154, + "learning_rate": 2.1917582417582417e-05, + "loss": 0.0342, + "step": 20444 + }, + { + "epoch": 56.167582417582416, + "grad_norm": 13.514933586120605, + "learning_rate": 2.191620879120879e-05, + "loss": 0.2735, + "step": 20445 + }, + { + "epoch": 56.17032967032967, + "grad_norm": 7.976495742797852, + "learning_rate": 2.1914835164835164e-05, + "loss": 0.2346, + "step": 20446 + }, + { + "epoch": 56.17307692307692, + "grad_norm": 12.429985046386719, + "learning_rate": 2.1913461538461538e-05, + "loss": 0.2723, + "step": 20447 + }, + { + "epoch": 56.175824175824175, + "grad_norm": 6.6288042068481445, + "learning_rate": 2.1912087912087914e-05, + "loss": 0.1245, + "step": 20448 + }, + { + "epoch": 56.17857142857143, + "grad_norm": 13.118917465209961, + "learning_rate": 2.1910714285714288e-05, + "loss": 0.1431, + "step": 20449 + }, + { + "epoch": 56.18131868131868, + "grad_norm": 4.5378618240356445, + "learning_rate": 2.190934065934066e-05, + "loss": 0.0877, + "step": 20450 + }, + { + "epoch": 56.184065934065934, + "grad_norm": 16.57266616821289, + "learning_rate": 2.1907967032967035e-05, + "loss": 0.3179, + "step": 20451 + }, + { + "epoch": 56.18681318681319, + "grad_norm": 13.761143684387207, + "learning_rate": 2.1906593406593408e-05, + "loss": 0.5243, + "step": 20452 + }, + { + "epoch": 56.18956043956044, + "grad_norm": 3.0969772338867188, + "learning_rate": 2.190521978021978e-05, + "loss": 0.0346, + "step": 20453 + }, + { + "epoch": 56.19230769230769, + "grad_norm": 16.365638732910156, + "learning_rate": 2.1903846153846155e-05, + "loss": 0.382, + "step": 20454 + }, + { + "epoch": 56.19505494505494, + "grad_norm": 8.025681495666504, + "learning_rate": 2.1902472527472528e-05, + "loss": 0.1062, + "step": 20455 + }, + { + "epoch": 56.1978021978022, + "grad_norm": 20.158252716064453, + "learning_rate": 2.19010989010989e-05, + "loss": 0.3002, + "step": 20456 + }, + { + "epoch": 56.20054945054945, + "grad_norm": 26.412548065185547, + "learning_rate": 2.1899725274725275e-05, + "loss": 0.849, + "step": 20457 + }, + { + "epoch": 56.2032967032967, + "grad_norm": 16.473983764648438, + "learning_rate": 2.1898351648351652e-05, + "loss": 0.4482, + "step": 20458 + }, + { + "epoch": 56.206043956043956, + "grad_norm": 12.209548950195312, + "learning_rate": 2.1896978021978022e-05, + "loss": 0.1795, + "step": 20459 + }, + { + "epoch": 56.20879120879121, + "grad_norm": 8.347341537475586, + "learning_rate": 2.1895604395604395e-05, + "loss": 0.1501, + "step": 20460 + }, + { + "epoch": 56.21153846153846, + "grad_norm": 14.01703929901123, + "learning_rate": 2.189423076923077e-05, + "loss": 0.3445, + "step": 20461 + }, + { + "epoch": 56.214285714285715, + "grad_norm": 7.4445013999938965, + "learning_rate": 2.1892857142857142e-05, + "loss": 0.1458, + "step": 20462 + }, + { + "epoch": 56.217032967032964, + "grad_norm": 19.558813095092773, + "learning_rate": 2.189148351648352e-05, + "loss": 0.3734, + "step": 20463 + }, + { + "epoch": 56.21978021978022, + "grad_norm": 10.64888858795166, + "learning_rate": 2.1890109890109892e-05, + "loss": 0.1536, + "step": 20464 + }, + { + "epoch": 56.222527472527474, + "grad_norm": 9.964476585388184, + "learning_rate": 2.1888736263736266e-05, + "loss": 0.3216, + "step": 20465 + }, + { + "epoch": 56.22527472527472, + "grad_norm": 11.321090698242188, + "learning_rate": 2.188736263736264e-05, + "loss": 0.2307, + "step": 20466 + }, + { + "epoch": 56.22802197802198, + "grad_norm": 19.416301727294922, + "learning_rate": 2.1885989010989013e-05, + "loss": 0.2577, + "step": 20467 + }, + { + "epoch": 56.23076923076923, + "grad_norm": 15.764161109924316, + "learning_rate": 2.1884615384615386e-05, + "loss": 0.5896, + "step": 20468 + }, + { + "epoch": 56.23351648351648, + "grad_norm": 3.9037346839904785, + "learning_rate": 2.188324175824176e-05, + "loss": 0.072, + "step": 20469 + }, + { + "epoch": 56.23626373626374, + "grad_norm": 15.09659481048584, + "learning_rate": 2.1881868131868133e-05, + "loss": 0.4077, + "step": 20470 + }, + { + "epoch": 56.239010989010985, + "grad_norm": 2.6378355026245117, + "learning_rate": 2.1880494505494506e-05, + "loss": 0.0357, + "step": 20471 + }, + { + "epoch": 56.24175824175824, + "grad_norm": 17.47872543334961, + "learning_rate": 2.187912087912088e-05, + "loss": 0.4026, + "step": 20472 + }, + { + "epoch": 56.244505494505496, + "grad_norm": 6.175570487976074, + "learning_rate": 2.1877747252747256e-05, + "loss": 0.0935, + "step": 20473 + }, + { + "epoch": 56.247252747252745, + "grad_norm": 18.504072189331055, + "learning_rate": 2.1876373626373626e-05, + "loss": 0.3992, + "step": 20474 + }, + { + "epoch": 56.25, + "grad_norm": 11.949176788330078, + "learning_rate": 2.1875e-05, + "loss": 0.3068, + "step": 20475 + }, + { + "epoch": 56.252747252747255, + "grad_norm": 18.635459899902344, + "learning_rate": 2.1873626373626373e-05, + "loss": 0.4974, + "step": 20476 + }, + { + "epoch": 56.255494505494504, + "grad_norm": 7.324606895446777, + "learning_rate": 2.1872252747252747e-05, + "loss": 0.1008, + "step": 20477 + }, + { + "epoch": 56.25824175824176, + "grad_norm": 9.619196891784668, + "learning_rate": 2.1870879120879123e-05, + "loss": 0.2256, + "step": 20478 + }, + { + "epoch": 56.260989010989015, + "grad_norm": 10.031052589416504, + "learning_rate": 2.1869505494505497e-05, + "loss": 0.1645, + "step": 20479 + }, + { + "epoch": 56.26373626373626, + "grad_norm": 10.081768989562988, + "learning_rate": 2.186813186813187e-05, + "loss": 0.221, + "step": 20480 + }, + { + "epoch": 56.26648351648352, + "grad_norm": 7.597236156463623, + "learning_rate": 2.1866758241758244e-05, + "loss": 0.1106, + "step": 20481 + }, + { + "epoch": 56.26923076923077, + "grad_norm": 19.33091163635254, + "learning_rate": 2.1865384615384617e-05, + "loss": 0.3591, + "step": 20482 + }, + { + "epoch": 56.27197802197802, + "grad_norm": 6.703240871429443, + "learning_rate": 2.186401098901099e-05, + "loss": 0.0952, + "step": 20483 + }, + { + "epoch": 56.27472527472528, + "grad_norm": 17.212158203125, + "learning_rate": 2.1862637362637364e-05, + "loss": 0.4155, + "step": 20484 + }, + { + "epoch": 56.277472527472526, + "grad_norm": 10.27936840057373, + "learning_rate": 2.1861263736263737e-05, + "loss": 0.1893, + "step": 20485 + }, + { + "epoch": 56.28021978021978, + "grad_norm": 15.136892318725586, + "learning_rate": 2.185989010989011e-05, + "loss": 0.5854, + "step": 20486 + }, + { + "epoch": 56.282967032967036, + "grad_norm": 7.163331031799316, + "learning_rate": 2.1858516483516484e-05, + "loss": 0.1267, + "step": 20487 + }, + { + "epoch": 56.285714285714285, + "grad_norm": 8.036703109741211, + "learning_rate": 2.185714285714286e-05, + "loss": 0.2197, + "step": 20488 + }, + { + "epoch": 56.28846153846154, + "grad_norm": 13.817264556884766, + "learning_rate": 2.185576923076923e-05, + "loss": 0.3469, + "step": 20489 + }, + { + "epoch": 56.29120879120879, + "grad_norm": 8.502406120300293, + "learning_rate": 2.1854395604395604e-05, + "loss": 0.1125, + "step": 20490 + }, + { + "epoch": 56.293956043956044, + "grad_norm": 14.942338943481445, + "learning_rate": 2.1853021978021978e-05, + "loss": 0.2772, + "step": 20491 + }, + { + "epoch": 56.2967032967033, + "grad_norm": 5.883149147033691, + "learning_rate": 2.185164835164835e-05, + "loss": 0.0449, + "step": 20492 + }, + { + "epoch": 56.29945054945055, + "grad_norm": 8.892627716064453, + "learning_rate": 2.1850274725274728e-05, + "loss": 0.3953, + "step": 20493 + }, + { + "epoch": 56.3021978021978, + "grad_norm": 11.649029731750488, + "learning_rate": 2.18489010989011e-05, + "loss": 0.497, + "step": 20494 + }, + { + "epoch": 56.30494505494506, + "grad_norm": 5.237294673919678, + "learning_rate": 2.1847527472527475e-05, + "loss": 0.1038, + "step": 20495 + }, + { + "epoch": 56.30769230769231, + "grad_norm": 10.301164627075195, + "learning_rate": 2.1846153846153848e-05, + "loss": 0.1963, + "step": 20496 + }, + { + "epoch": 56.31043956043956, + "grad_norm": 9.509157180786133, + "learning_rate": 2.184478021978022e-05, + "loss": 0.0757, + "step": 20497 + }, + { + "epoch": 56.31318681318681, + "grad_norm": 16.07227325439453, + "learning_rate": 2.1843406593406595e-05, + "loss": 0.2672, + "step": 20498 + }, + { + "epoch": 56.315934065934066, + "grad_norm": 6.6231770515441895, + "learning_rate": 2.184203296703297e-05, + "loss": 0.1589, + "step": 20499 + }, + { + "epoch": 56.31868131868132, + "grad_norm": 18.785070419311523, + "learning_rate": 2.1840659340659342e-05, + "loss": 0.4003, + "step": 20500 + }, + { + "epoch": 56.32142857142857, + "grad_norm": 10.06466293334961, + "learning_rate": 2.1839285714285715e-05, + "loss": 0.2622, + "step": 20501 + }, + { + "epoch": 56.324175824175825, + "grad_norm": 19.810606002807617, + "learning_rate": 2.183791208791209e-05, + "loss": 0.6307, + "step": 20502 + }, + { + "epoch": 56.32692307692308, + "grad_norm": 13.423431396484375, + "learning_rate": 2.1836538461538465e-05, + "loss": 0.2244, + "step": 20503 + }, + { + "epoch": 56.32967032967033, + "grad_norm": 17.201271057128906, + "learning_rate": 2.1835164835164835e-05, + "loss": 0.4466, + "step": 20504 + }, + { + "epoch": 56.332417582417584, + "grad_norm": 13.293134689331055, + "learning_rate": 2.183379120879121e-05, + "loss": 0.2149, + "step": 20505 + }, + { + "epoch": 56.33516483516483, + "grad_norm": 6.410712242126465, + "learning_rate": 2.1832417582417582e-05, + "loss": 0.0901, + "step": 20506 + }, + { + "epoch": 56.33791208791209, + "grad_norm": 13.302204132080078, + "learning_rate": 2.1831043956043956e-05, + "loss": 0.2783, + "step": 20507 + }, + { + "epoch": 56.34065934065934, + "grad_norm": 20.356346130371094, + "learning_rate": 2.1829670329670332e-05, + "loss": 0.6589, + "step": 20508 + }, + { + "epoch": 56.34340659340659, + "grad_norm": 6.610915184020996, + "learning_rate": 2.1828296703296706e-05, + "loss": 0.0871, + "step": 20509 + }, + { + "epoch": 56.34615384615385, + "grad_norm": 10.020873069763184, + "learning_rate": 2.182692307692308e-05, + "loss": 0.1073, + "step": 20510 + }, + { + "epoch": 56.3489010989011, + "grad_norm": 9.153464317321777, + "learning_rate": 2.1825549450549453e-05, + "loss": 0.1578, + "step": 20511 + }, + { + "epoch": 56.35164835164835, + "grad_norm": 4.71173095703125, + "learning_rate": 2.1824175824175823e-05, + "loss": 0.0793, + "step": 20512 + }, + { + "epoch": 56.354395604395606, + "grad_norm": 2.7832515239715576, + "learning_rate": 2.18228021978022e-05, + "loss": 0.035, + "step": 20513 + }, + { + "epoch": 56.357142857142854, + "grad_norm": 11.296903610229492, + "learning_rate": 2.1821428571428573e-05, + "loss": 0.1515, + "step": 20514 + }, + { + "epoch": 56.35989010989011, + "grad_norm": 12.891979217529297, + "learning_rate": 2.1820054945054946e-05, + "loss": 0.2338, + "step": 20515 + }, + { + "epoch": 56.362637362637365, + "grad_norm": 12.03161907196045, + "learning_rate": 2.181868131868132e-05, + "loss": 0.3212, + "step": 20516 + }, + { + "epoch": 56.36538461538461, + "grad_norm": 15.190939903259277, + "learning_rate": 2.1817307692307693e-05, + "loss": 0.1977, + "step": 20517 + }, + { + "epoch": 56.36813186813187, + "grad_norm": 7.779228687286377, + "learning_rate": 2.181593406593407e-05, + "loss": 0.2275, + "step": 20518 + }, + { + "epoch": 56.370879120879124, + "grad_norm": 5.877505302429199, + "learning_rate": 2.181456043956044e-05, + "loss": 0.0698, + "step": 20519 + }, + { + "epoch": 56.37362637362637, + "grad_norm": 15.479856491088867, + "learning_rate": 2.1813186813186813e-05, + "loss": 0.3225, + "step": 20520 + }, + { + "epoch": 56.37637362637363, + "grad_norm": 11.579113006591797, + "learning_rate": 2.1811813186813187e-05, + "loss": 0.2055, + "step": 20521 + }, + { + "epoch": 56.379120879120876, + "grad_norm": 3.999035358428955, + "learning_rate": 2.181043956043956e-05, + "loss": 0.0509, + "step": 20522 + }, + { + "epoch": 56.38186813186813, + "grad_norm": 9.422196388244629, + "learning_rate": 2.1809065934065937e-05, + "loss": 0.1058, + "step": 20523 + }, + { + "epoch": 56.38461538461539, + "grad_norm": 15.302231788635254, + "learning_rate": 2.180769230769231e-05, + "loss": 0.2262, + "step": 20524 + }, + { + "epoch": 56.387362637362635, + "grad_norm": 18.378719329833984, + "learning_rate": 2.1806318681318684e-05, + "loss": 0.5431, + "step": 20525 + }, + { + "epoch": 56.39010989010989, + "grad_norm": 6.7855939865112305, + "learning_rate": 2.1804945054945057e-05, + "loss": 0.1002, + "step": 20526 + }, + { + "epoch": 56.392857142857146, + "grad_norm": 8.427595138549805, + "learning_rate": 2.1803571428571427e-05, + "loss": 0.1472, + "step": 20527 + }, + { + "epoch": 56.395604395604394, + "grad_norm": 4.533182621002197, + "learning_rate": 2.1802197802197804e-05, + "loss": 0.0756, + "step": 20528 + }, + { + "epoch": 56.39835164835165, + "grad_norm": 10.422220230102539, + "learning_rate": 2.1800824175824177e-05, + "loss": 0.1062, + "step": 20529 + }, + { + "epoch": 56.4010989010989, + "grad_norm": 12.315130233764648, + "learning_rate": 2.179945054945055e-05, + "loss": 0.1573, + "step": 20530 + }, + { + "epoch": 56.40384615384615, + "grad_norm": 5.520082473754883, + "learning_rate": 2.1798076923076924e-05, + "loss": 0.0805, + "step": 20531 + }, + { + "epoch": 56.40659340659341, + "grad_norm": 11.598511695861816, + "learning_rate": 2.1796703296703298e-05, + "loss": 0.1473, + "step": 20532 + }, + { + "epoch": 56.40934065934066, + "grad_norm": 12.101561546325684, + "learning_rate": 2.179532967032967e-05, + "loss": 0.3022, + "step": 20533 + }, + { + "epoch": 56.41208791208791, + "grad_norm": 6.23471212387085, + "learning_rate": 2.1793956043956044e-05, + "loss": 0.1514, + "step": 20534 + }, + { + "epoch": 56.41483516483517, + "grad_norm": 5.685014247894287, + "learning_rate": 2.1792582417582418e-05, + "loss": 0.0658, + "step": 20535 + }, + { + "epoch": 56.417582417582416, + "grad_norm": 6.330519676208496, + "learning_rate": 2.179120879120879e-05, + "loss": 0.1968, + "step": 20536 + }, + { + "epoch": 56.42032967032967, + "grad_norm": 8.697829246520996, + "learning_rate": 2.1789835164835165e-05, + "loss": 0.1887, + "step": 20537 + }, + { + "epoch": 56.42307692307692, + "grad_norm": 3.0172410011291504, + "learning_rate": 2.1788461538461538e-05, + "loss": 0.0351, + "step": 20538 + }, + { + "epoch": 56.425824175824175, + "grad_norm": 14.048355102539062, + "learning_rate": 2.1787087912087915e-05, + "loss": 0.2169, + "step": 20539 + }, + { + "epoch": 56.42857142857143, + "grad_norm": 12.060132026672363, + "learning_rate": 2.1785714285714288e-05, + "loss": 0.2682, + "step": 20540 + }, + { + "epoch": 56.43131868131868, + "grad_norm": 19.156417846679688, + "learning_rate": 2.178434065934066e-05, + "loss": 0.3968, + "step": 20541 + }, + { + "epoch": 56.434065934065934, + "grad_norm": 7.307631015777588, + "learning_rate": 2.178296703296703e-05, + "loss": 0.0776, + "step": 20542 + }, + { + "epoch": 56.43681318681319, + "grad_norm": 7.498539924621582, + "learning_rate": 2.1781593406593405e-05, + "loss": 0.0997, + "step": 20543 + }, + { + "epoch": 56.43956043956044, + "grad_norm": 23.281099319458008, + "learning_rate": 2.1780219780219782e-05, + "loss": 0.5406, + "step": 20544 + }, + { + "epoch": 56.44230769230769, + "grad_norm": 21.5228271484375, + "learning_rate": 2.1778846153846155e-05, + "loss": 0.4292, + "step": 20545 + }, + { + "epoch": 56.44505494505494, + "grad_norm": 18.868873596191406, + "learning_rate": 2.177747252747253e-05, + "loss": 0.3033, + "step": 20546 + }, + { + "epoch": 56.4478021978022, + "grad_norm": 10.78105640411377, + "learning_rate": 2.1776098901098902e-05, + "loss": 0.2366, + "step": 20547 + }, + { + "epoch": 56.45054945054945, + "grad_norm": 7.651068687438965, + "learning_rate": 2.1774725274725275e-05, + "loss": 0.1713, + "step": 20548 + }, + { + "epoch": 56.4532967032967, + "grad_norm": 17.39504623413086, + "learning_rate": 2.177335164835165e-05, + "loss": 0.5045, + "step": 20549 + }, + { + "epoch": 56.456043956043956, + "grad_norm": 18.468276977539062, + "learning_rate": 2.1771978021978022e-05, + "loss": 0.3279, + "step": 20550 + }, + { + "epoch": 56.45879120879121, + "grad_norm": 3.42453670501709, + "learning_rate": 2.1770604395604396e-05, + "loss": 0.0521, + "step": 20551 + }, + { + "epoch": 56.46153846153846, + "grad_norm": 7.784600257873535, + "learning_rate": 2.176923076923077e-05, + "loss": 0.1612, + "step": 20552 + }, + { + "epoch": 56.464285714285715, + "grad_norm": 11.263214111328125, + "learning_rate": 2.1767857142857142e-05, + "loss": 0.2337, + "step": 20553 + }, + { + "epoch": 56.467032967032964, + "grad_norm": 8.386625289916992, + "learning_rate": 2.176648351648352e-05, + "loss": 0.167, + "step": 20554 + }, + { + "epoch": 56.46978021978022, + "grad_norm": 19.094358444213867, + "learning_rate": 2.1765109890109893e-05, + "loss": 0.5934, + "step": 20555 + }, + { + "epoch": 56.472527472527474, + "grad_norm": 17.31082534790039, + "learning_rate": 2.1763736263736266e-05, + "loss": 0.2278, + "step": 20556 + }, + { + "epoch": 56.47527472527472, + "grad_norm": 10.818962097167969, + "learning_rate": 2.1762362637362636e-05, + "loss": 0.2528, + "step": 20557 + }, + { + "epoch": 56.47802197802198, + "grad_norm": 9.628527641296387, + "learning_rate": 2.176098901098901e-05, + "loss": 0.2941, + "step": 20558 + }, + { + "epoch": 56.48076923076923, + "grad_norm": 0.6838502287864685, + "learning_rate": 2.1759615384615386e-05, + "loss": 0.0119, + "step": 20559 + }, + { + "epoch": 56.48351648351648, + "grad_norm": 16.30062484741211, + "learning_rate": 2.175824175824176e-05, + "loss": 0.2979, + "step": 20560 + }, + { + "epoch": 56.48626373626374, + "grad_norm": 9.21385383605957, + "learning_rate": 2.1756868131868133e-05, + "loss": 0.2266, + "step": 20561 + }, + { + "epoch": 56.489010989010985, + "grad_norm": 7.926863670349121, + "learning_rate": 2.1755494505494507e-05, + "loss": 0.1688, + "step": 20562 + }, + { + "epoch": 56.49175824175824, + "grad_norm": 14.071703910827637, + "learning_rate": 2.175412087912088e-05, + "loss": 0.3554, + "step": 20563 + }, + { + "epoch": 56.494505494505496, + "grad_norm": 8.599052429199219, + "learning_rate": 2.1752747252747253e-05, + "loss": 0.1592, + "step": 20564 + }, + { + "epoch": 56.497252747252745, + "grad_norm": 7.385944366455078, + "learning_rate": 2.1751373626373627e-05, + "loss": 0.0599, + "step": 20565 + }, + { + "epoch": 56.5, + "grad_norm": 17.368289947509766, + "learning_rate": 2.175e-05, + "loss": 0.4184, + "step": 20566 + }, + { + "epoch": 56.502747252747255, + "grad_norm": 12.670226097106934, + "learning_rate": 2.1748626373626374e-05, + "loss": 0.2756, + "step": 20567 + }, + { + "epoch": 56.505494505494504, + "grad_norm": 18.085346221923828, + "learning_rate": 2.1747252747252747e-05, + "loss": 0.4974, + "step": 20568 + }, + { + "epoch": 56.50824175824176, + "grad_norm": 4.777430057525635, + "learning_rate": 2.1745879120879124e-05, + "loss": 0.0918, + "step": 20569 + }, + { + "epoch": 56.51098901098901, + "grad_norm": 10.563117027282715, + "learning_rate": 2.1744505494505497e-05, + "loss": 0.1537, + "step": 20570 + }, + { + "epoch": 56.51373626373626, + "grad_norm": 12.857935905456543, + "learning_rate": 2.174313186813187e-05, + "loss": 0.3339, + "step": 20571 + }, + { + "epoch": 56.51648351648352, + "grad_norm": 7.085626125335693, + "learning_rate": 2.174175824175824e-05, + "loss": 0.159, + "step": 20572 + }, + { + "epoch": 56.51923076923077, + "grad_norm": 21.942529678344727, + "learning_rate": 2.1740384615384614e-05, + "loss": 0.4205, + "step": 20573 + }, + { + "epoch": 56.52197802197802, + "grad_norm": 8.510370254516602, + "learning_rate": 2.173901098901099e-05, + "loss": 0.1012, + "step": 20574 + }, + { + "epoch": 56.52472527472528, + "grad_norm": 12.29086685180664, + "learning_rate": 2.1737637362637364e-05, + "loss": 0.2667, + "step": 20575 + }, + { + "epoch": 56.527472527472526, + "grad_norm": 6.155073642730713, + "learning_rate": 2.1736263736263738e-05, + "loss": 0.1028, + "step": 20576 + }, + { + "epoch": 56.53021978021978, + "grad_norm": 13.156487464904785, + "learning_rate": 2.173489010989011e-05, + "loss": 0.3322, + "step": 20577 + }, + { + "epoch": 56.532967032967036, + "grad_norm": 6.0393218994140625, + "learning_rate": 2.1733516483516484e-05, + "loss": 0.0696, + "step": 20578 + }, + { + "epoch": 56.535714285714285, + "grad_norm": 22.948402404785156, + "learning_rate": 2.1732142857142858e-05, + "loss": 0.554, + "step": 20579 + }, + { + "epoch": 56.53846153846154, + "grad_norm": 10.322452545166016, + "learning_rate": 2.173076923076923e-05, + "loss": 0.2379, + "step": 20580 + }, + { + "epoch": 56.54120879120879, + "grad_norm": 14.379350662231445, + "learning_rate": 2.1729395604395605e-05, + "loss": 0.4417, + "step": 20581 + }, + { + "epoch": 56.543956043956044, + "grad_norm": 8.219653129577637, + "learning_rate": 2.1728021978021978e-05, + "loss": 0.34, + "step": 20582 + }, + { + "epoch": 56.5467032967033, + "grad_norm": 8.63028621673584, + "learning_rate": 2.172664835164835e-05, + "loss": 0.1411, + "step": 20583 + }, + { + "epoch": 56.54945054945055, + "grad_norm": 11.257124900817871, + "learning_rate": 2.1725274725274728e-05, + "loss": 0.2654, + "step": 20584 + }, + { + "epoch": 56.5521978021978, + "grad_norm": 6.401762962341309, + "learning_rate": 2.17239010989011e-05, + "loss": 0.0862, + "step": 20585 + }, + { + "epoch": 56.55494505494506, + "grad_norm": 3.6798973083496094, + "learning_rate": 2.1722527472527475e-05, + "loss": 0.0535, + "step": 20586 + }, + { + "epoch": 56.55769230769231, + "grad_norm": 10.276047706604004, + "learning_rate": 2.1721153846153845e-05, + "loss": 0.1376, + "step": 20587 + }, + { + "epoch": 56.56043956043956, + "grad_norm": 3.515375852584839, + "learning_rate": 2.171978021978022e-05, + "loss": 0.0662, + "step": 20588 + }, + { + "epoch": 56.56318681318681, + "grad_norm": 14.255114555358887, + "learning_rate": 2.1718406593406595e-05, + "loss": 0.4344, + "step": 20589 + }, + { + "epoch": 56.565934065934066, + "grad_norm": 12.779199600219727, + "learning_rate": 2.171703296703297e-05, + "loss": 0.2732, + "step": 20590 + }, + { + "epoch": 56.56868131868132, + "grad_norm": 7.994617938995361, + "learning_rate": 2.1715659340659342e-05, + "loss": 0.1279, + "step": 20591 + }, + { + "epoch": 56.57142857142857, + "grad_norm": 19.561866760253906, + "learning_rate": 2.1714285714285715e-05, + "loss": 0.2661, + "step": 20592 + }, + { + "epoch": 56.574175824175825, + "grad_norm": 7.608867168426514, + "learning_rate": 2.171291208791209e-05, + "loss": 0.2649, + "step": 20593 + }, + { + "epoch": 56.57692307692308, + "grad_norm": 21.595312118530273, + "learning_rate": 2.1711538461538462e-05, + "loss": 0.542, + "step": 20594 + }, + { + "epoch": 56.57967032967033, + "grad_norm": 12.62947940826416, + "learning_rate": 2.1710164835164836e-05, + "loss": 0.3492, + "step": 20595 + }, + { + "epoch": 56.582417582417584, + "grad_norm": 11.410636901855469, + "learning_rate": 2.170879120879121e-05, + "loss": 0.2539, + "step": 20596 + }, + { + "epoch": 56.58516483516483, + "grad_norm": 10.118799209594727, + "learning_rate": 2.1707417582417583e-05, + "loss": 0.1838, + "step": 20597 + }, + { + "epoch": 56.58791208791209, + "grad_norm": 14.629266738891602, + "learning_rate": 2.1706043956043956e-05, + "loss": 0.196, + "step": 20598 + }, + { + "epoch": 56.59065934065934, + "grad_norm": 26.835506439208984, + "learning_rate": 2.1704670329670333e-05, + "loss": 0.7586, + "step": 20599 + }, + { + "epoch": 56.59340659340659, + "grad_norm": 10.759028434753418, + "learning_rate": 2.1703296703296706e-05, + "loss": 0.227, + "step": 20600 + }, + { + "epoch": 56.59615384615385, + "grad_norm": 14.14804744720459, + "learning_rate": 2.170192307692308e-05, + "loss": 0.3807, + "step": 20601 + }, + { + "epoch": 56.5989010989011, + "grad_norm": 14.630586624145508, + "learning_rate": 2.170054945054945e-05, + "loss": 0.1932, + "step": 20602 + }, + { + "epoch": 56.60164835164835, + "grad_norm": 9.817155838012695, + "learning_rate": 2.1699175824175823e-05, + "loss": 0.1585, + "step": 20603 + }, + { + "epoch": 56.604395604395606, + "grad_norm": 7.351921081542969, + "learning_rate": 2.16978021978022e-05, + "loss": 0.1954, + "step": 20604 + }, + { + "epoch": 56.607142857142854, + "grad_norm": 5.3415727615356445, + "learning_rate": 2.1696428571428573e-05, + "loss": 0.067, + "step": 20605 + }, + { + "epoch": 56.60989010989011, + "grad_norm": 8.648150444030762, + "learning_rate": 2.1695054945054947e-05, + "loss": 0.204, + "step": 20606 + }, + { + "epoch": 56.612637362637365, + "grad_norm": 4.738077640533447, + "learning_rate": 2.169368131868132e-05, + "loss": 0.1484, + "step": 20607 + }, + { + "epoch": 56.61538461538461, + "grad_norm": 8.179367065429688, + "learning_rate": 2.1692307692307693e-05, + "loss": 0.2091, + "step": 20608 + }, + { + "epoch": 56.61813186813187, + "grad_norm": 3.0767621994018555, + "learning_rate": 2.1690934065934067e-05, + "loss": 0.0383, + "step": 20609 + }, + { + "epoch": 56.620879120879124, + "grad_norm": 16.294795989990234, + "learning_rate": 2.168956043956044e-05, + "loss": 0.2737, + "step": 20610 + }, + { + "epoch": 56.62362637362637, + "grad_norm": 7.105094909667969, + "learning_rate": 2.1688186813186814e-05, + "loss": 0.1286, + "step": 20611 + }, + { + "epoch": 56.62637362637363, + "grad_norm": 15.873282432556152, + "learning_rate": 2.1686813186813187e-05, + "loss": 0.4845, + "step": 20612 + }, + { + "epoch": 56.629120879120876, + "grad_norm": 10.367969512939453, + "learning_rate": 2.168543956043956e-05, + "loss": 0.2407, + "step": 20613 + }, + { + "epoch": 56.63186813186813, + "grad_norm": 11.594762802124023, + "learning_rate": 2.1684065934065937e-05, + "loss": 0.2711, + "step": 20614 + }, + { + "epoch": 56.63461538461539, + "grad_norm": 3.8953964710235596, + "learning_rate": 2.168269230769231e-05, + "loss": 0.0562, + "step": 20615 + }, + { + "epoch": 56.637362637362635, + "grad_norm": 14.485404968261719, + "learning_rate": 2.1681318681318684e-05, + "loss": 0.3254, + "step": 20616 + }, + { + "epoch": 56.64010989010989, + "grad_norm": 20.823928833007812, + "learning_rate": 2.1679945054945054e-05, + "loss": 0.5122, + "step": 20617 + }, + { + "epoch": 56.642857142857146, + "grad_norm": 12.785611152648926, + "learning_rate": 2.1678571428571427e-05, + "loss": 0.3209, + "step": 20618 + }, + { + "epoch": 56.645604395604394, + "grad_norm": 14.083996772766113, + "learning_rate": 2.1677197802197804e-05, + "loss": 0.2812, + "step": 20619 + }, + { + "epoch": 56.64835164835165, + "grad_norm": 11.487710952758789, + "learning_rate": 2.1675824175824178e-05, + "loss": 0.2549, + "step": 20620 + }, + { + "epoch": 56.6510989010989, + "grad_norm": 9.18851375579834, + "learning_rate": 2.167445054945055e-05, + "loss": 0.134, + "step": 20621 + }, + { + "epoch": 56.65384615384615, + "grad_norm": 15.819784164428711, + "learning_rate": 2.1673076923076924e-05, + "loss": 0.6185, + "step": 20622 + }, + { + "epoch": 56.65659340659341, + "grad_norm": 14.29548168182373, + "learning_rate": 2.1671703296703298e-05, + "loss": 0.2252, + "step": 20623 + }, + { + "epoch": 56.65934065934066, + "grad_norm": 11.972600936889648, + "learning_rate": 2.167032967032967e-05, + "loss": 0.3245, + "step": 20624 + }, + { + "epoch": 56.66208791208791, + "grad_norm": 11.360203742980957, + "learning_rate": 2.1668956043956045e-05, + "loss": 0.3731, + "step": 20625 + }, + { + "epoch": 56.66483516483517, + "grad_norm": 12.09188461303711, + "learning_rate": 2.1667582417582418e-05, + "loss": 0.183, + "step": 20626 + }, + { + "epoch": 56.667582417582416, + "grad_norm": 5.246795654296875, + "learning_rate": 2.166620879120879e-05, + "loss": 0.0983, + "step": 20627 + }, + { + "epoch": 56.67032967032967, + "grad_norm": 12.367679595947266, + "learning_rate": 2.1664835164835165e-05, + "loss": 0.1499, + "step": 20628 + }, + { + "epoch": 56.67307692307692, + "grad_norm": 8.854913711547852, + "learning_rate": 2.1663461538461542e-05, + "loss": 0.1486, + "step": 20629 + }, + { + "epoch": 56.675824175824175, + "grad_norm": 10.766033172607422, + "learning_rate": 2.1662087912087915e-05, + "loss": 0.2291, + "step": 20630 + }, + { + "epoch": 56.67857142857143, + "grad_norm": 7.1598052978515625, + "learning_rate": 2.166071428571429e-05, + "loss": 0.1345, + "step": 20631 + }, + { + "epoch": 56.68131868131868, + "grad_norm": 8.369820594787598, + "learning_rate": 2.165934065934066e-05, + "loss": 0.2039, + "step": 20632 + }, + { + "epoch": 56.684065934065934, + "grad_norm": 20.350313186645508, + "learning_rate": 2.1657967032967032e-05, + "loss": 1.0524, + "step": 20633 + }, + { + "epoch": 56.68681318681319, + "grad_norm": 11.711255073547363, + "learning_rate": 2.165659340659341e-05, + "loss": 0.1406, + "step": 20634 + }, + { + "epoch": 56.68956043956044, + "grad_norm": 8.89236831665039, + "learning_rate": 2.1655219780219782e-05, + "loss": 0.1563, + "step": 20635 + }, + { + "epoch": 56.69230769230769, + "grad_norm": 4.2724199295043945, + "learning_rate": 2.1653846153846156e-05, + "loss": 0.0767, + "step": 20636 + }, + { + "epoch": 56.69505494505494, + "grad_norm": 13.108914375305176, + "learning_rate": 2.165247252747253e-05, + "loss": 0.2921, + "step": 20637 + }, + { + "epoch": 56.6978021978022, + "grad_norm": 20.541006088256836, + "learning_rate": 2.1651098901098902e-05, + "loss": 0.5019, + "step": 20638 + }, + { + "epoch": 56.70054945054945, + "grad_norm": 9.156716346740723, + "learning_rate": 2.1649725274725276e-05, + "loss": 0.1602, + "step": 20639 + }, + { + "epoch": 56.7032967032967, + "grad_norm": 12.940093994140625, + "learning_rate": 2.164835164835165e-05, + "loss": 0.2772, + "step": 20640 + }, + { + "epoch": 56.706043956043956, + "grad_norm": 14.185381889343262, + "learning_rate": 2.1646978021978023e-05, + "loss": 0.3426, + "step": 20641 + }, + { + "epoch": 56.70879120879121, + "grad_norm": 15.10166072845459, + "learning_rate": 2.1645604395604396e-05, + "loss": 0.6172, + "step": 20642 + }, + { + "epoch": 56.71153846153846, + "grad_norm": 16.21605110168457, + "learning_rate": 2.164423076923077e-05, + "loss": 0.3305, + "step": 20643 + }, + { + "epoch": 56.714285714285715, + "grad_norm": 13.168818473815918, + "learning_rate": 2.1642857142857146e-05, + "loss": 0.269, + "step": 20644 + }, + { + "epoch": 56.717032967032964, + "grad_norm": 7.050699710845947, + "learning_rate": 2.164148351648352e-05, + "loss": 0.1094, + "step": 20645 + }, + { + "epoch": 56.71978021978022, + "grad_norm": 9.910106658935547, + "learning_rate": 2.1640109890109893e-05, + "loss": 0.1201, + "step": 20646 + }, + { + "epoch": 56.722527472527474, + "grad_norm": 8.545320510864258, + "learning_rate": 2.1638736263736263e-05, + "loss": 0.1159, + "step": 20647 + }, + { + "epoch": 56.72527472527472, + "grad_norm": 7.2613348960876465, + "learning_rate": 2.1637362637362636e-05, + "loss": 0.122, + "step": 20648 + }, + { + "epoch": 56.72802197802198, + "grad_norm": 9.069523811340332, + "learning_rate": 2.163598901098901e-05, + "loss": 0.1152, + "step": 20649 + }, + { + "epoch": 56.73076923076923, + "grad_norm": 14.527963638305664, + "learning_rate": 2.1634615384615387e-05, + "loss": 0.2826, + "step": 20650 + }, + { + "epoch": 56.73351648351648, + "grad_norm": 12.559576988220215, + "learning_rate": 2.163324175824176e-05, + "loss": 0.1682, + "step": 20651 + }, + { + "epoch": 56.73626373626374, + "grad_norm": 14.766145706176758, + "learning_rate": 2.1631868131868133e-05, + "loss": 0.2029, + "step": 20652 + }, + { + "epoch": 56.73901098901099, + "grad_norm": 9.527703285217285, + "learning_rate": 2.1630494505494507e-05, + "loss": 0.1975, + "step": 20653 + }, + { + "epoch": 56.74175824175824, + "grad_norm": 13.712837219238281, + "learning_rate": 2.162912087912088e-05, + "loss": 0.2622, + "step": 20654 + }, + { + "epoch": 56.744505494505496, + "grad_norm": 5.020475387573242, + "learning_rate": 2.1627747252747254e-05, + "loss": 0.1234, + "step": 20655 + }, + { + "epoch": 56.747252747252745, + "grad_norm": 13.838006973266602, + "learning_rate": 2.1626373626373627e-05, + "loss": 0.2666, + "step": 20656 + }, + { + "epoch": 56.75, + "grad_norm": 15.30631160736084, + "learning_rate": 2.1625e-05, + "loss": 0.257, + "step": 20657 + }, + { + "epoch": 56.752747252747255, + "grad_norm": 15.156966209411621, + "learning_rate": 2.1623626373626374e-05, + "loss": 0.3043, + "step": 20658 + }, + { + "epoch": 56.755494505494504, + "grad_norm": 22.635356903076172, + "learning_rate": 2.1622252747252747e-05, + "loss": 0.4291, + "step": 20659 + }, + { + "epoch": 56.75824175824176, + "grad_norm": 8.645182609558105, + "learning_rate": 2.1620879120879124e-05, + "loss": 0.1205, + "step": 20660 + }, + { + "epoch": 56.76098901098901, + "grad_norm": 5.838006973266602, + "learning_rate": 2.1619505494505497e-05, + "loss": 0.0915, + "step": 20661 + }, + { + "epoch": 56.76373626373626, + "grad_norm": 8.172836303710938, + "learning_rate": 2.1618131868131868e-05, + "loss": 0.207, + "step": 20662 + }, + { + "epoch": 56.76648351648352, + "grad_norm": 11.898177146911621, + "learning_rate": 2.161675824175824e-05, + "loss": 0.314, + "step": 20663 + }, + { + "epoch": 56.76923076923077, + "grad_norm": 9.16483211517334, + "learning_rate": 2.1615384615384614e-05, + "loss": 0.0979, + "step": 20664 + }, + { + "epoch": 56.77197802197802, + "grad_norm": 18.93508529663086, + "learning_rate": 2.161401098901099e-05, + "loss": 0.6944, + "step": 20665 + }, + { + "epoch": 56.77472527472528, + "grad_norm": 13.266305923461914, + "learning_rate": 2.1612637362637365e-05, + "loss": 0.2908, + "step": 20666 + }, + { + "epoch": 56.777472527472526, + "grad_norm": 4.694851875305176, + "learning_rate": 2.1611263736263738e-05, + "loss": 0.0523, + "step": 20667 + }, + { + "epoch": 56.78021978021978, + "grad_norm": 6.927103042602539, + "learning_rate": 2.160989010989011e-05, + "loss": 0.1455, + "step": 20668 + }, + { + "epoch": 56.782967032967036, + "grad_norm": 10.059932708740234, + "learning_rate": 2.1608516483516485e-05, + "loss": 0.15, + "step": 20669 + }, + { + "epoch": 56.785714285714285, + "grad_norm": 13.731667518615723, + "learning_rate": 2.1607142857142858e-05, + "loss": 0.1224, + "step": 20670 + }, + { + "epoch": 56.78846153846154, + "grad_norm": 16.834928512573242, + "learning_rate": 2.160576923076923e-05, + "loss": 0.3085, + "step": 20671 + }, + { + "epoch": 56.79120879120879, + "grad_norm": 11.165824890136719, + "learning_rate": 2.1604395604395605e-05, + "loss": 0.3282, + "step": 20672 + }, + { + "epoch": 56.793956043956044, + "grad_norm": 5.8518805503845215, + "learning_rate": 2.160302197802198e-05, + "loss": 0.1472, + "step": 20673 + }, + { + "epoch": 56.7967032967033, + "grad_norm": 14.4041109085083, + "learning_rate": 2.1601648351648352e-05, + "loss": 0.3441, + "step": 20674 + }, + { + "epoch": 56.79945054945055, + "grad_norm": 5.623892784118652, + "learning_rate": 2.160027472527473e-05, + "loss": 0.0672, + "step": 20675 + }, + { + "epoch": 56.8021978021978, + "grad_norm": 7.228303909301758, + "learning_rate": 2.1598901098901102e-05, + "loss": 0.1083, + "step": 20676 + }, + { + "epoch": 56.80494505494506, + "grad_norm": 13.904135704040527, + "learning_rate": 2.1597527472527472e-05, + "loss": 0.3981, + "step": 20677 + }, + { + "epoch": 56.80769230769231, + "grad_norm": 9.913392066955566, + "learning_rate": 2.1596153846153845e-05, + "loss": 0.3375, + "step": 20678 + }, + { + "epoch": 56.81043956043956, + "grad_norm": 7.117776870727539, + "learning_rate": 2.159478021978022e-05, + "loss": 0.0874, + "step": 20679 + }, + { + "epoch": 56.81318681318681, + "grad_norm": 9.806801795959473, + "learning_rate": 2.1593406593406596e-05, + "loss": 0.1844, + "step": 20680 + }, + { + "epoch": 56.815934065934066, + "grad_norm": 11.832067489624023, + "learning_rate": 2.159203296703297e-05, + "loss": 0.1619, + "step": 20681 + }, + { + "epoch": 56.81868131868132, + "grad_norm": 8.222466468811035, + "learning_rate": 2.1590659340659342e-05, + "loss": 0.3005, + "step": 20682 + }, + { + "epoch": 56.82142857142857, + "grad_norm": 10.658186912536621, + "learning_rate": 2.1589285714285716e-05, + "loss": 0.1948, + "step": 20683 + }, + { + "epoch": 56.824175824175825, + "grad_norm": 5.53383207321167, + "learning_rate": 2.158791208791209e-05, + "loss": 0.107, + "step": 20684 + }, + { + "epoch": 56.82692307692308, + "grad_norm": 6.086572647094727, + "learning_rate": 2.1586538461538463e-05, + "loss": 0.1093, + "step": 20685 + }, + { + "epoch": 56.82967032967033, + "grad_norm": 18.132421493530273, + "learning_rate": 2.1585164835164836e-05, + "loss": 0.4878, + "step": 20686 + }, + { + "epoch": 56.832417582417584, + "grad_norm": 17.798931121826172, + "learning_rate": 2.158379120879121e-05, + "loss": 0.4732, + "step": 20687 + }, + { + "epoch": 56.83516483516483, + "grad_norm": 14.708456039428711, + "learning_rate": 2.1582417582417583e-05, + "loss": 0.4758, + "step": 20688 + }, + { + "epoch": 56.83791208791209, + "grad_norm": 9.909454345703125, + "learning_rate": 2.1581043956043956e-05, + "loss": 0.1667, + "step": 20689 + }, + { + "epoch": 56.84065934065934, + "grad_norm": 15.181023597717285, + "learning_rate": 2.1579670329670333e-05, + "loss": 0.1955, + "step": 20690 + }, + { + "epoch": 56.84340659340659, + "grad_norm": 9.814021110534668, + "learning_rate": 2.1578296703296706e-05, + "loss": 0.2122, + "step": 20691 + }, + { + "epoch": 56.84615384615385, + "grad_norm": 5.2184600830078125, + "learning_rate": 2.1576923076923076e-05, + "loss": 0.0876, + "step": 20692 + }, + { + "epoch": 56.8489010989011, + "grad_norm": 13.386149406433105, + "learning_rate": 2.157554945054945e-05, + "loss": 0.4263, + "step": 20693 + }, + { + "epoch": 56.85164835164835, + "grad_norm": 4.6440839767456055, + "learning_rate": 2.1574175824175823e-05, + "loss": 0.0674, + "step": 20694 + }, + { + "epoch": 56.854395604395606, + "grad_norm": 14.21632194519043, + "learning_rate": 2.15728021978022e-05, + "loss": 0.1645, + "step": 20695 + }, + { + "epoch": 56.857142857142854, + "grad_norm": 5.6566667556762695, + "learning_rate": 2.1571428571428574e-05, + "loss": 0.0805, + "step": 20696 + }, + { + "epoch": 56.85989010989011, + "grad_norm": 7.6326518058776855, + "learning_rate": 2.1570054945054947e-05, + "loss": 0.1098, + "step": 20697 + }, + { + "epoch": 56.862637362637365, + "grad_norm": 10.833407402038574, + "learning_rate": 2.156868131868132e-05, + "loss": 0.1984, + "step": 20698 + }, + { + "epoch": 56.86538461538461, + "grad_norm": 13.700441360473633, + "learning_rate": 2.1567307692307694e-05, + "loss": 0.2434, + "step": 20699 + }, + { + "epoch": 56.86813186813187, + "grad_norm": 13.644707679748535, + "learning_rate": 2.1565934065934067e-05, + "loss": 0.3086, + "step": 20700 + }, + { + "epoch": 56.870879120879124, + "grad_norm": 10.229071617126465, + "learning_rate": 2.156456043956044e-05, + "loss": 0.0961, + "step": 20701 + }, + { + "epoch": 56.87362637362637, + "grad_norm": 18.371431350708008, + "learning_rate": 2.1563186813186814e-05, + "loss": 0.7911, + "step": 20702 + }, + { + "epoch": 56.87637362637363, + "grad_norm": 15.166738510131836, + "learning_rate": 2.1561813186813187e-05, + "loss": 0.282, + "step": 20703 + }, + { + "epoch": 56.879120879120876, + "grad_norm": 17.519458770751953, + "learning_rate": 2.156043956043956e-05, + "loss": 0.3265, + "step": 20704 + }, + { + "epoch": 56.88186813186813, + "grad_norm": 6.516539573669434, + "learning_rate": 2.1559065934065938e-05, + "loss": 0.1632, + "step": 20705 + }, + { + "epoch": 56.88461538461539, + "grad_norm": 13.655458450317383, + "learning_rate": 2.155769230769231e-05, + "loss": 0.2985, + "step": 20706 + }, + { + "epoch": 56.887362637362635, + "grad_norm": 15.586039543151855, + "learning_rate": 2.155631868131868e-05, + "loss": 0.3468, + "step": 20707 + }, + { + "epoch": 56.89010989010989, + "grad_norm": 21.865062713623047, + "learning_rate": 2.1554945054945054e-05, + "loss": 0.3406, + "step": 20708 + }, + { + "epoch": 56.892857142857146, + "grad_norm": 11.439767837524414, + "learning_rate": 2.1553571428571428e-05, + "loss": 0.2603, + "step": 20709 + }, + { + "epoch": 56.895604395604394, + "grad_norm": 21.286659240722656, + "learning_rate": 2.1552197802197805e-05, + "loss": 0.2935, + "step": 20710 + }, + { + "epoch": 56.89835164835165, + "grad_norm": 8.928285598754883, + "learning_rate": 2.1550824175824178e-05, + "loss": 0.2304, + "step": 20711 + }, + { + "epoch": 56.9010989010989, + "grad_norm": 6.109483242034912, + "learning_rate": 2.154945054945055e-05, + "loss": 0.1331, + "step": 20712 + }, + { + "epoch": 56.90384615384615, + "grad_norm": 20.320280075073242, + "learning_rate": 2.1548076923076925e-05, + "loss": 0.4298, + "step": 20713 + }, + { + "epoch": 56.90659340659341, + "grad_norm": 11.056055068969727, + "learning_rate": 2.1546703296703298e-05, + "loss": 0.1203, + "step": 20714 + }, + { + "epoch": 56.90934065934066, + "grad_norm": 5.700597286224365, + "learning_rate": 2.154532967032967e-05, + "loss": 0.0533, + "step": 20715 + }, + { + "epoch": 56.91208791208791, + "grad_norm": 13.852243423461914, + "learning_rate": 2.1543956043956045e-05, + "loss": 0.2106, + "step": 20716 + }, + { + "epoch": 56.91483516483517, + "grad_norm": 20.516891479492188, + "learning_rate": 2.154258241758242e-05, + "loss": 0.6982, + "step": 20717 + }, + { + "epoch": 56.917582417582416, + "grad_norm": 18.73042106628418, + "learning_rate": 2.1541208791208792e-05, + "loss": 0.2962, + "step": 20718 + }, + { + "epoch": 56.92032967032967, + "grad_norm": 6.068623065948486, + "learning_rate": 2.1539835164835165e-05, + "loss": 0.0917, + "step": 20719 + }, + { + "epoch": 56.92307692307692, + "grad_norm": 11.119922637939453, + "learning_rate": 2.1538461538461542e-05, + "loss": 0.3078, + "step": 20720 + }, + { + "epoch": 56.925824175824175, + "grad_norm": 9.001398086547852, + "learning_rate": 2.1537087912087915e-05, + "loss": 0.2077, + "step": 20721 + }, + { + "epoch": 56.92857142857143, + "grad_norm": 4.733592987060547, + "learning_rate": 2.1535714285714285e-05, + "loss": 0.0594, + "step": 20722 + }, + { + "epoch": 56.93131868131868, + "grad_norm": 27.49359893798828, + "learning_rate": 2.153434065934066e-05, + "loss": 0.9616, + "step": 20723 + }, + { + "epoch": 56.934065934065934, + "grad_norm": 7.412086009979248, + "learning_rate": 2.1532967032967032e-05, + "loss": 0.0929, + "step": 20724 + }, + { + "epoch": 56.93681318681319, + "grad_norm": 12.497015953063965, + "learning_rate": 2.153159340659341e-05, + "loss": 0.2278, + "step": 20725 + }, + { + "epoch": 56.93956043956044, + "grad_norm": 9.110107421875, + "learning_rate": 2.1530219780219782e-05, + "loss": 0.2317, + "step": 20726 + }, + { + "epoch": 56.94230769230769, + "grad_norm": 18.313640594482422, + "learning_rate": 2.1528846153846156e-05, + "loss": 1.0281, + "step": 20727 + }, + { + "epoch": 56.94505494505494, + "grad_norm": 16.985301971435547, + "learning_rate": 2.152747252747253e-05, + "loss": 0.8486, + "step": 20728 + }, + { + "epoch": 56.9478021978022, + "grad_norm": 8.351471900939941, + "learning_rate": 2.1526098901098903e-05, + "loss": 0.2572, + "step": 20729 + }, + { + "epoch": 56.95054945054945, + "grad_norm": 24.170568466186523, + "learning_rate": 2.1524725274725276e-05, + "loss": 0.7632, + "step": 20730 + }, + { + "epoch": 56.9532967032967, + "grad_norm": 14.005046844482422, + "learning_rate": 2.152335164835165e-05, + "loss": 0.2615, + "step": 20731 + }, + { + "epoch": 56.956043956043956, + "grad_norm": 10.118786811828613, + "learning_rate": 2.1521978021978023e-05, + "loss": 0.2602, + "step": 20732 + }, + { + "epoch": 56.95879120879121, + "grad_norm": 17.833717346191406, + "learning_rate": 2.1520604395604396e-05, + "loss": 0.868, + "step": 20733 + }, + { + "epoch": 56.96153846153846, + "grad_norm": 24.460914611816406, + "learning_rate": 2.151923076923077e-05, + "loss": 0.7632, + "step": 20734 + }, + { + "epoch": 56.964285714285715, + "grad_norm": 12.98448657989502, + "learning_rate": 2.1517857142857147e-05, + "loss": 0.3049, + "step": 20735 + }, + { + "epoch": 56.967032967032964, + "grad_norm": 13.237268447875977, + "learning_rate": 2.1516483516483517e-05, + "loss": 0.3013, + "step": 20736 + }, + { + "epoch": 56.96978021978022, + "grad_norm": 13.437952041625977, + "learning_rate": 2.151510989010989e-05, + "loss": 0.1599, + "step": 20737 + }, + { + "epoch": 56.972527472527474, + "grad_norm": 6.918842792510986, + "learning_rate": 2.1513736263736263e-05, + "loss": 0.101, + "step": 20738 + }, + { + "epoch": 56.97527472527472, + "grad_norm": 19.45890998840332, + "learning_rate": 2.1512362637362637e-05, + "loss": 0.3302, + "step": 20739 + }, + { + "epoch": 56.97802197802198, + "grad_norm": 25.659088134765625, + "learning_rate": 2.1510989010989014e-05, + "loss": 0.7501, + "step": 20740 + }, + { + "epoch": 56.98076923076923, + "grad_norm": 16.767484664916992, + "learning_rate": 2.1509615384615387e-05, + "loss": 0.2368, + "step": 20741 + }, + { + "epoch": 56.98351648351648, + "grad_norm": 9.248617172241211, + "learning_rate": 2.150824175824176e-05, + "loss": 0.226, + "step": 20742 + }, + { + "epoch": 56.98626373626374, + "grad_norm": 6.711560249328613, + "learning_rate": 2.1506868131868134e-05, + "loss": 0.0759, + "step": 20743 + }, + { + "epoch": 56.98901098901099, + "grad_norm": 14.731999397277832, + "learning_rate": 2.1505494505494507e-05, + "loss": 0.3169, + "step": 20744 + }, + { + "epoch": 56.99175824175824, + "grad_norm": 7.73817253112793, + "learning_rate": 2.150412087912088e-05, + "loss": 0.1282, + "step": 20745 + }, + { + "epoch": 56.994505494505496, + "grad_norm": 20.884933471679688, + "learning_rate": 2.1502747252747254e-05, + "loss": 0.8712, + "step": 20746 + }, + { + "epoch": 56.997252747252745, + "grad_norm": 11.249496459960938, + "learning_rate": 2.1501373626373627e-05, + "loss": 0.2259, + "step": 20747 + }, + { + "epoch": 57.0, + "grad_norm": 49.069129943847656, + "learning_rate": 2.15e-05, + "loss": 0.6157, + "step": 20748 + }, + { + "epoch": 57.0, + "eval_accuracy": 0.78099173553719, + "eval_f1": 0.7738869816538578, + "eval_f1_DuraRiadoRio_64x64": 0.7349081364829396, + "eval_f1_Mole_64x64": 0.8669201520912547, + "eval_f1_Quebrado_64x64": 0.842809364548495, + "eval_f1_RiadoRio_64x64": 0.4890829694323144, + "eval_f1_RioFechado_64x64": 0.9357142857142857, + "eval_loss": 0.7882454991340637, + "eval_precision": 0.8076303616506779, + "eval_precision_DuraRiadoRio_64x64": 0.5907172995780591, + "eval_precision_Mole_64x64": 0.957983193277311, + "eval_precision_Quebrado_64x64": 0.8129032258064516, + "eval_precision_RiadoRio_64x64": 0.7272727272727273, + "eval_precision_RioFechado_64x64": 0.9492753623188406, + "eval_recall": 0.7859690305576146, + "eval_recall_DuraRiadoRio_64x64": 0.9722222222222222, + "eval_recall_Mole_64x64": 0.7916666666666666, + "eval_recall_Quebrado_64x64": 0.875, + "eval_recall_RiadoRio_64x64": 0.3684210526315789, + "eval_recall_RioFechado_64x64": 0.9225352112676056, + "eval_runtime": 1.9326, + "eval_samples_per_second": 375.656, + "eval_steps_per_second": 23.802, + "step": 20748 + }, + { + "epoch": 57.002747252747255, + "grad_norm": 10.643946647644043, + "learning_rate": 2.1498626373626374e-05, + "loss": 0.1749, + "step": 20749 + }, + { + "epoch": 57.005494505494504, + "grad_norm": 23.298219680786133, + "learning_rate": 2.149725274725275e-05, + "loss": 0.5251, + "step": 20750 + }, + { + "epoch": 57.00824175824176, + "grad_norm": 7.5627031326293945, + "learning_rate": 2.149587912087912e-05, + "loss": 0.1449, + "step": 20751 + }, + { + "epoch": 57.010989010989015, + "grad_norm": 8.48692798614502, + "learning_rate": 2.1494505494505494e-05, + "loss": 0.224, + "step": 20752 + }, + { + "epoch": 57.01373626373626, + "grad_norm": 18.369007110595703, + "learning_rate": 2.1493131868131868e-05, + "loss": 0.9187, + "step": 20753 + }, + { + "epoch": 57.01648351648352, + "grad_norm": 6.865860462188721, + "learning_rate": 2.149175824175824e-05, + "loss": 0.1836, + "step": 20754 + }, + { + "epoch": 57.01923076923077, + "grad_norm": 6.706496715545654, + "learning_rate": 2.1490384615384618e-05, + "loss": 0.0868, + "step": 20755 + }, + { + "epoch": 57.02197802197802, + "grad_norm": 11.447575569152832, + "learning_rate": 2.148901098901099e-05, + "loss": 0.3833, + "step": 20756 + }, + { + "epoch": 57.02472527472528, + "grad_norm": 13.747493743896484, + "learning_rate": 2.1487637362637365e-05, + "loss": 0.348, + "step": 20757 + }, + { + "epoch": 57.027472527472526, + "grad_norm": 9.791083335876465, + "learning_rate": 2.1486263736263738e-05, + "loss": 0.2057, + "step": 20758 + }, + { + "epoch": 57.03021978021978, + "grad_norm": 6.4246134757995605, + "learning_rate": 2.148489010989011e-05, + "loss": 0.0997, + "step": 20759 + }, + { + "epoch": 57.032967032967036, + "grad_norm": 6.405653476715088, + "learning_rate": 2.1483516483516482e-05, + "loss": 0.0943, + "step": 20760 + }, + { + "epoch": 57.035714285714285, + "grad_norm": 11.54202651977539, + "learning_rate": 2.148214285714286e-05, + "loss": 0.3829, + "step": 20761 + }, + { + "epoch": 57.03846153846154, + "grad_norm": 9.520417213439941, + "learning_rate": 2.1480769230769232e-05, + "loss": 0.2488, + "step": 20762 + }, + { + "epoch": 57.04120879120879, + "grad_norm": 4.348824501037598, + "learning_rate": 2.1479395604395605e-05, + "loss": 0.0559, + "step": 20763 + }, + { + "epoch": 57.043956043956044, + "grad_norm": 8.01176643371582, + "learning_rate": 2.147802197802198e-05, + "loss": 0.1402, + "step": 20764 + }, + { + "epoch": 57.0467032967033, + "grad_norm": 10.034958839416504, + "learning_rate": 2.1476648351648352e-05, + "loss": 0.2659, + "step": 20765 + }, + { + "epoch": 57.04945054945055, + "grad_norm": 10.211974143981934, + "learning_rate": 2.1475274725274726e-05, + "loss": 0.1586, + "step": 20766 + }, + { + "epoch": 57.0521978021978, + "grad_norm": 3.448599100112915, + "learning_rate": 2.14739010989011e-05, + "loss": 0.099, + "step": 20767 + }, + { + "epoch": 57.05494505494506, + "grad_norm": 7.917337894439697, + "learning_rate": 2.1472527472527472e-05, + "loss": 0.0655, + "step": 20768 + }, + { + "epoch": 57.05769230769231, + "grad_norm": 17.448701858520508, + "learning_rate": 2.1471153846153846e-05, + "loss": 0.7809, + "step": 20769 + }, + { + "epoch": 57.06043956043956, + "grad_norm": 1.8055164813995361, + "learning_rate": 2.146978021978022e-05, + "loss": 0.027, + "step": 20770 + }, + { + "epoch": 57.06318681318681, + "grad_norm": 16.129919052124023, + "learning_rate": 2.1468406593406596e-05, + "loss": 0.4816, + "step": 20771 + }, + { + "epoch": 57.065934065934066, + "grad_norm": 11.642960548400879, + "learning_rate": 2.146703296703297e-05, + "loss": 0.2914, + "step": 20772 + }, + { + "epoch": 57.06868131868132, + "grad_norm": 10.834624290466309, + "learning_rate": 2.1465659340659343e-05, + "loss": 0.214, + "step": 20773 + }, + { + "epoch": 57.07142857142857, + "grad_norm": 6.806394577026367, + "learning_rate": 2.1464285714285716e-05, + "loss": 0.1451, + "step": 20774 + }, + { + "epoch": 57.074175824175825, + "grad_norm": 13.806235313415527, + "learning_rate": 2.1462912087912086e-05, + "loss": 0.3055, + "step": 20775 + }, + { + "epoch": 57.07692307692308, + "grad_norm": 18.421863555908203, + "learning_rate": 2.1461538461538463e-05, + "loss": 0.4813, + "step": 20776 + }, + { + "epoch": 57.07967032967033, + "grad_norm": 9.074856758117676, + "learning_rate": 2.1460164835164836e-05, + "loss": 0.1844, + "step": 20777 + }, + { + "epoch": 57.082417582417584, + "grad_norm": 7.8018107414245605, + "learning_rate": 2.145879120879121e-05, + "loss": 0.1797, + "step": 20778 + }, + { + "epoch": 57.08516483516483, + "grad_norm": 8.20492935180664, + "learning_rate": 2.1457417582417583e-05, + "loss": 0.1224, + "step": 20779 + }, + { + "epoch": 57.08791208791209, + "grad_norm": 18.851633071899414, + "learning_rate": 2.1456043956043957e-05, + "loss": 0.3821, + "step": 20780 + }, + { + "epoch": 57.09065934065934, + "grad_norm": 8.287276268005371, + "learning_rate": 2.145467032967033e-05, + "loss": 0.1927, + "step": 20781 + }, + { + "epoch": 57.09340659340659, + "grad_norm": 7.511949062347412, + "learning_rate": 2.1453296703296703e-05, + "loss": 0.1664, + "step": 20782 + }, + { + "epoch": 57.09615384615385, + "grad_norm": 8.057616233825684, + "learning_rate": 2.1451923076923077e-05, + "loss": 0.1884, + "step": 20783 + }, + { + "epoch": 57.0989010989011, + "grad_norm": 9.479972839355469, + "learning_rate": 2.145054945054945e-05, + "loss": 0.2586, + "step": 20784 + }, + { + "epoch": 57.10164835164835, + "grad_norm": 16.855512619018555, + "learning_rate": 2.1449175824175824e-05, + "loss": 0.2995, + "step": 20785 + }, + { + "epoch": 57.104395604395606, + "grad_norm": 23.183429718017578, + "learning_rate": 2.14478021978022e-05, + "loss": 0.2882, + "step": 20786 + }, + { + "epoch": 57.107142857142854, + "grad_norm": 10.1356201171875, + "learning_rate": 2.1446428571428574e-05, + "loss": 0.1492, + "step": 20787 + }, + { + "epoch": 57.10989010989011, + "grad_norm": 9.94720458984375, + "learning_rate": 2.1445054945054947e-05, + "loss": 0.1464, + "step": 20788 + }, + { + "epoch": 57.112637362637365, + "grad_norm": 18.273229598999023, + "learning_rate": 2.144368131868132e-05, + "loss": 0.2222, + "step": 20789 + }, + { + "epoch": 57.11538461538461, + "grad_norm": 10.720489501953125, + "learning_rate": 2.144230769230769e-05, + "loss": 0.1886, + "step": 20790 + }, + { + "epoch": 57.11813186813187, + "grad_norm": 11.868756294250488, + "learning_rate": 2.1440934065934067e-05, + "loss": 0.2297, + "step": 20791 + }, + { + "epoch": 57.120879120879124, + "grad_norm": 9.057994842529297, + "learning_rate": 2.143956043956044e-05, + "loss": 0.2106, + "step": 20792 + }, + { + "epoch": 57.12362637362637, + "grad_norm": 9.274876594543457, + "learning_rate": 2.1438186813186814e-05, + "loss": 0.181, + "step": 20793 + }, + { + "epoch": 57.12637362637363, + "grad_norm": 16.873607635498047, + "learning_rate": 2.1436813186813188e-05, + "loss": 0.3047, + "step": 20794 + }, + { + "epoch": 57.129120879120876, + "grad_norm": 2.5872321128845215, + "learning_rate": 2.143543956043956e-05, + "loss": 0.0484, + "step": 20795 + }, + { + "epoch": 57.13186813186813, + "grad_norm": 10.108600616455078, + "learning_rate": 2.1434065934065935e-05, + "loss": 0.1854, + "step": 20796 + }, + { + "epoch": 57.13461538461539, + "grad_norm": 21.855417251586914, + "learning_rate": 2.1432692307692308e-05, + "loss": 0.8066, + "step": 20797 + }, + { + "epoch": 57.137362637362635, + "grad_norm": 7.92315673828125, + "learning_rate": 2.143131868131868e-05, + "loss": 0.0866, + "step": 20798 + }, + { + "epoch": 57.14010989010989, + "grad_norm": 27.281532287597656, + "learning_rate": 2.1429945054945055e-05, + "loss": 0.7462, + "step": 20799 + }, + { + "epoch": 57.142857142857146, + "grad_norm": 22.645427703857422, + "learning_rate": 2.1428571428571428e-05, + "loss": 0.6758, + "step": 20800 + }, + { + "epoch": 57.145604395604394, + "grad_norm": 14.389395713806152, + "learning_rate": 2.1427197802197805e-05, + "loss": 0.4036, + "step": 20801 + }, + { + "epoch": 57.14835164835165, + "grad_norm": 11.832274436950684, + "learning_rate": 2.142582417582418e-05, + "loss": 0.2403, + "step": 20802 + }, + { + "epoch": 57.1510989010989, + "grad_norm": 7.064041614532471, + "learning_rate": 2.1424450549450552e-05, + "loss": 0.1411, + "step": 20803 + }, + { + "epoch": 57.15384615384615, + "grad_norm": 2.899568796157837, + "learning_rate": 2.1423076923076925e-05, + "loss": 0.0514, + "step": 20804 + }, + { + "epoch": 57.15659340659341, + "grad_norm": 10.817153930664062, + "learning_rate": 2.1421703296703295e-05, + "loss": 0.1777, + "step": 20805 + }, + { + "epoch": 57.15934065934066, + "grad_norm": 18.302978515625, + "learning_rate": 2.1420329670329672e-05, + "loss": 0.2792, + "step": 20806 + }, + { + "epoch": 57.16208791208791, + "grad_norm": 18.55962562561035, + "learning_rate": 2.1418956043956045e-05, + "loss": 0.3826, + "step": 20807 + }, + { + "epoch": 57.16483516483517, + "grad_norm": 12.485085487365723, + "learning_rate": 2.141758241758242e-05, + "loss": 0.2153, + "step": 20808 + }, + { + "epoch": 57.167582417582416, + "grad_norm": 17.484590530395508, + "learning_rate": 2.1416208791208792e-05, + "loss": 0.4065, + "step": 20809 + }, + { + "epoch": 57.17032967032967, + "grad_norm": 20.494775772094727, + "learning_rate": 2.1414835164835166e-05, + "loss": 0.3933, + "step": 20810 + }, + { + "epoch": 57.17307692307692, + "grad_norm": 6.0118913650512695, + "learning_rate": 2.141346153846154e-05, + "loss": 0.1235, + "step": 20811 + }, + { + "epoch": 57.175824175824175, + "grad_norm": 6.9983015060424805, + "learning_rate": 2.1412087912087912e-05, + "loss": 0.1106, + "step": 20812 + }, + { + "epoch": 57.17857142857143, + "grad_norm": 8.274529457092285, + "learning_rate": 2.1410714285714286e-05, + "loss": 0.112, + "step": 20813 + }, + { + "epoch": 57.18131868131868, + "grad_norm": 10.729879379272461, + "learning_rate": 2.140934065934066e-05, + "loss": 0.2568, + "step": 20814 + }, + { + "epoch": 57.184065934065934, + "grad_norm": 17.546628952026367, + "learning_rate": 2.1407967032967033e-05, + "loss": 0.3269, + "step": 20815 + }, + { + "epoch": 57.18681318681319, + "grad_norm": 10.592278480529785, + "learning_rate": 2.140659340659341e-05, + "loss": 0.1296, + "step": 20816 + }, + { + "epoch": 57.18956043956044, + "grad_norm": 14.056941032409668, + "learning_rate": 2.1405219780219783e-05, + "loss": 0.2269, + "step": 20817 + }, + { + "epoch": 57.19230769230769, + "grad_norm": 8.98094654083252, + "learning_rate": 2.1403846153846156e-05, + "loss": 0.208, + "step": 20818 + }, + { + "epoch": 57.19505494505494, + "grad_norm": 16.377887725830078, + "learning_rate": 2.1402472527472526e-05, + "loss": 0.1832, + "step": 20819 + }, + { + "epoch": 57.1978021978022, + "grad_norm": 15.511544227600098, + "learning_rate": 2.14010989010989e-05, + "loss": 0.3179, + "step": 20820 + }, + { + "epoch": 57.20054945054945, + "grad_norm": 8.48464584350586, + "learning_rate": 2.1399725274725276e-05, + "loss": 0.1185, + "step": 20821 + }, + { + "epoch": 57.2032967032967, + "grad_norm": 15.09656810760498, + "learning_rate": 2.139835164835165e-05, + "loss": 0.541, + "step": 20822 + }, + { + "epoch": 57.206043956043956, + "grad_norm": 6.563318252563477, + "learning_rate": 2.1396978021978023e-05, + "loss": 0.1205, + "step": 20823 + }, + { + "epoch": 57.20879120879121, + "grad_norm": 9.490930557250977, + "learning_rate": 2.1395604395604397e-05, + "loss": 0.1903, + "step": 20824 + }, + { + "epoch": 57.21153846153846, + "grad_norm": 13.834318161010742, + "learning_rate": 2.139423076923077e-05, + "loss": 0.2325, + "step": 20825 + }, + { + "epoch": 57.214285714285715, + "grad_norm": 7.155730247497559, + "learning_rate": 2.1392857142857143e-05, + "loss": 0.1166, + "step": 20826 + }, + { + "epoch": 57.217032967032964, + "grad_norm": 9.46716022491455, + "learning_rate": 2.1391483516483517e-05, + "loss": 0.2006, + "step": 20827 + }, + { + "epoch": 57.21978021978022, + "grad_norm": 3.195300340652466, + "learning_rate": 2.139010989010989e-05, + "loss": 0.043, + "step": 20828 + }, + { + "epoch": 57.222527472527474, + "grad_norm": 8.66077709197998, + "learning_rate": 2.1388736263736264e-05, + "loss": 0.1281, + "step": 20829 + }, + { + "epoch": 57.22527472527472, + "grad_norm": 14.167412757873535, + "learning_rate": 2.1387362637362637e-05, + "loss": 0.2265, + "step": 20830 + }, + { + "epoch": 57.22802197802198, + "grad_norm": 9.589435577392578, + "learning_rate": 2.1385989010989014e-05, + "loss": 0.1331, + "step": 20831 + }, + { + "epoch": 57.23076923076923, + "grad_norm": 16.104190826416016, + "learning_rate": 2.1384615384615387e-05, + "loss": 0.2383, + "step": 20832 + }, + { + "epoch": 57.23351648351648, + "grad_norm": 11.137697219848633, + "learning_rate": 2.138324175824176e-05, + "loss": 0.2123, + "step": 20833 + }, + { + "epoch": 57.23626373626374, + "grad_norm": 13.386996269226074, + "learning_rate": 2.138186813186813e-05, + "loss": 0.2771, + "step": 20834 + }, + { + "epoch": 57.239010989010985, + "grad_norm": 4.8798089027404785, + "learning_rate": 2.1380494505494504e-05, + "loss": 0.0528, + "step": 20835 + }, + { + "epoch": 57.24175824175824, + "grad_norm": 8.488372802734375, + "learning_rate": 2.137912087912088e-05, + "loss": 0.166, + "step": 20836 + }, + { + "epoch": 57.244505494505496, + "grad_norm": 15.466032028198242, + "learning_rate": 2.1377747252747254e-05, + "loss": 0.2272, + "step": 20837 + }, + { + "epoch": 57.247252747252745, + "grad_norm": 16.3323917388916, + "learning_rate": 2.1376373626373628e-05, + "loss": 0.3391, + "step": 20838 + }, + { + "epoch": 57.25, + "grad_norm": 15.137934684753418, + "learning_rate": 2.1375e-05, + "loss": 0.269, + "step": 20839 + }, + { + "epoch": 57.252747252747255, + "grad_norm": 16.2868709564209, + "learning_rate": 2.1373626373626375e-05, + "loss": 0.2189, + "step": 20840 + }, + { + "epoch": 57.255494505494504, + "grad_norm": 21.885478973388672, + "learning_rate": 2.1372252747252748e-05, + "loss": 0.3084, + "step": 20841 + }, + { + "epoch": 57.25824175824176, + "grad_norm": 19.137619018554688, + "learning_rate": 2.137087912087912e-05, + "loss": 0.4407, + "step": 20842 + }, + { + "epoch": 57.260989010989015, + "grad_norm": 14.5281343460083, + "learning_rate": 2.1369505494505495e-05, + "loss": 0.5292, + "step": 20843 + }, + { + "epoch": 57.26373626373626, + "grad_norm": 5.70026159286499, + "learning_rate": 2.1368131868131868e-05, + "loss": 0.0704, + "step": 20844 + }, + { + "epoch": 57.26648351648352, + "grad_norm": 6.965985298156738, + "learning_rate": 2.136675824175824e-05, + "loss": 0.1124, + "step": 20845 + }, + { + "epoch": 57.26923076923077, + "grad_norm": 9.558815956115723, + "learning_rate": 2.136538461538462e-05, + "loss": 0.1217, + "step": 20846 + }, + { + "epoch": 57.27197802197802, + "grad_norm": 9.66978931427002, + "learning_rate": 2.1364010989010992e-05, + "loss": 0.1639, + "step": 20847 + }, + { + "epoch": 57.27472527472528, + "grad_norm": 22.155628204345703, + "learning_rate": 2.1362637362637365e-05, + "loss": 0.4152, + "step": 20848 + }, + { + "epoch": 57.277472527472526, + "grad_norm": 4.270803928375244, + "learning_rate": 2.1361263736263735e-05, + "loss": 0.1239, + "step": 20849 + }, + { + "epoch": 57.28021978021978, + "grad_norm": 8.450334548950195, + "learning_rate": 2.135989010989011e-05, + "loss": 0.114, + "step": 20850 + }, + { + "epoch": 57.282967032967036, + "grad_norm": 9.078875541687012, + "learning_rate": 2.1358516483516485e-05, + "loss": 0.2565, + "step": 20851 + }, + { + "epoch": 57.285714285714285, + "grad_norm": 14.989062309265137, + "learning_rate": 2.135714285714286e-05, + "loss": 0.3426, + "step": 20852 + }, + { + "epoch": 57.28846153846154, + "grad_norm": 16.948976516723633, + "learning_rate": 2.1355769230769232e-05, + "loss": 0.3141, + "step": 20853 + }, + { + "epoch": 57.29120879120879, + "grad_norm": 14.014410018920898, + "learning_rate": 2.1354395604395606e-05, + "loss": 0.4612, + "step": 20854 + }, + { + "epoch": 57.293956043956044, + "grad_norm": 23.93646812438965, + "learning_rate": 2.135302197802198e-05, + "loss": 0.7885, + "step": 20855 + }, + { + "epoch": 57.2967032967033, + "grad_norm": 24.605972290039062, + "learning_rate": 2.1351648351648352e-05, + "loss": 0.267, + "step": 20856 + }, + { + "epoch": 57.29945054945055, + "grad_norm": 9.07959270477295, + "learning_rate": 2.1350274725274726e-05, + "loss": 0.2022, + "step": 20857 + }, + { + "epoch": 57.3021978021978, + "grad_norm": 9.075948715209961, + "learning_rate": 2.13489010989011e-05, + "loss": 0.1989, + "step": 20858 + }, + { + "epoch": 57.30494505494506, + "grad_norm": 23.737327575683594, + "learning_rate": 2.1347527472527473e-05, + "loss": 1.2742, + "step": 20859 + }, + { + "epoch": 57.30769230769231, + "grad_norm": 18.71664047241211, + "learning_rate": 2.1346153846153846e-05, + "loss": 0.4152, + "step": 20860 + }, + { + "epoch": 57.31043956043956, + "grad_norm": 11.7527437210083, + "learning_rate": 2.1344780219780223e-05, + "loss": 0.3142, + "step": 20861 + }, + { + "epoch": 57.31318681318681, + "grad_norm": 6.4231977462768555, + "learning_rate": 2.1343406593406596e-05, + "loss": 0.1759, + "step": 20862 + }, + { + "epoch": 57.315934065934066, + "grad_norm": 16.939271926879883, + "learning_rate": 2.134203296703297e-05, + "loss": 0.5073, + "step": 20863 + }, + { + "epoch": 57.31868131868132, + "grad_norm": 12.629474639892578, + "learning_rate": 2.134065934065934e-05, + "loss": 0.378, + "step": 20864 + }, + { + "epoch": 57.32142857142857, + "grad_norm": 7.410614967346191, + "learning_rate": 2.1339285714285713e-05, + "loss": 0.1295, + "step": 20865 + }, + { + "epoch": 57.324175824175825, + "grad_norm": 15.026054382324219, + "learning_rate": 2.133791208791209e-05, + "loss": 0.2251, + "step": 20866 + }, + { + "epoch": 57.32692307692308, + "grad_norm": 10.170732498168945, + "learning_rate": 2.1336538461538463e-05, + "loss": 0.1905, + "step": 20867 + }, + { + "epoch": 57.32967032967033, + "grad_norm": 18.837465286254883, + "learning_rate": 2.1335164835164837e-05, + "loss": 0.8287, + "step": 20868 + }, + { + "epoch": 57.332417582417584, + "grad_norm": 15.979187965393066, + "learning_rate": 2.133379120879121e-05, + "loss": 0.2865, + "step": 20869 + }, + { + "epoch": 57.33516483516483, + "grad_norm": 10.917628288269043, + "learning_rate": 2.1332417582417584e-05, + "loss": 0.2022, + "step": 20870 + }, + { + "epoch": 57.33791208791209, + "grad_norm": 6.586331844329834, + "learning_rate": 2.1331043956043957e-05, + "loss": 0.083, + "step": 20871 + }, + { + "epoch": 57.34065934065934, + "grad_norm": 19.247276306152344, + "learning_rate": 2.132967032967033e-05, + "loss": 0.5461, + "step": 20872 + }, + { + "epoch": 57.34340659340659, + "grad_norm": 4.535218715667725, + "learning_rate": 2.1328296703296704e-05, + "loss": 0.1016, + "step": 20873 + }, + { + "epoch": 57.34615384615385, + "grad_norm": 6.565176486968994, + "learning_rate": 2.1326923076923077e-05, + "loss": 0.1046, + "step": 20874 + }, + { + "epoch": 57.3489010989011, + "grad_norm": 11.764959335327148, + "learning_rate": 2.132554945054945e-05, + "loss": 0.1469, + "step": 20875 + }, + { + "epoch": 57.35164835164835, + "grad_norm": 12.721912384033203, + "learning_rate": 2.1324175824175824e-05, + "loss": 0.1283, + "step": 20876 + }, + { + "epoch": 57.354395604395606, + "grad_norm": 16.48980712890625, + "learning_rate": 2.13228021978022e-05, + "loss": 0.3043, + "step": 20877 + }, + { + "epoch": 57.357142857142854, + "grad_norm": 15.792420387268066, + "learning_rate": 2.1321428571428574e-05, + "loss": 0.4113, + "step": 20878 + }, + { + "epoch": 57.35989010989011, + "grad_norm": 15.769087791442871, + "learning_rate": 2.1320054945054944e-05, + "loss": 0.2319, + "step": 20879 + }, + { + "epoch": 57.362637362637365, + "grad_norm": 18.101726531982422, + "learning_rate": 2.1318681318681318e-05, + "loss": 0.43, + "step": 20880 + }, + { + "epoch": 57.36538461538461, + "grad_norm": 12.477861404418945, + "learning_rate": 2.131730769230769e-05, + "loss": 0.174, + "step": 20881 + }, + { + "epoch": 57.36813186813187, + "grad_norm": 13.948829650878906, + "learning_rate": 2.1315934065934068e-05, + "loss": 0.1405, + "step": 20882 + }, + { + "epoch": 57.370879120879124, + "grad_norm": 6.047832012176514, + "learning_rate": 2.131456043956044e-05, + "loss": 0.0859, + "step": 20883 + }, + { + "epoch": 57.37362637362637, + "grad_norm": 6.184541702270508, + "learning_rate": 2.1313186813186815e-05, + "loss": 0.1207, + "step": 20884 + }, + { + "epoch": 57.37637362637363, + "grad_norm": 14.414044380187988, + "learning_rate": 2.1311813186813188e-05, + "loss": 0.1722, + "step": 20885 + }, + { + "epoch": 57.379120879120876, + "grad_norm": 8.398429870605469, + "learning_rate": 2.131043956043956e-05, + "loss": 0.1272, + "step": 20886 + }, + { + "epoch": 57.38186813186813, + "grad_norm": 10.847068786621094, + "learning_rate": 2.1309065934065935e-05, + "loss": 0.2125, + "step": 20887 + }, + { + "epoch": 57.38461538461539, + "grad_norm": 13.914730072021484, + "learning_rate": 2.1307692307692308e-05, + "loss": 0.1967, + "step": 20888 + }, + { + "epoch": 57.387362637362635, + "grad_norm": 8.715319633483887, + "learning_rate": 2.130631868131868e-05, + "loss": 0.0805, + "step": 20889 + }, + { + "epoch": 57.39010989010989, + "grad_norm": 8.925630569458008, + "learning_rate": 2.1304945054945055e-05, + "loss": 0.2189, + "step": 20890 + }, + { + "epoch": 57.392857142857146, + "grad_norm": 10.760222434997559, + "learning_rate": 2.130357142857143e-05, + "loss": 0.2386, + "step": 20891 + }, + { + "epoch": 57.395604395604394, + "grad_norm": 8.360607147216797, + "learning_rate": 2.1302197802197805e-05, + "loss": 0.1488, + "step": 20892 + }, + { + "epoch": 57.39835164835165, + "grad_norm": 11.070107460021973, + "learning_rate": 2.130082417582418e-05, + "loss": 0.1584, + "step": 20893 + }, + { + "epoch": 57.4010989010989, + "grad_norm": 2.101426124572754, + "learning_rate": 2.129945054945055e-05, + "loss": 0.033, + "step": 20894 + }, + { + "epoch": 57.40384615384615, + "grad_norm": 16.277061462402344, + "learning_rate": 2.1298076923076922e-05, + "loss": 0.3319, + "step": 20895 + }, + { + "epoch": 57.40659340659341, + "grad_norm": 15.365156173706055, + "learning_rate": 2.1296703296703296e-05, + "loss": 0.3949, + "step": 20896 + }, + { + "epoch": 57.40934065934066, + "grad_norm": 14.405386924743652, + "learning_rate": 2.1295329670329672e-05, + "loss": 0.3886, + "step": 20897 + }, + { + "epoch": 57.41208791208791, + "grad_norm": 8.615596771240234, + "learning_rate": 2.1293956043956046e-05, + "loss": 0.2143, + "step": 20898 + }, + { + "epoch": 57.41483516483517, + "grad_norm": 8.584681510925293, + "learning_rate": 2.129258241758242e-05, + "loss": 0.1671, + "step": 20899 + }, + { + "epoch": 57.417582417582416, + "grad_norm": 14.397744178771973, + "learning_rate": 2.1291208791208793e-05, + "loss": 0.4101, + "step": 20900 + }, + { + "epoch": 57.42032967032967, + "grad_norm": 13.243278503417969, + "learning_rate": 2.1289835164835166e-05, + "loss": 0.1973, + "step": 20901 + }, + { + "epoch": 57.42307692307692, + "grad_norm": 7.258530616760254, + "learning_rate": 2.128846153846154e-05, + "loss": 0.1538, + "step": 20902 + }, + { + "epoch": 57.425824175824175, + "grad_norm": 17.87214469909668, + "learning_rate": 2.1287087912087913e-05, + "loss": 0.3362, + "step": 20903 + }, + { + "epoch": 57.42857142857143, + "grad_norm": 2.3916077613830566, + "learning_rate": 2.1285714285714286e-05, + "loss": 0.028, + "step": 20904 + }, + { + "epoch": 57.43131868131868, + "grad_norm": 9.660609245300293, + "learning_rate": 2.128434065934066e-05, + "loss": 0.2349, + "step": 20905 + }, + { + "epoch": 57.434065934065934, + "grad_norm": 10.885369300842285, + "learning_rate": 2.1282967032967033e-05, + "loss": 0.231, + "step": 20906 + }, + { + "epoch": 57.43681318681319, + "grad_norm": 17.535385131835938, + "learning_rate": 2.128159340659341e-05, + "loss": 0.4353, + "step": 20907 + }, + { + "epoch": 57.43956043956044, + "grad_norm": 7.021242618560791, + "learning_rate": 2.1280219780219783e-05, + "loss": 0.0723, + "step": 20908 + }, + { + "epoch": 57.44230769230769, + "grad_norm": 14.893985748291016, + "learning_rate": 2.1278846153846153e-05, + "loss": 0.2114, + "step": 20909 + }, + { + "epoch": 57.44505494505494, + "grad_norm": 11.533124923706055, + "learning_rate": 2.1277472527472527e-05, + "loss": 0.3651, + "step": 20910 + }, + { + "epoch": 57.4478021978022, + "grad_norm": 13.020153045654297, + "learning_rate": 2.12760989010989e-05, + "loss": 0.2326, + "step": 20911 + }, + { + "epoch": 57.45054945054945, + "grad_norm": 26.23487091064453, + "learning_rate": 2.1274725274725277e-05, + "loss": 0.9945, + "step": 20912 + }, + { + "epoch": 57.4532967032967, + "grad_norm": 5.830189228057861, + "learning_rate": 2.127335164835165e-05, + "loss": 0.0772, + "step": 20913 + }, + { + "epoch": 57.456043956043956, + "grad_norm": 10.964518547058105, + "learning_rate": 2.1271978021978024e-05, + "loss": 0.3233, + "step": 20914 + }, + { + "epoch": 57.45879120879121, + "grad_norm": 9.09908390045166, + "learning_rate": 2.1270604395604397e-05, + "loss": 0.1727, + "step": 20915 + }, + { + "epoch": 57.46153846153846, + "grad_norm": 4.152475357055664, + "learning_rate": 2.126923076923077e-05, + "loss": 0.0577, + "step": 20916 + }, + { + "epoch": 57.464285714285715, + "grad_norm": 7.175879001617432, + "learning_rate": 2.1267857142857144e-05, + "loss": 0.2295, + "step": 20917 + }, + { + "epoch": 57.467032967032964, + "grad_norm": 13.476211547851562, + "learning_rate": 2.1266483516483517e-05, + "loss": 0.3219, + "step": 20918 + }, + { + "epoch": 57.46978021978022, + "grad_norm": 6.717655181884766, + "learning_rate": 2.126510989010989e-05, + "loss": 0.1283, + "step": 20919 + }, + { + "epoch": 57.472527472527474, + "grad_norm": 10.544707298278809, + "learning_rate": 2.1263736263736264e-05, + "loss": 0.13, + "step": 20920 + }, + { + "epoch": 57.47527472527472, + "grad_norm": 3.3102526664733887, + "learning_rate": 2.1262362637362637e-05, + "loss": 0.0325, + "step": 20921 + }, + { + "epoch": 57.47802197802198, + "grad_norm": 2.8130760192871094, + "learning_rate": 2.1260989010989014e-05, + "loss": 0.052, + "step": 20922 + }, + { + "epoch": 57.48076923076923, + "grad_norm": 18.64915657043457, + "learning_rate": 2.1259615384615388e-05, + "loss": 0.3293, + "step": 20923 + }, + { + "epoch": 57.48351648351648, + "grad_norm": 8.838318824768066, + "learning_rate": 2.1258241758241758e-05, + "loss": 0.2155, + "step": 20924 + }, + { + "epoch": 57.48626373626374, + "grad_norm": 18.65176773071289, + "learning_rate": 2.125686813186813e-05, + "loss": 0.3937, + "step": 20925 + }, + { + "epoch": 57.489010989010985, + "grad_norm": 5.90160608291626, + "learning_rate": 2.1255494505494505e-05, + "loss": 0.1128, + "step": 20926 + }, + { + "epoch": 57.49175824175824, + "grad_norm": 1.6189110279083252, + "learning_rate": 2.125412087912088e-05, + "loss": 0.0192, + "step": 20927 + }, + { + "epoch": 57.494505494505496, + "grad_norm": 2.4261436462402344, + "learning_rate": 2.1252747252747255e-05, + "loss": 0.0498, + "step": 20928 + }, + { + "epoch": 57.497252747252745, + "grad_norm": 7.6263427734375, + "learning_rate": 2.1251373626373628e-05, + "loss": 0.1289, + "step": 20929 + }, + { + "epoch": 57.5, + "grad_norm": 12.635293006896973, + "learning_rate": 2.125e-05, + "loss": 0.2532, + "step": 20930 + }, + { + "epoch": 57.502747252747255, + "grad_norm": 22.545654296875, + "learning_rate": 2.1248626373626375e-05, + "loss": 0.7482, + "step": 20931 + }, + { + "epoch": 57.505494505494504, + "grad_norm": 10.545990943908691, + "learning_rate": 2.124725274725275e-05, + "loss": 0.2168, + "step": 20932 + }, + { + "epoch": 57.50824175824176, + "grad_norm": 6.909135818481445, + "learning_rate": 2.1245879120879122e-05, + "loss": 0.1521, + "step": 20933 + }, + { + "epoch": 57.51098901098901, + "grad_norm": 9.537689208984375, + "learning_rate": 2.1244505494505495e-05, + "loss": 0.1918, + "step": 20934 + }, + { + "epoch": 57.51373626373626, + "grad_norm": 8.232316017150879, + "learning_rate": 2.124313186813187e-05, + "loss": 0.1485, + "step": 20935 + }, + { + "epoch": 57.51648351648352, + "grad_norm": 9.24129581451416, + "learning_rate": 2.1241758241758242e-05, + "loss": 0.1257, + "step": 20936 + }, + { + "epoch": 57.51923076923077, + "grad_norm": 6.205121040344238, + "learning_rate": 2.124038461538462e-05, + "loss": 0.142, + "step": 20937 + }, + { + "epoch": 57.52197802197802, + "grad_norm": 2.3622629642486572, + "learning_rate": 2.1239010989010992e-05, + "loss": 0.0275, + "step": 20938 + }, + { + "epoch": 57.52472527472528, + "grad_norm": 8.651686668395996, + "learning_rate": 2.1237637362637362e-05, + "loss": 0.2361, + "step": 20939 + }, + { + "epoch": 57.527472527472526, + "grad_norm": 14.514582633972168, + "learning_rate": 2.1236263736263736e-05, + "loss": 0.3058, + "step": 20940 + }, + { + "epoch": 57.53021978021978, + "grad_norm": 12.160916328430176, + "learning_rate": 2.123489010989011e-05, + "loss": 0.2143, + "step": 20941 + }, + { + "epoch": 57.532967032967036, + "grad_norm": 9.06743335723877, + "learning_rate": 2.1233516483516486e-05, + "loss": 0.1034, + "step": 20942 + }, + { + "epoch": 57.535714285714285, + "grad_norm": 14.455875396728516, + "learning_rate": 2.123214285714286e-05, + "loss": 0.3151, + "step": 20943 + }, + { + "epoch": 57.53846153846154, + "grad_norm": 9.270553588867188, + "learning_rate": 2.1230769230769233e-05, + "loss": 0.1876, + "step": 20944 + }, + { + "epoch": 57.54120879120879, + "grad_norm": 14.200007438659668, + "learning_rate": 2.1229395604395606e-05, + "loss": 0.262, + "step": 20945 + }, + { + "epoch": 57.543956043956044, + "grad_norm": 11.36989688873291, + "learning_rate": 2.122802197802198e-05, + "loss": 0.1471, + "step": 20946 + }, + { + "epoch": 57.5467032967033, + "grad_norm": 5.266659259796143, + "learning_rate": 2.1226648351648353e-05, + "loss": 0.0496, + "step": 20947 + }, + { + "epoch": 57.54945054945055, + "grad_norm": 14.648004531860352, + "learning_rate": 2.1225274725274726e-05, + "loss": 0.3737, + "step": 20948 + }, + { + "epoch": 57.5521978021978, + "grad_norm": 22.34910774230957, + "learning_rate": 2.12239010989011e-05, + "loss": 0.561, + "step": 20949 + }, + { + "epoch": 57.55494505494506, + "grad_norm": 5.14698600769043, + "learning_rate": 2.1222527472527473e-05, + "loss": 0.0739, + "step": 20950 + }, + { + "epoch": 57.55769230769231, + "grad_norm": 7.767971515655518, + "learning_rate": 2.1221153846153846e-05, + "loss": 0.1173, + "step": 20951 + }, + { + "epoch": 57.56043956043956, + "grad_norm": 9.820987701416016, + "learning_rate": 2.1219780219780223e-05, + "loss": 0.0876, + "step": 20952 + }, + { + "epoch": 57.56318681318681, + "grad_norm": 11.860146522521973, + "learning_rate": 2.1218406593406597e-05, + "loss": 0.1768, + "step": 20953 + }, + { + "epoch": 57.565934065934066, + "grad_norm": 14.5281343460083, + "learning_rate": 2.1217032967032967e-05, + "loss": 0.4394, + "step": 20954 + }, + { + "epoch": 57.56868131868132, + "grad_norm": 8.84384536743164, + "learning_rate": 2.121565934065934e-05, + "loss": 0.1657, + "step": 20955 + }, + { + "epoch": 57.57142857142857, + "grad_norm": 8.111128807067871, + "learning_rate": 2.1214285714285713e-05, + "loss": 0.2772, + "step": 20956 + }, + { + "epoch": 57.574175824175825, + "grad_norm": 17.523324966430664, + "learning_rate": 2.121291208791209e-05, + "loss": 0.4318, + "step": 20957 + }, + { + "epoch": 57.57692307692308, + "grad_norm": 8.298068046569824, + "learning_rate": 2.1211538461538464e-05, + "loss": 0.1109, + "step": 20958 + }, + { + "epoch": 57.57967032967033, + "grad_norm": 12.053971290588379, + "learning_rate": 2.1210164835164837e-05, + "loss": 0.171, + "step": 20959 + }, + { + "epoch": 57.582417582417584, + "grad_norm": 17.444843292236328, + "learning_rate": 2.120879120879121e-05, + "loss": 0.2644, + "step": 20960 + }, + { + "epoch": 57.58516483516483, + "grad_norm": 1.180528998374939, + "learning_rate": 2.1207417582417584e-05, + "loss": 0.015, + "step": 20961 + }, + { + "epoch": 57.58791208791209, + "grad_norm": 19.672874450683594, + "learning_rate": 2.1206043956043957e-05, + "loss": 0.402, + "step": 20962 + }, + { + "epoch": 57.59065934065934, + "grad_norm": 15.219849586486816, + "learning_rate": 2.120467032967033e-05, + "loss": 0.3756, + "step": 20963 + }, + { + "epoch": 57.59340659340659, + "grad_norm": 10.997857093811035, + "learning_rate": 2.1203296703296704e-05, + "loss": 0.204, + "step": 20964 + }, + { + "epoch": 57.59615384615385, + "grad_norm": 3.0839684009552, + "learning_rate": 2.1201923076923078e-05, + "loss": 0.0495, + "step": 20965 + }, + { + "epoch": 57.5989010989011, + "grad_norm": 9.1471586227417, + "learning_rate": 2.120054945054945e-05, + "loss": 0.1135, + "step": 20966 + }, + { + "epoch": 57.60164835164835, + "grad_norm": 5.283059597015381, + "learning_rate": 2.1199175824175828e-05, + "loss": 0.0514, + "step": 20967 + }, + { + "epoch": 57.604395604395606, + "grad_norm": 10.300551414489746, + "learning_rate": 2.11978021978022e-05, + "loss": 0.2109, + "step": 20968 + }, + { + "epoch": 57.607142857142854, + "grad_norm": 12.415493965148926, + "learning_rate": 2.119642857142857e-05, + "loss": 0.1781, + "step": 20969 + }, + { + "epoch": 57.60989010989011, + "grad_norm": 10.152070999145508, + "learning_rate": 2.1195054945054945e-05, + "loss": 0.0993, + "step": 20970 + }, + { + "epoch": 57.612637362637365, + "grad_norm": 10.008942604064941, + "learning_rate": 2.1193681318681318e-05, + "loss": 0.1492, + "step": 20971 + }, + { + "epoch": 57.61538461538461, + "grad_norm": 8.782607078552246, + "learning_rate": 2.1192307692307695e-05, + "loss": 0.2558, + "step": 20972 + }, + { + "epoch": 57.61813186813187, + "grad_norm": 3.266016721725464, + "learning_rate": 2.1190934065934068e-05, + "loss": 0.0421, + "step": 20973 + }, + { + "epoch": 57.620879120879124, + "grad_norm": 22.737464904785156, + "learning_rate": 2.118956043956044e-05, + "loss": 0.5637, + "step": 20974 + }, + { + "epoch": 57.62362637362637, + "grad_norm": 8.804353713989258, + "learning_rate": 2.1188186813186815e-05, + "loss": 0.2238, + "step": 20975 + }, + { + "epoch": 57.62637362637363, + "grad_norm": 15.437352180480957, + "learning_rate": 2.118681318681319e-05, + "loss": 0.558, + "step": 20976 + }, + { + "epoch": 57.629120879120876, + "grad_norm": 7.593869209289551, + "learning_rate": 2.1185439560439562e-05, + "loss": 0.0867, + "step": 20977 + }, + { + "epoch": 57.63186813186813, + "grad_norm": 7.771308422088623, + "learning_rate": 2.1184065934065935e-05, + "loss": 0.0815, + "step": 20978 + }, + { + "epoch": 57.63461538461539, + "grad_norm": 8.592826843261719, + "learning_rate": 2.118269230769231e-05, + "loss": 0.1268, + "step": 20979 + }, + { + "epoch": 57.637362637362635, + "grad_norm": 26.628929138183594, + "learning_rate": 2.1181318681318682e-05, + "loss": 0.5547, + "step": 20980 + }, + { + "epoch": 57.64010989010989, + "grad_norm": 8.79945182800293, + "learning_rate": 2.1179945054945055e-05, + "loss": 0.1373, + "step": 20981 + }, + { + "epoch": 57.642857142857146, + "grad_norm": 16.536508560180664, + "learning_rate": 2.1178571428571432e-05, + "loss": 0.3888, + "step": 20982 + }, + { + "epoch": 57.645604395604394, + "grad_norm": 13.373210906982422, + "learning_rate": 2.1177197802197806e-05, + "loss": 0.2384, + "step": 20983 + }, + { + "epoch": 57.64835164835165, + "grad_norm": 23.441329956054688, + "learning_rate": 2.1175824175824176e-05, + "loss": 0.5701, + "step": 20984 + }, + { + "epoch": 57.6510989010989, + "grad_norm": 3.2624752521514893, + "learning_rate": 2.117445054945055e-05, + "loss": 0.0555, + "step": 20985 + }, + { + "epoch": 57.65384615384615, + "grad_norm": 17.839214324951172, + "learning_rate": 2.1173076923076922e-05, + "loss": 0.3188, + "step": 20986 + }, + { + "epoch": 57.65659340659341, + "grad_norm": 22.601604461669922, + "learning_rate": 2.1171703296703296e-05, + "loss": 0.3883, + "step": 20987 + }, + { + "epoch": 57.65934065934066, + "grad_norm": 7.891208648681641, + "learning_rate": 2.1170329670329673e-05, + "loss": 0.138, + "step": 20988 + }, + { + "epoch": 57.66208791208791, + "grad_norm": 20.9085693359375, + "learning_rate": 2.1168956043956046e-05, + "loss": 0.5732, + "step": 20989 + }, + { + "epoch": 57.66483516483517, + "grad_norm": 4.202314853668213, + "learning_rate": 2.116758241758242e-05, + "loss": 0.054, + "step": 20990 + }, + { + "epoch": 57.667582417582416, + "grad_norm": 11.922215461730957, + "learning_rate": 2.1166208791208793e-05, + "loss": 0.1687, + "step": 20991 + }, + { + "epoch": 57.67032967032967, + "grad_norm": 12.544877052307129, + "learning_rate": 2.1164835164835163e-05, + "loss": 0.3428, + "step": 20992 + }, + { + "epoch": 57.67307692307692, + "grad_norm": 7.992005348205566, + "learning_rate": 2.116346153846154e-05, + "loss": 0.1334, + "step": 20993 + }, + { + "epoch": 57.675824175824175, + "grad_norm": 18.485458374023438, + "learning_rate": 2.1162087912087913e-05, + "loss": 0.3859, + "step": 20994 + }, + { + "epoch": 57.67857142857143, + "grad_norm": 16.71990203857422, + "learning_rate": 2.1160714285714287e-05, + "loss": 0.4367, + "step": 20995 + }, + { + "epoch": 57.68131868131868, + "grad_norm": 9.109784126281738, + "learning_rate": 2.115934065934066e-05, + "loss": 0.149, + "step": 20996 + }, + { + "epoch": 57.684065934065934, + "grad_norm": 8.609833717346191, + "learning_rate": 2.1157967032967033e-05, + "loss": 0.2498, + "step": 20997 + }, + { + "epoch": 57.68681318681319, + "grad_norm": 6.419027805328369, + "learning_rate": 2.115659340659341e-05, + "loss": 0.2065, + "step": 20998 + }, + { + "epoch": 57.68956043956044, + "grad_norm": 14.347342491149902, + "learning_rate": 2.115521978021978e-05, + "loss": 0.2501, + "step": 20999 + }, + { + "epoch": 57.69230769230769, + "grad_norm": 8.560737609863281, + "learning_rate": 2.1153846153846154e-05, + "loss": 0.1828, + "step": 21000 + }, + { + "epoch": 57.69505494505494, + "grad_norm": 10.749784469604492, + "learning_rate": 2.1152472527472527e-05, + "loss": 0.0977, + "step": 21001 + }, + { + "epoch": 57.6978021978022, + "grad_norm": 13.592774391174316, + "learning_rate": 2.11510989010989e-05, + "loss": 0.3004, + "step": 21002 + }, + { + "epoch": 57.70054945054945, + "grad_norm": 14.755025863647461, + "learning_rate": 2.1149725274725277e-05, + "loss": 0.1921, + "step": 21003 + }, + { + "epoch": 57.7032967032967, + "grad_norm": 3.652935743331909, + "learning_rate": 2.114835164835165e-05, + "loss": 0.0534, + "step": 21004 + }, + { + "epoch": 57.706043956043956, + "grad_norm": 14.788104057312012, + "learning_rate": 2.1146978021978024e-05, + "loss": 0.4871, + "step": 21005 + }, + { + "epoch": 57.70879120879121, + "grad_norm": 6.71041202545166, + "learning_rate": 2.1145604395604397e-05, + "loss": 0.0671, + "step": 21006 + }, + { + "epoch": 57.71153846153846, + "grad_norm": 6.074024200439453, + "learning_rate": 2.1144230769230767e-05, + "loss": 0.079, + "step": 21007 + }, + { + "epoch": 57.714285714285715, + "grad_norm": 6.453563690185547, + "learning_rate": 2.1142857142857144e-05, + "loss": 0.1288, + "step": 21008 + }, + { + "epoch": 57.717032967032964, + "grad_norm": 11.016422271728516, + "learning_rate": 2.1141483516483518e-05, + "loss": 0.1796, + "step": 21009 + }, + { + "epoch": 57.71978021978022, + "grad_norm": 6.638143539428711, + "learning_rate": 2.114010989010989e-05, + "loss": 0.0996, + "step": 21010 + }, + { + "epoch": 57.722527472527474, + "grad_norm": 8.69453239440918, + "learning_rate": 2.1138736263736264e-05, + "loss": 0.1903, + "step": 21011 + }, + { + "epoch": 57.72527472527472, + "grad_norm": 1.2548803091049194, + "learning_rate": 2.1137362637362638e-05, + "loss": 0.0176, + "step": 21012 + }, + { + "epoch": 57.72802197802198, + "grad_norm": 4.196864128112793, + "learning_rate": 2.1135989010989015e-05, + "loss": 0.0699, + "step": 21013 + }, + { + "epoch": 57.73076923076923, + "grad_norm": 27.933094024658203, + "learning_rate": 2.1134615384615385e-05, + "loss": 0.6673, + "step": 21014 + }, + { + "epoch": 57.73351648351648, + "grad_norm": 7.960967540740967, + "learning_rate": 2.1133241758241758e-05, + "loss": 0.1105, + "step": 21015 + }, + { + "epoch": 57.73626373626374, + "grad_norm": 11.238478660583496, + "learning_rate": 2.113186813186813e-05, + "loss": 0.3451, + "step": 21016 + }, + { + "epoch": 57.73901098901099, + "grad_norm": 13.621893882751465, + "learning_rate": 2.1130494505494505e-05, + "loss": 0.2768, + "step": 21017 + }, + { + "epoch": 57.74175824175824, + "grad_norm": 3.4428136348724365, + "learning_rate": 2.112912087912088e-05, + "loss": 0.0564, + "step": 21018 + }, + { + "epoch": 57.744505494505496, + "grad_norm": 19.98436164855957, + "learning_rate": 2.1127747252747255e-05, + "loss": 0.7007, + "step": 21019 + }, + { + "epoch": 57.747252747252745, + "grad_norm": 14.149833679199219, + "learning_rate": 2.112637362637363e-05, + "loss": 0.3699, + "step": 21020 + }, + { + "epoch": 57.75, + "grad_norm": 7.745893478393555, + "learning_rate": 2.1125000000000002e-05, + "loss": 0.0958, + "step": 21021 + }, + { + "epoch": 57.752747252747255, + "grad_norm": 11.775774955749512, + "learning_rate": 2.1123626373626372e-05, + "loss": 0.2157, + "step": 21022 + }, + { + "epoch": 57.755494505494504, + "grad_norm": 20.9141788482666, + "learning_rate": 2.112225274725275e-05, + "loss": 0.4891, + "step": 21023 + }, + { + "epoch": 57.75824175824176, + "grad_norm": 21.7970027923584, + "learning_rate": 2.1120879120879122e-05, + "loss": 0.4995, + "step": 21024 + }, + { + "epoch": 57.76098901098901, + "grad_norm": 7.4752888679504395, + "learning_rate": 2.1119505494505496e-05, + "loss": 0.1509, + "step": 21025 + }, + { + "epoch": 57.76373626373626, + "grad_norm": 28.1782283782959, + "learning_rate": 2.111813186813187e-05, + "loss": 0.9879, + "step": 21026 + }, + { + "epoch": 57.76648351648352, + "grad_norm": 6.265590667724609, + "learning_rate": 2.1116758241758242e-05, + "loss": 0.1251, + "step": 21027 + }, + { + "epoch": 57.76923076923077, + "grad_norm": 14.329267501831055, + "learning_rate": 2.111538461538462e-05, + "loss": 0.6382, + "step": 21028 + }, + { + "epoch": 57.77197802197802, + "grad_norm": 8.386813163757324, + "learning_rate": 2.111401098901099e-05, + "loss": 0.1754, + "step": 21029 + }, + { + "epoch": 57.77472527472528, + "grad_norm": 2.4481875896453857, + "learning_rate": 2.1112637362637363e-05, + "loss": 0.045, + "step": 21030 + }, + { + "epoch": 57.777472527472526, + "grad_norm": 11.986095428466797, + "learning_rate": 2.1111263736263736e-05, + "loss": 0.224, + "step": 21031 + }, + { + "epoch": 57.78021978021978, + "grad_norm": 12.254015922546387, + "learning_rate": 2.110989010989011e-05, + "loss": 0.1518, + "step": 21032 + }, + { + "epoch": 57.782967032967036, + "grad_norm": 7.2396416664123535, + "learning_rate": 2.1108516483516486e-05, + "loss": 0.1242, + "step": 21033 + }, + { + "epoch": 57.785714285714285, + "grad_norm": 45.88474655151367, + "learning_rate": 2.110714285714286e-05, + "loss": 0.7918, + "step": 21034 + }, + { + "epoch": 57.78846153846154, + "grad_norm": 13.153777122497559, + "learning_rate": 2.1105769230769233e-05, + "loss": 0.2853, + "step": 21035 + }, + { + "epoch": 57.79120879120879, + "grad_norm": 17.461339950561523, + "learning_rate": 2.1104395604395606e-05, + "loss": 0.3255, + "step": 21036 + }, + { + "epoch": 57.793956043956044, + "grad_norm": 15.880232810974121, + "learning_rate": 2.1103021978021976e-05, + "loss": 0.5155, + "step": 21037 + }, + { + "epoch": 57.7967032967033, + "grad_norm": 6.191928863525391, + "learning_rate": 2.1101648351648353e-05, + "loss": 0.0836, + "step": 21038 + }, + { + "epoch": 57.79945054945055, + "grad_norm": 14.241604804992676, + "learning_rate": 2.1100274725274727e-05, + "loss": 0.2261, + "step": 21039 + }, + { + "epoch": 57.8021978021978, + "grad_norm": 12.539011001586914, + "learning_rate": 2.10989010989011e-05, + "loss": 0.1853, + "step": 21040 + }, + { + "epoch": 57.80494505494506, + "grad_norm": 19.49034309387207, + "learning_rate": 2.1097527472527473e-05, + "loss": 0.3394, + "step": 21041 + }, + { + "epoch": 57.80769230769231, + "grad_norm": 4.727754592895508, + "learning_rate": 2.1096153846153847e-05, + "loss": 0.077, + "step": 21042 + }, + { + "epoch": 57.81043956043956, + "grad_norm": 14.293152809143066, + "learning_rate": 2.109478021978022e-05, + "loss": 0.2401, + "step": 21043 + }, + { + "epoch": 57.81318681318681, + "grad_norm": 19.990921020507812, + "learning_rate": 2.1093406593406594e-05, + "loss": 0.6515, + "step": 21044 + }, + { + "epoch": 57.815934065934066, + "grad_norm": 11.147936820983887, + "learning_rate": 2.1092032967032967e-05, + "loss": 0.2402, + "step": 21045 + }, + { + "epoch": 57.81868131868132, + "grad_norm": 7.9694623947143555, + "learning_rate": 2.109065934065934e-05, + "loss": 0.1793, + "step": 21046 + }, + { + "epoch": 57.82142857142857, + "grad_norm": 27.491413116455078, + "learning_rate": 2.1089285714285714e-05, + "loss": 0.9939, + "step": 21047 + }, + { + "epoch": 57.824175824175825, + "grad_norm": 6.606257438659668, + "learning_rate": 2.108791208791209e-05, + "loss": 0.1211, + "step": 21048 + }, + { + "epoch": 57.82692307692308, + "grad_norm": 3.6981914043426514, + "learning_rate": 2.1086538461538464e-05, + "loss": 0.0728, + "step": 21049 + }, + { + "epoch": 57.82967032967033, + "grad_norm": 18.284997940063477, + "learning_rate": 2.1085164835164837e-05, + "loss": 0.5731, + "step": 21050 + }, + { + "epoch": 57.832417582417584, + "grad_norm": 10.776679039001465, + "learning_rate": 2.108379120879121e-05, + "loss": 0.1974, + "step": 21051 + }, + { + "epoch": 57.83516483516483, + "grad_norm": 14.756275177001953, + "learning_rate": 2.108241758241758e-05, + "loss": 0.4366, + "step": 21052 + }, + { + "epoch": 57.83791208791209, + "grad_norm": 14.581184387207031, + "learning_rate": 2.1081043956043958e-05, + "loss": 0.3393, + "step": 21053 + }, + { + "epoch": 57.84065934065934, + "grad_norm": 8.093246459960938, + "learning_rate": 2.107967032967033e-05, + "loss": 0.218, + "step": 21054 + }, + { + "epoch": 57.84340659340659, + "grad_norm": 5.440769672393799, + "learning_rate": 2.1078296703296704e-05, + "loss": 0.0927, + "step": 21055 + }, + { + "epoch": 57.84615384615385, + "grad_norm": 13.506505966186523, + "learning_rate": 2.1076923076923078e-05, + "loss": 0.3112, + "step": 21056 + }, + { + "epoch": 57.8489010989011, + "grad_norm": 15.784388542175293, + "learning_rate": 2.107554945054945e-05, + "loss": 0.3744, + "step": 21057 + }, + { + "epoch": 57.85164835164835, + "grad_norm": 10.148333549499512, + "learning_rate": 2.1074175824175825e-05, + "loss": 0.2527, + "step": 21058 + }, + { + "epoch": 57.854395604395606, + "grad_norm": 18.16516876220703, + "learning_rate": 2.1072802197802198e-05, + "loss": 0.6156, + "step": 21059 + }, + { + "epoch": 57.857142857142854, + "grad_norm": 16.66836166381836, + "learning_rate": 2.107142857142857e-05, + "loss": 0.4171, + "step": 21060 + }, + { + "epoch": 57.85989010989011, + "grad_norm": 15.180834770202637, + "learning_rate": 2.1070054945054945e-05, + "loss": 0.1616, + "step": 21061 + }, + { + "epoch": 57.862637362637365, + "grad_norm": 12.825955390930176, + "learning_rate": 2.106868131868132e-05, + "loss": 0.2041, + "step": 21062 + }, + { + "epoch": 57.86538461538461, + "grad_norm": 6.078322887420654, + "learning_rate": 2.1067307692307695e-05, + "loss": 0.0917, + "step": 21063 + }, + { + "epoch": 57.86813186813187, + "grad_norm": 12.430840492248535, + "learning_rate": 2.106593406593407e-05, + "loss": 0.2996, + "step": 21064 + }, + { + "epoch": 57.870879120879124, + "grad_norm": 8.38741397857666, + "learning_rate": 2.1064560439560442e-05, + "loss": 0.21, + "step": 21065 + }, + { + "epoch": 57.87362637362637, + "grad_norm": 8.492217063903809, + "learning_rate": 2.1063186813186815e-05, + "loss": 0.1175, + "step": 21066 + }, + { + "epoch": 57.87637362637363, + "grad_norm": 6.922939777374268, + "learning_rate": 2.1061813186813185e-05, + "loss": 0.1649, + "step": 21067 + }, + { + "epoch": 57.879120879120876, + "grad_norm": 5.739817142486572, + "learning_rate": 2.1060439560439562e-05, + "loss": 0.127, + "step": 21068 + }, + { + "epoch": 57.88186813186813, + "grad_norm": 10.640296936035156, + "learning_rate": 2.1059065934065936e-05, + "loss": 0.1578, + "step": 21069 + }, + { + "epoch": 57.88461538461539, + "grad_norm": 17.702123641967773, + "learning_rate": 2.105769230769231e-05, + "loss": 0.4862, + "step": 21070 + }, + { + "epoch": 57.887362637362635, + "grad_norm": 24.323884963989258, + "learning_rate": 2.1056318681318682e-05, + "loss": 0.6163, + "step": 21071 + }, + { + "epoch": 57.89010989010989, + "grad_norm": 6.07949161529541, + "learning_rate": 2.1054945054945056e-05, + "loss": 0.0625, + "step": 21072 + }, + { + "epoch": 57.892857142857146, + "grad_norm": 12.255069732666016, + "learning_rate": 2.105357142857143e-05, + "loss": 0.1127, + "step": 21073 + }, + { + "epoch": 57.895604395604394, + "grad_norm": 14.592077255249023, + "learning_rate": 2.1052197802197803e-05, + "loss": 0.3337, + "step": 21074 + }, + { + "epoch": 57.89835164835165, + "grad_norm": 12.733562469482422, + "learning_rate": 2.1050824175824176e-05, + "loss": 0.2112, + "step": 21075 + }, + { + "epoch": 57.9010989010989, + "grad_norm": 11.246756553649902, + "learning_rate": 2.104945054945055e-05, + "loss": 0.4709, + "step": 21076 + }, + { + "epoch": 57.90384615384615, + "grad_norm": 1.414878487586975, + "learning_rate": 2.1048076923076923e-05, + "loss": 0.0163, + "step": 21077 + }, + { + "epoch": 57.90659340659341, + "grad_norm": 6.408726215362549, + "learning_rate": 2.10467032967033e-05, + "loss": 0.1205, + "step": 21078 + }, + { + "epoch": 57.90934065934066, + "grad_norm": 15.965408325195312, + "learning_rate": 2.1045329670329673e-05, + "loss": 0.3724, + "step": 21079 + }, + { + "epoch": 57.91208791208791, + "grad_norm": 8.59010124206543, + "learning_rate": 2.1043956043956046e-05, + "loss": 0.1436, + "step": 21080 + }, + { + "epoch": 57.91483516483517, + "grad_norm": 10.056074142456055, + "learning_rate": 2.104258241758242e-05, + "loss": 0.1756, + "step": 21081 + }, + { + "epoch": 57.917582417582416, + "grad_norm": 8.09095287322998, + "learning_rate": 2.104120879120879e-05, + "loss": 0.0942, + "step": 21082 + }, + { + "epoch": 57.92032967032967, + "grad_norm": 2.2529847621917725, + "learning_rate": 2.1039835164835167e-05, + "loss": 0.0244, + "step": 21083 + }, + { + "epoch": 57.92307692307692, + "grad_norm": 10.783098220825195, + "learning_rate": 2.103846153846154e-05, + "loss": 0.2517, + "step": 21084 + }, + { + "epoch": 57.925824175824175, + "grad_norm": 6.837062358856201, + "learning_rate": 2.1037087912087913e-05, + "loss": 0.1037, + "step": 21085 + }, + { + "epoch": 57.92857142857143, + "grad_norm": 5.265026569366455, + "learning_rate": 2.1035714285714287e-05, + "loss": 0.0873, + "step": 21086 + }, + { + "epoch": 57.93131868131868, + "grad_norm": 13.107745170593262, + "learning_rate": 2.103434065934066e-05, + "loss": 0.2347, + "step": 21087 + }, + { + "epoch": 57.934065934065934, + "grad_norm": 8.964110374450684, + "learning_rate": 2.1032967032967034e-05, + "loss": 0.2396, + "step": 21088 + }, + { + "epoch": 57.93681318681319, + "grad_norm": 16.0654354095459, + "learning_rate": 2.1031593406593407e-05, + "loss": 0.3254, + "step": 21089 + }, + { + "epoch": 57.93956043956044, + "grad_norm": 4.4124016761779785, + "learning_rate": 2.103021978021978e-05, + "loss": 0.0958, + "step": 21090 + }, + { + "epoch": 57.94230769230769, + "grad_norm": 8.522360801696777, + "learning_rate": 2.1028846153846154e-05, + "loss": 0.1516, + "step": 21091 + }, + { + "epoch": 57.94505494505494, + "grad_norm": 2.891094446182251, + "learning_rate": 2.1027472527472527e-05, + "loss": 0.0391, + "step": 21092 + }, + { + "epoch": 57.9478021978022, + "grad_norm": 18.228595733642578, + "learning_rate": 2.1026098901098904e-05, + "loss": 0.4576, + "step": 21093 + }, + { + "epoch": 57.95054945054945, + "grad_norm": 13.35237979888916, + "learning_rate": 2.1024725274725278e-05, + "loss": 0.2896, + "step": 21094 + }, + { + "epoch": 57.9532967032967, + "grad_norm": 9.53065013885498, + "learning_rate": 2.102335164835165e-05, + "loss": 0.1293, + "step": 21095 + }, + { + "epoch": 57.956043956043956, + "grad_norm": 11.291141510009766, + "learning_rate": 2.1021978021978024e-05, + "loss": 0.2916, + "step": 21096 + }, + { + "epoch": 57.95879120879121, + "grad_norm": 20.486434936523438, + "learning_rate": 2.1020604395604394e-05, + "loss": 0.3733, + "step": 21097 + }, + { + "epoch": 57.96153846153846, + "grad_norm": 11.319015502929688, + "learning_rate": 2.101923076923077e-05, + "loss": 0.1256, + "step": 21098 + }, + { + "epoch": 57.964285714285715, + "grad_norm": 12.094730377197266, + "learning_rate": 2.1017857142857145e-05, + "loss": 0.2464, + "step": 21099 + }, + { + "epoch": 57.967032967032964, + "grad_norm": 11.14295482635498, + "learning_rate": 2.1016483516483518e-05, + "loss": 0.5231, + "step": 21100 + }, + { + "epoch": 57.96978021978022, + "grad_norm": 15.187357902526855, + "learning_rate": 2.101510989010989e-05, + "loss": 0.5014, + "step": 21101 + }, + { + "epoch": 57.972527472527474, + "grad_norm": 9.363349914550781, + "learning_rate": 2.1013736263736265e-05, + "loss": 0.1509, + "step": 21102 + }, + { + "epoch": 57.97527472527472, + "grad_norm": 13.089433670043945, + "learning_rate": 2.1012362637362638e-05, + "loss": 0.6068, + "step": 21103 + }, + { + "epoch": 57.97802197802198, + "grad_norm": 15.265290260314941, + "learning_rate": 2.101098901098901e-05, + "loss": 0.4371, + "step": 21104 + }, + { + "epoch": 57.98076923076923, + "grad_norm": 17.444494247436523, + "learning_rate": 2.1009615384615385e-05, + "loss": 0.2375, + "step": 21105 + }, + { + "epoch": 57.98351648351648, + "grad_norm": 6.320433139801025, + "learning_rate": 2.100824175824176e-05, + "loss": 0.0628, + "step": 21106 + }, + { + "epoch": 57.98626373626374, + "grad_norm": 5.448550224304199, + "learning_rate": 2.1006868131868132e-05, + "loss": 0.1053, + "step": 21107 + }, + { + "epoch": 57.98901098901099, + "grad_norm": 10.147231101989746, + "learning_rate": 2.1005494505494505e-05, + "loss": 0.1632, + "step": 21108 + }, + { + "epoch": 57.99175824175824, + "grad_norm": 20.346548080444336, + "learning_rate": 2.1004120879120882e-05, + "loss": 0.3749, + "step": 21109 + }, + { + "epoch": 57.994505494505496, + "grad_norm": 16.044265747070312, + "learning_rate": 2.1002747252747255e-05, + "loss": 0.3823, + "step": 21110 + }, + { + "epoch": 57.997252747252745, + "grad_norm": 5.74637508392334, + "learning_rate": 2.100137362637363e-05, + "loss": 0.0765, + "step": 21111 + }, + { + "epoch": 58.0, + "grad_norm": 26.296573638916016, + "learning_rate": 2.1e-05, + "loss": 0.2383, + "step": 21112 + }, + { + "epoch": 58.0, + "eval_accuracy": 0.7465564738292011, + "eval_f1": 0.758928934403572, + "eval_f1_DuraRiadoRio_64x64": 0.7607843137254902, + "eval_f1_Mole_64x64": 0.7866108786610879, + "eval_f1_Quebrado_64x64": 0.7033898305084746, + "eval_f1_RiadoRio_64x64": 0.631578947368421, + "eval_f1_RioFechado_64x64": 0.9122807017543859, + "eval_loss": 1.1022169589996338, + "eval_precision": 0.8317645813069154, + "eval_precision_DuraRiadoRio_64x64": 0.8738738738738738, + "eval_precision_Mole_64x64": 0.9894736842105263, + "eval_precision_Quebrado_64x64": 0.9021739130434783, + "eval_precision_RiadoRio_64x64": 0.4842105263157895, + "eval_precision_RioFechado_64x64": 0.9090909090909091, + "eval_recall": 0.7452330944732724, + "eval_recall_DuraRiadoRio_64x64": 0.6736111111111112, + "eval_recall_Mole_64x64": 0.6527777777777778, + "eval_recall_Quebrado_64x64": 0.5763888888888888, + "eval_recall_RiadoRio_64x64": 0.9078947368421053, + "eval_recall_RioFechado_64x64": 0.9154929577464789, + "eval_runtime": 1.769, + "eval_samples_per_second": 410.411, + "eval_steps_per_second": 26.004, + "step": 21112 + }, + { + "epoch": 58.002747252747255, + "grad_norm": 14.82429027557373, + "learning_rate": 2.0998626373626372e-05, + "loss": 0.5953, + "step": 21113 + }, + { + "epoch": 58.005494505494504, + "grad_norm": 21.37146759033203, + "learning_rate": 2.099725274725275e-05, + "loss": 0.4382, + "step": 21114 + }, + { + "epoch": 58.00824175824176, + "grad_norm": 15.81020450592041, + "learning_rate": 2.0995879120879122e-05, + "loss": 0.4786, + "step": 21115 + }, + { + "epoch": 58.010989010989015, + "grad_norm": 10.06807804107666, + "learning_rate": 2.0994505494505496e-05, + "loss": 0.1689, + "step": 21116 + }, + { + "epoch": 58.01373626373626, + "grad_norm": 6.788633346557617, + "learning_rate": 2.099313186813187e-05, + "loss": 0.0674, + "step": 21117 + }, + { + "epoch": 58.01648351648352, + "grad_norm": 5.900306224822998, + "learning_rate": 2.0991758241758243e-05, + "loss": 0.1304, + "step": 21118 + }, + { + "epoch": 58.01923076923077, + "grad_norm": 10.994338035583496, + "learning_rate": 2.0990384615384616e-05, + "loss": 0.25, + "step": 21119 + }, + { + "epoch": 58.02197802197802, + "grad_norm": 7.328124523162842, + "learning_rate": 2.098901098901099e-05, + "loss": 0.1605, + "step": 21120 + }, + { + "epoch": 58.02472527472528, + "grad_norm": 8.779351234436035, + "learning_rate": 2.0987637362637363e-05, + "loss": 0.1506, + "step": 21121 + }, + { + "epoch": 58.027472527472526, + "grad_norm": 6.946207523345947, + "learning_rate": 2.0986263736263736e-05, + "loss": 0.1324, + "step": 21122 + }, + { + "epoch": 58.03021978021978, + "grad_norm": 17.40067481994629, + "learning_rate": 2.098489010989011e-05, + "loss": 0.3708, + "step": 21123 + }, + { + "epoch": 58.032967032967036, + "grad_norm": 14.657526016235352, + "learning_rate": 2.0983516483516486e-05, + "loss": 0.3812, + "step": 21124 + }, + { + "epoch": 58.035714285714285, + "grad_norm": 12.803153991699219, + "learning_rate": 2.098214285714286e-05, + "loss": 0.2977, + "step": 21125 + }, + { + "epoch": 58.03846153846154, + "grad_norm": 12.842000961303711, + "learning_rate": 2.0980769230769233e-05, + "loss": 0.1574, + "step": 21126 + }, + { + "epoch": 58.04120879120879, + "grad_norm": 12.268350601196289, + "learning_rate": 2.0979395604395603e-05, + "loss": 0.1626, + "step": 21127 + }, + { + "epoch": 58.043956043956044, + "grad_norm": 13.808501243591309, + "learning_rate": 2.0978021978021977e-05, + "loss": 0.1781, + "step": 21128 + }, + { + "epoch": 58.0467032967033, + "grad_norm": 4.331361293792725, + "learning_rate": 2.0976648351648354e-05, + "loss": 0.0501, + "step": 21129 + }, + { + "epoch": 58.04945054945055, + "grad_norm": 15.790761947631836, + "learning_rate": 2.0975274725274727e-05, + "loss": 0.4829, + "step": 21130 + }, + { + "epoch": 58.0521978021978, + "grad_norm": 12.330621719360352, + "learning_rate": 2.09739010989011e-05, + "loss": 0.2185, + "step": 21131 + }, + { + "epoch": 58.05494505494506, + "grad_norm": 5.886513710021973, + "learning_rate": 2.0972527472527474e-05, + "loss": 0.0942, + "step": 21132 + }, + { + "epoch": 58.05769230769231, + "grad_norm": 8.161736488342285, + "learning_rate": 2.0971153846153847e-05, + "loss": 0.1726, + "step": 21133 + }, + { + "epoch": 58.06043956043956, + "grad_norm": 14.15807056427002, + "learning_rate": 2.096978021978022e-05, + "loss": 0.2482, + "step": 21134 + }, + { + "epoch": 58.06318681318681, + "grad_norm": 12.3399658203125, + "learning_rate": 2.0968406593406594e-05, + "loss": 0.2332, + "step": 21135 + }, + { + "epoch": 58.065934065934066, + "grad_norm": 7.520970821380615, + "learning_rate": 2.0967032967032967e-05, + "loss": 0.1833, + "step": 21136 + }, + { + "epoch": 58.06868131868132, + "grad_norm": 10.810844421386719, + "learning_rate": 2.096565934065934e-05, + "loss": 0.3361, + "step": 21137 + }, + { + "epoch": 58.07142857142857, + "grad_norm": 14.612204551696777, + "learning_rate": 2.0964285714285714e-05, + "loss": 0.4497, + "step": 21138 + }, + { + "epoch": 58.074175824175825, + "grad_norm": 13.591508865356445, + "learning_rate": 2.096291208791209e-05, + "loss": 0.2311, + "step": 21139 + }, + { + "epoch": 58.07692307692308, + "grad_norm": 3.5224056243896484, + "learning_rate": 2.0961538461538464e-05, + "loss": 0.062, + "step": 21140 + }, + { + "epoch": 58.07967032967033, + "grad_norm": 16.753366470336914, + "learning_rate": 2.0960164835164834e-05, + "loss": 0.3215, + "step": 21141 + }, + { + "epoch": 58.082417582417584, + "grad_norm": 4.847593307495117, + "learning_rate": 2.0958791208791208e-05, + "loss": 0.0871, + "step": 21142 + }, + { + "epoch": 58.08516483516483, + "grad_norm": 21.07548713684082, + "learning_rate": 2.095741758241758e-05, + "loss": 0.7031, + "step": 21143 + }, + { + "epoch": 58.08791208791209, + "grad_norm": 16.18853187561035, + "learning_rate": 2.0956043956043958e-05, + "loss": 0.2951, + "step": 21144 + }, + { + "epoch": 58.09065934065934, + "grad_norm": 9.941478729248047, + "learning_rate": 2.095467032967033e-05, + "loss": 0.2636, + "step": 21145 + }, + { + "epoch": 58.09340659340659, + "grad_norm": 17.505128860473633, + "learning_rate": 2.0953296703296705e-05, + "loss": 0.2888, + "step": 21146 + }, + { + "epoch": 58.09615384615385, + "grad_norm": 5.977138996124268, + "learning_rate": 2.0951923076923078e-05, + "loss": 0.1415, + "step": 21147 + }, + { + "epoch": 58.0989010989011, + "grad_norm": 18.727855682373047, + "learning_rate": 2.095054945054945e-05, + "loss": 0.377, + "step": 21148 + }, + { + "epoch": 58.10164835164835, + "grad_norm": 12.31447696685791, + "learning_rate": 2.0949175824175825e-05, + "loss": 0.1962, + "step": 21149 + }, + { + "epoch": 58.104395604395606, + "grad_norm": 11.669434547424316, + "learning_rate": 2.09478021978022e-05, + "loss": 0.4438, + "step": 21150 + }, + { + "epoch": 58.107142857142854, + "grad_norm": 6.1803083419799805, + "learning_rate": 2.0946428571428572e-05, + "loss": 0.1117, + "step": 21151 + }, + { + "epoch": 58.10989010989011, + "grad_norm": 12.153003692626953, + "learning_rate": 2.0945054945054945e-05, + "loss": 0.1674, + "step": 21152 + }, + { + "epoch": 58.112637362637365, + "grad_norm": 16.92183494567871, + "learning_rate": 2.094368131868132e-05, + "loss": 0.4675, + "step": 21153 + }, + { + "epoch": 58.11538461538461, + "grad_norm": 6.864041328430176, + "learning_rate": 2.0942307692307695e-05, + "loss": 0.1771, + "step": 21154 + }, + { + "epoch": 58.11813186813187, + "grad_norm": 15.67847728729248, + "learning_rate": 2.094093406593407e-05, + "loss": 0.4697, + "step": 21155 + }, + { + "epoch": 58.120879120879124, + "grad_norm": 4.057333469390869, + "learning_rate": 2.093956043956044e-05, + "loss": 0.0533, + "step": 21156 + }, + { + "epoch": 58.12362637362637, + "grad_norm": 7.349490165710449, + "learning_rate": 2.0938186813186812e-05, + "loss": 0.0924, + "step": 21157 + }, + { + "epoch": 58.12637362637363, + "grad_norm": 8.766545295715332, + "learning_rate": 2.0936813186813186e-05, + "loss": 0.283, + "step": 21158 + }, + { + "epoch": 58.129120879120876, + "grad_norm": 5.0160369873046875, + "learning_rate": 2.0935439560439563e-05, + "loss": 0.0802, + "step": 21159 + }, + { + "epoch": 58.13186813186813, + "grad_norm": 4.27039098739624, + "learning_rate": 2.0934065934065936e-05, + "loss": 0.0439, + "step": 21160 + }, + { + "epoch": 58.13461538461539, + "grad_norm": 16.392595291137695, + "learning_rate": 2.093269230769231e-05, + "loss": 0.4463, + "step": 21161 + }, + { + "epoch": 58.137362637362635, + "grad_norm": 9.334599494934082, + "learning_rate": 2.0931318681318683e-05, + "loss": 0.153, + "step": 21162 + }, + { + "epoch": 58.14010989010989, + "grad_norm": 7.895910263061523, + "learning_rate": 2.0929945054945056e-05, + "loss": 0.1568, + "step": 21163 + }, + { + "epoch": 58.142857142857146, + "grad_norm": 8.623800277709961, + "learning_rate": 2.092857142857143e-05, + "loss": 0.1929, + "step": 21164 + }, + { + "epoch": 58.145604395604394, + "grad_norm": 7.2177629470825195, + "learning_rate": 2.0927197802197803e-05, + "loss": 0.1047, + "step": 21165 + }, + { + "epoch": 58.14835164835165, + "grad_norm": 18.126150131225586, + "learning_rate": 2.0925824175824176e-05, + "loss": 0.4035, + "step": 21166 + }, + { + "epoch": 58.1510989010989, + "grad_norm": 5.726022720336914, + "learning_rate": 2.092445054945055e-05, + "loss": 0.163, + "step": 21167 + }, + { + "epoch": 58.15384615384615, + "grad_norm": 4.108270168304443, + "learning_rate": 2.0923076923076923e-05, + "loss": 0.0774, + "step": 21168 + }, + { + "epoch": 58.15659340659341, + "grad_norm": 10.200430870056152, + "learning_rate": 2.09217032967033e-05, + "loss": 0.2969, + "step": 21169 + }, + { + "epoch": 58.15934065934066, + "grad_norm": 5.843600749969482, + "learning_rate": 2.0920329670329673e-05, + "loss": 0.1204, + "step": 21170 + }, + { + "epoch": 58.16208791208791, + "grad_norm": 6.5749921798706055, + "learning_rate": 2.0918956043956043e-05, + "loss": 0.1725, + "step": 21171 + }, + { + "epoch": 58.16483516483517, + "grad_norm": 7.085838317871094, + "learning_rate": 2.0917582417582417e-05, + "loss": 0.0782, + "step": 21172 + }, + { + "epoch": 58.167582417582416, + "grad_norm": 15.302714347839355, + "learning_rate": 2.091620879120879e-05, + "loss": 0.1962, + "step": 21173 + }, + { + "epoch": 58.17032967032967, + "grad_norm": 17.032230377197266, + "learning_rate": 2.0914835164835167e-05, + "loss": 0.3739, + "step": 21174 + }, + { + "epoch": 58.17307692307692, + "grad_norm": 13.045273780822754, + "learning_rate": 2.091346153846154e-05, + "loss": 0.1996, + "step": 21175 + }, + { + "epoch": 58.175824175824175, + "grad_norm": 9.118555068969727, + "learning_rate": 2.0912087912087914e-05, + "loss": 0.1451, + "step": 21176 + }, + { + "epoch": 58.17857142857143, + "grad_norm": 11.29891300201416, + "learning_rate": 2.0910714285714287e-05, + "loss": 0.1733, + "step": 21177 + }, + { + "epoch": 58.18131868131868, + "grad_norm": 8.475238800048828, + "learning_rate": 2.090934065934066e-05, + "loss": 0.1999, + "step": 21178 + }, + { + "epoch": 58.184065934065934, + "grad_norm": 2.7331249713897705, + "learning_rate": 2.0907967032967034e-05, + "loss": 0.0717, + "step": 21179 + }, + { + "epoch": 58.18681318681319, + "grad_norm": 21.452808380126953, + "learning_rate": 2.0906593406593407e-05, + "loss": 0.5671, + "step": 21180 + }, + { + "epoch": 58.18956043956044, + "grad_norm": 8.123008728027344, + "learning_rate": 2.090521978021978e-05, + "loss": 0.1113, + "step": 21181 + }, + { + "epoch": 58.19230769230769, + "grad_norm": 13.171468734741211, + "learning_rate": 2.0903846153846154e-05, + "loss": 0.2165, + "step": 21182 + }, + { + "epoch": 58.19505494505494, + "grad_norm": 13.76927375793457, + "learning_rate": 2.0902472527472528e-05, + "loss": 0.3138, + "step": 21183 + }, + { + "epoch": 58.1978021978022, + "grad_norm": 11.032389640808105, + "learning_rate": 2.0901098901098904e-05, + "loss": 0.2745, + "step": 21184 + }, + { + "epoch": 58.20054945054945, + "grad_norm": 4.151147365570068, + "learning_rate": 2.0899725274725278e-05, + "loss": 0.0835, + "step": 21185 + }, + { + "epoch": 58.2032967032967, + "grad_norm": 11.565237045288086, + "learning_rate": 2.0898351648351648e-05, + "loss": 0.2532, + "step": 21186 + }, + { + "epoch": 58.206043956043956, + "grad_norm": 6.227088928222656, + "learning_rate": 2.089697802197802e-05, + "loss": 0.0973, + "step": 21187 + }, + { + "epoch": 58.20879120879121, + "grad_norm": 14.168846130371094, + "learning_rate": 2.0895604395604395e-05, + "loss": 0.5283, + "step": 21188 + }, + { + "epoch": 58.21153846153846, + "grad_norm": 7.684508323669434, + "learning_rate": 2.089423076923077e-05, + "loss": 0.079, + "step": 21189 + }, + { + "epoch": 58.214285714285715, + "grad_norm": 13.778059005737305, + "learning_rate": 2.0892857142857145e-05, + "loss": 0.3214, + "step": 21190 + }, + { + "epoch": 58.217032967032964, + "grad_norm": 22.31880760192871, + "learning_rate": 2.0891483516483518e-05, + "loss": 0.4549, + "step": 21191 + }, + { + "epoch": 58.21978021978022, + "grad_norm": 15.016557693481445, + "learning_rate": 2.0890109890109892e-05, + "loss": 0.3252, + "step": 21192 + }, + { + "epoch": 58.222527472527474, + "grad_norm": 10.014910697937012, + "learning_rate": 2.0888736263736265e-05, + "loss": 0.1667, + "step": 21193 + }, + { + "epoch": 58.22527472527472, + "grad_norm": 10.680563926696777, + "learning_rate": 2.088736263736264e-05, + "loss": 0.1223, + "step": 21194 + }, + { + "epoch": 58.22802197802198, + "grad_norm": 5.811598300933838, + "learning_rate": 2.0885989010989012e-05, + "loss": 0.0802, + "step": 21195 + }, + { + "epoch": 58.23076923076923, + "grad_norm": 11.17593002319336, + "learning_rate": 2.0884615384615385e-05, + "loss": 0.247, + "step": 21196 + }, + { + "epoch": 58.23351648351648, + "grad_norm": 17.823558807373047, + "learning_rate": 2.088324175824176e-05, + "loss": 0.3547, + "step": 21197 + }, + { + "epoch": 58.23626373626374, + "grad_norm": 16.27003288269043, + "learning_rate": 2.0881868131868132e-05, + "loss": 0.488, + "step": 21198 + }, + { + "epoch": 58.239010989010985, + "grad_norm": 17.960254669189453, + "learning_rate": 2.088049450549451e-05, + "loss": 0.3763, + "step": 21199 + }, + { + "epoch": 58.24175824175824, + "grad_norm": 8.769163131713867, + "learning_rate": 2.0879120879120882e-05, + "loss": 0.1136, + "step": 21200 + }, + { + "epoch": 58.244505494505496, + "grad_norm": 14.675155639648438, + "learning_rate": 2.0877747252747252e-05, + "loss": 0.5527, + "step": 21201 + }, + { + "epoch": 58.247252747252745, + "grad_norm": 3.7449450492858887, + "learning_rate": 2.0876373626373626e-05, + "loss": 0.0675, + "step": 21202 + }, + { + "epoch": 58.25, + "grad_norm": 11.93841552734375, + "learning_rate": 2.0875e-05, + "loss": 0.4191, + "step": 21203 + }, + { + "epoch": 58.252747252747255, + "grad_norm": 18.86860466003418, + "learning_rate": 2.0873626373626376e-05, + "loss": 0.4435, + "step": 21204 + }, + { + "epoch": 58.255494505494504, + "grad_norm": 13.538381576538086, + "learning_rate": 2.087225274725275e-05, + "loss": 0.5126, + "step": 21205 + }, + { + "epoch": 58.25824175824176, + "grad_norm": 26.201833724975586, + "learning_rate": 2.0870879120879123e-05, + "loss": 0.6598, + "step": 21206 + }, + { + "epoch": 58.260989010989015, + "grad_norm": 15.69904613494873, + "learning_rate": 2.0869505494505496e-05, + "loss": 0.2868, + "step": 21207 + }, + { + "epoch": 58.26373626373626, + "grad_norm": 11.604634284973145, + "learning_rate": 2.086813186813187e-05, + "loss": 0.3077, + "step": 21208 + }, + { + "epoch": 58.26648351648352, + "grad_norm": 11.302730560302734, + "learning_rate": 2.0866758241758243e-05, + "loss": 0.2827, + "step": 21209 + }, + { + "epoch": 58.26923076923077, + "grad_norm": 17.183094024658203, + "learning_rate": 2.0865384615384616e-05, + "loss": 0.5994, + "step": 21210 + }, + { + "epoch": 58.27197802197802, + "grad_norm": 5.551140785217285, + "learning_rate": 2.086401098901099e-05, + "loss": 0.0972, + "step": 21211 + }, + { + "epoch": 58.27472527472528, + "grad_norm": 22.092727661132812, + "learning_rate": 2.0862637362637363e-05, + "loss": 0.4934, + "step": 21212 + }, + { + "epoch": 58.277472527472526, + "grad_norm": 17.698936462402344, + "learning_rate": 2.0861263736263737e-05, + "loss": 0.3718, + "step": 21213 + }, + { + "epoch": 58.28021978021978, + "grad_norm": 17.39859962463379, + "learning_rate": 2.0859890109890113e-05, + "loss": 0.2706, + "step": 21214 + }, + { + "epoch": 58.282967032967036, + "grad_norm": 10.994733810424805, + "learning_rate": 2.0858516483516487e-05, + "loss": 0.2385, + "step": 21215 + }, + { + "epoch": 58.285714285714285, + "grad_norm": 11.28054428100586, + "learning_rate": 2.0857142857142857e-05, + "loss": 0.137, + "step": 21216 + }, + { + "epoch": 58.28846153846154, + "grad_norm": 8.46297836303711, + "learning_rate": 2.085576923076923e-05, + "loss": 0.104, + "step": 21217 + }, + { + "epoch": 58.29120879120879, + "grad_norm": 10.32194709777832, + "learning_rate": 2.0854395604395604e-05, + "loss": 0.2003, + "step": 21218 + }, + { + "epoch": 58.293956043956044, + "grad_norm": 5.570769786834717, + "learning_rate": 2.0853021978021977e-05, + "loss": 0.1147, + "step": 21219 + }, + { + "epoch": 58.2967032967033, + "grad_norm": 21.779399871826172, + "learning_rate": 2.0851648351648354e-05, + "loss": 0.456, + "step": 21220 + }, + { + "epoch": 58.29945054945055, + "grad_norm": 10.724810600280762, + "learning_rate": 2.0850274725274727e-05, + "loss": 0.1899, + "step": 21221 + }, + { + "epoch": 58.3021978021978, + "grad_norm": 9.986578941345215, + "learning_rate": 2.08489010989011e-05, + "loss": 0.2369, + "step": 21222 + }, + { + "epoch": 58.30494505494506, + "grad_norm": 4.318263530731201, + "learning_rate": 2.0847527472527474e-05, + "loss": 0.0926, + "step": 21223 + }, + { + "epoch": 58.30769230769231, + "grad_norm": 10.904033660888672, + "learning_rate": 2.0846153846153844e-05, + "loss": 0.2011, + "step": 21224 + }, + { + "epoch": 58.31043956043956, + "grad_norm": 5.97221565246582, + "learning_rate": 2.084478021978022e-05, + "loss": 0.1043, + "step": 21225 + }, + { + "epoch": 58.31318681318681, + "grad_norm": 9.37247085571289, + "learning_rate": 2.0843406593406594e-05, + "loss": 0.0737, + "step": 21226 + }, + { + "epoch": 58.315934065934066, + "grad_norm": 2.870229482650757, + "learning_rate": 2.0842032967032968e-05, + "loss": 0.0489, + "step": 21227 + }, + { + "epoch": 58.31868131868132, + "grad_norm": 13.587063789367676, + "learning_rate": 2.084065934065934e-05, + "loss": 0.2446, + "step": 21228 + }, + { + "epoch": 58.32142857142857, + "grad_norm": 7.442914962768555, + "learning_rate": 2.0839285714285715e-05, + "loss": 0.15, + "step": 21229 + }, + { + "epoch": 58.324175824175825, + "grad_norm": 7.862902641296387, + "learning_rate": 2.083791208791209e-05, + "loss": 0.2111, + "step": 21230 + }, + { + "epoch": 58.32692307692308, + "grad_norm": 4.201696395874023, + "learning_rate": 2.083653846153846e-05, + "loss": 0.0707, + "step": 21231 + }, + { + "epoch": 58.32967032967033, + "grad_norm": 14.09264087677002, + "learning_rate": 2.0835164835164835e-05, + "loss": 0.4884, + "step": 21232 + }, + { + "epoch": 58.332417582417584, + "grad_norm": 15.300454139709473, + "learning_rate": 2.0833791208791208e-05, + "loss": 0.2343, + "step": 21233 + }, + { + "epoch": 58.33516483516483, + "grad_norm": 6.170152187347412, + "learning_rate": 2.083241758241758e-05, + "loss": 0.0949, + "step": 21234 + }, + { + "epoch": 58.33791208791209, + "grad_norm": 18.122392654418945, + "learning_rate": 2.083104395604396e-05, + "loss": 0.499, + "step": 21235 + }, + { + "epoch": 58.34065934065934, + "grad_norm": 18.187097549438477, + "learning_rate": 2.0829670329670332e-05, + "loss": 0.5956, + "step": 21236 + }, + { + "epoch": 58.34340659340659, + "grad_norm": 15.363722801208496, + "learning_rate": 2.0828296703296705e-05, + "loss": 0.4817, + "step": 21237 + }, + { + "epoch": 58.34615384615385, + "grad_norm": 9.215445518493652, + "learning_rate": 2.082692307692308e-05, + "loss": 0.1652, + "step": 21238 + }, + { + "epoch": 58.3489010989011, + "grad_norm": 16.615983963012695, + "learning_rate": 2.082554945054945e-05, + "loss": 0.4974, + "step": 21239 + }, + { + "epoch": 58.35164835164835, + "grad_norm": 22.043575286865234, + "learning_rate": 2.0824175824175825e-05, + "loss": 0.5916, + "step": 21240 + }, + { + "epoch": 58.354395604395606, + "grad_norm": 10.365732192993164, + "learning_rate": 2.08228021978022e-05, + "loss": 0.2671, + "step": 21241 + }, + { + "epoch": 58.357142857142854, + "grad_norm": 20.5135498046875, + "learning_rate": 2.0821428571428572e-05, + "loss": 0.3714, + "step": 21242 + }, + { + "epoch": 58.35989010989011, + "grad_norm": 9.276081085205078, + "learning_rate": 2.0820054945054946e-05, + "loss": 0.0876, + "step": 21243 + }, + { + "epoch": 58.362637362637365, + "grad_norm": 3.1437790393829346, + "learning_rate": 2.081868131868132e-05, + "loss": 0.0703, + "step": 21244 + }, + { + "epoch": 58.36538461538461, + "grad_norm": 23.646284103393555, + "learning_rate": 2.0817307692307696e-05, + "loss": 0.5456, + "step": 21245 + }, + { + "epoch": 58.36813186813187, + "grad_norm": 14.684576034545898, + "learning_rate": 2.0815934065934066e-05, + "loss": 0.2366, + "step": 21246 + }, + { + "epoch": 58.370879120879124, + "grad_norm": 7.833915710449219, + "learning_rate": 2.081456043956044e-05, + "loss": 0.1409, + "step": 21247 + }, + { + "epoch": 58.37362637362637, + "grad_norm": 21.804588317871094, + "learning_rate": 2.0813186813186813e-05, + "loss": 0.3511, + "step": 21248 + }, + { + "epoch": 58.37637362637363, + "grad_norm": 19.736896514892578, + "learning_rate": 2.0811813186813186e-05, + "loss": 0.2649, + "step": 21249 + }, + { + "epoch": 58.379120879120876, + "grad_norm": 10.998598098754883, + "learning_rate": 2.0810439560439563e-05, + "loss": 0.1137, + "step": 21250 + }, + { + "epoch": 58.38186813186813, + "grad_norm": 36.285804748535156, + "learning_rate": 2.0809065934065936e-05, + "loss": 0.7047, + "step": 21251 + }, + { + "epoch": 58.38461538461539, + "grad_norm": 6.862924575805664, + "learning_rate": 2.080769230769231e-05, + "loss": 0.0989, + "step": 21252 + }, + { + "epoch": 58.387362637362635, + "grad_norm": 6.550187587738037, + "learning_rate": 2.0806318681318683e-05, + "loss": 0.1943, + "step": 21253 + }, + { + "epoch": 58.39010989010989, + "grad_norm": 9.084762573242188, + "learning_rate": 2.0804945054945053e-05, + "loss": 0.1381, + "step": 21254 + }, + { + "epoch": 58.392857142857146, + "grad_norm": 7.4311347007751465, + "learning_rate": 2.080357142857143e-05, + "loss": 0.0979, + "step": 21255 + }, + { + "epoch": 58.395604395604394, + "grad_norm": 14.097243309020996, + "learning_rate": 2.0802197802197803e-05, + "loss": 0.205, + "step": 21256 + }, + { + "epoch": 58.39835164835165, + "grad_norm": 11.792062759399414, + "learning_rate": 2.0800824175824177e-05, + "loss": 0.3051, + "step": 21257 + }, + { + "epoch": 58.4010989010989, + "grad_norm": 5.786685943603516, + "learning_rate": 2.079945054945055e-05, + "loss": 0.0811, + "step": 21258 + }, + { + "epoch": 58.40384615384615, + "grad_norm": 27.403575897216797, + "learning_rate": 2.0798076923076924e-05, + "loss": 0.7633, + "step": 21259 + }, + { + "epoch": 58.40659340659341, + "grad_norm": 19.075931549072266, + "learning_rate": 2.07967032967033e-05, + "loss": 0.2189, + "step": 21260 + }, + { + "epoch": 58.40934065934066, + "grad_norm": 16.050418853759766, + "learning_rate": 2.079532967032967e-05, + "loss": 0.3382, + "step": 21261 + }, + { + "epoch": 58.41208791208791, + "grad_norm": 14.103612899780273, + "learning_rate": 2.0793956043956044e-05, + "loss": 0.2433, + "step": 21262 + }, + { + "epoch": 58.41483516483517, + "grad_norm": 11.71503734588623, + "learning_rate": 2.0792582417582417e-05, + "loss": 0.3357, + "step": 21263 + }, + { + "epoch": 58.417582417582416, + "grad_norm": 15.374391555786133, + "learning_rate": 2.079120879120879e-05, + "loss": 0.2057, + "step": 21264 + }, + { + "epoch": 58.42032967032967, + "grad_norm": 9.299361228942871, + "learning_rate": 2.0789835164835167e-05, + "loss": 0.0844, + "step": 21265 + }, + { + "epoch": 58.42307692307692, + "grad_norm": 9.234845161437988, + "learning_rate": 2.078846153846154e-05, + "loss": 0.1969, + "step": 21266 + }, + { + "epoch": 58.425824175824175, + "grad_norm": 9.366612434387207, + "learning_rate": 2.0787087912087914e-05, + "loss": 0.135, + "step": 21267 + }, + { + "epoch": 58.42857142857143, + "grad_norm": 2.3676345348358154, + "learning_rate": 2.0785714285714288e-05, + "loss": 0.0386, + "step": 21268 + }, + { + "epoch": 58.43131868131868, + "grad_norm": 27.32744789123535, + "learning_rate": 2.0784340659340658e-05, + "loss": 0.6211, + "step": 21269 + }, + { + "epoch": 58.434065934065934, + "grad_norm": 16.23390007019043, + "learning_rate": 2.0782967032967034e-05, + "loss": 0.437, + "step": 21270 + }, + { + "epoch": 58.43681318681319, + "grad_norm": 17.08909797668457, + "learning_rate": 2.0781593406593408e-05, + "loss": 0.4763, + "step": 21271 + }, + { + "epoch": 58.43956043956044, + "grad_norm": 6.484286308288574, + "learning_rate": 2.078021978021978e-05, + "loss": 0.1374, + "step": 21272 + }, + { + "epoch": 58.44230769230769, + "grad_norm": 1.489870548248291, + "learning_rate": 2.0778846153846155e-05, + "loss": 0.0159, + "step": 21273 + }, + { + "epoch": 58.44505494505494, + "grad_norm": 6.330508232116699, + "learning_rate": 2.0777472527472528e-05, + "loss": 0.1075, + "step": 21274 + }, + { + "epoch": 58.4478021978022, + "grad_norm": 24.46555519104004, + "learning_rate": 2.0776098901098905e-05, + "loss": 0.569, + "step": 21275 + }, + { + "epoch": 58.45054945054945, + "grad_norm": 28.253894805908203, + "learning_rate": 2.0774725274725275e-05, + "loss": 1.0359, + "step": 21276 + }, + { + "epoch": 58.4532967032967, + "grad_norm": 6.2637739181518555, + "learning_rate": 2.0773351648351648e-05, + "loss": 0.1373, + "step": 21277 + }, + { + "epoch": 58.456043956043956, + "grad_norm": 14.52346420288086, + "learning_rate": 2.077197802197802e-05, + "loss": 0.2866, + "step": 21278 + }, + { + "epoch": 58.45879120879121, + "grad_norm": 7.030330657958984, + "learning_rate": 2.0770604395604395e-05, + "loss": 0.1012, + "step": 21279 + }, + { + "epoch": 58.46153846153846, + "grad_norm": 24.593019485473633, + "learning_rate": 2.0769230769230772e-05, + "loss": 0.5957, + "step": 21280 + }, + { + "epoch": 58.464285714285715, + "grad_norm": 11.239850044250488, + "learning_rate": 2.0767857142857145e-05, + "loss": 0.2566, + "step": 21281 + }, + { + "epoch": 58.467032967032964, + "grad_norm": 4.267231464385986, + "learning_rate": 2.076648351648352e-05, + "loss": 0.0534, + "step": 21282 + }, + { + "epoch": 58.46978021978022, + "grad_norm": 12.840145111083984, + "learning_rate": 2.0765109890109892e-05, + "loss": 0.1816, + "step": 21283 + }, + { + "epoch": 58.472527472527474, + "grad_norm": 17.306983947753906, + "learning_rate": 2.0763736263736262e-05, + "loss": 0.5857, + "step": 21284 + }, + { + "epoch": 58.47527472527472, + "grad_norm": 10.698244094848633, + "learning_rate": 2.076236263736264e-05, + "loss": 0.4258, + "step": 21285 + }, + { + "epoch": 58.47802197802198, + "grad_norm": 9.752359390258789, + "learning_rate": 2.0760989010989012e-05, + "loss": 0.1334, + "step": 21286 + }, + { + "epoch": 58.48076923076923, + "grad_norm": 9.796360969543457, + "learning_rate": 2.0759615384615386e-05, + "loss": 0.1732, + "step": 21287 + }, + { + "epoch": 58.48351648351648, + "grad_norm": 9.930635452270508, + "learning_rate": 2.075824175824176e-05, + "loss": 0.283, + "step": 21288 + }, + { + "epoch": 58.48626373626374, + "grad_norm": 7.325092792510986, + "learning_rate": 2.0756868131868133e-05, + "loss": 0.1267, + "step": 21289 + }, + { + "epoch": 58.489010989010985, + "grad_norm": 10.619020462036133, + "learning_rate": 2.075549450549451e-05, + "loss": 0.2098, + "step": 21290 + }, + { + "epoch": 58.49175824175824, + "grad_norm": 15.741238594055176, + "learning_rate": 2.075412087912088e-05, + "loss": 0.3643, + "step": 21291 + }, + { + "epoch": 58.494505494505496, + "grad_norm": 5.381780624389648, + "learning_rate": 2.0752747252747253e-05, + "loss": 0.071, + "step": 21292 + }, + { + "epoch": 58.497252747252745, + "grad_norm": 8.04525375366211, + "learning_rate": 2.0751373626373626e-05, + "loss": 0.2214, + "step": 21293 + }, + { + "epoch": 58.5, + "grad_norm": 19.36416244506836, + "learning_rate": 2.075e-05, + "loss": 0.7126, + "step": 21294 + }, + { + "epoch": 58.502747252747255, + "grad_norm": 24.430503845214844, + "learning_rate": 2.0748626373626376e-05, + "loss": 0.7652, + "step": 21295 + }, + { + "epoch": 58.505494505494504, + "grad_norm": 13.936336517333984, + "learning_rate": 2.074725274725275e-05, + "loss": 0.3674, + "step": 21296 + }, + { + "epoch": 58.50824175824176, + "grad_norm": 16.8851261138916, + "learning_rate": 2.0745879120879123e-05, + "loss": 0.2745, + "step": 21297 + }, + { + "epoch": 58.51098901098901, + "grad_norm": 12.070123672485352, + "learning_rate": 2.0744505494505497e-05, + "loss": 0.221, + "step": 21298 + }, + { + "epoch": 58.51373626373626, + "grad_norm": 13.753732681274414, + "learning_rate": 2.0743131868131867e-05, + "loss": 0.1495, + "step": 21299 + }, + { + "epoch": 58.51648351648352, + "grad_norm": 10.505646705627441, + "learning_rate": 2.0741758241758243e-05, + "loss": 0.1875, + "step": 21300 + }, + { + "epoch": 58.51923076923077, + "grad_norm": 8.460084915161133, + "learning_rate": 2.0740384615384617e-05, + "loss": 0.1568, + "step": 21301 + }, + { + "epoch": 58.52197802197802, + "grad_norm": 15.624340057373047, + "learning_rate": 2.073901098901099e-05, + "loss": 0.2706, + "step": 21302 + }, + { + "epoch": 58.52472527472528, + "grad_norm": 13.90075969696045, + "learning_rate": 2.0737637362637364e-05, + "loss": 0.2209, + "step": 21303 + }, + { + "epoch": 58.527472527472526, + "grad_norm": 4.135885238647461, + "learning_rate": 2.0736263736263737e-05, + "loss": 0.0801, + "step": 21304 + }, + { + "epoch": 58.53021978021978, + "grad_norm": 16.558897018432617, + "learning_rate": 2.0734890109890114e-05, + "loss": 0.3082, + "step": 21305 + }, + { + "epoch": 58.532967032967036, + "grad_norm": 7.818389892578125, + "learning_rate": 2.0733516483516484e-05, + "loss": 0.1855, + "step": 21306 + }, + { + "epoch": 58.535714285714285, + "grad_norm": 6.262366771697998, + "learning_rate": 2.0732142857142857e-05, + "loss": 0.1492, + "step": 21307 + }, + { + "epoch": 58.53846153846154, + "grad_norm": 18.947505950927734, + "learning_rate": 2.073076923076923e-05, + "loss": 0.3326, + "step": 21308 + }, + { + "epoch": 58.54120879120879, + "grad_norm": 17.174678802490234, + "learning_rate": 2.0729395604395604e-05, + "loss": 0.3694, + "step": 21309 + }, + { + "epoch": 58.543956043956044, + "grad_norm": 7.064168930053711, + "learning_rate": 2.072802197802198e-05, + "loss": 0.0703, + "step": 21310 + }, + { + "epoch": 58.5467032967033, + "grad_norm": 12.392010688781738, + "learning_rate": 2.0726648351648354e-05, + "loss": 0.1465, + "step": 21311 + }, + { + "epoch": 58.54945054945055, + "grad_norm": 26.253787994384766, + "learning_rate": 2.0725274725274728e-05, + "loss": 0.9201, + "step": 21312 + }, + { + "epoch": 58.5521978021978, + "grad_norm": 9.020451545715332, + "learning_rate": 2.07239010989011e-05, + "loss": 0.1567, + "step": 21313 + }, + { + "epoch": 58.55494505494506, + "grad_norm": 20.77330780029297, + "learning_rate": 2.072252747252747e-05, + "loss": 0.3835, + "step": 21314 + }, + { + "epoch": 58.55769230769231, + "grad_norm": 7.6492390632629395, + "learning_rate": 2.0721153846153848e-05, + "loss": 0.179, + "step": 21315 + }, + { + "epoch": 58.56043956043956, + "grad_norm": 14.922467231750488, + "learning_rate": 2.071978021978022e-05, + "loss": 0.4049, + "step": 21316 + }, + { + "epoch": 58.56318681318681, + "grad_norm": 16.47518539428711, + "learning_rate": 2.0718406593406595e-05, + "loss": 0.1441, + "step": 21317 + }, + { + "epoch": 58.565934065934066, + "grad_norm": 6.488823890686035, + "learning_rate": 2.0717032967032968e-05, + "loss": 0.0732, + "step": 21318 + }, + { + "epoch": 58.56868131868132, + "grad_norm": 4.884166240692139, + "learning_rate": 2.071565934065934e-05, + "loss": 0.0576, + "step": 21319 + }, + { + "epoch": 58.57142857142857, + "grad_norm": 4.752414703369141, + "learning_rate": 2.0714285714285718e-05, + "loss": 0.0707, + "step": 21320 + }, + { + "epoch": 58.574175824175825, + "grad_norm": 12.216300010681152, + "learning_rate": 2.0712912087912088e-05, + "loss": 0.1918, + "step": 21321 + }, + { + "epoch": 58.57692307692308, + "grad_norm": 3.7110159397125244, + "learning_rate": 2.0711538461538462e-05, + "loss": 0.0415, + "step": 21322 + }, + { + "epoch": 58.57967032967033, + "grad_norm": 20.68392562866211, + "learning_rate": 2.0710164835164835e-05, + "loss": 0.3944, + "step": 21323 + }, + { + "epoch": 58.582417582417584, + "grad_norm": 24.327205657958984, + "learning_rate": 2.070879120879121e-05, + "loss": 0.7135, + "step": 21324 + }, + { + "epoch": 58.58516483516483, + "grad_norm": 14.300121307373047, + "learning_rate": 2.0707417582417585e-05, + "loss": 0.55, + "step": 21325 + }, + { + "epoch": 58.58791208791209, + "grad_norm": 20.50954818725586, + "learning_rate": 2.070604395604396e-05, + "loss": 0.7213, + "step": 21326 + }, + { + "epoch": 58.59065934065934, + "grad_norm": 11.67029857635498, + "learning_rate": 2.0704670329670332e-05, + "loss": 0.2125, + "step": 21327 + }, + { + "epoch": 58.59340659340659, + "grad_norm": 8.693845748901367, + "learning_rate": 2.0703296703296706e-05, + "loss": 0.2089, + "step": 21328 + }, + { + "epoch": 58.59615384615385, + "grad_norm": 18.202348709106445, + "learning_rate": 2.0701923076923076e-05, + "loss": 0.2951, + "step": 21329 + }, + { + "epoch": 58.5989010989011, + "grad_norm": 2.515263795852661, + "learning_rate": 2.070054945054945e-05, + "loss": 0.0502, + "step": 21330 + }, + { + "epoch": 58.60164835164835, + "grad_norm": 13.002845764160156, + "learning_rate": 2.0699175824175826e-05, + "loss": 0.3067, + "step": 21331 + }, + { + "epoch": 58.604395604395606, + "grad_norm": 14.625061988830566, + "learning_rate": 2.06978021978022e-05, + "loss": 0.3306, + "step": 21332 + }, + { + "epoch": 58.607142857142854, + "grad_norm": 9.903794288635254, + "learning_rate": 2.0696428571428573e-05, + "loss": 0.1442, + "step": 21333 + }, + { + "epoch": 58.60989010989011, + "grad_norm": 4.990581035614014, + "learning_rate": 2.0695054945054946e-05, + "loss": 0.155, + "step": 21334 + }, + { + "epoch": 58.612637362637365, + "grad_norm": 16.288721084594727, + "learning_rate": 2.069368131868132e-05, + "loss": 0.3656, + "step": 21335 + }, + { + "epoch": 58.61538461538461, + "grad_norm": 8.357826232910156, + "learning_rate": 2.0692307692307693e-05, + "loss": 0.1634, + "step": 21336 + }, + { + "epoch": 58.61813186813187, + "grad_norm": 11.639738082885742, + "learning_rate": 2.0690934065934066e-05, + "loss": 0.3737, + "step": 21337 + }, + { + "epoch": 58.620879120879124, + "grad_norm": 5.823518753051758, + "learning_rate": 2.068956043956044e-05, + "loss": 0.1134, + "step": 21338 + }, + { + "epoch": 58.62362637362637, + "grad_norm": 2.9416561126708984, + "learning_rate": 2.0688186813186813e-05, + "loss": 0.0412, + "step": 21339 + }, + { + "epoch": 58.62637362637363, + "grad_norm": 10.160039901733398, + "learning_rate": 2.0686813186813186e-05, + "loss": 0.3008, + "step": 21340 + }, + { + "epoch": 58.629120879120876, + "grad_norm": 8.487733840942383, + "learning_rate": 2.0685439560439563e-05, + "loss": 0.1258, + "step": 21341 + }, + { + "epoch": 58.63186813186813, + "grad_norm": 6.54334831237793, + "learning_rate": 2.0684065934065937e-05, + "loss": 0.0756, + "step": 21342 + }, + { + "epoch": 58.63461538461539, + "grad_norm": 11.091134071350098, + "learning_rate": 2.068269230769231e-05, + "loss": 0.1739, + "step": 21343 + }, + { + "epoch": 58.637362637362635, + "grad_norm": 4.271245002746582, + "learning_rate": 2.068131868131868e-05, + "loss": 0.0606, + "step": 21344 + }, + { + "epoch": 58.64010989010989, + "grad_norm": 9.619219779968262, + "learning_rate": 2.0679945054945053e-05, + "loss": 0.2522, + "step": 21345 + }, + { + "epoch": 58.642857142857146, + "grad_norm": 17.210817337036133, + "learning_rate": 2.067857142857143e-05, + "loss": 0.4334, + "step": 21346 + }, + { + "epoch": 58.645604395604394, + "grad_norm": 17.186845779418945, + "learning_rate": 2.0677197802197804e-05, + "loss": 0.4338, + "step": 21347 + }, + { + "epoch": 58.64835164835165, + "grad_norm": 9.203731536865234, + "learning_rate": 2.0675824175824177e-05, + "loss": 0.2486, + "step": 21348 + }, + { + "epoch": 58.6510989010989, + "grad_norm": 10.339755058288574, + "learning_rate": 2.067445054945055e-05, + "loss": 0.3192, + "step": 21349 + }, + { + "epoch": 58.65384615384615, + "grad_norm": 22.51429557800293, + "learning_rate": 2.0673076923076924e-05, + "loss": 0.6952, + "step": 21350 + }, + { + "epoch": 58.65659340659341, + "grad_norm": 14.027823448181152, + "learning_rate": 2.0671703296703297e-05, + "loss": 0.3546, + "step": 21351 + }, + { + "epoch": 58.65934065934066, + "grad_norm": 12.113295555114746, + "learning_rate": 2.067032967032967e-05, + "loss": 0.2264, + "step": 21352 + }, + { + "epoch": 58.66208791208791, + "grad_norm": 23.715269088745117, + "learning_rate": 2.0668956043956044e-05, + "loss": 0.6272, + "step": 21353 + }, + { + "epoch": 58.66483516483517, + "grad_norm": 16.45387840270996, + "learning_rate": 2.0667582417582417e-05, + "loss": 0.3451, + "step": 21354 + }, + { + "epoch": 58.667582417582416, + "grad_norm": 3.989764928817749, + "learning_rate": 2.066620879120879e-05, + "loss": 0.067, + "step": 21355 + }, + { + "epoch": 58.67032967032967, + "grad_norm": 6.405345916748047, + "learning_rate": 2.0664835164835168e-05, + "loss": 0.092, + "step": 21356 + }, + { + "epoch": 58.67307692307692, + "grad_norm": 15.125359535217285, + "learning_rate": 2.066346153846154e-05, + "loss": 0.33, + "step": 21357 + }, + { + "epoch": 58.675824175824175, + "grad_norm": 25.852479934692383, + "learning_rate": 2.0662087912087915e-05, + "loss": 0.5553, + "step": 21358 + }, + { + "epoch": 58.67857142857143, + "grad_norm": 11.975090026855469, + "learning_rate": 2.0660714285714285e-05, + "loss": 0.2663, + "step": 21359 + }, + { + "epoch": 58.68131868131868, + "grad_norm": 12.780070304870605, + "learning_rate": 2.0659340659340658e-05, + "loss": 0.2411, + "step": 21360 + }, + { + "epoch": 58.684065934065934, + "grad_norm": 16.4339599609375, + "learning_rate": 2.0657967032967035e-05, + "loss": 0.3724, + "step": 21361 + }, + { + "epoch": 58.68681318681319, + "grad_norm": 11.751644134521484, + "learning_rate": 2.0656593406593408e-05, + "loss": 0.1867, + "step": 21362 + }, + { + "epoch": 58.68956043956044, + "grad_norm": 11.512070655822754, + "learning_rate": 2.065521978021978e-05, + "loss": 0.3086, + "step": 21363 + }, + { + "epoch": 58.69230769230769, + "grad_norm": 13.843844413757324, + "learning_rate": 2.0653846153846155e-05, + "loss": 0.2657, + "step": 21364 + }, + { + "epoch": 58.69505494505494, + "grad_norm": 13.973695755004883, + "learning_rate": 2.065247252747253e-05, + "loss": 0.2176, + "step": 21365 + }, + { + "epoch": 58.6978021978022, + "grad_norm": 12.677656173706055, + "learning_rate": 2.0651098901098902e-05, + "loss": 0.3372, + "step": 21366 + }, + { + "epoch": 58.70054945054945, + "grad_norm": 11.150157928466797, + "learning_rate": 2.0649725274725275e-05, + "loss": 0.2304, + "step": 21367 + }, + { + "epoch": 58.7032967032967, + "grad_norm": 11.965600967407227, + "learning_rate": 2.064835164835165e-05, + "loss": 0.2126, + "step": 21368 + }, + { + "epoch": 58.706043956043956, + "grad_norm": 14.724220275878906, + "learning_rate": 2.0646978021978022e-05, + "loss": 0.2764, + "step": 21369 + }, + { + "epoch": 58.70879120879121, + "grad_norm": 3.330876350402832, + "learning_rate": 2.0645604395604395e-05, + "loss": 0.0551, + "step": 21370 + }, + { + "epoch": 58.71153846153846, + "grad_norm": 22.286151885986328, + "learning_rate": 2.0644230769230772e-05, + "loss": 0.4404, + "step": 21371 + }, + { + "epoch": 58.714285714285715, + "grad_norm": 15.205683708190918, + "learning_rate": 2.0642857142857146e-05, + "loss": 0.3622, + "step": 21372 + }, + { + "epoch": 58.717032967032964, + "grad_norm": 16.389585494995117, + "learning_rate": 2.064148351648352e-05, + "loss": 0.3915, + "step": 21373 + }, + { + "epoch": 58.71978021978022, + "grad_norm": 18.85603904724121, + "learning_rate": 2.064010989010989e-05, + "loss": 0.2807, + "step": 21374 + }, + { + "epoch": 58.722527472527474, + "grad_norm": 17.0189208984375, + "learning_rate": 2.0638736263736262e-05, + "loss": 0.5212, + "step": 21375 + }, + { + "epoch": 58.72527472527472, + "grad_norm": 3.425583600997925, + "learning_rate": 2.063736263736264e-05, + "loss": 0.0584, + "step": 21376 + }, + { + "epoch": 58.72802197802198, + "grad_norm": 11.772557258605957, + "learning_rate": 2.0635989010989013e-05, + "loss": 0.162, + "step": 21377 + }, + { + "epoch": 58.73076923076923, + "grad_norm": 5.9260382652282715, + "learning_rate": 2.0634615384615386e-05, + "loss": 0.0804, + "step": 21378 + }, + { + "epoch": 58.73351648351648, + "grad_norm": 11.490827560424805, + "learning_rate": 2.063324175824176e-05, + "loss": 0.2589, + "step": 21379 + }, + { + "epoch": 58.73626373626374, + "grad_norm": 17.574420928955078, + "learning_rate": 2.0631868131868133e-05, + "loss": 0.2812, + "step": 21380 + }, + { + "epoch": 58.73901098901099, + "grad_norm": 21.170263290405273, + "learning_rate": 2.0630494505494506e-05, + "loss": 0.5655, + "step": 21381 + }, + { + "epoch": 58.74175824175824, + "grad_norm": 11.406708717346191, + "learning_rate": 2.062912087912088e-05, + "loss": 0.2897, + "step": 21382 + }, + { + "epoch": 58.744505494505496, + "grad_norm": 8.975950241088867, + "learning_rate": 2.0627747252747253e-05, + "loss": 0.3136, + "step": 21383 + }, + { + "epoch": 58.747252747252745, + "grad_norm": 9.30772590637207, + "learning_rate": 2.0626373626373626e-05, + "loss": 0.0732, + "step": 21384 + }, + { + "epoch": 58.75, + "grad_norm": 3.8411946296691895, + "learning_rate": 2.0625e-05, + "loss": 0.0528, + "step": 21385 + }, + { + "epoch": 58.752747252747255, + "grad_norm": 12.225976943969727, + "learning_rate": 2.0623626373626377e-05, + "loss": 0.2158, + "step": 21386 + }, + { + "epoch": 58.755494505494504, + "grad_norm": 15.510562896728516, + "learning_rate": 2.062225274725275e-05, + "loss": 0.325, + "step": 21387 + }, + { + "epoch": 58.75824175824176, + "grad_norm": 8.09565258026123, + "learning_rate": 2.0620879120879123e-05, + "loss": 0.1392, + "step": 21388 + }, + { + "epoch": 58.76098901098901, + "grad_norm": 3.2773287296295166, + "learning_rate": 2.0619505494505494e-05, + "loss": 0.0405, + "step": 21389 + }, + { + "epoch": 58.76373626373626, + "grad_norm": 5.822323799133301, + "learning_rate": 2.0618131868131867e-05, + "loss": 0.1285, + "step": 21390 + }, + { + "epoch": 58.76648351648352, + "grad_norm": 2.6389882564544678, + "learning_rate": 2.0616758241758244e-05, + "loss": 0.0213, + "step": 21391 + }, + { + "epoch": 58.76923076923077, + "grad_norm": 9.653905868530273, + "learning_rate": 2.0615384615384617e-05, + "loss": 0.2522, + "step": 21392 + }, + { + "epoch": 58.77197802197802, + "grad_norm": 20.277772903442383, + "learning_rate": 2.061401098901099e-05, + "loss": 0.5045, + "step": 21393 + }, + { + "epoch": 58.77472527472528, + "grad_norm": 8.745963096618652, + "learning_rate": 2.0612637362637364e-05, + "loss": 0.1354, + "step": 21394 + }, + { + "epoch": 58.777472527472526, + "grad_norm": 7.445740699768066, + "learning_rate": 2.0611263736263737e-05, + "loss": 0.0771, + "step": 21395 + }, + { + "epoch": 58.78021978021978, + "grad_norm": 3.547624349594116, + "learning_rate": 2.060989010989011e-05, + "loss": 0.0574, + "step": 21396 + }, + { + "epoch": 58.782967032967036, + "grad_norm": 11.963608741760254, + "learning_rate": 2.0608516483516484e-05, + "loss": 0.2796, + "step": 21397 + }, + { + "epoch": 58.785714285714285, + "grad_norm": 9.36641788482666, + "learning_rate": 2.0607142857142858e-05, + "loss": 0.1809, + "step": 21398 + }, + { + "epoch": 58.78846153846154, + "grad_norm": 15.705987930297852, + "learning_rate": 2.060576923076923e-05, + "loss": 0.2981, + "step": 21399 + }, + { + "epoch": 58.79120879120879, + "grad_norm": 13.212320327758789, + "learning_rate": 2.0604395604395604e-05, + "loss": 0.3634, + "step": 21400 + }, + { + "epoch": 58.793956043956044, + "grad_norm": 10.515153884887695, + "learning_rate": 2.060302197802198e-05, + "loss": 0.1819, + "step": 21401 + }, + { + "epoch": 58.7967032967033, + "grad_norm": 6.716102600097656, + "learning_rate": 2.0601648351648355e-05, + "loss": 0.0767, + "step": 21402 + }, + { + "epoch": 58.79945054945055, + "grad_norm": 9.472278594970703, + "learning_rate": 2.0600274725274728e-05, + "loss": 0.1177, + "step": 21403 + }, + { + "epoch": 58.8021978021978, + "grad_norm": 12.139416694641113, + "learning_rate": 2.0598901098901098e-05, + "loss": 0.178, + "step": 21404 + }, + { + "epoch": 58.80494505494506, + "grad_norm": 12.50543212890625, + "learning_rate": 2.059752747252747e-05, + "loss": 0.3468, + "step": 21405 + }, + { + "epoch": 58.80769230769231, + "grad_norm": 2.9645445346832275, + "learning_rate": 2.0596153846153848e-05, + "loss": 0.0824, + "step": 21406 + }, + { + "epoch": 58.81043956043956, + "grad_norm": 4.308942794799805, + "learning_rate": 2.059478021978022e-05, + "loss": 0.0397, + "step": 21407 + }, + { + "epoch": 58.81318681318681, + "grad_norm": 6.24534273147583, + "learning_rate": 2.0593406593406595e-05, + "loss": 0.0928, + "step": 21408 + }, + { + "epoch": 58.815934065934066, + "grad_norm": 10.124835014343262, + "learning_rate": 2.059203296703297e-05, + "loss": 0.137, + "step": 21409 + }, + { + "epoch": 58.81868131868132, + "grad_norm": 17.724699020385742, + "learning_rate": 2.0590659340659342e-05, + "loss": 0.2972, + "step": 21410 + }, + { + "epoch": 58.82142857142857, + "grad_norm": 19.159128189086914, + "learning_rate": 2.0589285714285715e-05, + "loss": 0.3444, + "step": 21411 + }, + { + "epoch": 58.824175824175825, + "grad_norm": 17.897594451904297, + "learning_rate": 2.058791208791209e-05, + "loss": 0.4259, + "step": 21412 + }, + { + "epoch": 58.82692307692308, + "grad_norm": 6.617218971252441, + "learning_rate": 2.0586538461538462e-05, + "loss": 0.0758, + "step": 21413 + }, + { + "epoch": 58.82967032967033, + "grad_norm": 9.279190063476562, + "learning_rate": 2.0585164835164835e-05, + "loss": 0.1964, + "step": 21414 + }, + { + "epoch": 58.832417582417584, + "grad_norm": 19.021446228027344, + "learning_rate": 2.058379120879121e-05, + "loss": 0.3373, + "step": 21415 + }, + { + "epoch": 58.83516483516483, + "grad_norm": 4.117094993591309, + "learning_rate": 2.0582417582417586e-05, + "loss": 0.0908, + "step": 21416 + }, + { + "epoch": 58.83791208791209, + "grad_norm": 19.854089736938477, + "learning_rate": 2.058104395604396e-05, + "loss": 0.7353, + "step": 21417 + }, + { + "epoch": 58.84065934065934, + "grad_norm": 3.8929312229156494, + "learning_rate": 2.0579670329670332e-05, + "loss": 0.0572, + "step": 21418 + }, + { + "epoch": 58.84340659340659, + "grad_norm": 12.108183860778809, + "learning_rate": 2.0578296703296702e-05, + "loss": 0.2351, + "step": 21419 + }, + { + "epoch": 58.84615384615385, + "grad_norm": 12.212427139282227, + "learning_rate": 2.0576923076923076e-05, + "loss": 0.2752, + "step": 21420 + }, + { + "epoch": 58.8489010989011, + "grad_norm": 20.871803283691406, + "learning_rate": 2.0575549450549453e-05, + "loss": 0.4529, + "step": 21421 + }, + { + "epoch": 58.85164835164835, + "grad_norm": 20.14139175415039, + "learning_rate": 2.0574175824175826e-05, + "loss": 0.4933, + "step": 21422 + }, + { + "epoch": 58.854395604395606, + "grad_norm": 4.434492588043213, + "learning_rate": 2.05728021978022e-05, + "loss": 0.086, + "step": 21423 + }, + { + "epoch": 58.857142857142854, + "grad_norm": 5.521429061889648, + "learning_rate": 2.0571428571428573e-05, + "loss": 0.0697, + "step": 21424 + }, + { + "epoch": 58.85989010989011, + "grad_norm": 6.109313488006592, + "learning_rate": 2.0570054945054946e-05, + "loss": 0.1253, + "step": 21425 + }, + { + "epoch": 58.862637362637365, + "grad_norm": 19.33819580078125, + "learning_rate": 2.056868131868132e-05, + "loss": 0.2508, + "step": 21426 + }, + { + "epoch": 58.86538461538461, + "grad_norm": 7.940450668334961, + "learning_rate": 2.0567307692307693e-05, + "loss": 0.272, + "step": 21427 + }, + { + "epoch": 58.86813186813187, + "grad_norm": 4.339803695678711, + "learning_rate": 2.0565934065934067e-05, + "loss": 0.0507, + "step": 21428 + }, + { + "epoch": 58.870879120879124, + "grad_norm": 9.287092208862305, + "learning_rate": 2.056456043956044e-05, + "loss": 0.109, + "step": 21429 + }, + { + "epoch": 58.87362637362637, + "grad_norm": 8.882369041442871, + "learning_rate": 2.0563186813186813e-05, + "loss": 0.1788, + "step": 21430 + }, + { + "epoch": 58.87637362637363, + "grad_norm": 13.219277381896973, + "learning_rate": 2.056181318681319e-05, + "loss": 0.4261, + "step": 21431 + }, + { + "epoch": 58.879120879120876, + "grad_norm": 4.071151256561279, + "learning_rate": 2.0560439560439564e-05, + "loss": 0.1175, + "step": 21432 + }, + { + "epoch": 58.88186813186813, + "grad_norm": 16.771167755126953, + "learning_rate": 2.0559065934065937e-05, + "loss": 0.2314, + "step": 21433 + }, + { + "epoch": 58.88461538461539, + "grad_norm": 14.976417541503906, + "learning_rate": 2.0557692307692307e-05, + "loss": 0.4751, + "step": 21434 + }, + { + "epoch": 58.887362637362635, + "grad_norm": 6.406018257141113, + "learning_rate": 2.055631868131868e-05, + "loss": 0.0919, + "step": 21435 + }, + { + "epoch": 58.89010989010989, + "grad_norm": 3.3759775161743164, + "learning_rate": 2.0554945054945057e-05, + "loss": 0.0515, + "step": 21436 + }, + { + "epoch": 58.892857142857146, + "grad_norm": 12.174962043762207, + "learning_rate": 2.055357142857143e-05, + "loss": 0.2329, + "step": 21437 + }, + { + "epoch": 58.895604395604394, + "grad_norm": 21.1652889251709, + "learning_rate": 2.0552197802197804e-05, + "loss": 0.3731, + "step": 21438 + }, + { + "epoch": 58.89835164835165, + "grad_norm": 14.108150482177734, + "learning_rate": 2.0550824175824177e-05, + "loss": 0.2076, + "step": 21439 + }, + { + "epoch": 58.9010989010989, + "grad_norm": 5.165742874145508, + "learning_rate": 2.054945054945055e-05, + "loss": 0.0838, + "step": 21440 + }, + { + "epoch": 58.90384615384615, + "grad_norm": 3.968127965927124, + "learning_rate": 2.0548076923076924e-05, + "loss": 0.057, + "step": 21441 + }, + { + "epoch": 58.90659340659341, + "grad_norm": 14.104372024536133, + "learning_rate": 2.0546703296703298e-05, + "loss": 0.306, + "step": 21442 + }, + { + "epoch": 58.90934065934066, + "grad_norm": 6.7492995262146, + "learning_rate": 2.054532967032967e-05, + "loss": 0.209, + "step": 21443 + }, + { + "epoch": 58.91208791208791, + "grad_norm": 4.463000774383545, + "learning_rate": 2.0543956043956044e-05, + "loss": 0.0536, + "step": 21444 + }, + { + "epoch": 58.91483516483517, + "grad_norm": 22.73777961730957, + "learning_rate": 2.0542582417582418e-05, + "loss": 0.6483, + "step": 21445 + }, + { + "epoch": 58.917582417582416, + "grad_norm": 18.06932258605957, + "learning_rate": 2.054120879120879e-05, + "loss": 0.515, + "step": 21446 + }, + { + "epoch": 58.92032967032967, + "grad_norm": 12.271615982055664, + "learning_rate": 2.0539835164835168e-05, + "loss": 0.2398, + "step": 21447 + }, + { + "epoch": 58.92307692307692, + "grad_norm": 8.15874195098877, + "learning_rate": 2.0538461538461538e-05, + "loss": 0.1579, + "step": 21448 + }, + { + "epoch": 58.925824175824175, + "grad_norm": 10.041383743286133, + "learning_rate": 2.053708791208791e-05, + "loss": 0.139, + "step": 21449 + }, + { + "epoch": 58.92857142857143, + "grad_norm": 9.53443717956543, + "learning_rate": 2.0535714285714285e-05, + "loss": 0.1724, + "step": 21450 + }, + { + "epoch": 58.93131868131868, + "grad_norm": 4.457927703857422, + "learning_rate": 2.0534340659340658e-05, + "loss": 0.0875, + "step": 21451 + }, + { + "epoch": 58.934065934065934, + "grad_norm": 14.669306755065918, + "learning_rate": 2.0532967032967035e-05, + "loss": 0.2685, + "step": 21452 + }, + { + "epoch": 58.93681318681319, + "grad_norm": 10.367839813232422, + "learning_rate": 2.053159340659341e-05, + "loss": 0.165, + "step": 21453 + }, + { + "epoch": 58.93956043956044, + "grad_norm": 4.542897701263428, + "learning_rate": 2.0530219780219782e-05, + "loss": 0.0454, + "step": 21454 + }, + { + "epoch": 58.94230769230769, + "grad_norm": 13.102754592895508, + "learning_rate": 2.0528846153846155e-05, + "loss": 0.1767, + "step": 21455 + }, + { + "epoch": 58.94505494505494, + "grad_norm": 11.779674530029297, + "learning_rate": 2.052747252747253e-05, + "loss": 0.2211, + "step": 21456 + }, + { + "epoch": 58.9478021978022, + "grad_norm": 2.475322723388672, + "learning_rate": 2.0526098901098902e-05, + "loss": 0.042, + "step": 21457 + }, + { + "epoch": 58.95054945054945, + "grad_norm": 4.574143886566162, + "learning_rate": 2.0524725274725276e-05, + "loss": 0.0938, + "step": 21458 + }, + { + "epoch": 58.9532967032967, + "grad_norm": 9.92853832244873, + "learning_rate": 2.052335164835165e-05, + "loss": 0.1557, + "step": 21459 + }, + { + "epoch": 58.956043956043956, + "grad_norm": 10.039204597473145, + "learning_rate": 2.0521978021978022e-05, + "loss": 0.3155, + "step": 21460 + }, + { + "epoch": 58.95879120879121, + "grad_norm": 3.7265353202819824, + "learning_rate": 2.0520604395604396e-05, + "loss": 0.0586, + "step": 21461 + }, + { + "epoch": 58.96153846153846, + "grad_norm": 6.141598224639893, + "learning_rate": 2.0519230769230773e-05, + "loss": 0.0552, + "step": 21462 + }, + { + "epoch": 58.964285714285715, + "grad_norm": 2.3199751377105713, + "learning_rate": 2.0517857142857143e-05, + "loss": 0.046, + "step": 21463 + }, + { + "epoch": 58.967032967032964, + "grad_norm": 3.609276294708252, + "learning_rate": 2.0516483516483516e-05, + "loss": 0.0413, + "step": 21464 + }, + { + "epoch": 58.96978021978022, + "grad_norm": 7.015844345092773, + "learning_rate": 2.051510989010989e-05, + "loss": 0.2158, + "step": 21465 + }, + { + "epoch": 58.972527472527474, + "grad_norm": 13.560925483703613, + "learning_rate": 2.0513736263736263e-05, + "loss": 0.1492, + "step": 21466 + }, + { + "epoch": 58.97527472527472, + "grad_norm": 12.761678695678711, + "learning_rate": 2.051236263736264e-05, + "loss": 0.2295, + "step": 21467 + }, + { + "epoch": 58.97802197802198, + "grad_norm": 19.071664810180664, + "learning_rate": 2.0510989010989013e-05, + "loss": 0.4077, + "step": 21468 + }, + { + "epoch": 58.98076923076923, + "grad_norm": 5.501649379730225, + "learning_rate": 2.0509615384615386e-05, + "loss": 0.1088, + "step": 21469 + }, + { + "epoch": 58.98351648351648, + "grad_norm": 14.077009201049805, + "learning_rate": 2.050824175824176e-05, + "loss": 0.2386, + "step": 21470 + }, + { + "epoch": 58.98626373626374, + "grad_norm": 12.689218521118164, + "learning_rate": 2.0506868131868133e-05, + "loss": 0.3111, + "step": 21471 + }, + { + "epoch": 58.98901098901099, + "grad_norm": 14.820146560668945, + "learning_rate": 2.0505494505494507e-05, + "loss": 0.4522, + "step": 21472 + }, + { + "epoch": 58.99175824175824, + "grad_norm": 10.249616622924805, + "learning_rate": 2.050412087912088e-05, + "loss": 0.1571, + "step": 21473 + }, + { + "epoch": 58.994505494505496, + "grad_norm": 12.74707317352295, + "learning_rate": 2.0502747252747253e-05, + "loss": 0.2393, + "step": 21474 + }, + { + "epoch": 58.997252747252745, + "grad_norm": 5.300707817077637, + "learning_rate": 2.0501373626373627e-05, + "loss": 0.0974, + "step": 21475 + }, + { + "epoch": 59.0, + "grad_norm": 7.094137191772461, + "learning_rate": 2.05e-05, + "loss": 0.0679, + "step": 21476 + }, + { + "epoch": 59.0, + "eval_accuracy": 0.7851239669421488, + "eval_f1": 0.7816241158625562, + "eval_f1_DuraRiadoRio_64x64": 0.7171314741035857, + "eval_f1_Mole_64x64": 0.7586206896551724, + "eval_f1_Quebrado_64x64": 0.8770764119601329, + "eval_f1_RiadoRio_64x64": 0.7610062893081762, + "eval_f1_RioFechado_64x64": 0.7942857142857143, + "eval_loss": 1.081311583518982, + "eval_precision": 0.8158141439914255, + "eval_precision_DuraRiadoRio_64x64": 0.8411214953271028, + "eval_precision_Mole_64x64": 1.0, + "eval_precision_Quebrado_64x64": 0.8407643312101911, + "eval_precision_RiadoRio_64x64": 0.7289156626506024, + "eval_precision_RioFechado_64x64": 0.6682692307692307, + "eval_recall": 0.785540729758669, + "eval_recall_DuraRiadoRio_64x64": 0.625, + "eval_recall_Mole_64x64": 0.6111111111111112, + "eval_recall_Quebrado_64x64": 0.9166666666666666, + "eval_recall_RiadoRio_64x64": 0.7960526315789473, + "eval_recall_RioFechado_64x64": 0.9788732394366197, + "eval_runtime": 1.7299, + "eval_samples_per_second": 419.675, + "eval_steps_per_second": 26.591, + "step": 21476 + }, + { + "epoch": 59.002747252747255, + "grad_norm": 15.738375663757324, + "learning_rate": 2.0498626373626377e-05, + "loss": 0.324, + "step": 21477 + }, + { + "epoch": 59.005494505494504, + "grad_norm": 10.996216773986816, + "learning_rate": 2.0497252747252747e-05, + "loss": 0.1761, + "step": 21478 + }, + { + "epoch": 59.00824175824176, + "grad_norm": 18.852949142456055, + "learning_rate": 2.049587912087912e-05, + "loss": 0.3998, + "step": 21479 + }, + { + "epoch": 59.010989010989015, + "grad_norm": 22.38568878173828, + "learning_rate": 2.0494505494505494e-05, + "loss": 0.7293, + "step": 21480 + }, + { + "epoch": 59.01373626373626, + "grad_norm": 10.208854675292969, + "learning_rate": 2.0493131868131867e-05, + "loss": 0.2711, + "step": 21481 + }, + { + "epoch": 59.01648351648352, + "grad_norm": 11.707367897033691, + "learning_rate": 2.0491758241758244e-05, + "loss": 0.2904, + "step": 21482 + }, + { + "epoch": 59.01923076923077, + "grad_norm": 10.168910026550293, + "learning_rate": 2.0490384615384617e-05, + "loss": 0.1166, + "step": 21483 + }, + { + "epoch": 59.02197802197802, + "grad_norm": 4.651200771331787, + "learning_rate": 2.048901098901099e-05, + "loss": 0.106, + "step": 21484 + }, + { + "epoch": 59.02472527472528, + "grad_norm": 10.649874687194824, + "learning_rate": 2.0487637362637364e-05, + "loss": 0.1579, + "step": 21485 + }, + { + "epoch": 59.027472527472526, + "grad_norm": 11.594301223754883, + "learning_rate": 2.0486263736263738e-05, + "loss": 0.21, + "step": 21486 + }, + { + "epoch": 59.03021978021978, + "grad_norm": 23.40061378479004, + "learning_rate": 2.048489010989011e-05, + "loss": 0.6286, + "step": 21487 + }, + { + "epoch": 59.032967032967036, + "grad_norm": 12.339024543762207, + "learning_rate": 2.0483516483516485e-05, + "loss": 0.3416, + "step": 21488 + }, + { + "epoch": 59.035714285714285, + "grad_norm": 12.648755073547363, + "learning_rate": 2.0482142857142858e-05, + "loss": 0.3776, + "step": 21489 + }, + { + "epoch": 59.03846153846154, + "grad_norm": 11.80351448059082, + "learning_rate": 2.048076923076923e-05, + "loss": 0.2758, + "step": 21490 + }, + { + "epoch": 59.04120879120879, + "grad_norm": 11.741539001464844, + "learning_rate": 2.0479395604395605e-05, + "loss": 0.2319, + "step": 21491 + }, + { + "epoch": 59.043956043956044, + "grad_norm": 10.040200233459473, + "learning_rate": 2.047802197802198e-05, + "loss": 0.1441, + "step": 21492 + }, + { + "epoch": 59.0467032967033, + "grad_norm": 18.0456485748291, + "learning_rate": 2.047664835164835e-05, + "loss": 0.4182, + "step": 21493 + }, + { + "epoch": 59.04945054945055, + "grad_norm": 4.170539379119873, + "learning_rate": 2.0475274725274725e-05, + "loss": 0.0506, + "step": 21494 + }, + { + "epoch": 59.0521978021978, + "grad_norm": 15.768462181091309, + "learning_rate": 2.04739010989011e-05, + "loss": 0.2761, + "step": 21495 + }, + { + "epoch": 59.05494505494506, + "grad_norm": 7.602990627288818, + "learning_rate": 2.0472527472527472e-05, + "loss": 0.1311, + "step": 21496 + }, + { + "epoch": 59.05769230769231, + "grad_norm": 0.5505585670471191, + "learning_rate": 2.047115384615385e-05, + "loss": 0.0077, + "step": 21497 + }, + { + "epoch": 59.06043956043956, + "grad_norm": 8.413654327392578, + "learning_rate": 2.0469780219780222e-05, + "loss": 0.1901, + "step": 21498 + }, + { + "epoch": 59.06318681318681, + "grad_norm": 17.578384399414062, + "learning_rate": 2.0468406593406595e-05, + "loss": 0.2903, + "step": 21499 + }, + { + "epoch": 59.065934065934066, + "grad_norm": 7.898906230926514, + "learning_rate": 2.046703296703297e-05, + "loss": 0.1227, + "step": 21500 + }, + { + "epoch": 59.06868131868132, + "grad_norm": 12.441621780395508, + "learning_rate": 2.0465659340659342e-05, + "loss": 0.1915, + "step": 21501 + }, + { + "epoch": 59.07142857142857, + "grad_norm": 5.36986780166626, + "learning_rate": 2.0464285714285716e-05, + "loss": 0.1279, + "step": 21502 + }, + { + "epoch": 59.074175824175825, + "grad_norm": 15.339407920837402, + "learning_rate": 2.046291208791209e-05, + "loss": 0.2443, + "step": 21503 + }, + { + "epoch": 59.07692307692308, + "grad_norm": 9.587250709533691, + "learning_rate": 2.0461538461538462e-05, + "loss": 0.2089, + "step": 21504 + }, + { + "epoch": 59.07967032967033, + "grad_norm": 10.518709182739258, + "learning_rate": 2.0460164835164836e-05, + "loss": 0.2072, + "step": 21505 + }, + { + "epoch": 59.082417582417584, + "grad_norm": 1.1826468706130981, + "learning_rate": 2.045879120879121e-05, + "loss": 0.0177, + "step": 21506 + }, + { + "epoch": 59.08516483516483, + "grad_norm": 11.27830696105957, + "learning_rate": 2.0457417582417586e-05, + "loss": 0.2736, + "step": 21507 + }, + { + "epoch": 59.08791208791209, + "grad_norm": 13.14531135559082, + "learning_rate": 2.0456043956043956e-05, + "loss": 0.2592, + "step": 21508 + }, + { + "epoch": 59.09065934065934, + "grad_norm": 13.130794525146484, + "learning_rate": 2.045467032967033e-05, + "loss": 0.2135, + "step": 21509 + }, + { + "epoch": 59.09340659340659, + "grad_norm": 14.151323318481445, + "learning_rate": 2.0453296703296703e-05, + "loss": 0.2247, + "step": 21510 + }, + { + "epoch": 59.09615384615385, + "grad_norm": 2.2415778636932373, + "learning_rate": 2.0451923076923076e-05, + "loss": 0.0292, + "step": 21511 + }, + { + "epoch": 59.0989010989011, + "grad_norm": 8.005145072937012, + "learning_rate": 2.0450549450549453e-05, + "loss": 0.2539, + "step": 21512 + }, + { + "epoch": 59.10164835164835, + "grad_norm": 10.778271675109863, + "learning_rate": 2.0449175824175826e-05, + "loss": 0.1888, + "step": 21513 + }, + { + "epoch": 59.104395604395606, + "grad_norm": 17.810657501220703, + "learning_rate": 2.04478021978022e-05, + "loss": 0.3235, + "step": 21514 + }, + { + "epoch": 59.107142857142854, + "grad_norm": 10.546182632446289, + "learning_rate": 2.0446428571428573e-05, + "loss": 0.2068, + "step": 21515 + }, + { + "epoch": 59.10989010989011, + "grad_norm": 11.747955322265625, + "learning_rate": 2.0445054945054947e-05, + "loss": 0.1469, + "step": 21516 + }, + { + "epoch": 59.112637362637365, + "grad_norm": 18.305410385131836, + "learning_rate": 2.044368131868132e-05, + "loss": 0.4021, + "step": 21517 + }, + { + "epoch": 59.11538461538461, + "grad_norm": 20.417896270751953, + "learning_rate": 2.0442307692307693e-05, + "loss": 0.5948, + "step": 21518 + }, + { + "epoch": 59.11813186813187, + "grad_norm": 6.262170791625977, + "learning_rate": 2.0440934065934067e-05, + "loss": 0.0607, + "step": 21519 + }, + { + "epoch": 59.120879120879124, + "grad_norm": 9.779120445251465, + "learning_rate": 2.043956043956044e-05, + "loss": 0.1678, + "step": 21520 + }, + { + "epoch": 59.12362637362637, + "grad_norm": 4.405633926391602, + "learning_rate": 2.0438186813186814e-05, + "loss": 0.0414, + "step": 21521 + }, + { + "epoch": 59.12637362637363, + "grad_norm": 10.886536598205566, + "learning_rate": 2.043681318681319e-05, + "loss": 0.1194, + "step": 21522 + }, + { + "epoch": 59.129120879120876, + "grad_norm": 6.6032609939575195, + "learning_rate": 2.043543956043956e-05, + "loss": 0.0817, + "step": 21523 + }, + { + "epoch": 59.13186813186813, + "grad_norm": 8.028709411621094, + "learning_rate": 2.0434065934065934e-05, + "loss": 0.1335, + "step": 21524 + }, + { + "epoch": 59.13461538461539, + "grad_norm": 7.5217766761779785, + "learning_rate": 2.0432692307692307e-05, + "loss": 0.1308, + "step": 21525 + }, + { + "epoch": 59.137362637362635, + "grad_norm": 7.67032527923584, + "learning_rate": 2.043131868131868e-05, + "loss": 0.1616, + "step": 21526 + }, + { + "epoch": 59.14010989010989, + "grad_norm": 8.013815879821777, + "learning_rate": 2.0429945054945058e-05, + "loss": 0.1633, + "step": 21527 + }, + { + "epoch": 59.142857142857146, + "grad_norm": 17.1997127532959, + "learning_rate": 2.042857142857143e-05, + "loss": 0.5644, + "step": 21528 + }, + { + "epoch": 59.145604395604394, + "grad_norm": 15.394261360168457, + "learning_rate": 2.0427197802197804e-05, + "loss": 0.229, + "step": 21529 + }, + { + "epoch": 59.14835164835165, + "grad_norm": 5.852084159851074, + "learning_rate": 2.0425824175824178e-05, + "loss": 0.0742, + "step": 21530 + }, + { + "epoch": 59.1510989010989, + "grad_norm": 19.0524845123291, + "learning_rate": 2.042445054945055e-05, + "loss": 0.6811, + "step": 21531 + }, + { + "epoch": 59.15384615384615, + "grad_norm": 28.651561737060547, + "learning_rate": 2.0423076923076925e-05, + "loss": 0.7337, + "step": 21532 + }, + { + "epoch": 59.15659340659341, + "grad_norm": 12.305831909179688, + "learning_rate": 2.0421703296703298e-05, + "loss": 0.3401, + "step": 21533 + }, + { + "epoch": 59.15934065934066, + "grad_norm": 2.686462640762329, + "learning_rate": 2.042032967032967e-05, + "loss": 0.0478, + "step": 21534 + }, + { + "epoch": 59.16208791208791, + "grad_norm": 5.688736438751221, + "learning_rate": 2.0418956043956045e-05, + "loss": 0.0878, + "step": 21535 + }, + { + "epoch": 59.16483516483517, + "grad_norm": 11.037099838256836, + "learning_rate": 2.0417582417582418e-05, + "loss": 0.1881, + "step": 21536 + }, + { + "epoch": 59.167582417582416, + "grad_norm": 6.379478931427002, + "learning_rate": 2.0416208791208795e-05, + "loss": 0.1187, + "step": 21537 + }, + { + "epoch": 59.17032967032967, + "grad_norm": 19.371028900146484, + "learning_rate": 2.0414835164835165e-05, + "loss": 0.2512, + "step": 21538 + }, + { + "epoch": 59.17307692307692, + "grad_norm": 2.1355295181274414, + "learning_rate": 2.041346153846154e-05, + "loss": 0.0301, + "step": 21539 + }, + { + "epoch": 59.175824175824175, + "grad_norm": 11.430048942565918, + "learning_rate": 2.0412087912087912e-05, + "loss": 0.1423, + "step": 21540 + }, + { + "epoch": 59.17857142857143, + "grad_norm": 12.9293212890625, + "learning_rate": 2.0410714285714285e-05, + "loss": 0.3315, + "step": 21541 + }, + { + "epoch": 59.18131868131868, + "grad_norm": 17.790781021118164, + "learning_rate": 2.0409340659340662e-05, + "loss": 0.4147, + "step": 21542 + }, + { + "epoch": 59.184065934065934, + "grad_norm": 6.62874174118042, + "learning_rate": 2.0407967032967035e-05, + "loss": 0.0866, + "step": 21543 + }, + { + "epoch": 59.18681318681319, + "grad_norm": 11.404012680053711, + "learning_rate": 2.040659340659341e-05, + "loss": 0.2656, + "step": 21544 + }, + { + "epoch": 59.18956043956044, + "grad_norm": 22.352449417114258, + "learning_rate": 2.0405219780219782e-05, + "loss": 0.3917, + "step": 21545 + }, + { + "epoch": 59.19230769230769, + "grad_norm": 17.94247817993164, + "learning_rate": 2.0403846153846152e-05, + "loss": 0.5085, + "step": 21546 + }, + { + "epoch": 59.19505494505494, + "grad_norm": 18.256811141967773, + "learning_rate": 2.040247252747253e-05, + "loss": 0.4941, + "step": 21547 + }, + { + "epoch": 59.1978021978022, + "grad_norm": 13.072890281677246, + "learning_rate": 2.0401098901098902e-05, + "loss": 0.2373, + "step": 21548 + }, + { + "epoch": 59.20054945054945, + "grad_norm": 9.031661987304688, + "learning_rate": 2.0399725274725276e-05, + "loss": 0.101, + "step": 21549 + }, + { + "epoch": 59.2032967032967, + "grad_norm": 8.544188499450684, + "learning_rate": 2.039835164835165e-05, + "loss": 0.1128, + "step": 21550 + }, + { + "epoch": 59.206043956043956, + "grad_norm": 9.53030014038086, + "learning_rate": 2.0396978021978023e-05, + "loss": 0.2003, + "step": 21551 + }, + { + "epoch": 59.20879120879121, + "grad_norm": 10.533980369567871, + "learning_rate": 2.03956043956044e-05, + "loss": 0.3193, + "step": 21552 + }, + { + "epoch": 59.21153846153846, + "grad_norm": 7.769029140472412, + "learning_rate": 2.039423076923077e-05, + "loss": 0.1085, + "step": 21553 + }, + { + "epoch": 59.214285714285715, + "grad_norm": 3.9623231887817383, + "learning_rate": 2.0392857142857143e-05, + "loss": 0.0513, + "step": 21554 + }, + { + "epoch": 59.217032967032964, + "grad_norm": 15.06103801727295, + "learning_rate": 2.0391483516483516e-05, + "loss": 0.3715, + "step": 21555 + }, + { + "epoch": 59.21978021978022, + "grad_norm": 18.609952926635742, + "learning_rate": 2.039010989010989e-05, + "loss": 0.2407, + "step": 21556 + }, + { + "epoch": 59.222527472527474, + "grad_norm": 13.857340812683105, + "learning_rate": 2.0388736263736263e-05, + "loss": 0.1792, + "step": 21557 + }, + { + "epoch": 59.22527472527472, + "grad_norm": 14.106467247009277, + "learning_rate": 2.038736263736264e-05, + "loss": 0.3777, + "step": 21558 + }, + { + "epoch": 59.22802197802198, + "grad_norm": 16.07704734802246, + "learning_rate": 2.0385989010989013e-05, + "loss": 0.3817, + "step": 21559 + }, + { + "epoch": 59.23076923076923, + "grad_norm": 20.92858123779297, + "learning_rate": 2.0384615384615387e-05, + "loss": 0.3205, + "step": 21560 + }, + { + "epoch": 59.23351648351648, + "grad_norm": 12.999093055725098, + "learning_rate": 2.0383241758241757e-05, + "loss": 0.2484, + "step": 21561 + }, + { + "epoch": 59.23626373626374, + "grad_norm": 14.232329368591309, + "learning_rate": 2.038186813186813e-05, + "loss": 0.312, + "step": 21562 + }, + { + "epoch": 59.239010989010985, + "grad_norm": 6.4913105964660645, + "learning_rate": 2.0380494505494507e-05, + "loss": 0.0635, + "step": 21563 + }, + { + "epoch": 59.24175824175824, + "grad_norm": 19.36560821533203, + "learning_rate": 2.037912087912088e-05, + "loss": 0.4342, + "step": 21564 + }, + { + "epoch": 59.244505494505496, + "grad_norm": 4.7765703201293945, + "learning_rate": 2.0377747252747254e-05, + "loss": 0.0951, + "step": 21565 + }, + { + "epoch": 59.247252747252745, + "grad_norm": 16.29290771484375, + "learning_rate": 2.0376373626373627e-05, + "loss": 0.417, + "step": 21566 + }, + { + "epoch": 59.25, + "grad_norm": 20.01163101196289, + "learning_rate": 2.0375e-05, + "loss": 0.5038, + "step": 21567 + }, + { + "epoch": 59.252747252747255, + "grad_norm": 10.375545501708984, + "learning_rate": 2.0373626373626374e-05, + "loss": 0.3116, + "step": 21568 + }, + { + "epoch": 59.255494505494504, + "grad_norm": 3.032630205154419, + "learning_rate": 2.0372252747252747e-05, + "loss": 0.0318, + "step": 21569 + }, + { + "epoch": 59.25824175824176, + "grad_norm": 11.941313743591309, + "learning_rate": 2.037087912087912e-05, + "loss": 0.1753, + "step": 21570 + }, + { + "epoch": 59.260989010989015, + "grad_norm": 3.7075724601745605, + "learning_rate": 2.0369505494505494e-05, + "loss": 0.0386, + "step": 21571 + }, + { + "epoch": 59.26373626373626, + "grad_norm": 4.6552653312683105, + "learning_rate": 2.0368131868131868e-05, + "loss": 0.1166, + "step": 21572 + }, + { + "epoch": 59.26648351648352, + "grad_norm": 8.97111701965332, + "learning_rate": 2.0366758241758244e-05, + "loss": 0.1438, + "step": 21573 + }, + { + "epoch": 59.26923076923077, + "grad_norm": 10.453875541687012, + "learning_rate": 2.0365384615384618e-05, + "loss": 0.2273, + "step": 21574 + }, + { + "epoch": 59.27197802197802, + "grad_norm": 2.6061012744903564, + "learning_rate": 2.036401098901099e-05, + "loss": 0.0395, + "step": 21575 + }, + { + "epoch": 59.27472527472528, + "grad_norm": 4.001844882965088, + "learning_rate": 2.036263736263736e-05, + "loss": 0.0461, + "step": 21576 + }, + { + "epoch": 59.277472527472526, + "grad_norm": 6.249282360076904, + "learning_rate": 2.0361263736263735e-05, + "loss": 0.092, + "step": 21577 + }, + { + "epoch": 59.28021978021978, + "grad_norm": 10.513294219970703, + "learning_rate": 2.035989010989011e-05, + "loss": 0.2454, + "step": 21578 + }, + { + "epoch": 59.282967032967036, + "grad_norm": 17.336841583251953, + "learning_rate": 2.0358516483516485e-05, + "loss": 0.2519, + "step": 21579 + }, + { + "epoch": 59.285714285714285, + "grad_norm": 13.18377685546875, + "learning_rate": 2.0357142857142858e-05, + "loss": 0.1101, + "step": 21580 + }, + { + "epoch": 59.28846153846154, + "grad_norm": 7.188912868499756, + "learning_rate": 2.035576923076923e-05, + "loss": 0.2445, + "step": 21581 + }, + { + "epoch": 59.29120879120879, + "grad_norm": 13.303630828857422, + "learning_rate": 2.0354395604395605e-05, + "loss": 0.4457, + "step": 21582 + }, + { + "epoch": 59.293956043956044, + "grad_norm": 4.846879482269287, + "learning_rate": 2.035302197802198e-05, + "loss": 0.0439, + "step": 21583 + }, + { + "epoch": 59.2967032967033, + "grad_norm": 13.25375747680664, + "learning_rate": 2.0351648351648352e-05, + "loss": 0.206, + "step": 21584 + }, + { + "epoch": 59.29945054945055, + "grad_norm": 19.571697235107422, + "learning_rate": 2.0350274725274725e-05, + "loss": 0.5196, + "step": 21585 + }, + { + "epoch": 59.3021978021978, + "grad_norm": 10.7979736328125, + "learning_rate": 2.03489010989011e-05, + "loss": 0.2627, + "step": 21586 + }, + { + "epoch": 59.30494505494506, + "grad_norm": 15.86847972869873, + "learning_rate": 2.0347527472527472e-05, + "loss": 0.262, + "step": 21587 + }, + { + "epoch": 59.30769230769231, + "grad_norm": 24.238771438598633, + "learning_rate": 2.034615384615385e-05, + "loss": 0.5031, + "step": 21588 + }, + { + "epoch": 59.31043956043956, + "grad_norm": 6.5815749168396, + "learning_rate": 2.0344780219780222e-05, + "loss": 0.0858, + "step": 21589 + }, + { + "epoch": 59.31318681318681, + "grad_norm": 18.881729125976562, + "learning_rate": 2.0343406593406596e-05, + "loss": 0.3211, + "step": 21590 + }, + { + "epoch": 59.315934065934066, + "grad_norm": 6.502415657043457, + "learning_rate": 2.0342032967032966e-05, + "loss": 0.1063, + "step": 21591 + }, + { + "epoch": 59.31868131868132, + "grad_norm": 2.687856674194336, + "learning_rate": 2.034065934065934e-05, + "loss": 0.0283, + "step": 21592 + }, + { + "epoch": 59.32142857142857, + "grad_norm": 15.549924850463867, + "learning_rate": 2.0339285714285716e-05, + "loss": 0.186, + "step": 21593 + }, + { + "epoch": 59.324175824175825, + "grad_norm": 6.5137739181518555, + "learning_rate": 2.033791208791209e-05, + "loss": 0.1203, + "step": 21594 + }, + { + "epoch": 59.32692307692308, + "grad_norm": 22.634563446044922, + "learning_rate": 2.0336538461538463e-05, + "loss": 0.7914, + "step": 21595 + }, + { + "epoch": 59.32967032967033, + "grad_norm": 18.41825294494629, + "learning_rate": 2.0335164835164836e-05, + "loss": 0.4704, + "step": 21596 + }, + { + "epoch": 59.332417582417584, + "grad_norm": 6.221706390380859, + "learning_rate": 2.033379120879121e-05, + "loss": 0.046, + "step": 21597 + }, + { + "epoch": 59.33516483516483, + "grad_norm": 11.855399131774902, + "learning_rate": 2.0332417582417583e-05, + "loss": 0.3185, + "step": 21598 + }, + { + "epoch": 59.33791208791209, + "grad_norm": 16.045215606689453, + "learning_rate": 2.0331043956043956e-05, + "loss": 0.3155, + "step": 21599 + }, + { + "epoch": 59.34065934065934, + "grad_norm": 9.531379699707031, + "learning_rate": 2.032967032967033e-05, + "loss": 0.2282, + "step": 21600 + }, + { + "epoch": 59.34340659340659, + "grad_norm": 10.214629173278809, + "learning_rate": 2.0328296703296703e-05, + "loss": 0.1391, + "step": 21601 + }, + { + "epoch": 59.34615384615385, + "grad_norm": 8.61712646484375, + "learning_rate": 2.0326923076923077e-05, + "loss": 0.0985, + "step": 21602 + }, + { + "epoch": 59.3489010989011, + "grad_norm": 4.6360764503479, + "learning_rate": 2.0325549450549453e-05, + "loss": 0.0489, + "step": 21603 + }, + { + "epoch": 59.35164835164835, + "grad_norm": 9.744495391845703, + "learning_rate": 2.0324175824175827e-05, + "loss": 0.1542, + "step": 21604 + }, + { + "epoch": 59.354395604395606, + "grad_norm": 8.361111640930176, + "learning_rate": 2.03228021978022e-05, + "loss": 0.1015, + "step": 21605 + }, + { + "epoch": 59.357142857142854, + "grad_norm": 11.953744888305664, + "learning_rate": 2.032142857142857e-05, + "loss": 0.1414, + "step": 21606 + }, + { + "epoch": 59.35989010989011, + "grad_norm": 13.617921829223633, + "learning_rate": 2.0320054945054944e-05, + "loss": 0.4967, + "step": 21607 + }, + { + "epoch": 59.362637362637365, + "grad_norm": 5.825453281402588, + "learning_rate": 2.031868131868132e-05, + "loss": 0.0469, + "step": 21608 + }, + { + "epoch": 59.36538461538461, + "grad_norm": 7.062349796295166, + "learning_rate": 2.0317307692307694e-05, + "loss": 0.1012, + "step": 21609 + }, + { + "epoch": 59.36813186813187, + "grad_norm": 18.189083099365234, + "learning_rate": 2.0315934065934067e-05, + "loss": 0.2087, + "step": 21610 + }, + { + "epoch": 59.370879120879124, + "grad_norm": 15.682208061218262, + "learning_rate": 2.031456043956044e-05, + "loss": 0.3639, + "step": 21611 + }, + { + "epoch": 59.37362637362637, + "grad_norm": 9.510835647583008, + "learning_rate": 2.0313186813186814e-05, + "loss": 0.2156, + "step": 21612 + }, + { + "epoch": 59.37637362637363, + "grad_norm": 0.2810552716255188, + "learning_rate": 2.0311813186813187e-05, + "loss": 0.0039, + "step": 21613 + }, + { + "epoch": 59.379120879120876, + "grad_norm": 3.773369073867798, + "learning_rate": 2.031043956043956e-05, + "loss": 0.0704, + "step": 21614 + }, + { + "epoch": 59.38186813186813, + "grad_norm": 12.833639144897461, + "learning_rate": 2.0309065934065934e-05, + "loss": 0.2721, + "step": 21615 + }, + { + "epoch": 59.38461538461539, + "grad_norm": 19.08150863647461, + "learning_rate": 2.0307692307692308e-05, + "loss": 0.2537, + "step": 21616 + }, + { + "epoch": 59.387362637362635, + "grad_norm": 17.19712257385254, + "learning_rate": 2.030631868131868e-05, + "loss": 0.4383, + "step": 21617 + }, + { + "epoch": 59.39010989010989, + "grad_norm": 12.367850303649902, + "learning_rate": 2.0304945054945058e-05, + "loss": 0.4258, + "step": 21618 + }, + { + "epoch": 59.392857142857146, + "grad_norm": 10.152281761169434, + "learning_rate": 2.030357142857143e-05, + "loss": 0.1904, + "step": 21619 + }, + { + "epoch": 59.395604395604394, + "grad_norm": 20.716629028320312, + "learning_rate": 2.0302197802197805e-05, + "loss": 0.5535, + "step": 21620 + }, + { + "epoch": 59.39835164835165, + "grad_norm": 19.220365524291992, + "learning_rate": 2.0300824175824175e-05, + "loss": 0.4788, + "step": 21621 + }, + { + "epoch": 59.4010989010989, + "grad_norm": 15.207443237304688, + "learning_rate": 2.0299450549450548e-05, + "loss": 0.4348, + "step": 21622 + }, + { + "epoch": 59.40384615384615, + "grad_norm": 21.797142028808594, + "learning_rate": 2.0298076923076925e-05, + "loss": 0.464, + "step": 21623 + }, + { + "epoch": 59.40659340659341, + "grad_norm": 11.156858444213867, + "learning_rate": 2.02967032967033e-05, + "loss": 0.1609, + "step": 21624 + }, + { + "epoch": 59.40934065934066, + "grad_norm": 8.77011775970459, + "learning_rate": 2.0295329670329672e-05, + "loss": 0.1248, + "step": 21625 + }, + { + "epoch": 59.41208791208791, + "grad_norm": 18.82411003112793, + "learning_rate": 2.0293956043956045e-05, + "loss": 0.5124, + "step": 21626 + }, + { + "epoch": 59.41483516483517, + "grad_norm": 9.744023323059082, + "learning_rate": 2.029258241758242e-05, + "loss": 0.2599, + "step": 21627 + }, + { + "epoch": 59.417582417582416, + "grad_norm": 9.543394088745117, + "learning_rate": 2.0291208791208792e-05, + "loss": 0.1251, + "step": 21628 + }, + { + "epoch": 59.42032967032967, + "grad_norm": 6.602817058563232, + "learning_rate": 2.0289835164835165e-05, + "loss": 0.0877, + "step": 21629 + }, + { + "epoch": 59.42307692307692, + "grad_norm": 14.296331405639648, + "learning_rate": 2.028846153846154e-05, + "loss": 0.4207, + "step": 21630 + }, + { + "epoch": 59.425824175824175, + "grad_norm": 14.313568115234375, + "learning_rate": 2.0287087912087912e-05, + "loss": 0.4851, + "step": 21631 + }, + { + "epoch": 59.42857142857143, + "grad_norm": 20.421592712402344, + "learning_rate": 2.0285714285714286e-05, + "loss": 0.6569, + "step": 21632 + }, + { + "epoch": 59.43131868131868, + "grad_norm": 14.963813781738281, + "learning_rate": 2.0284340659340662e-05, + "loss": 0.114, + "step": 21633 + }, + { + "epoch": 59.434065934065934, + "grad_norm": 13.944025993347168, + "learning_rate": 2.0282967032967036e-05, + "loss": 0.2469, + "step": 21634 + }, + { + "epoch": 59.43681318681319, + "grad_norm": 15.823151588439941, + "learning_rate": 2.028159340659341e-05, + "loss": 0.293, + "step": 21635 + }, + { + "epoch": 59.43956043956044, + "grad_norm": 3.2841336727142334, + "learning_rate": 2.028021978021978e-05, + "loss": 0.0325, + "step": 21636 + }, + { + "epoch": 59.44230769230769, + "grad_norm": 18.70171356201172, + "learning_rate": 2.0278846153846153e-05, + "loss": 0.7503, + "step": 21637 + }, + { + "epoch": 59.44505494505494, + "grad_norm": 9.011922836303711, + "learning_rate": 2.027747252747253e-05, + "loss": 0.1422, + "step": 21638 + }, + { + "epoch": 59.4478021978022, + "grad_norm": 15.04917049407959, + "learning_rate": 2.0276098901098903e-05, + "loss": 0.334, + "step": 21639 + }, + { + "epoch": 59.45054945054945, + "grad_norm": 15.151291847229004, + "learning_rate": 2.0274725274725276e-05, + "loss": 0.548, + "step": 21640 + }, + { + "epoch": 59.4532967032967, + "grad_norm": 18.035913467407227, + "learning_rate": 2.027335164835165e-05, + "loss": 0.2846, + "step": 21641 + }, + { + "epoch": 59.456043956043956, + "grad_norm": 15.78354263305664, + "learning_rate": 2.0271978021978023e-05, + "loss": 0.3083, + "step": 21642 + }, + { + "epoch": 59.45879120879121, + "grad_norm": 18.689138412475586, + "learning_rate": 2.0270604395604396e-05, + "loss": 0.4361, + "step": 21643 + }, + { + "epoch": 59.46153846153846, + "grad_norm": 8.963162422180176, + "learning_rate": 2.026923076923077e-05, + "loss": 0.1647, + "step": 21644 + }, + { + "epoch": 59.464285714285715, + "grad_norm": 13.882670402526855, + "learning_rate": 2.0267857142857143e-05, + "loss": 0.3397, + "step": 21645 + }, + { + "epoch": 59.467032967032964, + "grad_norm": 4.138991832733154, + "learning_rate": 2.0266483516483517e-05, + "loss": 0.0723, + "step": 21646 + }, + { + "epoch": 59.46978021978022, + "grad_norm": 7.823426246643066, + "learning_rate": 2.026510989010989e-05, + "loss": 0.2326, + "step": 21647 + }, + { + "epoch": 59.472527472527474, + "grad_norm": 10.64224910736084, + "learning_rate": 2.0263736263736267e-05, + "loss": 0.121, + "step": 21648 + }, + { + "epoch": 59.47527472527472, + "grad_norm": 6.280958652496338, + "learning_rate": 2.026236263736264e-05, + "loss": 0.1498, + "step": 21649 + }, + { + "epoch": 59.47802197802198, + "grad_norm": 17.473024368286133, + "learning_rate": 2.0260989010989014e-05, + "loss": 0.3595, + "step": 21650 + }, + { + "epoch": 59.48076923076923, + "grad_norm": 8.971490859985352, + "learning_rate": 2.0259615384615384e-05, + "loss": 0.2127, + "step": 21651 + }, + { + "epoch": 59.48351648351648, + "grad_norm": 15.88676643371582, + "learning_rate": 2.0258241758241757e-05, + "loss": 0.2184, + "step": 21652 + }, + { + "epoch": 59.48626373626374, + "grad_norm": 13.619094848632812, + "learning_rate": 2.0256868131868134e-05, + "loss": 0.2915, + "step": 21653 + }, + { + "epoch": 59.489010989010985, + "grad_norm": 8.5482759475708, + "learning_rate": 2.0255494505494507e-05, + "loss": 0.3165, + "step": 21654 + }, + { + "epoch": 59.49175824175824, + "grad_norm": 15.382317543029785, + "learning_rate": 2.025412087912088e-05, + "loss": 0.3358, + "step": 21655 + }, + { + "epoch": 59.494505494505496, + "grad_norm": 24.48931312561035, + "learning_rate": 2.0252747252747254e-05, + "loss": 0.6812, + "step": 21656 + }, + { + "epoch": 59.497252747252745, + "grad_norm": 11.927571296691895, + "learning_rate": 2.0251373626373628e-05, + "loss": 0.1961, + "step": 21657 + }, + { + "epoch": 59.5, + "grad_norm": 16.10375213623047, + "learning_rate": 2.025e-05, + "loss": 0.2568, + "step": 21658 + }, + { + "epoch": 59.502747252747255, + "grad_norm": 18.081756591796875, + "learning_rate": 2.0248626373626374e-05, + "loss": 0.5674, + "step": 21659 + }, + { + "epoch": 59.505494505494504, + "grad_norm": 13.137929916381836, + "learning_rate": 2.0247252747252748e-05, + "loss": 0.2472, + "step": 21660 + }, + { + "epoch": 59.50824175824176, + "grad_norm": 12.289767265319824, + "learning_rate": 2.024587912087912e-05, + "loss": 0.1708, + "step": 21661 + }, + { + "epoch": 59.51098901098901, + "grad_norm": 6.221070766448975, + "learning_rate": 2.0244505494505495e-05, + "loss": 0.0769, + "step": 21662 + }, + { + "epoch": 59.51373626373626, + "grad_norm": 16.840587615966797, + "learning_rate": 2.024313186813187e-05, + "loss": 0.3049, + "step": 21663 + }, + { + "epoch": 59.51648351648352, + "grad_norm": 7.544789791107178, + "learning_rate": 2.0241758241758245e-05, + "loss": 0.1016, + "step": 21664 + }, + { + "epoch": 59.51923076923077, + "grad_norm": 15.555231094360352, + "learning_rate": 2.0240384615384618e-05, + "loss": 0.1782, + "step": 21665 + }, + { + "epoch": 59.52197802197802, + "grad_norm": 12.189732551574707, + "learning_rate": 2.0239010989010988e-05, + "loss": 0.2674, + "step": 21666 + }, + { + "epoch": 59.52472527472528, + "grad_norm": 13.032845497131348, + "learning_rate": 2.023763736263736e-05, + "loss": 0.4721, + "step": 21667 + }, + { + "epoch": 59.527472527472526, + "grad_norm": 14.488029479980469, + "learning_rate": 2.023626373626374e-05, + "loss": 0.3097, + "step": 21668 + }, + { + "epoch": 59.53021978021978, + "grad_norm": 2.2545523643493652, + "learning_rate": 2.0234890109890112e-05, + "loss": 0.0221, + "step": 21669 + }, + { + "epoch": 59.532967032967036, + "grad_norm": 12.018309593200684, + "learning_rate": 2.0233516483516485e-05, + "loss": 0.3697, + "step": 21670 + }, + { + "epoch": 59.535714285714285, + "grad_norm": 10.95734691619873, + "learning_rate": 2.023214285714286e-05, + "loss": 0.1988, + "step": 21671 + }, + { + "epoch": 59.53846153846154, + "grad_norm": 14.107986450195312, + "learning_rate": 2.0230769230769232e-05, + "loss": 0.2495, + "step": 21672 + }, + { + "epoch": 59.54120879120879, + "grad_norm": 5.412396430969238, + "learning_rate": 2.0229395604395605e-05, + "loss": 0.0623, + "step": 21673 + }, + { + "epoch": 59.543956043956044, + "grad_norm": 2.4700441360473633, + "learning_rate": 2.022802197802198e-05, + "loss": 0.0495, + "step": 21674 + }, + { + "epoch": 59.5467032967033, + "grad_norm": 17.967966079711914, + "learning_rate": 2.0226648351648352e-05, + "loss": 0.3082, + "step": 21675 + }, + { + "epoch": 59.54945054945055, + "grad_norm": 5.494881629943848, + "learning_rate": 2.0225274725274726e-05, + "loss": 0.0642, + "step": 21676 + }, + { + "epoch": 59.5521978021978, + "grad_norm": 9.791885375976562, + "learning_rate": 2.02239010989011e-05, + "loss": 0.0721, + "step": 21677 + }, + { + "epoch": 59.55494505494506, + "grad_norm": 19.26564598083496, + "learning_rate": 2.0222527472527472e-05, + "loss": 0.3336, + "step": 21678 + }, + { + "epoch": 59.55769230769231, + "grad_norm": 20.760845184326172, + "learning_rate": 2.022115384615385e-05, + "loss": 0.544, + "step": 21679 + }, + { + "epoch": 59.56043956043956, + "grad_norm": 13.694604873657227, + "learning_rate": 2.0219780219780223e-05, + "loss": 0.3188, + "step": 21680 + }, + { + "epoch": 59.56318681318681, + "grad_norm": 20.138044357299805, + "learning_rate": 2.0218406593406593e-05, + "loss": 0.4285, + "step": 21681 + }, + { + "epoch": 59.565934065934066, + "grad_norm": 10.144362449645996, + "learning_rate": 2.0217032967032966e-05, + "loss": 0.1338, + "step": 21682 + }, + { + "epoch": 59.56868131868132, + "grad_norm": 14.036561012268066, + "learning_rate": 2.021565934065934e-05, + "loss": 0.3019, + "step": 21683 + }, + { + "epoch": 59.57142857142857, + "grad_norm": 13.849453926086426, + "learning_rate": 2.0214285714285716e-05, + "loss": 0.2027, + "step": 21684 + }, + { + "epoch": 59.574175824175825, + "grad_norm": 9.419318199157715, + "learning_rate": 2.021291208791209e-05, + "loss": 0.3596, + "step": 21685 + }, + { + "epoch": 59.57692307692308, + "grad_norm": 5.269662857055664, + "learning_rate": 2.0211538461538463e-05, + "loss": 0.0734, + "step": 21686 + }, + { + "epoch": 59.57967032967033, + "grad_norm": 29.267229080200195, + "learning_rate": 2.0210164835164837e-05, + "loss": 0.6091, + "step": 21687 + }, + { + "epoch": 59.582417582417584, + "grad_norm": 7.704533576965332, + "learning_rate": 2.020879120879121e-05, + "loss": 0.0984, + "step": 21688 + }, + { + "epoch": 59.58516483516483, + "grad_norm": 8.821009635925293, + "learning_rate": 2.0207417582417583e-05, + "loss": 0.1839, + "step": 21689 + }, + { + "epoch": 59.58791208791209, + "grad_norm": 15.048928260803223, + "learning_rate": 2.0206043956043957e-05, + "loss": 0.4612, + "step": 21690 + }, + { + "epoch": 59.59065934065934, + "grad_norm": 12.604253768920898, + "learning_rate": 2.020467032967033e-05, + "loss": 0.1831, + "step": 21691 + }, + { + "epoch": 59.59340659340659, + "grad_norm": 15.956014633178711, + "learning_rate": 2.0203296703296704e-05, + "loss": 0.1955, + "step": 21692 + }, + { + "epoch": 59.59615384615385, + "grad_norm": 6.791049480438232, + "learning_rate": 2.0201923076923077e-05, + "loss": 0.0816, + "step": 21693 + }, + { + "epoch": 59.5989010989011, + "grad_norm": 12.926817893981934, + "learning_rate": 2.0200549450549454e-05, + "loss": 0.1449, + "step": 21694 + }, + { + "epoch": 59.60164835164835, + "grad_norm": 8.814229011535645, + "learning_rate": 2.0199175824175827e-05, + "loss": 0.2056, + "step": 21695 + }, + { + "epoch": 59.604395604395606, + "grad_norm": 12.816207885742188, + "learning_rate": 2.0197802197802197e-05, + "loss": 0.2242, + "step": 21696 + }, + { + "epoch": 59.607142857142854, + "grad_norm": 2.374598979949951, + "learning_rate": 2.019642857142857e-05, + "loss": 0.0378, + "step": 21697 + }, + { + "epoch": 59.60989010989011, + "grad_norm": 16.664226531982422, + "learning_rate": 2.0195054945054944e-05, + "loss": 0.599, + "step": 21698 + }, + { + "epoch": 59.612637362637365, + "grad_norm": 15.901466369628906, + "learning_rate": 2.019368131868132e-05, + "loss": 0.4448, + "step": 21699 + }, + { + "epoch": 59.61538461538461, + "grad_norm": 7.93505334854126, + "learning_rate": 2.0192307692307694e-05, + "loss": 0.1646, + "step": 21700 + }, + { + "epoch": 59.61813186813187, + "grad_norm": 17.080158233642578, + "learning_rate": 2.0190934065934068e-05, + "loss": 0.4004, + "step": 21701 + }, + { + "epoch": 59.620879120879124, + "grad_norm": 8.072760581970215, + "learning_rate": 2.018956043956044e-05, + "loss": 0.1113, + "step": 21702 + }, + { + "epoch": 59.62362637362637, + "grad_norm": 7.676027297973633, + "learning_rate": 2.0188186813186814e-05, + "loss": 0.1382, + "step": 21703 + }, + { + "epoch": 59.62637362637363, + "grad_norm": 11.06760311126709, + "learning_rate": 2.0186813186813188e-05, + "loss": 0.2157, + "step": 21704 + }, + { + "epoch": 59.629120879120876, + "grad_norm": 13.407228469848633, + "learning_rate": 2.018543956043956e-05, + "loss": 0.2395, + "step": 21705 + }, + { + "epoch": 59.63186813186813, + "grad_norm": 9.3093900680542, + "learning_rate": 2.0184065934065935e-05, + "loss": 0.2966, + "step": 21706 + }, + { + "epoch": 59.63461538461539, + "grad_norm": 5.774056911468506, + "learning_rate": 2.0182692307692308e-05, + "loss": 0.1209, + "step": 21707 + }, + { + "epoch": 59.637362637362635, + "grad_norm": 22.7467041015625, + "learning_rate": 2.018131868131868e-05, + "loss": 0.5981, + "step": 21708 + }, + { + "epoch": 59.64010989010989, + "grad_norm": 13.479350090026855, + "learning_rate": 2.0179945054945058e-05, + "loss": 0.2736, + "step": 21709 + }, + { + "epoch": 59.642857142857146, + "grad_norm": 7.27517557144165, + "learning_rate": 2.017857142857143e-05, + "loss": 0.1848, + "step": 21710 + }, + { + "epoch": 59.645604395604394, + "grad_norm": 3.4495794773101807, + "learning_rate": 2.01771978021978e-05, + "loss": 0.061, + "step": 21711 + }, + { + "epoch": 59.64835164835165, + "grad_norm": 16.13583755493164, + "learning_rate": 2.0175824175824175e-05, + "loss": 0.2124, + "step": 21712 + }, + { + "epoch": 59.6510989010989, + "grad_norm": 17.75443458557129, + "learning_rate": 2.017445054945055e-05, + "loss": 0.1218, + "step": 21713 + }, + { + "epoch": 59.65384615384615, + "grad_norm": 10.80151081085205, + "learning_rate": 2.0173076923076925e-05, + "loss": 0.152, + "step": 21714 + }, + { + "epoch": 59.65659340659341, + "grad_norm": 7.258664131164551, + "learning_rate": 2.01717032967033e-05, + "loss": 0.1437, + "step": 21715 + }, + { + "epoch": 59.65934065934066, + "grad_norm": 9.562827110290527, + "learning_rate": 2.0170329670329672e-05, + "loss": 0.1736, + "step": 21716 + }, + { + "epoch": 59.66208791208791, + "grad_norm": 15.548690795898438, + "learning_rate": 2.0168956043956045e-05, + "loss": 0.2813, + "step": 21717 + }, + { + "epoch": 59.66483516483517, + "grad_norm": 12.76147747039795, + "learning_rate": 2.016758241758242e-05, + "loss": 0.2371, + "step": 21718 + }, + { + "epoch": 59.667582417582416, + "grad_norm": 14.854040145874023, + "learning_rate": 2.0166208791208792e-05, + "loss": 0.5423, + "step": 21719 + }, + { + "epoch": 59.67032967032967, + "grad_norm": 17.15450668334961, + "learning_rate": 2.0164835164835166e-05, + "loss": 0.3233, + "step": 21720 + }, + { + "epoch": 59.67307692307692, + "grad_norm": 22.02532958984375, + "learning_rate": 2.016346153846154e-05, + "loss": 0.137, + "step": 21721 + }, + { + "epoch": 59.675824175824175, + "grad_norm": 14.553024291992188, + "learning_rate": 2.0162087912087913e-05, + "loss": 0.2817, + "step": 21722 + }, + { + "epoch": 59.67857142857143, + "grad_norm": 15.771546363830566, + "learning_rate": 2.0160714285714286e-05, + "loss": 0.2458, + "step": 21723 + }, + { + "epoch": 59.68131868131868, + "grad_norm": 11.900233268737793, + "learning_rate": 2.0159340659340663e-05, + "loss": 0.2965, + "step": 21724 + }, + { + "epoch": 59.684065934065934, + "grad_norm": 4.011966705322266, + "learning_rate": 2.0157967032967036e-05, + "loss": 0.0476, + "step": 21725 + }, + { + "epoch": 59.68681318681319, + "grad_norm": 16.183259963989258, + "learning_rate": 2.0156593406593406e-05, + "loss": 0.2193, + "step": 21726 + }, + { + "epoch": 59.68956043956044, + "grad_norm": 10.804247856140137, + "learning_rate": 2.015521978021978e-05, + "loss": 0.1324, + "step": 21727 + }, + { + "epoch": 59.69230769230769, + "grad_norm": 18.535070419311523, + "learning_rate": 2.0153846153846153e-05, + "loss": 0.6195, + "step": 21728 + }, + { + "epoch": 59.69505494505494, + "grad_norm": 9.112041473388672, + "learning_rate": 2.015247252747253e-05, + "loss": 0.1295, + "step": 21729 + }, + { + "epoch": 59.6978021978022, + "grad_norm": 32.00632095336914, + "learning_rate": 2.0151098901098903e-05, + "loss": 1.4826, + "step": 21730 + }, + { + "epoch": 59.70054945054945, + "grad_norm": 4.135479927062988, + "learning_rate": 2.0149725274725277e-05, + "loss": 0.1137, + "step": 21731 + }, + { + "epoch": 59.7032967032967, + "grad_norm": 12.258999824523926, + "learning_rate": 2.014835164835165e-05, + "loss": 0.1917, + "step": 21732 + }, + { + "epoch": 59.706043956043956, + "grad_norm": 17.460708618164062, + "learning_rate": 2.0146978021978023e-05, + "loss": 0.3984, + "step": 21733 + }, + { + "epoch": 59.70879120879121, + "grad_norm": 17.617942810058594, + "learning_rate": 2.0145604395604397e-05, + "loss": 0.3087, + "step": 21734 + }, + { + "epoch": 59.71153846153846, + "grad_norm": 22.148889541625977, + "learning_rate": 2.014423076923077e-05, + "loss": 0.2783, + "step": 21735 + }, + { + "epoch": 59.714285714285715, + "grad_norm": 13.151070594787598, + "learning_rate": 2.0142857142857144e-05, + "loss": 0.2818, + "step": 21736 + }, + { + "epoch": 59.717032967032964, + "grad_norm": 9.126148223876953, + "learning_rate": 2.0141483516483517e-05, + "loss": 0.1987, + "step": 21737 + }, + { + "epoch": 59.71978021978022, + "grad_norm": 4.4784955978393555, + "learning_rate": 2.014010989010989e-05, + "loss": 0.0617, + "step": 21738 + }, + { + "epoch": 59.722527472527474, + "grad_norm": 17.622478485107422, + "learning_rate": 2.0138736263736267e-05, + "loss": 0.4513, + "step": 21739 + }, + { + "epoch": 59.72527472527472, + "grad_norm": 6.267744541168213, + "learning_rate": 2.013736263736264e-05, + "loss": 0.2154, + "step": 21740 + }, + { + "epoch": 59.72802197802198, + "grad_norm": 5.554006576538086, + "learning_rate": 2.013598901098901e-05, + "loss": 0.0808, + "step": 21741 + }, + { + "epoch": 59.73076923076923, + "grad_norm": 13.11107063293457, + "learning_rate": 2.0134615384615384e-05, + "loss": 0.2376, + "step": 21742 + }, + { + "epoch": 59.73351648351648, + "grad_norm": 32.760128021240234, + "learning_rate": 2.0133241758241757e-05, + "loss": 1.2815, + "step": 21743 + }, + { + "epoch": 59.73626373626374, + "grad_norm": 13.228442192077637, + "learning_rate": 2.0131868131868134e-05, + "loss": 0.3047, + "step": 21744 + }, + { + "epoch": 59.73901098901099, + "grad_norm": 7.238826274871826, + "learning_rate": 2.0130494505494508e-05, + "loss": 0.0928, + "step": 21745 + }, + { + "epoch": 59.74175824175824, + "grad_norm": 19.430381774902344, + "learning_rate": 2.012912087912088e-05, + "loss": 0.4001, + "step": 21746 + }, + { + "epoch": 59.744505494505496, + "grad_norm": 6.438769340515137, + "learning_rate": 2.0127747252747254e-05, + "loss": 0.1121, + "step": 21747 + }, + { + "epoch": 59.747252747252745, + "grad_norm": 7.716424942016602, + "learning_rate": 2.0126373626373628e-05, + "loss": 0.1833, + "step": 21748 + }, + { + "epoch": 59.75, + "grad_norm": 6.249057769775391, + "learning_rate": 2.0125e-05, + "loss": 0.127, + "step": 21749 + }, + { + "epoch": 59.752747252747255, + "grad_norm": 13.260830879211426, + "learning_rate": 2.0123626373626375e-05, + "loss": 0.2176, + "step": 21750 + }, + { + "epoch": 59.755494505494504, + "grad_norm": 1.8805564641952515, + "learning_rate": 2.0122252747252748e-05, + "loss": 0.025, + "step": 21751 + }, + { + "epoch": 59.75824175824176, + "grad_norm": 15.331954002380371, + "learning_rate": 2.012087912087912e-05, + "loss": 0.2545, + "step": 21752 + }, + { + "epoch": 59.76098901098901, + "grad_norm": 13.156906127929688, + "learning_rate": 2.0119505494505495e-05, + "loss": 0.1775, + "step": 21753 + }, + { + "epoch": 59.76373626373626, + "grad_norm": 13.510153770446777, + "learning_rate": 2.0118131868131872e-05, + "loss": 0.2872, + "step": 21754 + }, + { + "epoch": 59.76648351648352, + "grad_norm": 21.374670028686523, + "learning_rate": 2.0116758241758242e-05, + "loss": 0.5339, + "step": 21755 + }, + { + "epoch": 59.76923076923077, + "grad_norm": 19.84139060974121, + "learning_rate": 2.0115384615384615e-05, + "loss": 0.4686, + "step": 21756 + }, + { + "epoch": 59.77197802197802, + "grad_norm": 13.506378173828125, + "learning_rate": 2.011401098901099e-05, + "loss": 0.2735, + "step": 21757 + }, + { + "epoch": 59.77472527472528, + "grad_norm": 18.125267028808594, + "learning_rate": 2.0112637362637362e-05, + "loss": 0.3832, + "step": 21758 + }, + { + "epoch": 59.777472527472526, + "grad_norm": 31.625885009765625, + "learning_rate": 2.011126373626374e-05, + "loss": 0.8988, + "step": 21759 + }, + { + "epoch": 59.78021978021978, + "grad_norm": 14.769328117370605, + "learning_rate": 2.0109890109890112e-05, + "loss": 0.6368, + "step": 21760 + }, + { + "epoch": 59.782967032967036, + "grad_norm": 15.398086547851562, + "learning_rate": 2.0108516483516486e-05, + "loss": 0.2938, + "step": 21761 + }, + { + "epoch": 59.785714285714285, + "grad_norm": 10.357332229614258, + "learning_rate": 2.010714285714286e-05, + "loss": 0.3386, + "step": 21762 + }, + { + "epoch": 59.78846153846154, + "grad_norm": 10.95586109161377, + "learning_rate": 2.0105769230769232e-05, + "loss": 0.1106, + "step": 21763 + }, + { + "epoch": 59.79120879120879, + "grad_norm": 16.315675735473633, + "learning_rate": 2.0104395604395606e-05, + "loss": 0.3511, + "step": 21764 + }, + { + "epoch": 59.793956043956044, + "grad_norm": 3.3325765132904053, + "learning_rate": 2.010302197802198e-05, + "loss": 0.0414, + "step": 21765 + }, + { + "epoch": 59.7967032967033, + "grad_norm": 14.226224899291992, + "learning_rate": 2.0101648351648353e-05, + "loss": 0.1755, + "step": 21766 + }, + { + "epoch": 59.79945054945055, + "grad_norm": 21.302921295166016, + "learning_rate": 2.0100274725274726e-05, + "loss": 0.3598, + "step": 21767 + }, + { + "epoch": 59.8021978021978, + "grad_norm": 21.655242919921875, + "learning_rate": 2.00989010989011e-05, + "loss": 0.5794, + "step": 21768 + }, + { + "epoch": 59.80494505494506, + "grad_norm": 28.923675537109375, + "learning_rate": 2.0097527472527476e-05, + "loss": 1.1619, + "step": 21769 + }, + { + "epoch": 59.80769230769231, + "grad_norm": 14.705945014953613, + "learning_rate": 2.0096153846153846e-05, + "loss": 0.1694, + "step": 21770 + }, + { + "epoch": 59.81043956043956, + "grad_norm": 12.096613883972168, + "learning_rate": 2.009478021978022e-05, + "loss": 0.2227, + "step": 21771 + }, + { + "epoch": 59.81318681318681, + "grad_norm": 14.044828414916992, + "learning_rate": 2.0093406593406593e-05, + "loss": 0.1407, + "step": 21772 + }, + { + "epoch": 59.815934065934066, + "grad_norm": 24.061288833618164, + "learning_rate": 2.0092032967032966e-05, + "loss": 0.8693, + "step": 21773 + }, + { + "epoch": 59.81868131868132, + "grad_norm": 18.397079467773438, + "learning_rate": 2.0090659340659343e-05, + "loss": 0.3046, + "step": 21774 + }, + { + "epoch": 59.82142857142857, + "grad_norm": 12.995329856872559, + "learning_rate": 2.0089285714285717e-05, + "loss": 0.3244, + "step": 21775 + }, + { + "epoch": 59.824175824175825, + "grad_norm": 4.447211265563965, + "learning_rate": 2.008791208791209e-05, + "loss": 0.0678, + "step": 21776 + }, + { + "epoch": 59.82692307692308, + "grad_norm": 15.751133918762207, + "learning_rate": 2.0086538461538463e-05, + "loss": 0.2059, + "step": 21777 + }, + { + "epoch": 59.82967032967033, + "grad_norm": 14.119443893432617, + "learning_rate": 2.0085164835164837e-05, + "loss": 0.2097, + "step": 21778 + }, + { + "epoch": 59.832417582417584, + "grad_norm": 5.592469692230225, + "learning_rate": 2.008379120879121e-05, + "loss": 0.1091, + "step": 21779 + }, + { + "epoch": 59.83516483516483, + "grad_norm": 11.39118766784668, + "learning_rate": 2.0082417582417584e-05, + "loss": 0.0638, + "step": 21780 + }, + { + "epoch": 59.83791208791209, + "grad_norm": 4.621216773986816, + "learning_rate": 2.0081043956043957e-05, + "loss": 0.0838, + "step": 21781 + }, + { + "epoch": 59.84065934065934, + "grad_norm": 12.718903541564941, + "learning_rate": 2.007967032967033e-05, + "loss": 0.3246, + "step": 21782 + }, + { + "epoch": 59.84340659340659, + "grad_norm": 18.26742172241211, + "learning_rate": 2.0078296703296704e-05, + "loss": 0.3552, + "step": 21783 + }, + { + "epoch": 59.84615384615385, + "grad_norm": 9.753961563110352, + "learning_rate": 2.0076923076923077e-05, + "loss": 0.1177, + "step": 21784 + }, + { + "epoch": 59.8489010989011, + "grad_norm": 13.295743942260742, + "learning_rate": 2.007554945054945e-05, + "loss": 0.2026, + "step": 21785 + }, + { + "epoch": 59.85164835164835, + "grad_norm": 17.333749771118164, + "learning_rate": 2.0074175824175824e-05, + "loss": 0.1683, + "step": 21786 + }, + { + "epoch": 59.854395604395606, + "grad_norm": 22.31633758544922, + "learning_rate": 2.0072802197802198e-05, + "loss": 0.7971, + "step": 21787 + }, + { + "epoch": 59.857142857142854, + "grad_norm": 9.532875061035156, + "learning_rate": 2.007142857142857e-05, + "loss": 0.1406, + "step": 21788 + }, + { + "epoch": 59.85989010989011, + "grad_norm": 23.010953903198242, + "learning_rate": 2.0070054945054944e-05, + "loss": 0.3769, + "step": 21789 + }, + { + "epoch": 59.862637362637365, + "grad_norm": 26.158187866210938, + "learning_rate": 2.006868131868132e-05, + "loss": 0.5802, + "step": 21790 + }, + { + "epoch": 59.86538461538461, + "grad_norm": 10.057615280151367, + "learning_rate": 2.0067307692307695e-05, + "loss": 0.2903, + "step": 21791 + }, + { + "epoch": 59.86813186813187, + "grad_norm": 14.461583137512207, + "learning_rate": 2.0065934065934068e-05, + "loss": 0.2559, + "step": 21792 + }, + { + "epoch": 59.870879120879124, + "grad_norm": 24.026227951049805, + "learning_rate": 2.006456043956044e-05, + "loss": 0.4926, + "step": 21793 + }, + { + "epoch": 59.87362637362637, + "grad_norm": 8.790390014648438, + "learning_rate": 2.006318681318681e-05, + "loss": 0.2177, + "step": 21794 + }, + { + "epoch": 59.87637362637363, + "grad_norm": 9.64874267578125, + "learning_rate": 2.0061813186813188e-05, + "loss": 0.1501, + "step": 21795 + }, + { + "epoch": 59.879120879120876, + "grad_norm": 13.282488822937012, + "learning_rate": 2.006043956043956e-05, + "loss": 0.2737, + "step": 21796 + }, + { + "epoch": 59.88186813186813, + "grad_norm": 12.821049690246582, + "learning_rate": 2.0059065934065935e-05, + "loss": 0.4726, + "step": 21797 + }, + { + "epoch": 59.88461538461539, + "grad_norm": 8.584378242492676, + "learning_rate": 2.005769230769231e-05, + "loss": 0.1424, + "step": 21798 + }, + { + "epoch": 59.887362637362635, + "grad_norm": 1.754661202430725, + "learning_rate": 2.0056318681318682e-05, + "loss": 0.0363, + "step": 21799 + }, + { + "epoch": 59.89010989010989, + "grad_norm": 11.138476371765137, + "learning_rate": 2.0054945054945055e-05, + "loss": 0.2891, + "step": 21800 + }, + { + "epoch": 59.892857142857146, + "grad_norm": 10.688258171081543, + "learning_rate": 2.005357142857143e-05, + "loss": 0.2235, + "step": 21801 + }, + { + "epoch": 59.895604395604394, + "grad_norm": 2.4818294048309326, + "learning_rate": 2.0052197802197802e-05, + "loss": 0.0299, + "step": 21802 + }, + { + "epoch": 59.89835164835165, + "grad_norm": 20.52383804321289, + "learning_rate": 2.0050824175824175e-05, + "loss": 0.5703, + "step": 21803 + }, + { + "epoch": 59.9010989010989, + "grad_norm": 1.8943850994110107, + "learning_rate": 2.004945054945055e-05, + "loss": 0.0338, + "step": 21804 + }, + { + "epoch": 59.90384615384615, + "grad_norm": 19.704347610473633, + "learning_rate": 2.0048076923076926e-05, + "loss": 0.5323, + "step": 21805 + }, + { + "epoch": 59.90659340659341, + "grad_norm": 14.114662170410156, + "learning_rate": 2.00467032967033e-05, + "loss": 0.3996, + "step": 21806 + }, + { + "epoch": 59.90934065934066, + "grad_norm": 11.403802871704102, + "learning_rate": 2.0045329670329672e-05, + "loss": 0.1741, + "step": 21807 + }, + { + "epoch": 59.91208791208791, + "grad_norm": 5.912238597869873, + "learning_rate": 2.0043956043956046e-05, + "loss": 0.0904, + "step": 21808 + }, + { + "epoch": 59.91483516483517, + "grad_norm": 16.757877349853516, + "learning_rate": 2.0042582417582416e-05, + "loss": 0.5139, + "step": 21809 + }, + { + "epoch": 59.917582417582416, + "grad_norm": 11.359589576721191, + "learning_rate": 2.0041208791208793e-05, + "loss": 0.2072, + "step": 21810 + }, + { + "epoch": 59.92032967032967, + "grad_norm": 15.740439414978027, + "learning_rate": 2.0039835164835166e-05, + "loss": 0.2537, + "step": 21811 + }, + { + "epoch": 59.92307692307692, + "grad_norm": 11.386009216308594, + "learning_rate": 2.003846153846154e-05, + "loss": 0.3154, + "step": 21812 + }, + { + "epoch": 59.925824175824175, + "grad_norm": 9.57036304473877, + "learning_rate": 2.0037087912087913e-05, + "loss": 0.286, + "step": 21813 + }, + { + "epoch": 59.92857142857143, + "grad_norm": 20.37274169921875, + "learning_rate": 2.0035714285714286e-05, + "loss": 0.4341, + "step": 21814 + }, + { + "epoch": 59.93131868131868, + "grad_norm": 14.280996322631836, + "learning_rate": 2.003434065934066e-05, + "loss": 0.3704, + "step": 21815 + }, + { + "epoch": 59.934065934065934, + "grad_norm": 12.156667709350586, + "learning_rate": 2.0032967032967033e-05, + "loss": 0.2643, + "step": 21816 + }, + { + "epoch": 59.93681318681319, + "grad_norm": 7.974757194519043, + "learning_rate": 2.0031593406593407e-05, + "loss": 0.1802, + "step": 21817 + }, + { + "epoch": 59.93956043956044, + "grad_norm": 12.493242263793945, + "learning_rate": 2.003021978021978e-05, + "loss": 0.4146, + "step": 21818 + }, + { + "epoch": 59.94230769230769, + "grad_norm": 9.957784652709961, + "learning_rate": 2.0028846153846153e-05, + "loss": 0.0868, + "step": 21819 + }, + { + "epoch": 59.94505494505494, + "grad_norm": 16.985572814941406, + "learning_rate": 2.002747252747253e-05, + "loss": 0.2642, + "step": 21820 + }, + { + "epoch": 59.9478021978022, + "grad_norm": 12.866437911987305, + "learning_rate": 2.0026098901098904e-05, + "loss": 0.1852, + "step": 21821 + }, + { + "epoch": 59.95054945054945, + "grad_norm": 16.00041961669922, + "learning_rate": 2.0024725274725277e-05, + "loss": 0.6763, + "step": 21822 + }, + { + "epoch": 59.9532967032967, + "grad_norm": 4.33563232421875, + "learning_rate": 2.002335164835165e-05, + "loss": 0.08, + "step": 21823 + }, + { + "epoch": 59.956043956043956, + "grad_norm": 5.970132827758789, + "learning_rate": 2.002197802197802e-05, + "loss": 0.0675, + "step": 21824 + }, + { + "epoch": 59.95879120879121, + "grad_norm": 15.324410438537598, + "learning_rate": 2.0020604395604397e-05, + "loss": 0.2174, + "step": 21825 + }, + { + "epoch": 59.96153846153846, + "grad_norm": 11.334992408752441, + "learning_rate": 2.001923076923077e-05, + "loss": 0.3186, + "step": 21826 + }, + { + "epoch": 59.964285714285715, + "grad_norm": 9.164765357971191, + "learning_rate": 2.0017857142857144e-05, + "loss": 0.1067, + "step": 21827 + }, + { + "epoch": 59.967032967032964, + "grad_norm": 9.038000106811523, + "learning_rate": 2.0016483516483517e-05, + "loss": 0.136, + "step": 21828 + }, + { + "epoch": 59.96978021978022, + "grad_norm": 20.08153533935547, + "learning_rate": 2.001510989010989e-05, + "loss": 0.4813, + "step": 21829 + }, + { + "epoch": 59.972527472527474, + "grad_norm": 20.214845657348633, + "learning_rate": 2.0013736263736264e-05, + "loss": 0.3745, + "step": 21830 + }, + { + "epoch": 59.97527472527472, + "grad_norm": 14.89795970916748, + "learning_rate": 2.0012362637362638e-05, + "loss": 0.4301, + "step": 21831 + }, + { + "epoch": 59.97802197802198, + "grad_norm": 19.951330184936523, + "learning_rate": 2.001098901098901e-05, + "loss": 0.4404, + "step": 21832 + }, + { + "epoch": 59.98076923076923, + "grad_norm": 15.587076187133789, + "learning_rate": 2.0009615384615384e-05, + "loss": 0.2214, + "step": 21833 + }, + { + "epoch": 59.98351648351648, + "grad_norm": 10.275033950805664, + "learning_rate": 2.0008241758241758e-05, + "loss": 0.1285, + "step": 21834 + }, + { + "epoch": 59.98626373626374, + "grad_norm": 5.692975997924805, + "learning_rate": 2.0006868131868135e-05, + "loss": 0.0623, + "step": 21835 + }, + { + "epoch": 59.98901098901099, + "grad_norm": 23.363401412963867, + "learning_rate": 2.0005494505494508e-05, + "loss": 0.7363, + "step": 21836 + }, + { + "epoch": 59.99175824175824, + "grad_norm": 19.598094940185547, + "learning_rate": 2.000412087912088e-05, + "loss": 0.5519, + "step": 21837 + }, + { + "epoch": 59.994505494505496, + "grad_norm": 14.375810623168945, + "learning_rate": 2.0002747252747255e-05, + "loss": 0.4598, + "step": 21838 + }, + { + "epoch": 59.997252747252745, + "grad_norm": 18.26340103149414, + "learning_rate": 2.0001373626373625e-05, + "loss": 0.5603, + "step": 21839 + }, + { + "epoch": 60.0, + "grad_norm": 49.83018493652344, + "learning_rate": 2e-05, + "loss": 2.4259, + "step": 21840 + }, + { + "epoch": 60.0, + "eval_accuracy": 0.5909090909090909, + "eval_f1": 0.5688269884799559, + "eval_f1_DuraRiadoRio_64x64": 0.6763005780346821, + "eval_f1_Mole_64x64": 0.7888446215139442, + "eval_f1_Quebrado_64x64": 0.5352112676056338, + "eval_f1_RiadoRio_64x64": 0.5546218487394958, + "eval_f1_RioFechado_64x64": 0.2891566265060241, + "eval_loss": 2.694662094116211, + "eval_precision": 0.7475871859162078, + "eval_precision_DuraRiadoRio_64x64": 0.5792079207920792, + "eval_precision_Mole_64x64": 0.9252336448598131, + "eval_precision_Quebrado_64x64": 0.8260869565217391, + "eval_precision_RiadoRio_64x64": 0.4074074074074074, + "eval_precision_RioFechado_64x64": 1.0, + "eval_recall": 0.5866536940943909, + "eval_recall_DuraRiadoRio_64x64": 0.8125, + "eval_recall_Mole_64x64": 0.6875, + "eval_recall_Quebrado_64x64": 0.3958333333333333, + "eval_recall_RiadoRio_64x64": 0.868421052631579, + "eval_recall_RioFechado_64x64": 0.16901408450704225, + "eval_runtime": 1.7822, + "eval_samples_per_second": 407.354, + "eval_steps_per_second": 25.81, + "step": 21840 + }, + { + "epoch": 60.002747252747255, + "grad_norm": 19.746553421020508, + "learning_rate": 1.9998626373626375e-05, + "loss": 0.6748, + "step": 21841 + }, + { + "epoch": 60.005494505494504, + "grad_norm": 10.2206392288208, + "learning_rate": 1.999725274725275e-05, + "loss": 0.1905, + "step": 21842 + }, + { + "epoch": 60.00824175824176, + "grad_norm": 18.081918716430664, + "learning_rate": 1.9995879120879122e-05, + "loss": 0.15, + "step": 21843 + }, + { + "epoch": 60.010989010989015, + "grad_norm": 18.154273986816406, + "learning_rate": 1.9994505494505495e-05, + "loss": 0.3583, + "step": 21844 + }, + { + "epoch": 60.01373626373626, + "grad_norm": 11.637118339538574, + "learning_rate": 1.999313186813187e-05, + "loss": 0.3378, + "step": 21845 + }, + { + "epoch": 60.01648351648352, + "grad_norm": 8.205549240112305, + "learning_rate": 1.9991758241758242e-05, + "loss": 0.1375, + "step": 21846 + }, + { + "epoch": 60.01923076923077, + "grad_norm": 14.288604736328125, + "learning_rate": 1.9990384615384615e-05, + "loss": 0.5239, + "step": 21847 + }, + { + "epoch": 60.02197802197802, + "grad_norm": 11.194257736206055, + "learning_rate": 1.998901098901099e-05, + "loss": 0.2061, + "step": 21848 + }, + { + "epoch": 60.02472527472528, + "grad_norm": 14.272889137268066, + "learning_rate": 1.9987637362637362e-05, + "loss": 0.2242, + "step": 21849 + }, + { + "epoch": 60.027472527472526, + "grad_norm": 8.924832344055176, + "learning_rate": 1.998626373626374e-05, + "loss": 0.1315, + "step": 21850 + }, + { + "epoch": 60.03021978021978, + "grad_norm": 16.892375946044922, + "learning_rate": 1.9984890109890112e-05, + "loss": 0.2332, + "step": 21851 + }, + { + "epoch": 60.032967032967036, + "grad_norm": 7.592212677001953, + "learning_rate": 1.9983516483516486e-05, + "loss": 0.1418, + "step": 21852 + }, + { + "epoch": 60.035714285714285, + "grad_norm": 19.420059204101562, + "learning_rate": 1.9982142857142856e-05, + "loss": 0.4722, + "step": 21853 + }, + { + "epoch": 60.03846153846154, + "grad_norm": 16.384702682495117, + "learning_rate": 1.998076923076923e-05, + "loss": 0.2874, + "step": 21854 + }, + { + "epoch": 60.04120879120879, + "grad_norm": 8.442194938659668, + "learning_rate": 1.9979395604395606e-05, + "loss": 0.0723, + "step": 21855 + }, + { + "epoch": 60.043956043956044, + "grad_norm": 1.1320934295654297, + "learning_rate": 1.997802197802198e-05, + "loss": 0.0123, + "step": 21856 + }, + { + "epoch": 60.0467032967033, + "grad_norm": 19.258230209350586, + "learning_rate": 1.9976648351648353e-05, + "loss": 0.3942, + "step": 21857 + }, + { + "epoch": 60.04945054945055, + "grad_norm": 5.252962112426758, + "learning_rate": 1.9975274725274726e-05, + "loss": 0.1066, + "step": 21858 + }, + { + "epoch": 60.0521978021978, + "grad_norm": 6.22085428237915, + "learning_rate": 1.99739010989011e-05, + "loss": 0.105, + "step": 21859 + }, + { + "epoch": 60.05494505494506, + "grad_norm": 4.618988990783691, + "learning_rate": 1.9972527472527473e-05, + "loss": 0.0611, + "step": 21860 + }, + { + "epoch": 60.05769230769231, + "grad_norm": 4.957571506500244, + "learning_rate": 1.9971153846153847e-05, + "loss": 0.0519, + "step": 21861 + }, + { + "epoch": 60.06043956043956, + "grad_norm": 15.391865730285645, + "learning_rate": 1.996978021978022e-05, + "loss": 0.2097, + "step": 21862 + }, + { + "epoch": 60.06318681318681, + "grad_norm": 8.686738967895508, + "learning_rate": 1.9968406593406593e-05, + "loss": 0.2872, + "step": 21863 + }, + { + "epoch": 60.065934065934066, + "grad_norm": 12.548203468322754, + "learning_rate": 1.9967032967032967e-05, + "loss": 0.364, + "step": 21864 + }, + { + "epoch": 60.06868131868132, + "grad_norm": 7.171314716339111, + "learning_rate": 1.9965659340659344e-05, + "loss": 0.17, + "step": 21865 + }, + { + "epoch": 60.07142857142857, + "grad_norm": 7.657748699188232, + "learning_rate": 1.9964285714285717e-05, + "loss": 0.1378, + "step": 21866 + }, + { + "epoch": 60.074175824175825, + "grad_norm": 19.112016677856445, + "learning_rate": 1.996291208791209e-05, + "loss": 0.4908, + "step": 21867 + }, + { + "epoch": 60.07692307692308, + "grad_norm": 9.890776634216309, + "learning_rate": 1.996153846153846e-05, + "loss": 0.2922, + "step": 21868 + }, + { + "epoch": 60.07967032967033, + "grad_norm": 7.214828014373779, + "learning_rate": 1.9960164835164834e-05, + "loss": 0.1594, + "step": 21869 + }, + { + "epoch": 60.082417582417584, + "grad_norm": 29.528650283813477, + "learning_rate": 1.995879120879121e-05, + "loss": 0.3906, + "step": 21870 + }, + { + "epoch": 60.08516483516483, + "grad_norm": 21.503585815429688, + "learning_rate": 1.9957417582417584e-05, + "loss": 0.814, + "step": 21871 + }, + { + "epoch": 60.08791208791209, + "grad_norm": 0.3610627055168152, + "learning_rate": 1.9956043956043957e-05, + "loss": 0.0062, + "step": 21872 + }, + { + "epoch": 60.09065934065934, + "grad_norm": 3.98641300201416, + "learning_rate": 1.995467032967033e-05, + "loss": 0.0755, + "step": 21873 + }, + { + "epoch": 60.09340659340659, + "grad_norm": 14.987716674804688, + "learning_rate": 1.9953296703296704e-05, + "loss": 0.3284, + "step": 21874 + }, + { + "epoch": 60.09615384615385, + "grad_norm": 16.667203903198242, + "learning_rate": 1.9951923076923078e-05, + "loss": 0.6693, + "step": 21875 + }, + { + "epoch": 60.0989010989011, + "grad_norm": 12.141571044921875, + "learning_rate": 1.995054945054945e-05, + "loss": 0.315, + "step": 21876 + }, + { + "epoch": 60.10164835164835, + "grad_norm": 13.96474838256836, + "learning_rate": 1.9949175824175824e-05, + "loss": 0.3817, + "step": 21877 + }, + { + "epoch": 60.104395604395606, + "grad_norm": 6.740602016448975, + "learning_rate": 1.9947802197802198e-05, + "loss": 0.1026, + "step": 21878 + }, + { + "epoch": 60.107142857142854, + "grad_norm": 12.314827919006348, + "learning_rate": 1.994642857142857e-05, + "loss": 0.1386, + "step": 21879 + }, + { + "epoch": 60.10989010989011, + "grad_norm": 16.658039093017578, + "learning_rate": 1.9945054945054948e-05, + "loss": 0.2987, + "step": 21880 + }, + { + "epoch": 60.112637362637365, + "grad_norm": 4.042672634124756, + "learning_rate": 1.994368131868132e-05, + "loss": 0.0719, + "step": 21881 + }, + { + "epoch": 60.11538461538461, + "grad_norm": 16.71011734008789, + "learning_rate": 1.9942307692307695e-05, + "loss": 0.3479, + "step": 21882 + }, + { + "epoch": 60.11813186813187, + "grad_norm": 9.936200141906738, + "learning_rate": 1.9940934065934065e-05, + "loss": 0.2072, + "step": 21883 + }, + { + "epoch": 60.120879120879124, + "grad_norm": 5.359809875488281, + "learning_rate": 1.993956043956044e-05, + "loss": 0.0816, + "step": 21884 + }, + { + "epoch": 60.12362637362637, + "grad_norm": 12.06900405883789, + "learning_rate": 1.9938186813186815e-05, + "loss": 0.3052, + "step": 21885 + }, + { + "epoch": 60.12637362637363, + "grad_norm": 4.252014636993408, + "learning_rate": 1.993681318681319e-05, + "loss": 0.0515, + "step": 21886 + }, + { + "epoch": 60.129120879120876, + "grad_norm": 8.140823364257812, + "learning_rate": 1.9935439560439562e-05, + "loss": 0.1935, + "step": 21887 + }, + { + "epoch": 60.13186813186813, + "grad_norm": 7.392173767089844, + "learning_rate": 1.9934065934065935e-05, + "loss": 0.1802, + "step": 21888 + }, + { + "epoch": 60.13461538461539, + "grad_norm": 3.246976375579834, + "learning_rate": 1.993269230769231e-05, + "loss": 0.0418, + "step": 21889 + }, + { + "epoch": 60.137362637362635, + "grad_norm": 11.994317054748535, + "learning_rate": 1.9931318681318682e-05, + "loss": 0.192, + "step": 21890 + }, + { + "epoch": 60.14010989010989, + "grad_norm": 15.82447624206543, + "learning_rate": 1.9929945054945056e-05, + "loss": 0.3799, + "step": 21891 + }, + { + "epoch": 60.142857142857146, + "grad_norm": 11.274170875549316, + "learning_rate": 1.992857142857143e-05, + "loss": 0.2525, + "step": 21892 + }, + { + "epoch": 60.145604395604394, + "grad_norm": 11.611043930053711, + "learning_rate": 1.9927197802197802e-05, + "loss": 0.2233, + "step": 21893 + }, + { + "epoch": 60.14835164835165, + "grad_norm": 27.433698654174805, + "learning_rate": 1.9925824175824176e-05, + "loss": 0.5611, + "step": 21894 + }, + { + "epoch": 60.1510989010989, + "grad_norm": 15.82377815246582, + "learning_rate": 1.9924450549450553e-05, + "loss": 0.4766, + "step": 21895 + }, + { + "epoch": 60.15384615384615, + "grad_norm": 11.71097183227539, + "learning_rate": 1.9923076923076926e-05, + "loss": 0.3245, + "step": 21896 + }, + { + "epoch": 60.15659340659341, + "grad_norm": 17.246219635009766, + "learning_rate": 1.99217032967033e-05, + "loss": 0.3353, + "step": 21897 + }, + { + "epoch": 60.15934065934066, + "grad_norm": 3.7734110355377197, + "learning_rate": 1.992032967032967e-05, + "loss": 0.0829, + "step": 21898 + }, + { + "epoch": 60.16208791208791, + "grad_norm": 5.178074359893799, + "learning_rate": 1.9918956043956043e-05, + "loss": 0.0985, + "step": 21899 + }, + { + "epoch": 60.16483516483517, + "grad_norm": 14.01072883605957, + "learning_rate": 1.9917582417582416e-05, + "loss": 0.1381, + "step": 21900 + }, + { + "epoch": 60.167582417582416, + "grad_norm": 2.4216208457946777, + "learning_rate": 1.9916208791208793e-05, + "loss": 0.0538, + "step": 21901 + }, + { + "epoch": 60.17032967032967, + "grad_norm": 13.273320198059082, + "learning_rate": 1.9914835164835166e-05, + "loss": 0.1114, + "step": 21902 + }, + { + "epoch": 60.17307692307692, + "grad_norm": 11.54942512512207, + "learning_rate": 1.991346153846154e-05, + "loss": 0.3275, + "step": 21903 + }, + { + "epoch": 60.175824175824175, + "grad_norm": 11.333735466003418, + "learning_rate": 1.9912087912087913e-05, + "loss": 0.1347, + "step": 21904 + }, + { + "epoch": 60.17857142857143, + "grad_norm": 9.506349563598633, + "learning_rate": 1.9910714285714287e-05, + "loss": 0.2222, + "step": 21905 + }, + { + "epoch": 60.18131868131868, + "grad_norm": 15.978431701660156, + "learning_rate": 1.990934065934066e-05, + "loss": 0.5154, + "step": 21906 + }, + { + "epoch": 60.184065934065934, + "grad_norm": 15.234979629516602, + "learning_rate": 1.9907967032967033e-05, + "loss": 0.3087, + "step": 21907 + }, + { + "epoch": 60.18681318681319, + "grad_norm": 5.664842128753662, + "learning_rate": 1.9906593406593407e-05, + "loss": 0.0825, + "step": 21908 + }, + { + "epoch": 60.18956043956044, + "grad_norm": 13.370207786560059, + "learning_rate": 1.990521978021978e-05, + "loss": 0.2813, + "step": 21909 + }, + { + "epoch": 60.19230769230769, + "grad_norm": 10.514751434326172, + "learning_rate": 1.9903846153846154e-05, + "loss": 0.2902, + "step": 21910 + }, + { + "epoch": 60.19505494505494, + "grad_norm": 16.710233688354492, + "learning_rate": 1.990247252747253e-05, + "loss": 0.1887, + "step": 21911 + }, + { + "epoch": 60.1978021978022, + "grad_norm": 23.65813636779785, + "learning_rate": 1.9901098901098904e-05, + "loss": 0.9076, + "step": 21912 + }, + { + "epoch": 60.20054945054945, + "grad_norm": 3.1525118350982666, + "learning_rate": 1.9899725274725274e-05, + "loss": 0.0485, + "step": 21913 + }, + { + "epoch": 60.2032967032967, + "grad_norm": 3.142760992050171, + "learning_rate": 1.9898351648351647e-05, + "loss": 0.0502, + "step": 21914 + }, + { + "epoch": 60.206043956043956, + "grad_norm": 18.596487045288086, + "learning_rate": 1.989697802197802e-05, + "loss": 0.3531, + "step": 21915 + }, + { + "epoch": 60.20879120879121, + "grad_norm": 18.379392623901367, + "learning_rate": 1.9895604395604397e-05, + "loss": 0.3053, + "step": 21916 + }, + { + "epoch": 60.21153846153846, + "grad_norm": 7.766711235046387, + "learning_rate": 1.989423076923077e-05, + "loss": 0.1054, + "step": 21917 + }, + { + "epoch": 60.214285714285715, + "grad_norm": 17.935312271118164, + "learning_rate": 1.9892857142857144e-05, + "loss": 0.4055, + "step": 21918 + }, + { + "epoch": 60.217032967032964, + "grad_norm": 4.355184078216553, + "learning_rate": 1.9891483516483518e-05, + "loss": 0.0629, + "step": 21919 + }, + { + "epoch": 60.21978021978022, + "grad_norm": 5.990107536315918, + "learning_rate": 1.989010989010989e-05, + "loss": 0.1001, + "step": 21920 + }, + { + "epoch": 60.222527472527474, + "grad_norm": 10.996933937072754, + "learning_rate": 1.9888736263736265e-05, + "loss": 0.2547, + "step": 21921 + }, + { + "epoch": 60.22527472527472, + "grad_norm": 6.4660844802856445, + "learning_rate": 1.9887362637362638e-05, + "loss": 0.0896, + "step": 21922 + }, + { + "epoch": 60.22802197802198, + "grad_norm": 13.152609825134277, + "learning_rate": 1.988598901098901e-05, + "loss": 0.5376, + "step": 21923 + }, + { + "epoch": 60.23076923076923, + "grad_norm": 20.960060119628906, + "learning_rate": 1.9884615384615385e-05, + "loss": 0.7557, + "step": 21924 + }, + { + "epoch": 60.23351648351648, + "grad_norm": 5.3626275062561035, + "learning_rate": 1.9883241758241758e-05, + "loss": 0.1263, + "step": 21925 + }, + { + "epoch": 60.23626373626374, + "grad_norm": 14.592059135437012, + "learning_rate": 1.9881868131868135e-05, + "loss": 0.3998, + "step": 21926 + }, + { + "epoch": 60.239010989010985, + "grad_norm": 16.933202743530273, + "learning_rate": 1.988049450549451e-05, + "loss": 0.2255, + "step": 21927 + }, + { + "epoch": 60.24175824175824, + "grad_norm": 6.661529541015625, + "learning_rate": 1.987912087912088e-05, + "loss": 0.0882, + "step": 21928 + }, + { + "epoch": 60.244505494505496, + "grad_norm": 18.54444694519043, + "learning_rate": 1.9877747252747252e-05, + "loss": 0.3764, + "step": 21929 + }, + { + "epoch": 60.247252747252745, + "grad_norm": 27.875986099243164, + "learning_rate": 1.9876373626373625e-05, + "loss": 0.4571, + "step": 21930 + }, + { + "epoch": 60.25, + "grad_norm": 15.851698875427246, + "learning_rate": 1.9875000000000002e-05, + "loss": 0.2169, + "step": 21931 + }, + { + "epoch": 60.252747252747255, + "grad_norm": 20.893484115600586, + "learning_rate": 1.9873626373626375e-05, + "loss": 0.3453, + "step": 21932 + }, + { + "epoch": 60.255494505494504, + "grad_norm": 15.374388694763184, + "learning_rate": 1.987225274725275e-05, + "loss": 0.2888, + "step": 21933 + }, + { + "epoch": 60.25824175824176, + "grad_norm": 13.476400375366211, + "learning_rate": 1.9870879120879122e-05, + "loss": 0.318, + "step": 21934 + }, + { + "epoch": 60.260989010989015, + "grad_norm": 14.961465835571289, + "learning_rate": 1.9869505494505496e-05, + "loss": 0.2023, + "step": 21935 + }, + { + "epoch": 60.26373626373626, + "grad_norm": 11.039193153381348, + "learning_rate": 1.986813186813187e-05, + "loss": 0.2144, + "step": 21936 + }, + { + "epoch": 60.26648351648352, + "grad_norm": 21.814287185668945, + "learning_rate": 1.9866758241758242e-05, + "loss": 0.6158, + "step": 21937 + }, + { + "epoch": 60.26923076923077, + "grad_norm": 6.892226219177246, + "learning_rate": 1.9865384615384616e-05, + "loss": 0.109, + "step": 21938 + }, + { + "epoch": 60.27197802197802, + "grad_norm": 9.497516632080078, + "learning_rate": 1.986401098901099e-05, + "loss": 0.1112, + "step": 21939 + }, + { + "epoch": 60.27472527472528, + "grad_norm": 15.103463172912598, + "learning_rate": 1.9862637362637363e-05, + "loss": 0.1963, + "step": 21940 + }, + { + "epoch": 60.277472527472526, + "grad_norm": 14.201021194458008, + "learning_rate": 1.986126373626374e-05, + "loss": 0.3716, + "step": 21941 + }, + { + "epoch": 60.28021978021978, + "grad_norm": 18.59617042541504, + "learning_rate": 1.9859890109890113e-05, + "loss": 0.3156, + "step": 21942 + }, + { + "epoch": 60.282967032967036, + "grad_norm": 18.802799224853516, + "learning_rate": 1.9858516483516483e-05, + "loss": 0.1609, + "step": 21943 + }, + { + "epoch": 60.285714285714285, + "grad_norm": 10.813424110412598, + "learning_rate": 1.9857142857142856e-05, + "loss": 0.1563, + "step": 21944 + }, + { + "epoch": 60.28846153846154, + "grad_norm": 13.287833213806152, + "learning_rate": 1.985576923076923e-05, + "loss": 0.3242, + "step": 21945 + }, + { + "epoch": 60.29120879120879, + "grad_norm": 20.672164916992188, + "learning_rate": 1.9854395604395606e-05, + "loss": 0.4736, + "step": 21946 + }, + { + "epoch": 60.293956043956044, + "grad_norm": 18.12476348876953, + "learning_rate": 1.985302197802198e-05, + "loss": 0.5328, + "step": 21947 + }, + { + "epoch": 60.2967032967033, + "grad_norm": 3.8942627906799316, + "learning_rate": 1.9851648351648353e-05, + "loss": 0.0622, + "step": 21948 + }, + { + "epoch": 60.29945054945055, + "grad_norm": 15.704694747924805, + "learning_rate": 1.9850274725274727e-05, + "loss": 0.149, + "step": 21949 + }, + { + "epoch": 60.3021978021978, + "grad_norm": 14.623231887817383, + "learning_rate": 1.98489010989011e-05, + "loss": 0.2376, + "step": 21950 + }, + { + "epoch": 60.30494505494506, + "grad_norm": 18.01946258544922, + "learning_rate": 1.9847527472527474e-05, + "loss": 0.3346, + "step": 21951 + }, + { + "epoch": 60.30769230769231, + "grad_norm": 8.09599781036377, + "learning_rate": 1.9846153846153847e-05, + "loss": 0.1883, + "step": 21952 + }, + { + "epoch": 60.31043956043956, + "grad_norm": 12.688797950744629, + "learning_rate": 1.984478021978022e-05, + "loss": 0.4286, + "step": 21953 + }, + { + "epoch": 60.31318681318681, + "grad_norm": 22.581214904785156, + "learning_rate": 1.9843406593406594e-05, + "loss": 0.4013, + "step": 21954 + }, + { + "epoch": 60.315934065934066, + "grad_norm": 14.492110252380371, + "learning_rate": 1.9842032967032967e-05, + "loss": 0.2658, + "step": 21955 + }, + { + "epoch": 60.31868131868132, + "grad_norm": 10.503034591674805, + "learning_rate": 1.9840659340659344e-05, + "loss": 0.2229, + "step": 21956 + }, + { + "epoch": 60.32142857142857, + "grad_norm": 16.20859146118164, + "learning_rate": 1.9839285714285717e-05, + "loss": 0.3665, + "step": 21957 + }, + { + "epoch": 60.324175824175825, + "grad_norm": 21.21054458618164, + "learning_rate": 1.9837912087912087e-05, + "loss": 0.5981, + "step": 21958 + }, + { + "epoch": 60.32692307692308, + "grad_norm": 8.896452903747559, + "learning_rate": 1.983653846153846e-05, + "loss": 0.1375, + "step": 21959 + }, + { + "epoch": 60.32967032967033, + "grad_norm": 13.968603134155273, + "learning_rate": 1.9835164835164834e-05, + "loss": 0.3278, + "step": 21960 + }, + { + "epoch": 60.332417582417584, + "grad_norm": 8.831925392150879, + "learning_rate": 1.983379120879121e-05, + "loss": 0.1295, + "step": 21961 + }, + { + "epoch": 60.33516483516483, + "grad_norm": 26.00827407836914, + "learning_rate": 1.9832417582417584e-05, + "loss": 0.7481, + "step": 21962 + }, + { + "epoch": 60.33791208791209, + "grad_norm": 16.287521362304688, + "learning_rate": 1.9831043956043958e-05, + "loss": 0.6472, + "step": 21963 + }, + { + "epoch": 60.34065934065934, + "grad_norm": 13.896254539489746, + "learning_rate": 1.982967032967033e-05, + "loss": 0.4398, + "step": 21964 + }, + { + "epoch": 60.34340659340659, + "grad_norm": 12.941696166992188, + "learning_rate": 1.9828296703296705e-05, + "loss": 0.3527, + "step": 21965 + }, + { + "epoch": 60.34615384615385, + "grad_norm": 5.872088432312012, + "learning_rate": 1.9826923076923078e-05, + "loss": 0.0635, + "step": 21966 + }, + { + "epoch": 60.3489010989011, + "grad_norm": 6.580446720123291, + "learning_rate": 1.982554945054945e-05, + "loss": 0.0562, + "step": 21967 + }, + { + "epoch": 60.35164835164835, + "grad_norm": 11.587444305419922, + "learning_rate": 1.9824175824175825e-05, + "loss": 0.2635, + "step": 21968 + }, + { + "epoch": 60.354395604395606, + "grad_norm": 14.16968059539795, + "learning_rate": 1.9822802197802198e-05, + "loss": 0.4853, + "step": 21969 + }, + { + "epoch": 60.357142857142854, + "grad_norm": 9.261123657226562, + "learning_rate": 1.982142857142857e-05, + "loss": 0.1478, + "step": 21970 + }, + { + "epoch": 60.35989010989011, + "grad_norm": 11.423508644104004, + "learning_rate": 1.982005494505495e-05, + "loss": 0.1425, + "step": 21971 + }, + { + "epoch": 60.362637362637365, + "grad_norm": 5.833245754241943, + "learning_rate": 1.9818681318681322e-05, + "loss": 0.1175, + "step": 21972 + }, + { + "epoch": 60.36538461538461, + "grad_norm": 16.59853744506836, + "learning_rate": 1.9817307692307692e-05, + "loss": 0.2574, + "step": 21973 + }, + { + "epoch": 60.36813186813187, + "grad_norm": 8.61697006225586, + "learning_rate": 1.9815934065934065e-05, + "loss": 0.1171, + "step": 21974 + }, + { + "epoch": 60.370879120879124, + "grad_norm": 5.771988391876221, + "learning_rate": 1.981456043956044e-05, + "loss": 0.082, + "step": 21975 + }, + { + "epoch": 60.37362637362637, + "grad_norm": 19.012996673583984, + "learning_rate": 1.9813186813186815e-05, + "loss": 0.2672, + "step": 21976 + }, + { + "epoch": 60.37637362637363, + "grad_norm": 8.77011489868164, + "learning_rate": 1.981181318681319e-05, + "loss": 0.1557, + "step": 21977 + }, + { + "epoch": 60.379120879120876, + "grad_norm": 8.195743560791016, + "learning_rate": 1.9810439560439562e-05, + "loss": 0.2606, + "step": 21978 + }, + { + "epoch": 60.38186813186813, + "grad_norm": 11.096799850463867, + "learning_rate": 1.9809065934065936e-05, + "loss": 0.124, + "step": 21979 + }, + { + "epoch": 60.38461538461539, + "grad_norm": 14.318842887878418, + "learning_rate": 1.980769230769231e-05, + "loss": 0.2888, + "step": 21980 + }, + { + "epoch": 60.387362637362635, + "grad_norm": 8.455869674682617, + "learning_rate": 1.9806318681318682e-05, + "loss": 0.1468, + "step": 21981 + }, + { + "epoch": 60.39010989010989, + "grad_norm": 9.204190254211426, + "learning_rate": 1.9804945054945056e-05, + "loss": 0.1108, + "step": 21982 + }, + { + "epoch": 60.392857142857146, + "grad_norm": 4.507515907287598, + "learning_rate": 1.980357142857143e-05, + "loss": 0.0738, + "step": 21983 + }, + { + "epoch": 60.395604395604394, + "grad_norm": 15.773921012878418, + "learning_rate": 1.9802197802197803e-05, + "loss": 0.3576, + "step": 21984 + }, + { + "epoch": 60.39835164835165, + "grad_norm": 16.21460723876953, + "learning_rate": 1.9800824175824176e-05, + "loss": 0.4272, + "step": 21985 + }, + { + "epoch": 60.4010989010989, + "grad_norm": 8.160402297973633, + "learning_rate": 1.9799450549450553e-05, + "loss": 0.1032, + "step": 21986 + }, + { + "epoch": 60.40384615384615, + "grad_norm": 2.0118823051452637, + "learning_rate": 1.9798076923076926e-05, + "loss": 0.0296, + "step": 21987 + }, + { + "epoch": 60.40659340659341, + "grad_norm": 9.997312545776367, + "learning_rate": 1.9796703296703296e-05, + "loss": 0.1856, + "step": 21988 + }, + { + "epoch": 60.40934065934066, + "grad_norm": 5.222809791564941, + "learning_rate": 1.979532967032967e-05, + "loss": 0.077, + "step": 21989 + }, + { + "epoch": 60.41208791208791, + "grad_norm": 20.487207412719727, + "learning_rate": 1.9793956043956043e-05, + "loss": 0.1718, + "step": 21990 + }, + { + "epoch": 60.41483516483517, + "grad_norm": 12.106205940246582, + "learning_rate": 1.979258241758242e-05, + "loss": 0.2793, + "step": 21991 + }, + { + "epoch": 60.417582417582416, + "grad_norm": 14.948420524597168, + "learning_rate": 1.9791208791208793e-05, + "loss": 0.4157, + "step": 21992 + }, + { + "epoch": 60.42032967032967, + "grad_norm": 6.297083377838135, + "learning_rate": 1.9789835164835167e-05, + "loss": 0.159, + "step": 21993 + }, + { + "epoch": 60.42307692307692, + "grad_norm": 2.923046588897705, + "learning_rate": 1.978846153846154e-05, + "loss": 0.0267, + "step": 21994 + }, + { + "epoch": 60.425824175824175, + "grad_norm": 19.00644302368164, + "learning_rate": 1.9787087912087914e-05, + "loss": 0.4439, + "step": 21995 + }, + { + "epoch": 60.42857142857143, + "grad_norm": 3.386110305786133, + "learning_rate": 1.9785714285714287e-05, + "loss": 0.0457, + "step": 21996 + }, + { + "epoch": 60.43131868131868, + "grad_norm": 15.893824577331543, + "learning_rate": 1.978434065934066e-05, + "loss": 0.4178, + "step": 21997 + }, + { + "epoch": 60.434065934065934, + "grad_norm": 11.128156661987305, + "learning_rate": 1.9782967032967034e-05, + "loss": 0.1187, + "step": 21998 + }, + { + "epoch": 60.43681318681319, + "grad_norm": 14.468639373779297, + "learning_rate": 1.9781593406593407e-05, + "loss": 0.2766, + "step": 21999 + }, + { + "epoch": 60.43956043956044, + "grad_norm": 6.40433931350708, + "learning_rate": 1.978021978021978e-05, + "loss": 0.1379, + "step": 22000 + }, + { + "epoch": 60.44230769230769, + "grad_norm": 14.756457328796387, + "learning_rate": 1.9778846153846157e-05, + "loss": 0.3462, + "step": 22001 + }, + { + "epoch": 60.44505494505494, + "grad_norm": 8.905076026916504, + "learning_rate": 1.977747252747253e-05, + "loss": 0.2094, + "step": 22002 + }, + { + "epoch": 60.4478021978022, + "grad_norm": 15.213191032409668, + "learning_rate": 1.97760989010989e-05, + "loss": 0.296, + "step": 22003 + }, + { + "epoch": 60.45054945054945, + "grad_norm": 2.430267572402954, + "learning_rate": 1.9774725274725274e-05, + "loss": 0.0374, + "step": 22004 + }, + { + "epoch": 60.4532967032967, + "grad_norm": 12.002724647521973, + "learning_rate": 1.9773351648351648e-05, + "loss": 0.4767, + "step": 22005 + }, + { + "epoch": 60.456043956043956, + "grad_norm": 16.73257827758789, + "learning_rate": 1.9771978021978024e-05, + "loss": 0.358, + "step": 22006 + }, + { + "epoch": 60.45879120879121, + "grad_norm": 5.8977556228637695, + "learning_rate": 1.9770604395604398e-05, + "loss": 0.0873, + "step": 22007 + }, + { + "epoch": 60.46153846153846, + "grad_norm": 11.86581802368164, + "learning_rate": 1.976923076923077e-05, + "loss": 0.2875, + "step": 22008 + }, + { + "epoch": 60.464285714285715, + "grad_norm": 2.9666621685028076, + "learning_rate": 1.9767857142857145e-05, + "loss": 0.0378, + "step": 22009 + }, + { + "epoch": 60.467032967032964, + "grad_norm": 15.876127243041992, + "learning_rate": 1.9766483516483518e-05, + "loss": 0.2826, + "step": 22010 + }, + { + "epoch": 60.46978021978022, + "grad_norm": 11.408720970153809, + "learning_rate": 1.9765109890109888e-05, + "loss": 0.2059, + "step": 22011 + }, + { + "epoch": 60.472527472527474, + "grad_norm": 9.435941696166992, + "learning_rate": 1.9763736263736265e-05, + "loss": 0.1243, + "step": 22012 + }, + { + "epoch": 60.47527472527472, + "grad_norm": 21.339345932006836, + "learning_rate": 1.9762362637362638e-05, + "loss": 0.5437, + "step": 22013 + }, + { + "epoch": 60.47802197802198, + "grad_norm": 10.009440422058105, + "learning_rate": 1.976098901098901e-05, + "loss": 0.0962, + "step": 22014 + }, + { + "epoch": 60.48076923076923, + "grad_norm": 7.8760223388671875, + "learning_rate": 1.9759615384615385e-05, + "loss": 0.1677, + "step": 22015 + }, + { + "epoch": 60.48351648351648, + "grad_norm": 10.169212341308594, + "learning_rate": 1.975824175824176e-05, + "loss": 0.2769, + "step": 22016 + }, + { + "epoch": 60.48626373626374, + "grad_norm": 9.737849235534668, + "learning_rate": 1.9756868131868135e-05, + "loss": 0.1655, + "step": 22017 + }, + { + "epoch": 60.489010989010985, + "grad_norm": 11.15747356414795, + "learning_rate": 1.9755494505494505e-05, + "loss": 0.198, + "step": 22018 + }, + { + "epoch": 60.49175824175824, + "grad_norm": 5.615816116333008, + "learning_rate": 1.975412087912088e-05, + "loss": 0.11, + "step": 22019 + }, + { + "epoch": 60.494505494505496, + "grad_norm": 16.97863006591797, + "learning_rate": 1.9752747252747252e-05, + "loss": 0.4044, + "step": 22020 + }, + { + "epoch": 60.497252747252745, + "grad_norm": 20.067378997802734, + "learning_rate": 1.9751373626373626e-05, + "loss": 0.5239, + "step": 22021 + }, + { + "epoch": 60.5, + "grad_norm": 8.450124740600586, + "learning_rate": 1.9750000000000002e-05, + "loss": 0.118, + "step": 22022 + }, + { + "epoch": 60.502747252747255, + "grad_norm": 15.033352851867676, + "learning_rate": 1.9748626373626376e-05, + "loss": 0.358, + "step": 22023 + }, + { + "epoch": 60.505494505494504, + "grad_norm": 10.105077743530273, + "learning_rate": 1.974725274725275e-05, + "loss": 0.2366, + "step": 22024 + }, + { + "epoch": 60.50824175824176, + "grad_norm": 5.7186360359191895, + "learning_rate": 1.9745879120879123e-05, + "loss": 0.1199, + "step": 22025 + }, + { + "epoch": 60.51098901098901, + "grad_norm": 2.7336959838867188, + "learning_rate": 1.9744505494505493e-05, + "loss": 0.0415, + "step": 22026 + }, + { + "epoch": 60.51373626373626, + "grad_norm": 11.128426551818848, + "learning_rate": 1.974313186813187e-05, + "loss": 0.2403, + "step": 22027 + }, + { + "epoch": 60.51648351648352, + "grad_norm": 9.680133819580078, + "learning_rate": 1.9741758241758243e-05, + "loss": 0.1889, + "step": 22028 + }, + { + "epoch": 60.51923076923077, + "grad_norm": 6.9785237312316895, + "learning_rate": 1.9740384615384616e-05, + "loss": 0.1564, + "step": 22029 + }, + { + "epoch": 60.52197802197802, + "grad_norm": 9.556107521057129, + "learning_rate": 1.973901098901099e-05, + "loss": 0.1554, + "step": 22030 + }, + { + "epoch": 60.52472527472528, + "grad_norm": 18.34986114501953, + "learning_rate": 1.9737637362637363e-05, + "loss": 0.3697, + "step": 22031 + }, + { + "epoch": 60.527472527472526, + "grad_norm": 10.083991050720215, + "learning_rate": 1.973626373626374e-05, + "loss": 0.1698, + "step": 22032 + }, + { + "epoch": 60.53021978021978, + "grad_norm": 7.067823886871338, + "learning_rate": 1.973489010989011e-05, + "loss": 0.0844, + "step": 22033 + }, + { + "epoch": 60.532967032967036, + "grad_norm": 16.55512809753418, + "learning_rate": 1.9733516483516483e-05, + "loss": 0.3542, + "step": 22034 + }, + { + "epoch": 60.535714285714285, + "grad_norm": 10.297621726989746, + "learning_rate": 1.9732142857142857e-05, + "loss": 0.2643, + "step": 22035 + }, + { + "epoch": 60.53846153846154, + "grad_norm": 13.564202308654785, + "learning_rate": 1.973076923076923e-05, + "loss": 0.3535, + "step": 22036 + }, + { + "epoch": 60.54120879120879, + "grad_norm": 10.131711959838867, + "learning_rate": 1.9729395604395607e-05, + "loss": 0.1916, + "step": 22037 + }, + { + "epoch": 60.543956043956044, + "grad_norm": 10.790584564208984, + "learning_rate": 1.972802197802198e-05, + "loss": 0.3593, + "step": 22038 + }, + { + "epoch": 60.5467032967033, + "grad_norm": 7.9767231941223145, + "learning_rate": 1.9726648351648354e-05, + "loss": 0.1849, + "step": 22039 + }, + { + "epoch": 60.54945054945055, + "grad_norm": 16.239683151245117, + "learning_rate": 1.9725274725274727e-05, + "loss": 0.3565, + "step": 22040 + }, + { + "epoch": 60.5521978021978, + "grad_norm": 18.991003036499023, + "learning_rate": 1.9723901098901097e-05, + "loss": 0.4097, + "step": 22041 + }, + { + "epoch": 60.55494505494506, + "grad_norm": 14.881807327270508, + "learning_rate": 1.9722527472527474e-05, + "loss": 0.3346, + "step": 22042 + }, + { + "epoch": 60.55769230769231, + "grad_norm": 15.259018898010254, + "learning_rate": 1.9721153846153847e-05, + "loss": 0.3854, + "step": 22043 + }, + { + "epoch": 60.56043956043956, + "grad_norm": 8.650004386901855, + "learning_rate": 1.971978021978022e-05, + "loss": 0.1579, + "step": 22044 + }, + { + "epoch": 60.56318681318681, + "grad_norm": 10.134641647338867, + "learning_rate": 1.9718406593406594e-05, + "loss": 0.2091, + "step": 22045 + }, + { + "epoch": 60.565934065934066, + "grad_norm": 11.143792152404785, + "learning_rate": 1.9717032967032967e-05, + "loss": 0.1933, + "step": 22046 + }, + { + "epoch": 60.56868131868132, + "grad_norm": 7.383054256439209, + "learning_rate": 1.9715659340659344e-05, + "loss": 0.1693, + "step": 22047 + }, + { + "epoch": 60.57142857142857, + "grad_norm": 12.023818016052246, + "learning_rate": 1.9714285714285714e-05, + "loss": 0.2999, + "step": 22048 + }, + { + "epoch": 60.574175824175825, + "grad_norm": 16.967876434326172, + "learning_rate": 1.9712912087912088e-05, + "loss": 0.532, + "step": 22049 + }, + { + "epoch": 60.57692307692308, + "grad_norm": 19.54706573486328, + "learning_rate": 1.971153846153846e-05, + "loss": 0.2536, + "step": 22050 + }, + { + "epoch": 60.57967032967033, + "grad_norm": 9.004866600036621, + "learning_rate": 1.9710164835164835e-05, + "loss": 0.1381, + "step": 22051 + }, + { + "epoch": 60.582417582417584, + "grad_norm": 7.839241027832031, + "learning_rate": 1.970879120879121e-05, + "loss": 0.1205, + "step": 22052 + }, + { + "epoch": 60.58516483516483, + "grad_norm": 7.176292419433594, + "learning_rate": 1.9707417582417585e-05, + "loss": 0.1911, + "step": 22053 + }, + { + "epoch": 60.58791208791209, + "grad_norm": 16.96463966369629, + "learning_rate": 1.9706043956043958e-05, + "loss": 0.3792, + "step": 22054 + }, + { + "epoch": 60.59065934065934, + "grad_norm": 6.627385139465332, + "learning_rate": 1.970467032967033e-05, + "loss": 0.1186, + "step": 22055 + }, + { + "epoch": 60.59340659340659, + "grad_norm": 16.952730178833008, + "learning_rate": 1.97032967032967e-05, + "loss": 0.3172, + "step": 22056 + }, + { + "epoch": 60.59615384615385, + "grad_norm": 11.557802200317383, + "learning_rate": 1.970192307692308e-05, + "loss": 0.2334, + "step": 22057 + }, + { + "epoch": 60.5989010989011, + "grad_norm": 7.671875476837158, + "learning_rate": 1.9700549450549452e-05, + "loss": 0.1445, + "step": 22058 + }, + { + "epoch": 60.60164835164835, + "grad_norm": 6.667153358459473, + "learning_rate": 1.9699175824175825e-05, + "loss": 0.1058, + "step": 22059 + }, + { + "epoch": 60.604395604395606, + "grad_norm": 7.111238479614258, + "learning_rate": 1.96978021978022e-05, + "loss": 0.1358, + "step": 22060 + }, + { + "epoch": 60.607142857142854, + "grad_norm": 9.339221000671387, + "learning_rate": 1.9696428571428572e-05, + "loss": 0.1769, + "step": 22061 + }, + { + "epoch": 60.60989010989011, + "grad_norm": 17.838287353515625, + "learning_rate": 1.969505494505495e-05, + "loss": 0.3035, + "step": 22062 + }, + { + "epoch": 60.612637362637365, + "grad_norm": 10.758848190307617, + "learning_rate": 1.969368131868132e-05, + "loss": 0.2155, + "step": 22063 + }, + { + "epoch": 60.61538461538461, + "grad_norm": 13.020501136779785, + "learning_rate": 1.9692307692307692e-05, + "loss": 0.3358, + "step": 22064 + }, + { + "epoch": 60.61813186813187, + "grad_norm": 14.900360107421875, + "learning_rate": 1.9690934065934066e-05, + "loss": 0.2247, + "step": 22065 + }, + { + "epoch": 60.620879120879124, + "grad_norm": 8.923418045043945, + "learning_rate": 1.968956043956044e-05, + "loss": 0.126, + "step": 22066 + }, + { + "epoch": 60.62362637362637, + "grad_norm": 4.65610408782959, + "learning_rate": 1.9688186813186816e-05, + "loss": 0.0579, + "step": 22067 + }, + { + "epoch": 60.62637362637363, + "grad_norm": 12.448904991149902, + "learning_rate": 1.968681318681319e-05, + "loss": 0.2215, + "step": 22068 + }, + { + "epoch": 60.629120879120876, + "grad_norm": 13.634522438049316, + "learning_rate": 1.9685439560439563e-05, + "loss": 0.4771, + "step": 22069 + }, + { + "epoch": 60.63186813186813, + "grad_norm": 9.3833589553833, + "learning_rate": 1.9684065934065936e-05, + "loss": 0.1473, + "step": 22070 + }, + { + "epoch": 60.63461538461539, + "grad_norm": 1.91120445728302, + "learning_rate": 1.9682692307692306e-05, + "loss": 0.0685, + "step": 22071 + }, + { + "epoch": 60.637362637362635, + "grad_norm": 27.29531478881836, + "learning_rate": 1.9681318681318683e-05, + "loss": 0.7188, + "step": 22072 + }, + { + "epoch": 60.64010989010989, + "grad_norm": 21.261451721191406, + "learning_rate": 1.9679945054945056e-05, + "loss": 0.5867, + "step": 22073 + }, + { + "epoch": 60.642857142857146, + "grad_norm": 16.342432022094727, + "learning_rate": 1.967857142857143e-05, + "loss": 0.2835, + "step": 22074 + }, + { + "epoch": 60.645604395604394, + "grad_norm": 3.786176919937134, + "learning_rate": 1.9677197802197803e-05, + "loss": 0.054, + "step": 22075 + }, + { + "epoch": 60.64835164835165, + "grad_norm": 13.497274398803711, + "learning_rate": 1.9675824175824176e-05, + "loss": 0.3371, + "step": 22076 + }, + { + "epoch": 60.6510989010989, + "grad_norm": 21.031543731689453, + "learning_rate": 1.967445054945055e-05, + "loss": 0.3456, + "step": 22077 + }, + { + "epoch": 60.65384615384615, + "grad_norm": 16.38836669921875, + "learning_rate": 1.9673076923076923e-05, + "loss": 0.3137, + "step": 22078 + }, + { + "epoch": 60.65659340659341, + "grad_norm": 8.465558052062988, + "learning_rate": 1.9671703296703297e-05, + "loss": 0.124, + "step": 22079 + }, + { + "epoch": 60.65934065934066, + "grad_norm": 11.776041030883789, + "learning_rate": 1.967032967032967e-05, + "loss": 0.187, + "step": 22080 + }, + { + "epoch": 60.66208791208791, + "grad_norm": 23.861465454101562, + "learning_rate": 1.9668956043956044e-05, + "loss": 0.6383, + "step": 22081 + }, + { + "epoch": 60.66483516483517, + "grad_norm": 10.917237281799316, + "learning_rate": 1.966758241758242e-05, + "loss": 0.178, + "step": 22082 + }, + { + "epoch": 60.667582417582416, + "grad_norm": 6.0917439460754395, + "learning_rate": 1.9666208791208794e-05, + "loss": 0.191, + "step": 22083 + }, + { + "epoch": 60.67032967032967, + "grad_norm": 7.536017894744873, + "learning_rate": 1.9664835164835167e-05, + "loss": 0.1591, + "step": 22084 + }, + { + "epoch": 60.67307692307692, + "grad_norm": 6.027034759521484, + "learning_rate": 1.966346153846154e-05, + "loss": 0.0984, + "step": 22085 + }, + { + "epoch": 60.675824175824175, + "grad_norm": 15.357020378112793, + "learning_rate": 1.966208791208791e-05, + "loss": 0.337, + "step": 22086 + }, + { + "epoch": 60.67857142857143, + "grad_norm": 9.012530326843262, + "learning_rate": 1.9660714285714287e-05, + "loss": 0.1104, + "step": 22087 + }, + { + "epoch": 60.68131868131868, + "grad_norm": 12.939773559570312, + "learning_rate": 1.965934065934066e-05, + "loss": 0.2752, + "step": 22088 + }, + { + "epoch": 60.684065934065934, + "grad_norm": 14.299043655395508, + "learning_rate": 1.9657967032967034e-05, + "loss": 0.1586, + "step": 22089 + }, + { + "epoch": 60.68681318681319, + "grad_norm": 4.155673503875732, + "learning_rate": 1.9656593406593408e-05, + "loss": 0.0425, + "step": 22090 + }, + { + "epoch": 60.68956043956044, + "grad_norm": 21.42605209350586, + "learning_rate": 1.965521978021978e-05, + "loss": 0.4589, + "step": 22091 + }, + { + "epoch": 60.69230769230769, + "grad_norm": 7.9449076652526855, + "learning_rate": 1.9653846153846154e-05, + "loss": 0.1437, + "step": 22092 + }, + { + "epoch": 60.69505494505494, + "grad_norm": 3.348790407180786, + "learning_rate": 1.9652472527472528e-05, + "loss": 0.0597, + "step": 22093 + }, + { + "epoch": 60.6978021978022, + "grad_norm": 15.959309577941895, + "learning_rate": 1.96510989010989e-05, + "loss": 0.379, + "step": 22094 + }, + { + "epoch": 60.70054945054945, + "grad_norm": 6.92678689956665, + "learning_rate": 1.9649725274725275e-05, + "loss": 0.0968, + "step": 22095 + }, + { + "epoch": 60.7032967032967, + "grad_norm": 9.598220825195312, + "learning_rate": 1.9648351648351648e-05, + "loss": 0.1619, + "step": 22096 + }, + { + "epoch": 60.706043956043956, + "grad_norm": 9.237415313720703, + "learning_rate": 1.9646978021978025e-05, + "loss": 0.189, + "step": 22097 + }, + { + "epoch": 60.70879120879121, + "grad_norm": 12.782803535461426, + "learning_rate": 1.9645604395604398e-05, + "loss": 0.2093, + "step": 22098 + }, + { + "epoch": 60.71153846153846, + "grad_norm": 2.1559605598449707, + "learning_rate": 1.964423076923077e-05, + "loss": 0.0264, + "step": 22099 + }, + { + "epoch": 60.714285714285715, + "grad_norm": 7.850510120391846, + "learning_rate": 1.9642857142857145e-05, + "loss": 0.1017, + "step": 22100 + }, + { + "epoch": 60.717032967032964, + "grad_norm": 26.285438537597656, + "learning_rate": 1.9641483516483515e-05, + "loss": 0.9658, + "step": 22101 + }, + { + "epoch": 60.71978021978022, + "grad_norm": 12.416911125183105, + "learning_rate": 1.9640109890109892e-05, + "loss": 0.4299, + "step": 22102 + }, + { + "epoch": 60.722527472527474, + "grad_norm": 13.007173538208008, + "learning_rate": 1.9638736263736265e-05, + "loss": 0.3379, + "step": 22103 + }, + { + "epoch": 60.72527472527472, + "grad_norm": 22.85420036315918, + "learning_rate": 1.963736263736264e-05, + "loss": 0.5439, + "step": 22104 + }, + { + "epoch": 60.72802197802198, + "grad_norm": 16.939105987548828, + "learning_rate": 1.9635989010989012e-05, + "loss": 0.4489, + "step": 22105 + }, + { + "epoch": 60.73076923076923, + "grad_norm": 8.83610725402832, + "learning_rate": 1.9634615384615385e-05, + "loss": 0.0671, + "step": 22106 + }, + { + "epoch": 60.73351648351648, + "grad_norm": 20.719430923461914, + "learning_rate": 1.963324175824176e-05, + "loss": 0.6866, + "step": 22107 + }, + { + "epoch": 60.73626373626374, + "grad_norm": 11.153125762939453, + "learning_rate": 1.9631868131868132e-05, + "loss": 0.2001, + "step": 22108 + }, + { + "epoch": 60.73901098901099, + "grad_norm": 7.509145259857178, + "learning_rate": 1.9630494505494506e-05, + "loss": 0.1036, + "step": 22109 + }, + { + "epoch": 60.74175824175824, + "grad_norm": 5.560103416442871, + "learning_rate": 1.962912087912088e-05, + "loss": 0.1203, + "step": 22110 + }, + { + "epoch": 60.744505494505496, + "grad_norm": 9.718255043029785, + "learning_rate": 1.9627747252747252e-05, + "loss": 0.096, + "step": 22111 + }, + { + "epoch": 60.747252747252745, + "grad_norm": 7.04813814163208, + "learning_rate": 1.962637362637363e-05, + "loss": 0.0619, + "step": 22112 + }, + { + "epoch": 60.75, + "grad_norm": 20.45157814025879, + "learning_rate": 1.9625000000000003e-05, + "loss": 0.5419, + "step": 22113 + }, + { + "epoch": 60.752747252747255, + "grad_norm": 9.135978698730469, + "learning_rate": 1.9623626373626376e-05, + "loss": 0.3427, + "step": 22114 + }, + { + "epoch": 60.755494505494504, + "grad_norm": 11.02854061126709, + "learning_rate": 1.962225274725275e-05, + "loss": 0.137, + "step": 22115 + }, + { + "epoch": 60.75824175824176, + "grad_norm": 8.43559741973877, + "learning_rate": 1.962087912087912e-05, + "loss": 0.1308, + "step": 22116 + }, + { + "epoch": 60.76098901098901, + "grad_norm": 9.896890640258789, + "learning_rate": 1.9619505494505496e-05, + "loss": 0.3085, + "step": 22117 + }, + { + "epoch": 60.76373626373626, + "grad_norm": 13.498271942138672, + "learning_rate": 1.961813186813187e-05, + "loss": 0.2542, + "step": 22118 + }, + { + "epoch": 60.76648351648352, + "grad_norm": 7.841922760009766, + "learning_rate": 1.9616758241758243e-05, + "loss": 0.1648, + "step": 22119 + }, + { + "epoch": 60.76923076923077, + "grad_norm": 18.60801124572754, + "learning_rate": 1.9615384615384617e-05, + "loss": 0.346, + "step": 22120 + }, + { + "epoch": 60.77197802197802, + "grad_norm": 10.581347465515137, + "learning_rate": 1.961401098901099e-05, + "loss": 0.3061, + "step": 22121 + }, + { + "epoch": 60.77472527472528, + "grad_norm": 5.281129837036133, + "learning_rate": 1.9612637362637363e-05, + "loss": 0.059, + "step": 22122 + }, + { + "epoch": 60.777472527472526, + "grad_norm": 15.633675575256348, + "learning_rate": 1.9611263736263737e-05, + "loss": 0.2439, + "step": 22123 + }, + { + "epoch": 60.78021978021978, + "grad_norm": 6.410268306732178, + "learning_rate": 1.960989010989011e-05, + "loss": 0.1269, + "step": 22124 + }, + { + "epoch": 60.782967032967036, + "grad_norm": 8.280417442321777, + "learning_rate": 1.9608516483516484e-05, + "loss": 0.2621, + "step": 22125 + }, + { + "epoch": 60.785714285714285, + "grad_norm": 18.141496658325195, + "learning_rate": 1.9607142857142857e-05, + "loss": 0.4052, + "step": 22126 + }, + { + "epoch": 60.78846153846154, + "grad_norm": 16.68583869934082, + "learning_rate": 1.960576923076923e-05, + "loss": 0.4745, + "step": 22127 + }, + { + "epoch": 60.79120879120879, + "grad_norm": 20.311222076416016, + "learning_rate": 1.9604395604395607e-05, + "loss": 0.4901, + "step": 22128 + }, + { + "epoch": 60.793956043956044, + "grad_norm": 3.3207309246063232, + "learning_rate": 1.960302197802198e-05, + "loss": 0.0481, + "step": 22129 + }, + { + "epoch": 60.7967032967033, + "grad_norm": 12.498148918151855, + "learning_rate": 1.9601648351648354e-05, + "loss": 0.2327, + "step": 22130 + }, + { + "epoch": 60.79945054945055, + "grad_norm": 6.485653877258301, + "learning_rate": 1.9600274725274724e-05, + "loss": 0.1424, + "step": 22131 + }, + { + "epoch": 60.8021978021978, + "grad_norm": 7.049901485443115, + "learning_rate": 1.9598901098901097e-05, + "loss": 0.1355, + "step": 22132 + }, + { + "epoch": 60.80494505494506, + "grad_norm": 17.00888442993164, + "learning_rate": 1.9597527472527474e-05, + "loss": 0.2682, + "step": 22133 + }, + { + "epoch": 60.80769230769231, + "grad_norm": 7.141184329986572, + "learning_rate": 1.9596153846153848e-05, + "loss": 0.1552, + "step": 22134 + }, + { + "epoch": 60.81043956043956, + "grad_norm": 9.91087532043457, + "learning_rate": 1.959478021978022e-05, + "loss": 0.1097, + "step": 22135 + }, + { + "epoch": 60.81318681318681, + "grad_norm": 16.065689086914062, + "learning_rate": 1.9593406593406594e-05, + "loss": 0.4728, + "step": 22136 + }, + { + "epoch": 60.815934065934066, + "grad_norm": 7.99806547164917, + "learning_rate": 1.9592032967032968e-05, + "loss": 0.0814, + "step": 22137 + }, + { + "epoch": 60.81868131868132, + "grad_norm": 7.65854549407959, + "learning_rate": 1.959065934065934e-05, + "loss": 0.1699, + "step": 22138 + }, + { + "epoch": 60.82142857142857, + "grad_norm": 20.127595901489258, + "learning_rate": 1.9589285714285715e-05, + "loss": 0.5412, + "step": 22139 + }, + { + "epoch": 60.824175824175825, + "grad_norm": 9.972082138061523, + "learning_rate": 1.9587912087912088e-05, + "loss": 0.1998, + "step": 22140 + }, + { + "epoch": 60.82692307692308, + "grad_norm": 17.2966365814209, + "learning_rate": 1.958653846153846e-05, + "loss": 0.3167, + "step": 22141 + }, + { + "epoch": 60.82967032967033, + "grad_norm": 11.50804615020752, + "learning_rate": 1.9585164835164835e-05, + "loss": 0.1808, + "step": 22142 + }, + { + "epoch": 60.832417582417584, + "grad_norm": 18.610620498657227, + "learning_rate": 1.958379120879121e-05, + "loss": 0.6051, + "step": 22143 + }, + { + "epoch": 60.83516483516483, + "grad_norm": 11.30908489227295, + "learning_rate": 1.9582417582417585e-05, + "loss": 0.2168, + "step": 22144 + }, + { + "epoch": 60.83791208791209, + "grad_norm": 16.563798904418945, + "learning_rate": 1.958104395604396e-05, + "loss": 0.3076, + "step": 22145 + }, + { + "epoch": 60.84065934065934, + "grad_norm": 2.72361421585083, + "learning_rate": 1.957967032967033e-05, + "loss": 0.0509, + "step": 22146 + }, + { + "epoch": 60.84340659340659, + "grad_norm": 11.064207077026367, + "learning_rate": 1.9578296703296702e-05, + "loss": 0.1757, + "step": 22147 + }, + { + "epoch": 60.84615384615385, + "grad_norm": 8.922192573547363, + "learning_rate": 1.957692307692308e-05, + "loss": 0.1938, + "step": 22148 + }, + { + "epoch": 60.8489010989011, + "grad_norm": 16.930757522583008, + "learning_rate": 1.9575549450549452e-05, + "loss": 0.3167, + "step": 22149 + }, + { + "epoch": 60.85164835164835, + "grad_norm": 3.8694045543670654, + "learning_rate": 1.9574175824175826e-05, + "loss": 0.0389, + "step": 22150 + }, + { + "epoch": 60.854395604395606, + "grad_norm": 10.956225395202637, + "learning_rate": 1.95728021978022e-05, + "loss": 0.2559, + "step": 22151 + }, + { + "epoch": 60.857142857142854, + "grad_norm": 2.589968204498291, + "learning_rate": 1.9571428571428572e-05, + "loss": 0.0457, + "step": 22152 + }, + { + "epoch": 60.85989010989011, + "grad_norm": 9.085906028747559, + "learning_rate": 1.9570054945054946e-05, + "loss": 0.1021, + "step": 22153 + }, + { + "epoch": 60.862637362637365, + "grad_norm": 1.3515268564224243, + "learning_rate": 1.956868131868132e-05, + "loss": 0.0195, + "step": 22154 + }, + { + "epoch": 60.86538461538461, + "grad_norm": 15.652586936950684, + "learning_rate": 1.9567307692307693e-05, + "loss": 0.4568, + "step": 22155 + }, + { + "epoch": 60.86813186813187, + "grad_norm": 4.917202949523926, + "learning_rate": 1.9565934065934066e-05, + "loss": 0.0845, + "step": 22156 + }, + { + "epoch": 60.870879120879124, + "grad_norm": 19.117761611938477, + "learning_rate": 1.956456043956044e-05, + "loss": 0.6404, + "step": 22157 + }, + { + "epoch": 60.87362637362637, + "grad_norm": 16.435346603393555, + "learning_rate": 1.9563186813186816e-05, + "loss": 0.3316, + "step": 22158 + }, + { + "epoch": 60.87637362637363, + "grad_norm": 8.611238479614258, + "learning_rate": 1.956181318681319e-05, + "loss": 0.2036, + "step": 22159 + }, + { + "epoch": 60.879120879120876, + "grad_norm": 17.305219650268555, + "learning_rate": 1.956043956043956e-05, + "loss": 0.2389, + "step": 22160 + }, + { + "epoch": 60.88186813186813, + "grad_norm": 11.877609252929688, + "learning_rate": 1.9559065934065933e-05, + "loss": 0.1895, + "step": 22161 + }, + { + "epoch": 60.88461538461539, + "grad_norm": 24.84627342224121, + "learning_rate": 1.9557692307692306e-05, + "loss": 0.4979, + "step": 22162 + }, + { + "epoch": 60.887362637362635, + "grad_norm": 6.942782878875732, + "learning_rate": 1.9556318681318683e-05, + "loss": 0.1586, + "step": 22163 + }, + { + "epoch": 60.89010989010989, + "grad_norm": 8.80065631866455, + "learning_rate": 1.9554945054945057e-05, + "loss": 0.1469, + "step": 22164 + }, + { + "epoch": 60.892857142857146, + "grad_norm": 17.288808822631836, + "learning_rate": 1.955357142857143e-05, + "loss": 0.2892, + "step": 22165 + }, + { + "epoch": 60.895604395604394, + "grad_norm": 12.773855209350586, + "learning_rate": 1.9552197802197803e-05, + "loss": 0.193, + "step": 22166 + }, + { + "epoch": 60.89835164835165, + "grad_norm": 12.710423469543457, + "learning_rate": 1.9550824175824177e-05, + "loss": 0.334, + "step": 22167 + }, + { + "epoch": 60.9010989010989, + "grad_norm": 13.806167602539062, + "learning_rate": 1.954945054945055e-05, + "loss": 0.0963, + "step": 22168 + }, + { + "epoch": 60.90384615384615, + "grad_norm": 11.355447769165039, + "learning_rate": 1.9548076923076924e-05, + "loss": 0.1296, + "step": 22169 + }, + { + "epoch": 60.90659340659341, + "grad_norm": 9.255085945129395, + "learning_rate": 1.9546703296703297e-05, + "loss": 0.2686, + "step": 22170 + }, + { + "epoch": 60.90934065934066, + "grad_norm": 27.219539642333984, + "learning_rate": 1.954532967032967e-05, + "loss": 0.5587, + "step": 22171 + }, + { + "epoch": 60.91208791208791, + "grad_norm": 14.224715232849121, + "learning_rate": 1.9543956043956044e-05, + "loss": 0.1725, + "step": 22172 + }, + { + "epoch": 60.91483516483517, + "grad_norm": 14.49571418762207, + "learning_rate": 1.954258241758242e-05, + "loss": 0.2506, + "step": 22173 + }, + { + "epoch": 60.917582417582416, + "grad_norm": 6.399028778076172, + "learning_rate": 1.9541208791208794e-05, + "loss": 0.129, + "step": 22174 + }, + { + "epoch": 60.92032967032967, + "grad_norm": 20.314138412475586, + "learning_rate": 1.9539835164835164e-05, + "loss": 0.6994, + "step": 22175 + }, + { + "epoch": 60.92307692307692, + "grad_norm": 7.943477630615234, + "learning_rate": 1.9538461538461537e-05, + "loss": 0.1694, + "step": 22176 + }, + { + "epoch": 60.925824175824175, + "grad_norm": 12.828629493713379, + "learning_rate": 1.953708791208791e-05, + "loss": 0.2677, + "step": 22177 + }, + { + "epoch": 60.92857142857143, + "grad_norm": 6.132114887237549, + "learning_rate": 1.9535714285714288e-05, + "loss": 0.1145, + "step": 22178 + }, + { + "epoch": 60.93131868131868, + "grad_norm": 8.79562759399414, + "learning_rate": 1.953434065934066e-05, + "loss": 0.1315, + "step": 22179 + }, + { + "epoch": 60.934065934065934, + "grad_norm": 4.686675548553467, + "learning_rate": 1.9532967032967034e-05, + "loss": 0.1059, + "step": 22180 + }, + { + "epoch": 60.93681318681319, + "grad_norm": 7.575313091278076, + "learning_rate": 1.9531593406593408e-05, + "loss": 0.1774, + "step": 22181 + }, + { + "epoch": 60.93956043956044, + "grad_norm": 12.009021759033203, + "learning_rate": 1.953021978021978e-05, + "loss": 0.3062, + "step": 22182 + }, + { + "epoch": 60.94230769230769, + "grad_norm": 19.2606201171875, + "learning_rate": 1.9528846153846155e-05, + "loss": 0.3156, + "step": 22183 + }, + { + "epoch": 60.94505494505494, + "grad_norm": 11.417191505432129, + "learning_rate": 1.9527472527472528e-05, + "loss": 0.3071, + "step": 22184 + }, + { + "epoch": 60.9478021978022, + "grad_norm": 21.73988914489746, + "learning_rate": 1.95260989010989e-05, + "loss": 0.3587, + "step": 22185 + }, + { + "epoch": 60.95054945054945, + "grad_norm": 12.293867111206055, + "learning_rate": 1.9524725274725275e-05, + "loss": 0.2693, + "step": 22186 + }, + { + "epoch": 60.9532967032967, + "grad_norm": 8.982813835144043, + "learning_rate": 1.952335164835165e-05, + "loss": 0.1238, + "step": 22187 + }, + { + "epoch": 60.956043956043956, + "grad_norm": 13.17296314239502, + "learning_rate": 1.9521978021978025e-05, + "loss": 0.1464, + "step": 22188 + }, + { + "epoch": 60.95879120879121, + "grad_norm": 13.364962577819824, + "learning_rate": 1.95206043956044e-05, + "loss": 0.2212, + "step": 22189 + }, + { + "epoch": 60.96153846153846, + "grad_norm": 6.783044815063477, + "learning_rate": 1.951923076923077e-05, + "loss": 0.0873, + "step": 22190 + }, + { + "epoch": 60.964285714285715, + "grad_norm": 6.293022632598877, + "learning_rate": 1.9517857142857142e-05, + "loss": 0.1143, + "step": 22191 + }, + { + "epoch": 60.967032967032964, + "grad_norm": 5.88463020324707, + "learning_rate": 1.9516483516483515e-05, + "loss": 0.0645, + "step": 22192 + }, + { + "epoch": 60.96978021978022, + "grad_norm": 20.494855880737305, + "learning_rate": 1.9515109890109892e-05, + "loss": 0.4719, + "step": 22193 + }, + { + "epoch": 60.972527472527474, + "grad_norm": 10.383044242858887, + "learning_rate": 1.9513736263736266e-05, + "loss": 0.1947, + "step": 22194 + }, + { + "epoch": 60.97527472527472, + "grad_norm": 1.8425812721252441, + "learning_rate": 1.951236263736264e-05, + "loss": 0.0206, + "step": 22195 + }, + { + "epoch": 60.97802197802198, + "grad_norm": 12.898555755615234, + "learning_rate": 1.9510989010989012e-05, + "loss": 0.4677, + "step": 22196 + }, + { + "epoch": 60.98076923076923, + "grad_norm": 6.769998073577881, + "learning_rate": 1.9509615384615386e-05, + "loss": 0.13, + "step": 22197 + }, + { + "epoch": 60.98351648351648, + "grad_norm": 15.81142807006836, + "learning_rate": 1.950824175824176e-05, + "loss": 0.3817, + "step": 22198 + }, + { + "epoch": 60.98626373626374, + "grad_norm": 17.030406951904297, + "learning_rate": 1.9506868131868133e-05, + "loss": 0.5759, + "step": 22199 + }, + { + "epoch": 60.98901098901099, + "grad_norm": 14.26145076751709, + "learning_rate": 1.9505494505494506e-05, + "loss": 0.3335, + "step": 22200 + }, + { + "epoch": 60.99175824175824, + "grad_norm": 5.49934196472168, + "learning_rate": 1.950412087912088e-05, + "loss": 0.1668, + "step": 22201 + }, + { + "epoch": 60.994505494505496, + "grad_norm": 7.297369480133057, + "learning_rate": 1.9502747252747253e-05, + "loss": 0.1432, + "step": 22202 + }, + { + "epoch": 60.997252747252745, + "grad_norm": 13.32840347290039, + "learning_rate": 1.950137362637363e-05, + "loss": 0.3176, + "step": 22203 + }, + { + "epoch": 61.0, + "grad_norm": 1.6019185781478882, + "learning_rate": 1.9500000000000003e-05, + "loss": 0.011, + "step": 22204 + }, + { + "epoch": 61.0, + "eval_accuracy": 0.7603305785123967, + "eval_f1": 0.7670004589688706, + "eval_f1_DuraRiadoRio_64x64": 0.7161572052401747, + "eval_f1_Mole_64x64": 0.8353413654618473, + "eval_f1_Quebrado_64x64": 0.7751479289940828, + "eval_f1_RiadoRio_64x64": 0.6630727762803235, + "eval_f1_RioFechado_64x64": 0.8452830188679246, + "eval_loss": 0.893841564655304, + "eval_precision": 0.820530549219078, + "eval_precision_DuraRiadoRio_64x64": 0.9647058823529412, + "eval_precision_Mole_64x64": 0.9904761904761905, + "eval_precision_Quebrado_64x64": 0.6752577319587629, + "eval_precision_RiadoRio_64x64": 0.5616438356164384, + "eval_precision_RioFechado_64x64": 0.9105691056910569, + "eval_recall": 0.759866361914175, + "eval_recall_DuraRiadoRio_64x64": 0.5694444444444444, + "eval_recall_Mole_64x64": 0.7222222222222222, + "eval_recall_Quebrado_64x64": 0.9097222222222222, + "eval_recall_RiadoRio_64x64": 0.8092105263157895, + "eval_recall_RioFechado_64x64": 0.7887323943661971, + "eval_runtime": 1.7313, + "eval_samples_per_second": 419.346, + "eval_steps_per_second": 26.57, + "step": 22204 + }, + { + "epoch": 61.002747252747255, + "grad_norm": 13.976158142089844, + "learning_rate": 1.9498626373626373e-05, + "loss": 0.2546, + "step": 22205 + }, + { + "epoch": 61.005494505494504, + "grad_norm": 12.937698364257812, + "learning_rate": 1.9497252747252746e-05, + "loss": 0.196, + "step": 22206 + }, + { + "epoch": 61.00824175824176, + "grad_norm": 8.808226585388184, + "learning_rate": 1.949587912087912e-05, + "loss": 0.1355, + "step": 22207 + }, + { + "epoch": 61.010989010989015, + "grad_norm": 14.747764587402344, + "learning_rate": 1.9494505494505497e-05, + "loss": 0.251, + "step": 22208 + }, + { + "epoch": 61.01373626373626, + "grad_norm": 14.8800630569458, + "learning_rate": 1.949313186813187e-05, + "loss": 0.1929, + "step": 22209 + }, + { + "epoch": 61.01648351648352, + "grad_norm": 10.02020263671875, + "learning_rate": 1.9491758241758243e-05, + "loss": 0.0929, + "step": 22210 + }, + { + "epoch": 61.01923076923077, + "grad_norm": 5.4283976554870605, + "learning_rate": 1.9490384615384617e-05, + "loss": 0.1118, + "step": 22211 + }, + { + "epoch": 61.02197802197802, + "grad_norm": 11.343599319458008, + "learning_rate": 1.948901098901099e-05, + "loss": 0.1931, + "step": 22212 + }, + { + "epoch": 61.02472527472528, + "grad_norm": 16.314390182495117, + "learning_rate": 1.9487637362637364e-05, + "loss": 0.5522, + "step": 22213 + }, + { + "epoch": 61.027472527472526, + "grad_norm": 9.560672760009766, + "learning_rate": 1.9486263736263737e-05, + "loss": 0.0973, + "step": 22214 + }, + { + "epoch": 61.03021978021978, + "grad_norm": 19.485891342163086, + "learning_rate": 1.948489010989011e-05, + "loss": 0.3104, + "step": 22215 + }, + { + "epoch": 61.032967032967036, + "grad_norm": 21.824481964111328, + "learning_rate": 1.9483516483516484e-05, + "loss": 0.5051, + "step": 22216 + }, + { + "epoch": 61.035714285714285, + "grad_norm": 9.2342529296875, + "learning_rate": 1.9482142857142857e-05, + "loss": 0.1548, + "step": 22217 + }, + { + "epoch": 61.03846153846154, + "grad_norm": 6.174681186676025, + "learning_rate": 1.9480769230769234e-05, + "loss": 0.0507, + "step": 22218 + }, + { + "epoch": 61.04120879120879, + "grad_norm": 14.574067115783691, + "learning_rate": 1.9479395604395608e-05, + "loss": 0.3716, + "step": 22219 + }, + { + "epoch": 61.043956043956044, + "grad_norm": 9.937966346740723, + "learning_rate": 1.9478021978021978e-05, + "loss": 0.1641, + "step": 22220 + }, + { + "epoch": 61.0467032967033, + "grad_norm": 4.755817890167236, + "learning_rate": 1.947664835164835e-05, + "loss": 0.0895, + "step": 22221 + }, + { + "epoch": 61.04945054945055, + "grad_norm": 12.86359691619873, + "learning_rate": 1.9475274725274724e-05, + "loss": 0.2358, + "step": 22222 + }, + { + "epoch": 61.0521978021978, + "grad_norm": 20.36589813232422, + "learning_rate": 1.94739010989011e-05, + "loss": 0.2859, + "step": 22223 + }, + { + "epoch": 61.05494505494506, + "grad_norm": 12.786121368408203, + "learning_rate": 1.9472527472527475e-05, + "loss": 0.1874, + "step": 22224 + }, + { + "epoch": 61.05769230769231, + "grad_norm": 7.321951389312744, + "learning_rate": 1.9471153846153848e-05, + "loss": 0.0802, + "step": 22225 + }, + { + "epoch": 61.06043956043956, + "grad_norm": 16.48282241821289, + "learning_rate": 1.946978021978022e-05, + "loss": 0.2501, + "step": 22226 + }, + { + "epoch": 61.06318681318681, + "grad_norm": 14.50033187866211, + "learning_rate": 1.9468406593406595e-05, + "loss": 0.2657, + "step": 22227 + }, + { + "epoch": 61.065934065934066, + "grad_norm": 16.145784378051758, + "learning_rate": 1.9467032967032968e-05, + "loss": 0.4068, + "step": 22228 + }, + { + "epoch": 61.06868131868132, + "grad_norm": 5.397296905517578, + "learning_rate": 1.946565934065934e-05, + "loss": 0.1139, + "step": 22229 + }, + { + "epoch": 61.07142857142857, + "grad_norm": 15.860361099243164, + "learning_rate": 1.9464285714285715e-05, + "loss": 0.2349, + "step": 22230 + }, + { + "epoch": 61.074175824175825, + "grad_norm": 14.397547721862793, + "learning_rate": 1.946291208791209e-05, + "loss": 0.1984, + "step": 22231 + }, + { + "epoch": 61.07692307692308, + "grad_norm": 4.132114410400391, + "learning_rate": 1.9461538461538462e-05, + "loss": 0.0472, + "step": 22232 + }, + { + "epoch": 61.07967032967033, + "grad_norm": 8.823017120361328, + "learning_rate": 1.946016483516484e-05, + "loss": 0.2161, + "step": 22233 + }, + { + "epoch": 61.082417582417584, + "grad_norm": 12.36948299407959, + "learning_rate": 1.9458791208791212e-05, + "loss": 0.507, + "step": 22234 + }, + { + "epoch": 61.08516483516483, + "grad_norm": 13.633625984191895, + "learning_rate": 1.9457417582417582e-05, + "loss": 0.1735, + "step": 22235 + }, + { + "epoch": 61.08791208791209, + "grad_norm": 16.995126724243164, + "learning_rate": 1.9456043956043955e-05, + "loss": 0.3469, + "step": 22236 + }, + { + "epoch": 61.09065934065934, + "grad_norm": 8.112517356872559, + "learning_rate": 1.945467032967033e-05, + "loss": 0.0828, + "step": 22237 + }, + { + "epoch": 61.09340659340659, + "grad_norm": 17.251754760742188, + "learning_rate": 1.9453296703296706e-05, + "loss": 0.412, + "step": 22238 + }, + { + "epoch": 61.09615384615385, + "grad_norm": 19.038164138793945, + "learning_rate": 1.945192307692308e-05, + "loss": 0.4795, + "step": 22239 + }, + { + "epoch": 61.0989010989011, + "grad_norm": 16.39190673828125, + "learning_rate": 1.9450549450549452e-05, + "loss": 0.3659, + "step": 22240 + }, + { + "epoch": 61.10164835164835, + "grad_norm": 1.372625708580017, + "learning_rate": 1.9449175824175826e-05, + "loss": 0.0135, + "step": 22241 + }, + { + "epoch": 61.104395604395606, + "grad_norm": 16.673547744750977, + "learning_rate": 1.94478021978022e-05, + "loss": 0.2834, + "step": 22242 + }, + { + "epoch": 61.107142857142854, + "grad_norm": 15.54072380065918, + "learning_rate": 1.9446428571428573e-05, + "loss": 0.2172, + "step": 22243 + }, + { + "epoch": 61.10989010989011, + "grad_norm": 6.665052890777588, + "learning_rate": 1.9445054945054946e-05, + "loss": 0.1157, + "step": 22244 + }, + { + "epoch": 61.112637362637365, + "grad_norm": 5.132101535797119, + "learning_rate": 1.944368131868132e-05, + "loss": 0.0711, + "step": 22245 + }, + { + "epoch": 61.11538461538461, + "grad_norm": 12.413735389709473, + "learning_rate": 1.9442307692307693e-05, + "loss": 0.1669, + "step": 22246 + }, + { + "epoch": 61.11813186813187, + "grad_norm": 9.656216621398926, + "learning_rate": 1.9440934065934066e-05, + "loss": 0.1632, + "step": 22247 + }, + { + "epoch": 61.120879120879124, + "grad_norm": 17.50084686279297, + "learning_rate": 1.943956043956044e-05, + "loss": 0.2919, + "step": 22248 + }, + { + "epoch": 61.12362637362637, + "grad_norm": 7.706409454345703, + "learning_rate": 1.9438186813186817e-05, + "loss": 0.1475, + "step": 22249 + }, + { + "epoch": 61.12637362637363, + "grad_norm": 10.2935791015625, + "learning_rate": 1.9436813186813187e-05, + "loss": 0.2321, + "step": 22250 + }, + { + "epoch": 61.129120879120876, + "grad_norm": 19.548450469970703, + "learning_rate": 1.943543956043956e-05, + "loss": 0.3038, + "step": 22251 + }, + { + "epoch": 61.13186813186813, + "grad_norm": 9.399590492248535, + "learning_rate": 1.9434065934065933e-05, + "loss": 0.2279, + "step": 22252 + }, + { + "epoch": 61.13461538461539, + "grad_norm": 15.475387573242188, + "learning_rate": 1.9432692307692307e-05, + "loss": 0.3462, + "step": 22253 + }, + { + "epoch": 61.137362637362635, + "grad_norm": 9.656466484069824, + "learning_rate": 1.9431318681318684e-05, + "loss": 0.2256, + "step": 22254 + }, + { + "epoch": 61.14010989010989, + "grad_norm": 7.500362873077393, + "learning_rate": 1.9429945054945057e-05, + "loss": 0.0922, + "step": 22255 + }, + { + "epoch": 61.142857142857146, + "grad_norm": 11.075809478759766, + "learning_rate": 1.942857142857143e-05, + "loss": 0.2354, + "step": 22256 + }, + { + "epoch": 61.145604395604394, + "grad_norm": 1.4022696018218994, + "learning_rate": 1.9427197802197804e-05, + "loss": 0.0126, + "step": 22257 + }, + { + "epoch": 61.14835164835165, + "grad_norm": 14.082318305969238, + "learning_rate": 1.9425824175824174e-05, + "loss": 0.277, + "step": 22258 + }, + { + "epoch": 61.1510989010989, + "grad_norm": 11.973487854003906, + "learning_rate": 1.942445054945055e-05, + "loss": 0.0939, + "step": 22259 + }, + { + "epoch": 61.15384615384615, + "grad_norm": 10.11275577545166, + "learning_rate": 1.9423076923076924e-05, + "loss": 0.1208, + "step": 22260 + }, + { + "epoch": 61.15659340659341, + "grad_norm": 23.872533798217773, + "learning_rate": 1.9421703296703297e-05, + "loss": 0.346, + "step": 22261 + }, + { + "epoch": 61.15934065934066, + "grad_norm": 12.821830749511719, + "learning_rate": 1.942032967032967e-05, + "loss": 0.2224, + "step": 22262 + }, + { + "epoch": 61.16208791208791, + "grad_norm": 17.55531120300293, + "learning_rate": 1.9418956043956044e-05, + "loss": 0.4702, + "step": 22263 + }, + { + "epoch": 61.16483516483517, + "grad_norm": 14.999478340148926, + "learning_rate": 1.941758241758242e-05, + "loss": 0.1722, + "step": 22264 + }, + { + "epoch": 61.167582417582416, + "grad_norm": 6.931419849395752, + "learning_rate": 1.941620879120879e-05, + "loss": 0.0701, + "step": 22265 + }, + { + "epoch": 61.17032967032967, + "grad_norm": 11.834547996520996, + "learning_rate": 1.9414835164835164e-05, + "loss": 0.2781, + "step": 22266 + }, + { + "epoch": 61.17307692307692, + "grad_norm": 9.982704162597656, + "learning_rate": 1.9413461538461538e-05, + "loss": 0.2442, + "step": 22267 + }, + { + "epoch": 61.175824175824175, + "grad_norm": 14.24361801147461, + "learning_rate": 1.941208791208791e-05, + "loss": 0.3209, + "step": 22268 + }, + { + "epoch": 61.17857142857143, + "grad_norm": 13.8853178024292, + "learning_rate": 1.9410714285714288e-05, + "loss": 0.3451, + "step": 22269 + }, + { + "epoch": 61.18131868131868, + "grad_norm": 12.50020980834961, + "learning_rate": 1.940934065934066e-05, + "loss": 0.1739, + "step": 22270 + }, + { + "epoch": 61.184065934065934, + "grad_norm": 14.813042640686035, + "learning_rate": 1.9407967032967035e-05, + "loss": 0.2922, + "step": 22271 + }, + { + "epoch": 61.18681318681319, + "grad_norm": 14.85567569732666, + "learning_rate": 1.9406593406593408e-05, + "loss": 0.1763, + "step": 22272 + }, + { + "epoch": 61.18956043956044, + "grad_norm": 4.934969425201416, + "learning_rate": 1.9405219780219778e-05, + "loss": 0.1486, + "step": 22273 + }, + { + "epoch": 61.19230769230769, + "grad_norm": 4.939184665679932, + "learning_rate": 1.9403846153846155e-05, + "loss": 0.1206, + "step": 22274 + }, + { + "epoch": 61.19505494505494, + "grad_norm": 6.817447185516357, + "learning_rate": 1.940247252747253e-05, + "loss": 0.1647, + "step": 22275 + }, + { + "epoch": 61.1978021978022, + "grad_norm": 14.373557090759277, + "learning_rate": 1.9401098901098902e-05, + "loss": 0.2302, + "step": 22276 + }, + { + "epoch": 61.20054945054945, + "grad_norm": 6.456489562988281, + "learning_rate": 1.9399725274725275e-05, + "loss": 0.1418, + "step": 22277 + }, + { + "epoch": 61.2032967032967, + "grad_norm": 12.945965766906738, + "learning_rate": 1.939835164835165e-05, + "loss": 0.1652, + "step": 22278 + }, + { + "epoch": 61.206043956043956, + "grad_norm": 6.821228981018066, + "learning_rate": 1.9396978021978025e-05, + "loss": 0.1039, + "step": 22279 + }, + { + "epoch": 61.20879120879121, + "grad_norm": 3.6951658725738525, + "learning_rate": 1.9395604395604396e-05, + "loss": 0.0574, + "step": 22280 + }, + { + "epoch": 61.21153846153846, + "grad_norm": 17.77461051940918, + "learning_rate": 1.939423076923077e-05, + "loss": 0.2544, + "step": 22281 + }, + { + "epoch": 61.214285714285715, + "grad_norm": 10.950080871582031, + "learning_rate": 1.9392857142857142e-05, + "loss": 0.3351, + "step": 22282 + }, + { + "epoch": 61.217032967032964, + "grad_norm": 15.623525619506836, + "learning_rate": 1.9391483516483516e-05, + "loss": 0.2316, + "step": 22283 + }, + { + "epoch": 61.21978021978022, + "grad_norm": 22.933008193969727, + "learning_rate": 1.9390109890109893e-05, + "loss": 0.3026, + "step": 22284 + }, + { + "epoch": 61.222527472527474, + "grad_norm": 5.185487270355225, + "learning_rate": 1.9388736263736266e-05, + "loss": 0.1089, + "step": 22285 + }, + { + "epoch": 61.22527472527472, + "grad_norm": 11.505779266357422, + "learning_rate": 1.938736263736264e-05, + "loss": 0.1585, + "step": 22286 + }, + { + "epoch": 61.22802197802198, + "grad_norm": 16.731523513793945, + "learning_rate": 1.9385989010989013e-05, + "loss": 0.1568, + "step": 22287 + }, + { + "epoch": 61.23076923076923, + "grad_norm": 16.100215911865234, + "learning_rate": 1.9384615384615383e-05, + "loss": 0.3894, + "step": 22288 + }, + { + "epoch": 61.23351648351648, + "grad_norm": 14.309104919433594, + "learning_rate": 1.938324175824176e-05, + "loss": 0.3712, + "step": 22289 + }, + { + "epoch": 61.23626373626374, + "grad_norm": 17.442140579223633, + "learning_rate": 1.9381868131868133e-05, + "loss": 0.2031, + "step": 22290 + }, + { + "epoch": 61.239010989010985, + "grad_norm": 17.126079559326172, + "learning_rate": 1.9380494505494506e-05, + "loss": 0.4137, + "step": 22291 + }, + { + "epoch": 61.24175824175824, + "grad_norm": 9.838051795959473, + "learning_rate": 1.937912087912088e-05, + "loss": 0.1078, + "step": 22292 + }, + { + "epoch": 61.244505494505496, + "grad_norm": 12.71229076385498, + "learning_rate": 1.9377747252747253e-05, + "loss": 0.4444, + "step": 22293 + }, + { + "epoch": 61.247252747252745, + "grad_norm": 11.955806732177734, + "learning_rate": 1.937637362637363e-05, + "loss": 0.2781, + "step": 22294 + }, + { + "epoch": 61.25, + "grad_norm": 11.529622077941895, + "learning_rate": 1.9375e-05, + "loss": 0.1796, + "step": 22295 + }, + { + "epoch": 61.252747252747255, + "grad_norm": 5.812412261962891, + "learning_rate": 1.9373626373626373e-05, + "loss": 0.0869, + "step": 22296 + }, + { + "epoch": 61.255494505494504, + "grad_norm": 13.047173500061035, + "learning_rate": 1.9372252747252747e-05, + "loss": 0.2465, + "step": 22297 + }, + { + "epoch": 61.25824175824176, + "grad_norm": 5.462814807891846, + "learning_rate": 1.937087912087912e-05, + "loss": 0.0858, + "step": 22298 + }, + { + "epoch": 61.260989010989015, + "grad_norm": 12.775259017944336, + "learning_rate": 1.9369505494505497e-05, + "loss": 0.3039, + "step": 22299 + }, + { + "epoch": 61.26373626373626, + "grad_norm": 6.266964912414551, + "learning_rate": 1.936813186813187e-05, + "loss": 0.0514, + "step": 22300 + }, + { + "epoch": 61.26648351648352, + "grad_norm": 5.931027412414551, + "learning_rate": 1.9366758241758244e-05, + "loss": 0.1145, + "step": 22301 + }, + { + "epoch": 61.26923076923077, + "grad_norm": 16.83022689819336, + "learning_rate": 1.9365384615384617e-05, + "loss": 0.1877, + "step": 22302 + }, + { + "epoch": 61.27197802197802, + "grad_norm": 10.631368637084961, + "learning_rate": 1.9364010989010987e-05, + "loss": 0.1272, + "step": 22303 + }, + { + "epoch": 61.27472527472528, + "grad_norm": 16.35822105407715, + "learning_rate": 1.9362637362637364e-05, + "loss": 0.1803, + "step": 22304 + }, + { + "epoch": 61.277472527472526, + "grad_norm": 18.036720275878906, + "learning_rate": 1.9361263736263737e-05, + "loss": 0.4866, + "step": 22305 + }, + { + "epoch": 61.28021978021978, + "grad_norm": 7.531360626220703, + "learning_rate": 1.935989010989011e-05, + "loss": 0.1402, + "step": 22306 + }, + { + "epoch": 61.282967032967036, + "grad_norm": 12.756645202636719, + "learning_rate": 1.9358516483516484e-05, + "loss": 0.1754, + "step": 22307 + }, + { + "epoch": 61.285714285714285, + "grad_norm": 7.608121871948242, + "learning_rate": 1.9357142857142858e-05, + "loss": 0.0709, + "step": 22308 + }, + { + "epoch": 61.28846153846154, + "grad_norm": 13.111160278320312, + "learning_rate": 1.9355769230769234e-05, + "loss": 0.407, + "step": 22309 + }, + { + "epoch": 61.29120879120879, + "grad_norm": 1.7177577018737793, + "learning_rate": 1.9354395604395604e-05, + "loss": 0.0226, + "step": 22310 + }, + { + "epoch": 61.293956043956044, + "grad_norm": 9.553421974182129, + "learning_rate": 1.9353021978021978e-05, + "loss": 0.1283, + "step": 22311 + }, + { + "epoch": 61.2967032967033, + "grad_norm": 14.460234642028809, + "learning_rate": 1.935164835164835e-05, + "loss": 0.3934, + "step": 22312 + }, + { + "epoch": 61.29945054945055, + "grad_norm": 2.9045486450195312, + "learning_rate": 1.9350274725274725e-05, + "loss": 0.0308, + "step": 22313 + }, + { + "epoch": 61.3021978021978, + "grad_norm": 11.619146347045898, + "learning_rate": 1.93489010989011e-05, + "loss": 0.1484, + "step": 22314 + }, + { + "epoch": 61.30494505494506, + "grad_norm": 16.024911880493164, + "learning_rate": 1.9347527472527475e-05, + "loss": 0.3589, + "step": 22315 + }, + { + "epoch": 61.30769230769231, + "grad_norm": 8.033838272094727, + "learning_rate": 1.934615384615385e-05, + "loss": 0.1172, + "step": 22316 + }, + { + "epoch": 61.31043956043956, + "grad_norm": 4.851569175720215, + "learning_rate": 1.9344780219780222e-05, + "loss": 0.1132, + "step": 22317 + }, + { + "epoch": 61.31318681318681, + "grad_norm": 15.581023216247559, + "learning_rate": 1.9343406593406592e-05, + "loss": 0.2759, + "step": 22318 + }, + { + "epoch": 61.315934065934066, + "grad_norm": 7.565861225128174, + "learning_rate": 1.934203296703297e-05, + "loss": 0.1268, + "step": 22319 + }, + { + "epoch": 61.31868131868132, + "grad_norm": 18.52520179748535, + "learning_rate": 1.9340659340659342e-05, + "loss": 0.4491, + "step": 22320 + }, + { + "epoch": 61.32142857142857, + "grad_norm": 20.311391830444336, + "learning_rate": 1.9339285714285715e-05, + "loss": 0.6044, + "step": 22321 + }, + { + "epoch": 61.324175824175825, + "grad_norm": 15.449966430664062, + "learning_rate": 1.933791208791209e-05, + "loss": 0.3194, + "step": 22322 + }, + { + "epoch": 61.32692307692308, + "grad_norm": 22.545198440551758, + "learning_rate": 1.9336538461538462e-05, + "loss": 0.4438, + "step": 22323 + }, + { + "epoch": 61.32967032967033, + "grad_norm": 17.40088653564453, + "learning_rate": 1.933516483516484e-05, + "loss": 0.2623, + "step": 22324 + }, + { + "epoch": 61.332417582417584, + "grad_norm": 16.873563766479492, + "learning_rate": 1.933379120879121e-05, + "loss": 0.3741, + "step": 22325 + }, + { + "epoch": 61.33516483516483, + "grad_norm": 30.334867477416992, + "learning_rate": 1.9332417582417582e-05, + "loss": 0.673, + "step": 22326 + }, + { + "epoch": 61.33791208791209, + "grad_norm": 5.700828552246094, + "learning_rate": 1.9331043956043956e-05, + "loss": 0.0783, + "step": 22327 + }, + { + "epoch": 61.34065934065934, + "grad_norm": 18.018917083740234, + "learning_rate": 1.932967032967033e-05, + "loss": 0.6631, + "step": 22328 + }, + { + "epoch": 61.34340659340659, + "grad_norm": 11.897069931030273, + "learning_rate": 1.9328296703296706e-05, + "loss": 0.2399, + "step": 22329 + }, + { + "epoch": 61.34615384615385, + "grad_norm": 7.118526935577393, + "learning_rate": 1.932692307692308e-05, + "loss": 0.1349, + "step": 22330 + }, + { + "epoch": 61.3489010989011, + "grad_norm": 2.545706272125244, + "learning_rate": 1.9325549450549453e-05, + "loss": 0.0342, + "step": 22331 + }, + { + "epoch": 61.35164835164835, + "grad_norm": 12.860158920288086, + "learning_rate": 1.9324175824175826e-05, + "loss": 0.3012, + "step": 22332 + }, + { + "epoch": 61.354395604395606, + "grad_norm": 15.76343059539795, + "learning_rate": 1.9322802197802196e-05, + "loss": 0.4095, + "step": 22333 + }, + { + "epoch": 61.357142857142854, + "grad_norm": 15.35130786895752, + "learning_rate": 1.9321428571428573e-05, + "loss": 0.267, + "step": 22334 + }, + { + "epoch": 61.35989010989011, + "grad_norm": 12.501094818115234, + "learning_rate": 1.9320054945054946e-05, + "loss": 0.2083, + "step": 22335 + }, + { + "epoch": 61.362637362637365, + "grad_norm": 6.291213512420654, + "learning_rate": 1.931868131868132e-05, + "loss": 0.1112, + "step": 22336 + }, + { + "epoch": 61.36538461538461, + "grad_norm": 12.07497787475586, + "learning_rate": 1.9317307692307693e-05, + "loss": 0.1492, + "step": 22337 + }, + { + "epoch": 61.36813186813187, + "grad_norm": 4.262842178344727, + "learning_rate": 1.9315934065934067e-05, + "loss": 0.0334, + "step": 22338 + }, + { + "epoch": 61.370879120879124, + "grad_norm": 4.225899696350098, + "learning_rate": 1.9314560439560443e-05, + "loss": 0.0427, + "step": 22339 + }, + { + "epoch": 61.37362637362637, + "grad_norm": 20.4346981048584, + "learning_rate": 1.9313186813186813e-05, + "loss": 0.3508, + "step": 22340 + }, + { + "epoch": 61.37637362637363, + "grad_norm": 10.97362995147705, + "learning_rate": 1.9311813186813187e-05, + "loss": 0.2228, + "step": 22341 + }, + { + "epoch": 61.379120879120876, + "grad_norm": 5.403800010681152, + "learning_rate": 1.931043956043956e-05, + "loss": 0.0941, + "step": 22342 + }, + { + "epoch": 61.38186813186813, + "grad_norm": 17.425935745239258, + "learning_rate": 1.9309065934065934e-05, + "loss": 0.4123, + "step": 22343 + }, + { + "epoch": 61.38461538461539, + "grad_norm": 12.303153991699219, + "learning_rate": 1.930769230769231e-05, + "loss": 0.2619, + "step": 22344 + }, + { + "epoch": 61.387362637362635, + "grad_norm": 5.906757831573486, + "learning_rate": 1.9306318681318684e-05, + "loss": 0.0824, + "step": 22345 + }, + { + "epoch": 61.39010989010989, + "grad_norm": 15.393518447875977, + "learning_rate": 1.9304945054945057e-05, + "loss": 0.2162, + "step": 22346 + }, + { + "epoch": 61.392857142857146, + "grad_norm": 7.766915798187256, + "learning_rate": 1.930357142857143e-05, + "loss": 0.1238, + "step": 22347 + }, + { + "epoch": 61.395604395604394, + "grad_norm": 33.16553497314453, + "learning_rate": 1.93021978021978e-05, + "loss": 0.9752, + "step": 22348 + }, + { + "epoch": 61.39835164835165, + "grad_norm": 6.749151229858398, + "learning_rate": 1.9300824175824178e-05, + "loss": 0.1313, + "step": 22349 + }, + { + "epoch": 61.4010989010989, + "grad_norm": 21.96059226989746, + "learning_rate": 1.929945054945055e-05, + "loss": 0.5649, + "step": 22350 + }, + { + "epoch": 61.40384615384615, + "grad_norm": 21.483348846435547, + "learning_rate": 1.9298076923076924e-05, + "loss": 0.4601, + "step": 22351 + }, + { + "epoch": 61.40659340659341, + "grad_norm": 17.29677963256836, + "learning_rate": 1.9296703296703298e-05, + "loss": 0.3556, + "step": 22352 + }, + { + "epoch": 61.40934065934066, + "grad_norm": 11.537214279174805, + "learning_rate": 1.929532967032967e-05, + "loss": 0.231, + "step": 22353 + }, + { + "epoch": 61.41208791208791, + "grad_norm": 18.201417922973633, + "learning_rate": 1.9293956043956045e-05, + "loss": 0.3263, + "step": 22354 + }, + { + "epoch": 61.41483516483517, + "grad_norm": 11.455011367797852, + "learning_rate": 1.9292582417582418e-05, + "loss": 0.152, + "step": 22355 + }, + { + "epoch": 61.417582417582416, + "grad_norm": 13.560003280639648, + "learning_rate": 1.929120879120879e-05, + "loss": 0.2327, + "step": 22356 + }, + { + "epoch": 61.42032967032967, + "grad_norm": 9.2883939743042, + "learning_rate": 1.9289835164835165e-05, + "loss": 0.2464, + "step": 22357 + }, + { + "epoch": 61.42307692307692, + "grad_norm": 25.900121688842773, + "learning_rate": 1.9288461538461538e-05, + "loss": 0.6671, + "step": 22358 + }, + { + "epoch": 61.425824175824175, + "grad_norm": 9.032227516174316, + "learning_rate": 1.928708791208791e-05, + "loss": 0.1719, + "step": 22359 + }, + { + "epoch": 61.42857142857143, + "grad_norm": 21.448923110961914, + "learning_rate": 1.928571428571429e-05, + "loss": 0.6788, + "step": 22360 + }, + { + "epoch": 61.43131868131868, + "grad_norm": 11.713554382324219, + "learning_rate": 1.9284340659340662e-05, + "loss": 0.3261, + "step": 22361 + }, + { + "epoch": 61.434065934065934, + "grad_norm": 13.01004695892334, + "learning_rate": 1.9282967032967035e-05, + "loss": 0.3329, + "step": 22362 + }, + { + "epoch": 61.43681318681319, + "grad_norm": 19.364974975585938, + "learning_rate": 1.9281593406593405e-05, + "loss": 0.3468, + "step": 22363 + }, + { + "epoch": 61.43956043956044, + "grad_norm": 3.931262493133545, + "learning_rate": 1.928021978021978e-05, + "loss": 0.0458, + "step": 22364 + }, + { + "epoch": 61.44230769230769, + "grad_norm": 9.125916481018066, + "learning_rate": 1.9278846153846155e-05, + "loss": 0.1514, + "step": 22365 + }, + { + "epoch": 61.44505494505494, + "grad_norm": 14.966418266296387, + "learning_rate": 1.927747252747253e-05, + "loss": 0.2617, + "step": 22366 + }, + { + "epoch": 61.4478021978022, + "grad_norm": 8.104159355163574, + "learning_rate": 1.9276098901098902e-05, + "loss": 0.093, + "step": 22367 + }, + { + "epoch": 61.45054945054945, + "grad_norm": 6.312868118286133, + "learning_rate": 1.9274725274725276e-05, + "loss": 0.1045, + "step": 22368 + }, + { + "epoch": 61.4532967032967, + "grad_norm": 13.88921070098877, + "learning_rate": 1.927335164835165e-05, + "loss": 0.1544, + "step": 22369 + }, + { + "epoch": 61.456043956043956, + "grad_norm": 9.859647750854492, + "learning_rate": 1.9271978021978022e-05, + "loss": 0.2129, + "step": 22370 + }, + { + "epoch": 61.45879120879121, + "grad_norm": 9.103498458862305, + "learning_rate": 1.9270604395604396e-05, + "loss": 0.1051, + "step": 22371 + }, + { + "epoch": 61.46153846153846, + "grad_norm": 8.930126190185547, + "learning_rate": 1.926923076923077e-05, + "loss": 0.1372, + "step": 22372 + }, + { + "epoch": 61.464285714285715, + "grad_norm": 30.20054817199707, + "learning_rate": 1.9267857142857143e-05, + "loss": 0.8316, + "step": 22373 + }, + { + "epoch": 61.467032967032964, + "grad_norm": 3.744689702987671, + "learning_rate": 1.9266483516483516e-05, + "loss": 0.0547, + "step": 22374 + }, + { + "epoch": 61.46978021978022, + "grad_norm": 15.009491920471191, + "learning_rate": 1.9265109890109893e-05, + "loss": 0.2398, + "step": 22375 + }, + { + "epoch": 61.472527472527474, + "grad_norm": 9.576645851135254, + "learning_rate": 1.9263736263736266e-05, + "loss": 0.1675, + "step": 22376 + }, + { + "epoch": 61.47527472527472, + "grad_norm": 4.390391826629639, + "learning_rate": 1.926236263736264e-05, + "loss": 0.0803, + "step": 22377 + }, + { + "epoch": 61.47802197802198, + "grad_norm": 6.707484722137451, + "learning_rate": 1.926098901098901e-05, + "loss": 0.1252, + "step": 22378 + }, + { + "epoch": 61.48076923076923, + "grad_norm": 24.154848098754883, + "learning_rate": 1.9259615384615383e-05, + "loss": 0.9696, + "step": 22379 + }, + { + "epoch": 61.48351648351648, + "grad_norm": 4.853340148925781, + "learning_rate": 1.925824175824176e-05, + "loss": 0.0442, + "step": 22380 + }, + { + "epoch": 61.48626373626374, + "grad_norm": 6.07185697555542, + "learning_rate": 1.9256868131868133e-05, + "loss": 0.1014, + "step": 22381 + }, + { + "epoch": 61.489010989010985, + "grad_norm": 13.139240264892578, + "learning_rate": 1.9255494505494507e-05, + "loss": 0.2983, + "step": 22382 + }, + { + "epoch": 61.49175824175824, + "grad_norm": 9.265974998474121, + "learning_rate": 1.925412087912088e-05, + "loss": 0.0965, + "step": 22383 + }, + { + "epoch": 61.494505494505496, + "grad_norm": 11.663151741027832, + "learning_rate": 1.9252747252747254e-05, + "loss": 0.1169, + "step": 22384 + }, + { + "epoch": 61.497252747252745, + "grad_norm": 14.083200454711914, + "learning_rate": 1.9251373626373627e-05, + "loss": 0.2899, + "step": 22385 + }, + { + "epoch": 61.5, + "grad_norm": 7.050133228302002, + "learning_rate": 1.925e-05, + "loss": 0.122, + "step": 22386 + }, + { + "epoch": 61.502747252747255, + "grad_norm": 12.6466064453125, + "learning_rate": 1.9248626373626374e-05, + "loss": 0.2792, + "step": 22387 + }, + { + "epoch": 61.505494505494504, + "grad_norm": 10.739006996154785, + "learning_rate": 1.9247252747252747e-05, + "loss": 0.2107, + "step": 22388 + }, + { + "epoch": 61.50824175824176, + "grad_norm": 4.149493217468262, + "learning_rate": 1.924587912087912e-05, + "loss": 0.1366, + "step": 22389 + }, + { + "epoch": 61.51098901098901, + "grad_norm": 18.819318771362305, + "learning_rate": 1.9244505494505497e-05, + "loss": 0.4566, + "step": 22390 + }, + { + "epoch": 61.51373626373626, + "grad_norm": 10.403327941894531, + "learning_rate": 1.924313186813187e-05, + "loss": 0.1397, + "step": 22391 + }, + { + "epoch": 61.51648351648352, + "grad_norm": 13.378058433532715, + "learning_rate": 1.9241758241758244e-05, + "loss": 0.273, + "step": 22392 + }, + { + "epoch": 61.51923076923077, + "grad_norm": 1.378777027130127, + "learning_rate": 1.9240384615384614e-05, + "loss": 0.0219, + "step": 22393 + }, + { + "epoch": 61.52197802197802, + "grad_norm": 11.483165740966797, + "learning_rate": 1.9239010989010988e-05, + "loss": 0.2534, + "step": 22394 + }, + { + "epoch": 61.52472527472528, + "grad_norm": 12.429248809814453, + "learning_rate": 1.9237637362637364e-05, + "loss": 0.5734, + "step": 22395 + }, + { + "epoch": 61.527472527472526, + "grad_norm": 19.939056396484375, + "learning_rate": 1.9236263736263738e-05, + "loss": 0.5007, + "step": 22396 + }, + { + "epoch": 61.53021978021978, + "grad_norm": 4.408247947692871, + "learning_rate": 1.923489010989011e-05, + "loss": 0.0842, + "step": 22397 + }, + { + "epoch": 61.532967032967036, + "grad_norm": 7.768686771392822, + "learning_rate": 1.9233516483516485e-05, + "loss": 0.1133, + "step": 22398 + }, + { + "epoch": 61.535714285714285, + "grad_norm": 13.651394844055176, + "learning_rate": 1.9232142857142858e-05, + "loss": 0.341, + "step": 22399 + }, + { + "epoch": 61.53846153846154, + "grad_norm": 9.620176315307617, + "learning_rate": 1.923076923076923e-05, + "loss": 0.1203, + "step": 22400 + }, + { + "epoch": 61.54120879120879, + "grad_norm": 14.69075870513916, + "learning_rate": 1.9229395604395605e-05, + "loss": 0.3169, + "step": 22401 + }, + { + "epoch": 61.543956043956044, + "grad_norm": 16.291561126708984, + "learning_rate": 1.9228021978021978e-05, + "loss": 0.3842, + "step": 22402 + }, + { + "epoch": 61.5467032967033, + "grad_norm": 6.109086990356445, + "learning_rate": 1.922664835164835e-05, + "loss": 0.095, + "step": 22403 + }, + { + "epoch": 61.54945054945055, + "grad_norm": 6.302194118499756, + "learning_rate": 1.9225274725274725e-05, + "loss": 0.1194, + "step": 22404 + }, + { + "epoch": 61.5521978021978, + "grad_norm": 2.409313678741455, + "learning_rate": 1.9223901098901102e-05, + "loss": 0.0329, + "step": 22405 + }, + { + "epoch": 61.55494505494506, + "grad_norm": 17.013351440429688, + "learning_rate": 1.9222527472527475e-05, + "loss": 0.4448, + "step": 22406 + }, + { + "epoch": 61.55769230769231, + "grad_norm": 7.159859657287598, + "learning_rate": 1.922115384615385e-05, + "loss": 0.1955, + "step": 22407 + }, + { + "epoch": 61.56043956043956, + "grad_norm": 8.525177001953125, + "learning_rate": 1.921978021978022e-05, + "loss": 0.1378, + "step": 22408 + }, + { + "epoch": 61.56318681318681, + "grad_norm": 16.98732566833496, + "learning_rate": 1.9218406593406592e-05, + "loss": 0.4753, + "step": 22409 + }, + { + "epoch": 61.565934065934066, + "grad_norm": 18.433652877807617, + "learning_rate": 1.921703296703297e-05, + "loss": 0.5526, + "step": 22410 + }, + { + "epoch": 61.56868131868132, + "grad_norm": 16.02456283569336, + "learning_rate": 1.9215659340659342e-05, + "loss": 0.2085, + "step": 22411 + }, + { + "epoch": 61.57142857142857, + "grad_norm": 8.953778266906738, + "learning_rate": 1.9214285714285716e-05, + "loss": 0.2066, + "step": 22412 + }, + { + "epoch": 61.574175824175825, + "grad_norm": 23.691076278686523, + "learning_rate": 1.921291208791209e-05, + "loss": 0.4195, + "step": 22413 + }, + { + "epoch": 61.57692307692308, + "grad_norm": 4.141317844390869, + "learning_rate": 1.9211538461538463e-05, + "loss": 0.0536, + "step": 22414 + }, + { + "epoch": 61.57967032967033, + "grad_norm": 7.979799747467041, + "learning_rate": 1.9210164835164836e-05, + "loss": 0.1279, + "step": 22415 + }, + { + "epoch": 61.582417582417584, + "grad_norm": 11.988219261169434, + "learning_rate": 1.920879120879121e-05, + "loss": 0.1707, + "step": 22416 + }, + { + "epoch": 61.58516483516483, + "grad_norm": 8.775355339050293, + "learning_rate": 1.9207417582417583e-05, + "loss": 0.1238, + "step": 22417 + }, + { + "epoch": 61.58791208791209, + "grad_norm": 15.202884674072266, + "learning_rate": 1.9206043956043956e-05, + "loss": 0.6939, + "step": 22418 + }, + { + "epoch": 61.59065934065934, + "grad_norm": 9.170612335205078, + "learning_rate": 1.920467032967033e-05, + "loss": 0.2167, + "step": 22419 + }, + { + "epoch": 61.59340659340659, + "grad_norm": 8.38724422454834, + "learning_rate": 1.9203296703296706e-05, + "loss": 0.112, + "step": 22420 + }, + { + "epoch": 61.59615384615385, + "grad_norm": 20.321142196655273, + "learning_rate": 1.920192307692308e-05, + "loss": 0.5056, + "step": 22421 + }, + { + "epoch": 61.5989010989011, + "grad_norm": 3.3666999340057373, + "learning_rate": 1.9200549450549453e-05, + "loss": 0.0356, + "step": 22422 + }, + { + "epoch": 61.60164835164835, + "grad_norm": 7.0670247077941895, + "learning_rate": 1.9199175824175823e-05, + "loss": 0.0938, + "step": 22423 + }, + { + "epoch": 61.604395604395606, + "grad_norm": 13.208772659301758, + "learning_rate": 1.9197802197802197e-05, + "loss": 0.3477, + "step": 22424 + }, + { + "epoch": 61.607142857142854, + "grad_norm": 15.491142272949219, + "learning_rate": 1.9196428571428573e-05, + "loss": 0.2245, + "step": 22425 + }, + { + "epoch": 61.60989010989011, + "grad_norm": 18.550764083862305, + "learning_rate": 1.9195054945054947e-05, + "loss": 0.2325, + "step": 22426 + }, + { + "epoch": 61.612637362637365, + "grad_norm": 6.092789649963379, + "learning_rate": 1.919368131868132e-05, + "loss": 0.086, + "step": 22427 + }, + { + "epoch": 61.61538461538461, + "grad_norm": 10.110404014587402, + "learning_rate": 1.9192307692307694e-05, + "loss": 0.1636, + "step": 22428 + }, + { + "epoch": 61.61813186813187, + "grad_norm": 19.527650833129883, + "learning_rate": 1.9190934065934067e-05, + "loss": 0.4505, + "step": 22429 + }, + { + "epoch": 61.620879120879124, + "grad_norm": 2.4768643379211426, + "learning_rate": 1.918956043956044e-05, + "loss": 0.0297, + "step": 22430 + }, + { + "epoch": 61.62362637362637, + "grad_norm": 26.281686782836914, + "learning_rate": 1.9188186813186814e-05, + "loss": 0.8738, + "step": 22431 + }, + { + "epoch": 61.62637362637363, + "grad_norm": 7.38408899307251, + "learning_rate": 1.9186813186813187e-05, + "loss": 0.164, + "step": 22432 + }, + { + "epoch": 61.629120879120876, + "grad_norm": 2.764085292816162, + "learning_rate": 1.918543956043956e-05, + "loss": 0.0343, + "step": 22433 + }, + { + "epoch": 61.63186813186813, + "grad_norm": 12.470242500305176, + "learning_rate": 1.9184065934065934e-05, + "loss": 0.148, + "step": 22434 + }, + { + "epoch": 61.63461538461539, + "grad_norm": 5.352348804473877, + "learning_rate": 1.918269230769231e-05, + "loss": 0.0889, + "step": 22435 + }, + { + "epoch": 61.637362637362635, + "grad_norm": 14.788352012634277, + "learning_rate": 1.9181318681318684e-05, + "loss": 0.5093, + "step": 22436 + }, + { + "epoch": 61.64010989010989, + "grad_norm": 8.351097106933594, + "learning_rate": 1.9179945054945058e-05, + "loss": 0.3409, + "step": 22437 + }, + { + "epoch": 61.642857142857146, + "grad_norm": 4.63418436050415, + "learning_rate": 1.9178571428571428e-05, + "loss": 0.0927, + "step": 22438 + }, + { + "epoch": 61.645604395604394, + "grad_norm": 7.045537948608398, + "learning_rate": 1.91771978021978e-05, + "loss": 0.1677, + "step": 22439 + }, + { + "epoch": 61.64835164835165, + "grad_norm": 18.225492477416992, + "learning_rate": 1.9175824175824178e-05, + "loss": 0.3753, + "step": 22440 + }, + { + "epoch": 61.6510989010989, + "grad_norm": 15.147113800048828, + "learning_rate": 1.917445054945055e-05, + "loss": 0.3198, + "step": 22441 + }, + { + "epoch": 61.65384615384615, + "grad_norm": 17.501819610595703, + "learning_rate": 1.9173076923076925e-05, + "loss": 0.3468, + "step": 22442 + }, + { + "epoch": 61.65659340659341, + "grad_norm": 7.269907474517822, + "learning_rate": 1.9171703296703298e-05, + "loss": 0.1796, + "step": 22443 + }, + { + "epoch": 61.65934065934066, + "grad_norm": 6.881575107574463, + "learning_rate": 1.917032967032967e-05, + "loss": 0.1054, + "step": 22444 + }, + { + "epoch": 61.66208791208791, + "grad_norm": 19.945758819580078, + "learning_rate": 1.9168956043956045e-05, + "loss": 0.5221, + "step": 22445 + }, + { + "epoch": 61.66483516483517, + "grad_norm": 21.52570343017578, + "learning_rate": 1.916758241758242e-05, + "loss": 0.6581, + "step": 22446 + }, + { + "epoch": 61.667582417582416, + "grad_norm": 2.8175384998321533, + "learning_rate": 1.9166208791208792e-05, + "loss": 0.0524, + "step": 22447 + }, + { + "epoch": 61.67032967032967, + "grad_norm": 12.305270195007324, + "learning_rate": 1.9164835164835165e-05, + "loss": 0.2496, + "step": 22448 + }, + { + "epoch": 61.67307692307692, + "grad_norm": 13.750285148620605, + "learning_rate": 1.916346153846154e-05, + "loss": 0.2557, + "step": 22449 + }, + { + "epoch": 61.675824175824175, + "grad_norm": 8.581321716308594, + "learning_rate": 1.9162087912087915e-05, + "loss": 0.2346, + "step": 22450 + }, + { + "epoch": 61.67857142857143, + "grad_norm": 7.741057395935059, + "learning_rate": 1.916071428571429e-05, + "loss": 0.0826, + "step": 22451 + }, + { + "epoch": 61.68131868131868, + "grad_norm": 8.699012756347656, + "learning_rate": 1.9159340659340662e-05, + "loss": 0.1795, + "step": 22452 + }, + { + "epoch": 61.684065934065934, + "grad_norm": 18.961511611938477, + "learning_rate": 1.9157967032967032e-05, + "loss": 0.4796, + "step": 22453 + }, + { + "epoch": 61.68681318681319, + "grad_norm": 7.989169120788574, + "learning_rate": 1.9156593406593406e-05, + "loss": 0.2026, + "step": 22454 + }, + { + "epoch": 61.68956043956044, + "grad_norm": 10.658992767333984, + "learning_rate": 1.9155219780219782e-05, + "loss": 0.3905, + "step": 22455 + }, + { + "epoch": 61.69230769230769, + "grad_norm": 23.253175735473633, + "learning_rate": 1.9153846153846156e-05, + "loss": 0.386, + "step": 22456 + }, + { + "epoch": 61.69505494505494, + "grad_norm": 20.962661743164062, + "learning_rate": 1.915247252747253e-05, + "loss": 0.5496, + "step": 22457 + }, + { + "epoch": 61.6978021978022, + "grad_norm": 13.45540714263916, + "learning_rate": 1.9151098901098903e-05, + "loss": 0.2806, + "step": 22458 + }, + { + "epoch": 61.70054945054945, + "grad_norm": 4.524938583374023, + "learning_rate": 1.9149725274725276e-05, + "loss": 0.0489, + "step": 22459 + }, + { + "epoch": 61.7032967032967, + "grad_norm": 8.004226684570312, + "learning_rate": 1.914835164835165e-05, + "loss": 0.1291, + "step": 22460 + }, + { + "epoch": 61.706043956043956, + "grad_norm": 12.78677749633789, + "learning_rate": 1.9146978021978023e-05, + "loss": 0.2908, + "step": 22461 + }, + { + "epoch": 61.70879120879121, + "grad_norm": 5.212320804595947, + "learning_rate": 1.9145604395604396e-05, + "loss": 0.0586, + "step": 22462 + }, + { + "epoch": 61.71153846153846, + "grad_norm": 16.18942642211914, + "learning_rate": 1.914423076923077e-05, + "loss": 0.3635, + "step": 22463 + }, + { + "epoch": 61.714285714285715, + "grad_norm": 22.635168075561523, + "learning_rate": 1.9142857142857143e-05, + "loss": 0.6655, + "step": 22464 + }, + { + "epoch": 61.717032967032964, + "grad_norm": 10.621557235717773, + "learning_rate": 1.914148351648352e-05, + "loss": 0.2123, + "step": 22465 + }, + { + "epoch": 61.71978021978022, + "grad_norm": 16.199018478393555, + "learning_rate": 1.9140109890109893e-05, + "loss": 0.2874, + "step": 22466 + }, + { + "epoch": 61.722527472527474, + "grad_norm": 17.39268684387207, + "learning_rate": 1.9138736263736267e-05, + "loss": 0.249, + "step": 22467 + }, + { + "epoch": 61.72527472527472, + "grad_norm": 4.335822105407715, + "learning_rate": 1.9137362637362637e-05, + "loss": 0.0817, + "step": 22468 + }, + { + "epoch": 61.72802197802198, + "grad_norm": 17.387495040893555, + "learning_rate": 1.913598901098901e-05, + "loss": 0.4728, + "step": 22469 + }, + { + "epoch": 61.73076923076923, + "grad_norm": 14.968169212341309, + "learning_rate": 1.9134615384615383e-05, + "loss": 0.324, + "step": 22470 + }, + { + "epoch": 61.73351648351648, + "grad_norm": 15.790116310119629, + "learning_rate": 1.913324175824176e-05, + "loss": 0.2866, + "step": 22471 + }, + { + "epoch": 61.73626373626374, + "grad_norm": 7.324741363525391, + "learning_rate": 1.9131868131868134e-05, + "loss": 0.2175, + "step": 22472 + }, + { + "epoch": 61.73901098901099, + "grad_norm": 12.39752197265625, + "learning_rate": 1.9130494505494507e-05, + "loss": 0.1351, + "step": 22473 + }, + { + "epoch": 61.74175824175824, + "grad_norm": 18.15013885498047, + "learning_rate": 1.912912087912088e-05, + "loss": 0.5999, + "step": 22474 + }, + { + "epoch": 61.744505494505496, + "grad_norm": 13.406347274780273, + "learning_rate": 1.9127747252747254e-05, + "loss": 0.258, + "step": 22475 + }, + { + "epoch": 61.747252747252745, + "grad_norm": 7.366724967956543, + "learning_rate": 1.9126373626373627e-05, + "loss": 0.1174, + "step": 22476 + }, + { + "epoch": 61.75, + "grad_norm": 12.772847175598145, + "learning_rate": 1.9125e-05, + "loss": 0.1649, + "step": 22477 + }, + { + "epoch": 61.752747252747255, + "grad_norm": 21.011945724487305, + "learning_rate": 1.9123626373626374e-05, + "loss": 0.6838, + "step": 22478 + }, + { + "epoch": 61.755494505494504, + "grad_norm": 2.1724765300750732, + "learning_rate": 1.9122252747252748e-05, + "loss": 0.0801, + "step": 22479 + }, + { + "epoch": 61.75824175824176, + "grad_norm": 24.178916931152344, + "learning_rate": 1.912087912087912e-05, + "loss": 0.6168, + "step": 22480 + }, + { + "epoch": 61.76098901098901, + "grad_norm": 11.988673210144043, + "learning_rate": 1.9119505494505498e-05, + "loss": 0.2025, + "step": 22481 + }, + { + "epoch": 61.76373626373626, + "grad_norm": 16.325021743774414, + "learning_rate": 1.9118131868131868e-05, + "loss": 0.4126, + "step": 22482 + }, + { + "epoch": 61.76648351648352, + "grad_norm": 15.978900909423828, + "learning_rate": 1.911675824175824e-05, + "loss": 0.5388, + "step": 22483 + }, + { + "epoch": 61.76923076923077, + "grad_norm": 3.8688671588897705, + "learning_rate": 1.9115384615384615e-05, + "loss": 0.0398, + "step": 22484 + }, + { + "epoch": 61.77197802197802, + "grad_norm": 10.316314697265625, + "learning_rate": 1.9114010989010988e-05, + "loss": 0.1132, + "step": 22485 + }, + { + "epoch": 61.77472527472528, + "grad_norm": 12.484724998474121, + "learning_rate": 1.9112637362637365e-05, + "loss": 0.239, + "step": 22486 + }, + { + "epoch": 61.777472527472526, + "grad_norm": 11.5975980758667, + "learning_rate": 1.9111263736263738e-05, + "loss": 0.2103, + "step": 22487 + }, + { + "epoch": 61.78021978021978, + "grad_norm": 19.2387752532959, + "learning_rate": 1.910989010989011e-05, + "loss": 0.4293, + "step": 22488 + }, + { + "epoch": 61.782967032967036, + "grad_norm": 2.8092923164367676, + "learning_rate": 1.9108516483516485e-05, + "loss": 0.026, + "step": 22489 + }, + { + "epoch": 61.785714285714285, + "grad_norm": 3.242187023162842, + "learning_rate": 1.910714285714286e-05, + "loss": 0.0671, + "step": 22490 + }, + { + "epoch": 61.78846153846154, + "grad_norm": 11.54549503326416, + "learning_rate": 1.9105769230769232e-05, + "loss": 0.2432, + "step": 22491 + }, + { + "epoch": 61.79120879120879, + "grad_norm": 18.9826602935791, + "learning_rate": 1.9104395604395605e-05, + "loss": 0.5529, + "step": 22492 + }, + { + "epoch": 61.793956043956044, + "grad_norm": 14.918950080871582, + "learning_rate": 1.910302197802198e-05, + "loss": 0.1838, + "step": 22493 + }, + { + "epoch": 61.7967032967033, + "grad_norm": 6.9210028648376465, + "learning_rate": 1.9101648351648352e-05, + "loss": 0.0879, + "step": 22494 + }, + { + "epoch": 61.79945054945055, + "grad_norm": 1.2909287214279175, + "learning_rate": 1.9100274725274725e-05, + "loss": 0.0185, + "step": 22495 + }, + { + "epoch": 61.8021978021978, + "grad_norm": 5.263190746307373, + "learning_rate": 1.9098901098901102e-05, + "loss": 0.1104, + "step": 22496 + }, + { + "epoch": 61.80494505494506, + "grad_norm": 22.29178237915039, + "learning_rate": 1.9097527472527472e-05, + "loss": 0.6306, + "step": 22497 + }, + { + "epoch": 61.80769230769231, + "grad_norm": 22.483137130737305, + "learning_rate": 1.9096153846153846e-05, + "loss": 0.4653, + "step": 22498 + }, + { + "epoch": 61.81043956043956, + "grad_norm": 24.206388473510742, + "learning_rate": 1.909478021978022e-05, + "loss": 0.6691, + "step": 22499 + }, + { + "epoch": 61.81318681318681, + "grad_norm": 12.690281867980957, + "learning_rate": 1.9093406593406592e-05, + "loss": 0.2391, + "step": 22500 + }, + { + "epoch": 61.815934065934066, + "grad_norm": 3.432710647583008, + "learning_rate": 1.909203296703297e-05, + "loss": 0.0544, + "step": 22501 + }, + { + "epoch": 61.81868131868132, + "grad_norm": 19.917659759521484, + "learning_rate": 1.9090659340659343e-05, + "loss": 0.7375, + "step": 22502 + }, + { + "epoch": 61.82142857142857, + "grad_norm": 16.748510360717773, + "learning_rate": 1.9089285714285716e-05, + "loss": 0.424, + "step": 22503 + }, + { + "epoch": 61.824175824175825, + "grad_norm": 8.727187156677246, + "learning_rate": 1.908791208791209e-05, + "loss": 0.0965, + "step": 22504 + }, + { + "epoch": 61.82692307692308, + "grad_norm": 13.270648002624512, + "learning_rate": 1.9086538461538463e-05, + "loss": 0.273, + "step": 22505 + }, + { + "epoch": 61.82967032967033, + "grad_norm": 4.8516459465026855, + "learning_rate": 1.9085164835164836e-05, + "loss": 0.0897, + "step": 22506 + }, + { + "epoch": 61.832417582417584, + "grad_norm": 11.135149955749512, + "learning_rate": 1.908379120879121e-05, + "loss": 0.1998, + "step": 22507 + }, + { + "epoch": 61.83516483516483, + "grad_norm": 16.96841812133789, + "learning_rate": 1.9082417582417583e-05, + "loss": 0.3555, + "step": 22508 + }, + { + "epoch": 61.83791208791209, + "grad_norm": 12.932181358337402, + "learning_rate": 1.9081043956043956e-05, + "loss": 0.3571, + "step": 22509 + }, + { + "epoch": 61.84065934065934, + "grad_norm": 17.23965835571289, + "learning_rate": 1.907967032967033e-05, + "loss": 0.3886, + "step": 22510 + }, + { + "epoch": 61.84340659340659, + "grad_norm": 21.739622116088867, + "learning_rate": 1.9078296703296707e-05, + "loss": 0.6849, + "step": 22511 + }, + { + "epoch": 61.84615384615385, + "grad_norm": 12.454110145568848, + "learning_rate": 1.9076923076923077e-05, + "loss": 0.371, + "step": 22512 + }, + { + "epoch": 61.8489010989011, + "grad_norm": 13.167333602905273, + "learning_rate": 1.907554945054945e-05, + "loss": 0.2485, + "step": 22513 + }, + { + "epoch": 61.85164835164835, + "grad_norm": 12.032097816467285, + "learning_rate": 1.9074175824175824e-05, + "loss": 0.1337, + "step": 22514 + }, + { + "epoch": 61.854395604395606, + "grad_norm": 9.639544486999512, + "learning_rate": 1.9072802197802197e-05, + "loss": 0.2046, + "step": 22515 + }, + { + "epoch": 61.857142857142854, + "grad_norm": 8.257948875427246, + "learning_rate": 1.9071428571428574e-05, + "loss": 0.2913, + "step": 22516 + }, + { + "epoch": 61.85989010989011, + "grad_norm": 5.4018988609313965, + "learning_rate": 1.9070054945054947e-05, + "loss": 0.0859, + "step": 22517 + }, + { + "epoch": 61.862637362637365, + "grad_norm": 16.61369514465332, + "learning_rate": 1.906868131868132e-05, + "loss": 0.1868, + "step": 22518 + }, + { + "epoch": 61.86538461538461, + "grad_norm": 12.675858497619629, + "learning_rate": 1.9067307692307694e-05, + "loss": 0.299, + "step": 22519 + }, + { + "epoch": 61.86813186813187, + "grad_norm": 17.370851516723633, + "learning_rate": 1.9065934065934067e-05, + "loss": 0.3406, + "step": 22520 + }, + { + "epoch": 61.870879120879124, + "grad_norm": 21.402957916259766, + "learning_rate": 1.906456043956044e-05, + "loss": 0.7658, + "step": 22521 + }, + { + "epoch": 61.87362637362637, + "grad_norm": 18.308507919311523, + "learning_rate": 1.9063186813186814e-05, + "loss": 0.3499, + "step": 22522 + }, + { + "epoch": 61.87637362637363, + "grad_norm": 11.133191108703613, + "learning_rate": 1.9061813186813188e-05, + "loss": 0.289, + "step": 22523 + }, + { + "epoch": 61.879120879120876, + "grad_norm": 10.820343017578125, + "learning_rate": 1.906043956043956e-05, + "loss": 0.2853, + "step": 22524 + }, + { + "epoch": 61.88186813186813, + "grad_norm": 14.803267478942871, + "learning_rate": 1.9059065934065934e-05, + "loss": 0.3604, + "step": 22525 + }, + { + "epoch": 61.88461538461539, + "grad_norm": 10.64501667022705, + "learning_rate": 1.905769230769231e-05, + "loss": 0.2286, + "step": 22526 + }, + { + "epoch": 61.887362637362635, + "grad_norm": 12.192333221435547, + "learning_rate": 1.905631868131868e-05, + "loss": 0.2166, + "step": 22527 + }, + { + "epoch": 61.89010989010989, + "grad_norm": 6.448202610015869, + "learning_rate": 1.9054945054945055e-05, + "loss": 0.0986, + "step": 22528 + }, + { + "epoch": 61.892857142857146, + "grad_norm": 7.327707290649414, + "learning_rate": 1.9053571428571428e-05, + "loss": 0.1154, + "step": 22529 + }, + { + "epoch": 61.895604395604394, + "grad_norm": 5.809962749481201, + "learning_rate": 1.90521978021978e-05, + "loss": 0.1105, + "step": 22530 + }, + { + "epoch": 61.89835164835165, + "grad_norm": 8.894742012023926, + "learning_rate": 1.9050824175824178e-05, + "loss": 0.1427, + "step": 22531 + }, + { + "epoch": 61.9010989010989, + "grad_norm": 11.769285202026367, + "learning_rate": 1.904945054945055e-05, + "loss": 0.1841, + "step": 22532 + }, + { + "epoch": 61.90384615384615, + "grad_norm": 8.224396705627441, + "learning_rate": 1.9048076923076925e-05, + "loss": 0.1675, + "step": 22533 + }, + { + "epoch": 61.90659340659341, + "grad_norm": 2.4208619594573975, + "learning_rate": 1.90467032967033e-05, + "loss": 0.0305, + "step": 22534 + }, + { + "epoch": 61.90934065934066, + "grad_norm": 7.258786201477051, + "learning_rate": 1.9045329670329672e-05, + "loss": 0.0812, + "step": 22535 + }, + { + "epoch": 61.91208791208791, + "grad_norm": 12.431879043579102, + "learning_rate": 1.9043956043956045e-05, + "loss": 0.2428, + "step": 22536 + }, + { + "epoch": 61.91483516483517, + "grad_norm": 16.47831916809082, + "learning_rate": 1.904258241758242e-05, + "loss": 0.2808, + "step": 22537 + }, + { + "epoch": 61.917582417582416, + "grad_norm": 20.754114151000977, + "learning_rate": 1.9041208791208792e-05, + "loss": 0.3663, + "step": 22538 + }, + { + "epoch": 61.92032967032967, + "grad_norm": 13.376843452453613, + "learning_rate": 1.9039835164835165e-05, + "loss": 0.4902, + "step": 22539 + }, + { + "epoch": 61.92307692307692, + "grad_norm": 8.79124927520752, + "learning_rate": 1.903846153846154e-05, + "loss": 0.1133, + "step": 22540 + }, + { + "epoch": 61.925824175824175, + "grad_norm": 8.52724552154541, + "learning_rate": 1.9037087912087916e-05, + "loss": 0.1996, + "step": 22541 + }, + { + "epoch": 61.92857142857143, + "grad_norm": 6.820933818817139, + "learning_rate": 1.9035714285714286e-05, + "loss": 0.1152, + "step": 22542 + }, + { + "epoch": 61.93131868131868, + "grad_norm": 22.621234893798828, + "learning_rate": 1.903434065934066e-05, + "loss": 0.5742, + "step": 22543 + }, + { + "epoch": 61.934065934065934, + "grad_norm": 13.201422691345215, + "learning_rate": 1.9032967032967033e-05, + "loss": 0.1475, + "step": 22544 + }, + { + "epoch": 61.93681318681319, + "grad_norm": 13.103632926940918, + "learning_rate": 1.9031593406593406e-05, + "loss": 0.1079, + "step": 22545 + }, + { + "epoch": 61.93956043956044, + "grad_norm": 15.1202974319458, + "learning_rate": 1.9030219780219783e-05, + "loss": 0.3372, + "step": 22546 + }, + { + "epoch": 61.94230769230769, + "grad_norm": 3.59157657623291, + "learning_rate": 1.9028846153846156e-05, + "loss": 0.0642, + "step": 22547 + }, + { + "epoch": 61.94505494505494, + "grad_norm": 5.824479103088379, + "learning_rate": 1.902747252747253e-05, + "loss": 0.0938, + "step": 22548 + }, + { + "epoch": 61.9478021978022, + "grad_norm": 17.4763126373291, + "learning_rate": 1.9026098901098903e-05, + "loss": 0.6608, + "step": 22549 + }, + { + "epoch": 61.95054945054945, + "grad_norm": 18.33354377746582, + "learning_rate": 1.9024725274725276e-05, + "loss": 0.6474, + "step": 22550 + }, + { + "epoch": 61.9532967032967, + "grad_norm": 12.563759803771973, + "learning_rate": 1.902335164835165e-05, + "loss": 0.1892, + "step": 22551 + }, + { + "epoch": 61.956043956043956, + "grad_norm": 14.455567359924316, + "learning_rate": 1.9021978021978023e-05, + "loss": 0.3635, + "step": 22552 + }, + { + "epoch": 61.95879120879121, + "grad_norm": 13.888114929199219, + "learning_rate": 1.9020604395604397e-05, + "loss": 0.3532, + "step": 22553 + }, + { + "epoch": 61.96153846153846, + "grad_norm": 3.5074691772460938, + "learning_rate": 1.901923076923077e-05, + "loss": 0.0585, + "step": 22554 + }, + { + "epoch": 61.964285714285715, + "grad_norm": 31.906021118164062, + "learning_rate": 1.9017857142857143e-05, + "loss": 1.0656, + "step": 22555 + }, + { + "epoch": 61.967032967032964, + "grad_norm": 7.373316287994385, + "learning_rate": 1.901648351648352e-05, + "loss": 0.1028, + "step": 22556 + }, + { + "epoch": 61.96978021978022, + "grad_norm": 3.7275660037994385, + "learning_rate": 1.901510989010989e-05, + "loss": 0.0476, + "step": 22557 + }, + { + "epoch": 61.972527472527474, + "grad_norm": 17.025583267211914, + "learning_rate": 1.9013736263736264e-05, + "loss": 0.4604, + "step": 22558 + }, + { + "epoch": 61.97527472527472, + "grad_norm": 12.682271957397461, + "learning_rate": 1.9012362637362637e-05, + "loss": 0.3571, + "step": 22559 + }, + { + "epoch": 61.97802197802198, + "grad_norm": 8.162332534790039, + "learning_rate": 1.901098901098901e-05, + "loss": 0.1868, + "step": 22560 + }, + { + "epoch": 61.98076923076923, + "grad_norm": 19.016136169433594, + "learning_rate": 1.9009615384615387e-05, + "loss": 0.525, + "step": 22561 + }, + { + "epoch": 61.98351648351648, + "grad_norm": 8.94290542602539, + "learning_rate": 1.900824175824176e-05, + "loss": 0.1173, + "step": 22562 + }, + { + "epoch": 61.98626373626374, + "grad_norm": 6.922672271728516, + "learning_rate": 1.9006868131868134e-05, + "loss": 0.109, + "step": 22563 + }, + { + "epoch": 61.98901098901099, + "grad_norm": 18.381664276123047, + "learning_rate": 1.9005494505494507e-05, + "loss": 0.3292, + "step": 22564 + }, + { + "epoch": 61.99175824175824, + "grad_norm": 16.854013442993164, + "learning_rate": 1.9004120879120877e-05, + "loss": 0.2696, + "step": 22565 + }, + { + "epoch": 61.994505494505496, + "grad_norm": 2.6419665813446045, + "learning_rate": 1.9002747252747254e-05, + "loss": 0.0302, + "step": 22566 + }, + { + "epoch": 61.997252747252745, + "grad_norm": 2.6048712730407715, + "learning_rate": 1.9001373626373628e-05, + "loss": 0.0411, + "step": 22567 + }, + { + "epoch": 62.0, + "grad_norm": 38.971012115478516, + "learning_rate": 1.9e-05, + "loss": 0.4961, + "step": 22568 + }, + { + "epoch": 62.0, + "eval_accuracy": 0.6914600550964187, + "eval_f1": 0.7158974388155364, + "eval_f1_DuraRiadoRio_64x64": 0.7647058823529411, + "eval_f1_Mole_64x64": 0.782258064516129, + "eval_f1_Quebrado_64x64": 0.7607843137254902, + "eval_f1_RiadoRio_64x64": 0.5643153526970954, + "eval_f1_RioFechado_64x64": 0.7074235807860262, + "eval_loss": 1.7525376081466675, + "eval_precision": 0.8235613965657986, + "eval_precision_DuraRiadoRio_64x64": 0.9680851063829787, + "eval_precision_Mole_64x64": 0.9326923076923077, + "eval_precision_Quebrado_64x64": 0.8738738738738738, + "eval_precision_RiadoRio_64x64": 0.4121212121212121, + "eval_precision_RioFechado_64x64": 0.9310344827586207, + "eval_recall": 0.6888652087966395, + "eval_recall_DuraRiadoRio_64x64": 0.6319444444444444, + "eval_recall_Mole_64x64": 0.6736111111111112, + "eval_recall_Quebrado_64x64": 0.6736111111111112, + "eval_recall_RiadoRio_64x64": 0.8947368421052632, + "eval_recall_RioFechado_64x64": 0.5704225352112676, + "eval_runtime": 1.7793, + "eval_samples_per_second": 408.026, + "eval_steps_per_second": 25.853, + "step": 22568 + }, + { + "epoch": 62.002747252747255, + "grad_norm": 14.105464935302734, + "learning_rate": 1.8998626373626374e-05, + "loss": 0.265, + "step": 22569 + }, + { + "epoch": 62.005494505494504, + "grad_norm": 4.191265106201172, + "learning_rate": 1.8997252747252748e-05, + "loss": 0.0495, + "step": 22570 + }, + { + "epoch": 62.00824175824176, + "grad_norm": 8.55135440826416, + "learning_rate": 1.8995879120879125e-05, + "loss": 0.1355, + "step": 22571 + }, + { + "epoch": 62.010989010989015, + "grad_norm": 9.150339126586914, + "learning_rate": 1.8994505494505495e-05, + "loss": 0.1911, + "step": 22572 + }, + { + "epoch": 62.01373626373626, + "grad_norm": 8.085713386535645, + "learning_rate": 1.8993131868131868e-05, + "loss": 0.1342, + "step": 22573 + }, + { + "epoch": 62.01648351648352, + "grad_norm": 9.590974807739258, + "learning_rate": 1.899175824175824e-05, + "loss": 0.1726, + "step": 22574 + }, + { + "epoch": 62.01923076923077, + "grad_norm": 4.453831672668457, + "learning_rate": 1.8990384615384615e-05, + "loss": 0.1051, + "step": 22575 + }, + { + "epoch": 62.02197802197802, + "grad_norm": 10.00057315826416, + "learning_rate": 1.898901098901099e-05, + "loss": 0.096, + "step": 22576 + }, + { + "epoch": 62.02472527472528, + "grad_norm": 8.370871543884277, + "learning_rate": 1.8987637362637365e-05, + "loss": 0.1666, + "step": 22577 + }, + { + "epoch": 62.027472527472526, + "grad_norm": 3.5124051570892334, + "learning_rate": 1.898626373626374e-05, + "loss": 0.0413, + "step": 22578 + }, + { + "epoch": 62.03021978021978, + "grad_norm": 12.695966720581055, + "learning_rate": 1.8984890109890112e-05, + "loss": 0.1945, + "step": 22579 + }, + { + "epoch": 62.032967032967036, + "grad_norm": 13.764043807983398, + "learning_rate": 1.8983516483516482e-05, + "loss": 0.2267, + "step": 22580 + }, + { + "epoch": 62.035714285714285, + "grad_norm": 12.029040336608887, + "learning_rate": 1.8982142857142855e-05, + "loss": 0.1797, + "step": 22581 + }, + { + "epoch": 62.03846153846154, + "grad_norm": 10.460589408874512, + "learning_rate": 1.8980769230769232e-05, + "loss": 0.1812, + "step": 22582 + }, + { + "epoch": 62.04120879120879, + "grad_norm": 8.324370384216309, + "learning_rate": 1.8979395604395606e-05, + "loss": 0.1357, + "step": 22583 + }, + { + "epoch": 62.043956043956044, + "grad_norm": 14.441784858703613, + "learning_rate": 1.897802197802198e-05, + "loss": 0.446, + "step": 22584 + }, + { + "epoch": 62.0467032967033, + "grad_norm": 17.458663940429688, + "learning_rate": 1.8976648351648352e-05, + "loss": 0.3358, + "step": 22585 + }, + { + "epoch": 62.04945054945055, + "grad_norm": 13.375563621520996, + "learning_rate": 1.8975274725274726e-05, + "loss": 0.1381, + "step": 22586 + }, + { + "epoch": 62.0521978021978, + "grad_norm": 9.21418571472168, + "learning_rate": 1.89739010989011e-05, + "loss": 0.1749, + "step": 22587 + }, + { + "epoch": 62.05494505494506, + "grad_norm": 13.800848007202148, + "learning_rate": 1.8972527472527473e-05, + "loss": 0.1644, + "step": 22588 + }, + { + "epoch": 62.05769230769231, + "grad_norm": 8.6031494140625, + "learning_rate": 1.8971153846153846e-05, + "loss": 0.185, + "step": 22589 + }, + { + "epoch": 62.06043956043956, + "grad_norm": 6.105588436126709, + "learning_rate": 1.896978021978022e-05, + "loss": 0.1112, + "step": 22590 + }, + { + "epoch": 62.06318681318681, + "grad_norm": 13.871050834655762, + "learning_rate": 1.8968406593406593e-05, + "loss": 0.4548, + "step": 22591 + }, + { + "epoch": 62.065934065934066, + "grad_norm": 5.373159408569336, + "learning_rate": 1.896703296703297e-05, + "loss": 0.1071, + "step": 22592 + }, + { + "epoch": 62.06868131868132, + "grad_norm": 5.016867637634277, + "learning_rate": 1.8965659340659343e-05, + "loss": 0.1409, + "step": 22593 + }, + { + "epoch": 62.07142857142857, + "grad_norm": 2.408445358276367, + "learning_rate": 1.8964285714285716e-05, + "loss": 0.0318, + "step": 22594 + }, + { + "epoch": 62.074175824175825, + "grad_norm": 22.438322067260742, + "learning_rate": 1.8962912087912086e-05, + "loss": 0.5105, + "step": 22595 + }, + { + "epoch": 62.07692307692308, + "grad_norm": 6.156324863433838, + "learning_rate": 1.896153846153846e-05, + "loss": 0.0637, + "step": 22596 + }, + { + "epoch": 62.07967032967033, + "grad_norm": 11.190424919128418, + "learning_rate": 1.8960164835164837e-05, + "loss": 0.1673, + "step": 22597 + }, + { + "epoch": 62.082417582417584, + "grad_norm": 17.60850715637207, + "learning_rate": 1.895879120879121e-05, + "loss": 0.2024, + "step": 22598 + }, + { + "epoch": 62.08516483516483, + "grad_norm": 18.478639602661133, + "learning_rate": 1.8957417582417583e-05, + "loss": 0.6487, + "step": 22599 + }, + { + "epoch": 62.08791208791209, + "grad_norm": 15.231324195861816, + "learning_rate": 1.8956043956043957e-05, + "loss": 0.1995, + "step": 22600 + }, + { + "epoch": 62.09065934065934, + "grad_norm": 15.820850372314453, + "learning_rate": 1.895467032967033e-05, + "loss": 0.4538, + "step": 22601 + }, + { + "epoch": 62.09340659340659, + "grad_norm": 18.51181983947754, + "learning_rate": 1.8953296703296704e-05, + "loss": 0.3548, + "step": 22602 + }, + { + "epoch": 62.09615384615385, + "grad_norm": 8.916245460510254, + "learning_rate": 1.8951923076923077e-05, + "loss": 0.2145, + "step": 22603 + }, + { + "epoch": 62.0989010989011, + "grad_norm": 17.230854034423828, + "learning_rate": 1.895054945054945e-05, + "loss": 0.4019, + "step": 22604 + }, + { + "epoch": 62.10164835164835, + "grad_norm": 10.546805381774902, + "learning_rate": 1.8949175824175824e-05, + "loss": 0.2619, + "step": 22605 + }, + { + "epoch": 62.104395604395606, + "grad_norm": 17.514265060424805, + "learning_rate": 1.8947802197802197e-05, + "loss": 0.6376, + "step": 22606 + }, + { + "epoch": 62.107142857142854, + "grad_norm": 4.2947611808776855, + "learning_rate": 1.8946428571428574e-05, + "loss": 0.0546, + "step": 22607 + }, + { + "epoch": 62.10989010989011, + "grad_norm": 18.886791229248047, + "learning_rate": 1.8945054945054947e-05, + "loss": 0.3069, + "step": 22608 + }, + { + "epoch": 62.112637362637365, + "grad_norm": 16.296245574951172, + "learning_rate": 1.894368131868132e-05, + "loss": 0.4333, + "step": 22609 + }, + { + "epoch": 62.11538461538461, + "grad_norm": 10.702973365783691, + "learning_rate": 1.894230769230769e-05, + "loss": 0.1734, + "step": 22610 + }, + { + "epoch": 62.11813186813187, + "grad_norm": 10.75216293334961, + "learning_rate": 1.8940934065934064e-05, + "loss": 0.1642, + "step": 22611 + }, + { + "epoch": 62.120879120879124, + "grad_norm": 19.08033561706543, + "learning_rate": 1.893956043956044e-05, + "loss": 0.2094, + "step": 22612 + }, + { + "epoch": 62.12362637362637, + "grad_norm": 26.1224365234375, + "learning_rate": 1.8938186813186815e-05, + "loss": 0.488, + "step": 22613 + }, + { + "epoch": 62.12637362637363, + "grad_norm": 9.055672645568848, + "learning_rate": 1.8936813186813188e-05, + "loss": 0.0789, + "step": 22614 + }, + { + "epoch": 62.129120879120876, + "grad_norm": 2.8531672954559326, + "learning_rate": 1.893543956043956e-05, + "loss": 0.0248, + "step": 22615 + }, + { + "epoch": 62.13186813186813, + "grad_norm": 18.269258499145508, + "learning_rate": 1.8934065934065935e-05, + "loss": 0.4674, + "step": 22616 + }, + { + "epoch": 62.13461538461539, + "grad_norm": 15.125411987304688, + "learning_rate": 1.8932692307692308e-05, + "loss": 0.176, + "step": 22617 + }, + { + "epoch": 62.137362637362635, + "grad_norm": 14.399378776550293, + "learning_rate": 1.893131868131868e-05, + "loss": 0.2202, + "step": 22618 + }, + { + "epoch": 62.14010989010989, + "grad_norm": 4.521859169006348, + "learning_rate": 1.8929945054945055e-05, + "loss": 0.0949, + "step": 22619 + }, + { + "epoch": 62.142857142857146, + "grad_norm": 10.610091209411621, + "learning_rate": 1.892857142857143e-05, + "loss": 0.1945, + "step": 22620 + }, + { + "epoch": 62.145604395604394, + "grad_norm": 14.199089050292969, + "learning_rate": 1.8927197802197802e-05, + "loss": 0.3711, + "step": 22621 + }, + { + "epoch": 62.14835164835165, + "grad_norm": 17.102426528930664, + "learning_rate": 1.892582417582418e-05, + "loss": 0.459, + "step": 22622 + }, + { + "epoch": 62.1510989010989, + "grad_norm": 10.820621490478516, + "learning_rate": 1.8924450549450552e-05, + "loss": 0.146, + "step": 22623 + }, + { + "epoch": 62.15384615384615, + "grad_norm": 13.853240013122559, + "learning_rate": 1.8923076923076925e-05, + "loss": 0.4186, + "step": 22624 + }, + { + "epoch": 62.15659340659341, + "grad_norm": 13.671018600463867, + "learning_rate": 1.8921703296703295e-05, + "loss": 0.3969, + "step": 22625 + }, + { + "epoch": 62.15934065934066, + "grad_norm": 7.299479961395264, + "learning_rate": 1.892032967032967e-05, + "loss": 0.0613, + "step": 22626 + }, + { + "epoch": 62.16208791208791, + "grad_norm": 10.17551040649414, + "learning_rate": 1.8918956043956046e-05, + "loss": 0.1997, + "step": 22627 + }, + { + "epoch": 62.16483516483517, + "grad_norm": 9.023483276367188, + "learning_rate": 1.891758241758242e-05, + "loss": 0.0832, + "step": 22628 + }, + { + "epoch": 62.167582417582416, + "grad_norm": 11.107837677001953, + "learning_rate": 1.8916208791208792e-05, + "loss": 0.1493, + "step": 22629 + }, + { + "epoch": 62.17032967032967, + "grad_norm": 4.116137981414795, + "learning_rate": 1.8914835164835166e-05, + "loss": 0.0512, + "step": 22630 + }, + { + "epoch": 62.17307692307692, + "grad_norm": 15.900001525878906, + "learning_rate": 1.891346153846154e-05, + "loss": 0.205, + "step": 22631 + }, + { + "epoch": 62.175824175824175, + "grad_norm": 9.784446716308594, + "learning_rate": 1.8912087912087913e-05, + "loss": 0.2071, + "step": 22632 + }, + { + "epoch": 62.17857142857143, + "grad_norm": 18.6993465423584, + "learning_rate": 1.8910714285714286e-05, + "loss": 0.2755, + "step": 22633 + }, + { + "epoch": 62.18131868131868, + "grad_norm": 11.706568717956543, + "learning_rate": 1.890934065934066e-05, + "loss": 0.1793, + "step": 22634 + }, + { + "epoch": 62.184065934065934, + "grad_norm": 17.142179489135742, + "learning_rate": 1.8907967032967033e-05, + "loss": 0.4846, + "step": 22635 + }, + { + "epoch": 62.18681318681319, + "grad_norm": 4.488917827606201, + "learning_rate": 1.8906593406593406e-05, + "loss": 0.0626, + "step": 22636 + }, + { + "epoch": 62.18956043956044, + "grad_norm": 2.813739061355591, + "learning_rate": 1.8905219780219783e-05, + "loss": 0.0569, + "step": 22637 + }, + { + "epoch": 62.19230769230769, + "grad_norm": 3.0840206146240234, + "learning_rate": 1.8903846153846156e-05, + "loss": 0.039, + "step": 22638 + }, + { + "epoch": 62.19505494505494, + "grad_norm": 15.320544242858887, + "learning_rate": 1.890247252747253e-05, + "loss": 0.395, + "step": 22639 + }, + { + "epoch": 62.1978021978022, + "grad_norm": 5.786797046661377, + "learning_rate": 1.89010989010989e-05, + "loss": 0.1267, + "step": 22640 + }, + { + "epoch": 62.20054945054945, + "grad_norm": 3.088756799697876, + "learning_rate": 1.8899725274725273e-05, + "loss": 0.0516, + "step": 22641 + }, + { + "epoch": 62.2032967032967, + "grad_norm": 8.849098205566406, + "learning_rate": 1.889835164835165e-05, + "loss": 0.1598, + "step": 22642 + }, + { + "epoch": 62.206043956043956, + "grad_norm": 9.244332313537598, + "learning_rate": 1.8896978021978023e-05, + "loss": 0.119, + "step": 22643 + }, + { + "epoch": 62.20879120879121, + "grad_norm": 3.617398262023926, + "learning_rate": 1.8895604395604397e-05, + "loss": 0.0449, + "step": 22644 + }, + { + "epoch": 62.21153846153846, + "grad_norm": 20.164539337158203, + "learning_rate": 1.889423076923077e-05, + "loss": 0.5017, + "step": 22645 + }, + { + "epoch": 62.214285714285715, + "grad_norm": 13.720182418823242, + "learning_rate": 1.8892857142857144e-05, + "loss": 0.3243, + "step": 22646 + }, + { + "epoch": 62.217032967032964, + "grad_norm": 6.859162330627441, + "learning_rate": 1.8891483516483517e-05, + "loss": 0.0886, + "step": 22647 + }, + { + "epoch": 62.21978021978022, + "grad_norm": 8.3003511428833, + "learning_rate": 1.889010989010989e-05, + "loss": 0.1915, + "step": 22648 + }, + { + "epoch": 62.222527472527474, + "grad_norm": 14.452241897583008, + "learning_rate": 1.8888736263736264e-05, + "loss": 0.307, + "step": 22649 + }, + { + "epoch": 62.22527472527472, + "grad_norm": 8.366333961486816, + "learning_rate": 1.8887362637362637e-05, + "loss": 0.1263, + "step": 22650 + }, + { + "epoch": 62.22802197802198, + "grad_norm": 3.9736757278442383, + "learning_rate": 1.888598901098901e-05, + "loss": 0.061, + "step": 22651 + }, + { + "epoch": 62.23076923076923, + "grad_norm": 11.644207000732422, + "learning_rate": 1.8884615384615388e-05, + "loss": 0.2073, + "step": 22652 + }, + { + "epoch": 62.23351648351648, + "grad_norm": 15.72797966003418, + "learning_rate": 1.888324175824176e-05, + "loss": 0.3493, + "step": 22653 + }, + { + "epoch": 62.23626373626374, + "grad_norm": 5.3921074867248535, + "learning_rate": 1.8881868131868134e-05, + "loss": 0.1657, + "step": 22654 + }, + { + "epoch": 62.239010989010985, + "grad_norm": 18.612689971923828, + "learning_rate": 1.8880494505494504e-05, + "loss": 0.3047, + "step": 22655 + }, + { + "epoch": 62.24175824175824, + "grad_norm": 3.3234589099884033, + "learning_rate": 1.8879120879120878e-05, + "loss": 0.0466, + "step": 22656 + }, + { + "epoch": 62.244505494505496, + "grad_norm": 9.534472465515137, + "learning_rate": 1.8877747252747255e-05, + "loss": 0.1096, + "step": 22657 + }, + { + "epoch": 62.247252747252745, + "grad_norm": 4.906966209411621, + "learning_rate": 1.8876373626373628e-05, + "loss": 0.0591, + "step": 22658 + }, + { + "epoch": 62.25, + "grad_norm": 20.940824508666992, + "learning_rate": 1.8875e-05, + "loss": 0.8268, + "step": 22659 + }, + { + "epoch": 62.252747252747255, + "grad_norm": 21.15419578552246, + "learning_rate": 1.8873626373626375e-05, + "loss": 0.4617, + "step": 22660 + }, + { + "epoch": 62.255494505494504, + "grad_norm": 4.73917293548584, + "learning_rate": 1.8872252747252748e-05, + "loss": 0.0897, + "step": 22661 + }, + { + "epoch": 62.25824175824176, + "grad_norm": 14.407804489135742, + "learning_rate": 1.887087912087912e-05, + "loss": 0.2648, + "step": 22662 + }, + { + "epoch": 62.260989010989015, + "grad_norm": 12.22403335571289, + "learning_rate": 1.8869505494505495e-05, + "loss": 0.2949, + "step": 22663 + }, + { + "epoch": 62.26373626373626, + "grad_norm": 7.953524589538574, + "learning_rate": 1.886813186813187e-05, + "loss": 0.1593, + "step": 22664 + }, + { + "epoch": 62.26648351648352, + "grad_norm": 14.4696044921875, + "learning_rate": 1.8866758241758242e-05, + "loss": 0.3731, + "step": 22665 + }, + { + "epoch": 62.26923076923077, + "grad_norm": 3.180636167526245, + "learning_rate": 1.8865384615384615e-05, + "loss": 0.0512, + "step": 22666 + }, + { + "epoch": 62.27197802197802, + "grad_norm": 9.98089599609375, + "learning_rate": 1.8864010989010992e-05, + "loss": 0.1692, + "step": 22667 + }, + { + "epoch": 62.27472527472528, + "grad_norm": 18.435277938842773, + "learning_rate": 1.8862637362637365e-05, + "loss": 0.2963, + "step": 22668 + }, + { + "epoch": 62.277472527472526, + "grad_norm": 8.540277481079102, + "learning_rate": 1.886126373626374e-05, + "loss": 0.1079, + "step": 22669 + }, + { + "epoch": 62.28021978021978, + "grad_norm": 5.710090160369873, + "learning_rate": 1.885989010989011e-05, + "loss": 0.0706, + "step": 22670 + }, + { + "epoch": 62.282967032967036, + "grad_norm": 13.834964752197266, + "learning_rate": 1.8858516483516482e-05, + "loss": 0.2147, + "step": 22671 + }, + { + "epoch": 62.285714285714285, + "grad_norm": 17.713096618652344, + "learning_rate": 1.885714285714286e-05, + "loss": 0.4018, + "step": 22672 + }, + { + "epoch": 62.28846153846154, + "grad_norm": 26.985061645507812, + "learning_rate": 1.8855769230769232e-05, + "loss": 0.5954, + "step": 22673 + }, + { + "epoch": 62.29120879120879, + "grad_norm": 10.199352264404297, + "learning_rate": 1.8854395604395606e-05, + "loss": 0.2055, + "step": 22674 + }, + { + "epoch": 62.293956043956044, + "grad_norm": 14.190959930419922, + "learning_rate": 1.885302197802198e-05, + "loss": 0.2708, + "step": 22675 + }, + { + "epoch": 62.2967032967033, + "grad_norm": 5.828318119049072, + "learning_rate": 1.8851648351648353e-05, + "loss": 0.1429, + "step": 22676 + }, + { + "epoch": 62.29945054945055, + "grad_norm": 17.547456741333008, + "learning_rate": 1.8850274725274726e-05, + "loss": 0.3841, + "step": 22677 + }, + { + "epoch": 62.3021978021978, + "grad_norm": 9.352365493774414, + "learning_rate": 1.88489010989011e-05, + "loss": 0.1471, + "step": 22678 + }, + { + "epoch": 62.30494505494506, + "grad_norm": 18.79207992553711, + "learning_rate": 1.8847527472527473e-05, + "loss": 0.5494, + "step": 22679 + }, + { + "epoch": 62.30769230769231, + "grad_norm": 17.922626495361328, + "learning_rate": 1.8846153846153846e-05, + "loss": 0.2362, + "step": 22680 + }, + { + "epoch": 62.31043956043956, + "grad_norm": 9.103967666625977, + "learning_rate": 1.884478021978022e-05, + "loss": 0.2447, + "step": 22681 + }, + { + "epoch": 62.31318681318681, + "grad_norm": 6.463674545288086, + "learning_rate": 1.8843406593406597e-05, + "loss": 0.1254, + "step": 22682 + }, + { + "epoch": 62.315934065934066, + "grad_norm": 13.630852699279785, + "learning_rate": 1.884203296703297e-05, + "loss": 0.264, + "step": 22683 + }, + { + "epoch": 62.31868131868132, + "grad_norm": 8.894111633300781, + "learning_rate": 1.8840659340659343e-05, + "loss": 0.1253, + "step": 22684 + }, + { + "epoch": 62.32142857142857, + "grad_norm": 10.81760311126709, + "learning_rate": 1.8839285714285713e-05, + "loss": 0.2517, + "step": 22685 + }, + { + "epoch": 62.324175824175825, + "grad_norm": 8.620584487915039, + "learning_rate": 1.8837912087912087e-05, + "loss": 0.1495, + "step": 22686 + }, + { + "epoch": 62.32692307692308, + "grad_norm": 25.25568389892578, + "learning_rate": 1.8836538461538464e-05, + "loss": 0.7827, + "step": 22687 + }, + { + "epoch": 62.32967032967033, + "grad_norm": 2.2543413639068604, + "learning_rate": 1.8835164835164837e-05, + "loss": 0.0338, + "step": 22688 + }, + { + "epoch": 62.332417582417584, + "grad_norm": 6.000245571136475, + "learning_rate": 1.883379120879121e-05, + "loss": 0.0873, + "step": 22689 + }, + { + "epoch": 62.33516483516483, + "grad_norm": 13.561279296875, + "learning_rate": 1.8832417582417584e-05, + "loss": 0.2816, + "step": 22690 + }, + { + "epoch": 62.33791208791209, + "grad_norm": 3.69153094291687, + "learning_rate": 1.8831043956043957e-05, + "loss": 0.0554, + "step": 22691 + }, + { + "epoch": 62.34065934065934, + "grad_norm": 10.650479316711426, + "learning_rate": 1.882967032967033e-05, + "loss": 0.1789, + "step": 22692 + }, + { + "epoch": 62.34340659340659, + "grad_norm": 19.07103729248047, + "learning_rate": 1.8828296703296704e-05, + "loss": 0.568, + "step": 22693 + }, + { + "epoch": 62.34615384615385, + "grad_norm": 11.263609886169434, + "learning_rate": 1.8826923076923077e-05, + "loss": 0.1042, + "step": 22694 + }, + { + "epoch": 62.3489010989011, + "grad_norm": 19.954147338867188, + "learning_rate": 1.882554945054945e-05, + "loss": 0.5996, + "step": 22695 + }, + { + "epoch": 62.35164835164835, + "grad_norm": 5.0327372550964355, + "learning_rate": 1.8824175824175824e-05, + "loss": 0.0601, + "step": 22696 + }, + { + "epoch": 62.354395604395606, + "grad_norm": 20.363407135009766, + "learning_rate": 1.8822802197802198e-05, + "loss": 0.4225, + "step": 22697 + }, + { + "epoch": 62.357142857142854, + "grad_norm": 9.588390350341797, + "learning_rate": 1.8821428571428574e-05, + "loss": 0.108, + "step": 22698 + }, + { + "epoch": 62.35989010989011, + "grad_norm": 12.3983154296875, + "learning_rate": 1.8820054945054948e-05, + "loss": 0.1575, + "step": 22699 + }, + { + "epoch": 62.362637362637365, + "grad_norm": 18.900442123413086, + "learning_rate": 1.8818681318681318e-05, + "loss": 0.3706, + "step": 22700 + }, + { + "epoch": 62.36538461538461, + "grad_norm": 9.385079383850098, + "learning_rate": 1.881730769230769e-05, + "loss": 0.155, + "step": 22701 + }, + { + "epoch": 62.36813186813187, + "grad_norm": 4.953742027282715, + "learning_rate": 1.8815934065934065e-05, + "loss": 0.0664, + "step": 22702 + }, + { + "epoch": 62.370879120879124, + "grad_norm": 2.8676352500915527, + "learning_rate": 1.881456043956044e-05, + "loss": 0.0338, + "step": 22703 + }, + { + "epoch": 62.37362637362637, + "grad_norm": 9.279083251953125, + "learning_rate": 1.8813186813186815e-05, + "loss": 0.199, + "step": 22704 + }, + { + "epoch": 62.37637362637363, + "grad_norm": 14.271549224853516, + "learning_rate": 1.8811813186813188e-05, + "loss": 0.17, + "step": 22705 + }, + { + "epoch": 62.379120879120876, + "grad_norm": 11.20978832244873, + "learning_rate": 1.881043956043956e-05, + "loss": 0.1957, + "step": 22706 + }, + { + "epoch": 62.38186813186813, + "grad_norm": 14.563630104064941, + "learning_rate": 1.8809065934065935e-05, + "loss": 0.2686, + "step": 22707 + }, + { + "epoch": 62.38461538461539, + "grad_norm": 12.969033241271973, + "learning_rate": 1.880769230769231e-05, + "loss": 0.5052, + "step": 22708 + }, + { + "epoch": 62.387362637362635, + "grad_norm": 19.656579971313477, + "learning_rate": 1.8806318681318682e-05, + "loss": 0.3695, + "step": 22709 + }, + { + "epoch": 62.39010989010989, + "grad_norm": 15.606849670410156, + "learning_rate": 1.8804945054945055e-05, + "loss": 0.273, + "step": 22710 + }, + { + "epoch": 62.392857142857146, + "grad_norm": 5.0556769371032715, + "learning_rate": 1.880357142857143e-05, + "loss": 0.0427, + "step": 22711 + }, + { + "epoch": 62.395604395604394, + "grad_norm": 12.487024307250977, + "learning_rate": 1.8802197802197802e-05, + "loss": 0.3061, + "step": 22712 + }, + { + "epoch": 62.39835164835165, + "grad_norm": 26.740331649780273, + "learning_rate": 1.880082417582418e-05, + "loss": 0.7948, + "step": 22713 + }, + { + "epoch": 62.4010989010989, + "grad_norm": 15.324671745300293, + "learning_rate": 1.8799450549450552e-05, + "loss": 0.2279, + "step": 22714 + }, + { + "epoch": 62.40384615384615, + "grad_norm": 13.091997146606445, + "learning_rate": 1.8798076923076922e-05, + "loss": 0.2793, + "step": 22715 + }, + { + "epoch": 62.40659340659341, + "grad_norm": 10.599137306213379, + "learning_rate": 1.8796703296703296e-05, + "loss": 0.2547, + "step": 22716 + }, + { + "epoch": 62.40934065934066, + "grad_norm": 7.968836784362793, + "learning_rate": 1.879532967032967e-05, + "loss": 0.1064, + "step": 22717 + }, + { + "epoch": 62.41208791208791, + "grad_norm": 16.719058990478516, + "learning_rate": 1.8793956043956046e-05, + "loss": 0.2883, + "step": 22718 + }, + { + "epoch": 62.41483516483517, + "grad_norm": 10.851590156555176, + "learning_rate": 1.879258241758242e-05, + "loss": 0.3199, + "step": 22719 + }, + { + "epoch": 62.417582417582416, + "grad_norm": 6.655843734741211, + "learning_rate": 1.8791208791208793e-05, + "loss": 0.0627, + "step": 22720 + }, + { + "epoch": 62.42032967032967, + "grad_norm": 7.337701797485352, + "learning_rate": 1.8789835164835166e-05, + "loss": 0.088, + "step": 22721 + }, + { + "epoch": 62.42307692307692, + "grad_norm": 3.9028499126434326, + "learning_rate": 1.878846153846154e-05, + "loss": 0.0698, + "step": 22722 + }, + { + "epoch": 62.425824175824175, + "grad_norm": 17.47449493408203, + "learning_rate": 1.8787087912087913e-05, + "loss": 0.3064, + "step": 22723 + }, + { + "epoch": 62.42857142857143, + "grad_norm": 7.02687406539917, + "learning_rate": 1.8785714285714286e-05, + "loss": 0.1254, + "step": 22724 + }, + { + "epoch": 62.43131868131868, + "grad_norm": 12.040315628051758, + "learning_rate": 1.878434065934066e-05, + "loss": 0.3139, + "step": 22725 + }, + { + "epoch": 62.434065934065934, + "grad_norm": 12.272677421569824, + "learning_rate": 1.8782967032967033e-05, + "loss": 0.2821, + "step": 22726 + }, + { + "epoch": 62.43681318681319, + "grad_norm": 1.8843296766281128, + "learning_rate": 1.8781593406593407e-05, + "loss": 0.0246, + "step": 22727 + }, + { + "epoch": 62.43956043956044, + "grad_norm": 6.875926494598389, + "learning_rate": 1.8780219780219783e-05, + "loss": 0.1191, + "step": 22728 + }, + { + "epoch": 62.44230769230769, + "grad_norm": 4.084636688232422, + "learning_rate": 1.8778846153846157e-05, + "loss": 0.0547, + "step": 22729 + }, + { + "epoch": 62.44505494505494, + "grad_norm": 11.152634620666504, + "learning_rate": 1.8777472527472527e-05, + "loss": 0.1316, + "step": 22730 + }, + { + "epoch": 62.4478021978022, + "grad_norm": 19.07596206665039, + "learning_rate": 1.87760989010989e-05, + "loss": 0.2914, + "step": 22731 + }, + { + "epoch": 62.45054945054945, + "grad_norm": 8.704639434814453, + "learning_rate": 1.8774725274725274e-05, + "loss": 0.1164, + "step": 22732 + }, + { + "epoch": 62.4532967032967, + "grad_norm": 19.63243865966797, + "learning_rate": 1.877335164835165e-05, + "loss": 0.3068, + "step": 22733 + }, + { + "epoch": 62.456043956043956, + "grad_norm": 1.947227954864502, + "learning_rate": 1.8771978021978024e-05, + "loss": 0.0321, + "step": 22734 + }, + { + "epoch": 62.45879120879121, + "grad_norm": 4.703107833862305, + "learning_rate": 1.8770604395604397e-05, + "loss": 0.0382, + "step": 22735 + }, + { + "epoch": 62.46153846153846, + "grad_norm": 19.857885360717773, + "learning_rate": 1.876923076923077e-05, + "loss": 0.5273, + "step": 22736 + }, + { + "epoch": 62.464285714285715, + "grad_norm": 5.648676872253418, + "learning_rate": 1.8767857142857144e-05, + "loss": 0.1344, + "step": 22737 + }, + { + "epoch": 62.467032967032964, + "grad_norm": 15.041579246520996, + "learning_rate": 1.8766483516483517e-05, + "loss": 0.331, + "step": 22738 + }, + { + "epoch": 62.46978021978022, + "grad_norm": 5.034633636474609, + "learning_rate": 1.876510989010989e-05, + "loss": 0.071, + "step": 22739 + }, + { + "epoch": 62.472527472527474, + "grad_norm": 9.548603057861328, + "learning_rate": 1.8763736263736264e-05, + "loss": 0.0963, + "step": 22740 + }, + { + "epoch": 62.47527472527472, + "grad_norm": 3.802753448486328, + "learning_rate": 1.8762362637362638e-05, + "loss": 0.0445, + "step": 22741 + }, + { + "epoch": 62.47802197802198, + "grad_norm": 11.514336585998535, + "learning_rate": 1.876098901098901e-05, + "loss": 0.2105, + "step": 22742 + }, + { + "epoch": 62.48076923076923, + "grad_norm": 18.55054473876953, + "learning_rate": 1.8759615384615388e-05, + "loss": 0.319, + "step": 22743 + }, + { + "epoch": 62.48351648351648, + "grad_norm": 5.511348724365234, + "learning_rate": 1.875824175824176e-05, + "loss": 0.0988, + "step": 22744 + }, + { + "epoch": 62.48626373626374, + "grad_norm": 6.617349147796631, + "learning_rate": 1.875686813186813e-05, + "loss": 0.0827, + "step": 22745 + }, + { + "epoch": 62.489010989010985, + "grad_norm": 6.821989059448242, + "learning_rate": 1.8755494505494505e-05, + "loss": 0.1218, + "step": 22746 + }, + { + "epoch": 62.49175824175824, + "grad_norm": 18.646026611328125, + "learning_rate": 1.8754120879120878e-05, + "loss": 0.695, + "step": 22747 + }, + { + "epoch": 62.494505494505496, + "grad_norm": 4.563668251037598, + "learning_rate": 1.8752747252747255e-05, + "loss": 0.061, + "step": 22748 + }, + { + "epoch": 62.497252747252745, + "grad_norm": 7.2941718101501465, + "learning_rate": 1.875137362637363e-05, + "loss": 0.0994, + "step": 22749 + }, + { + "epoch": 62.5, + "grad_norm": 8.368324279785156, + "learning_rate": 1.8750000000000002e-05, + "loss": 0.1933, + "step": 22750 + }, + { + "epoch": 62.502747252747255, + "grad_norm": 14.238757133483887, + "learning_rate": 1.8748626373626375e-05, + "loss": 0.2967, + "step": 22751 + }, + { + "epoch": 62.505494505494504, + "grad_norm": 18.731889724731445, + "learning_rate": 1.874725274725275e-05, + "loss": 0.4058, + "step": 22752 + }, + { + "epoch": 62.50824175824176, + "grad_norm": 17.975526809692383, + "learning_rate": 1.8745879120879122e-05, + "loss": 0.6128, + "step": 22753 + }, + { + "epoch": 62.51098901098901, + "grad_norm": 10.603706359863281, + "learning_rate": 1.8744505494505495e-05, + "loss": 0.0979, + "step": 22754 + }, + { + "epoch": 62.51373626373626, + "grad_norm": 2.8938565254211426, + "learning_rate": 1.874313186813187e-05, + "loss": 0.0358, + "step": 22755 + }, + { + "epoch": 62.51648351648352, + "grad_norm": 8.02958869934082, + "learning_rate": 1.8741758241758242e-05, + "loss": 0.1514, + "step": 22756 + }, + { + "epoch": 62.51923076923077, + "grad_norm": 5.276434898376465, + "learning_rate": 1.8740384615384616e-05, + "loss": 0.0751, + "step": 22757 + }, + { + "epoch": 62.52197802197802, + "grad_norm": 7.205111026763916, + "learning_rate": 1.8739010989010992e-05, + "loss": 0.0836, + "step": 22758 + }, + { + "epoch": 62.52472527472528, + "grad_norm": 10.18975830078125, + "learning_rate": 1.8737637362637366e-05, + "loss": 0.2999, + "step": 22759 + }, + { + "epoch": 62.527472527472526, + "grad_norm": 17.510713577270508, + "learning_rate": 1.8736263736263736e-05, + "loss": 0.3576, + "step": 22760 + }, + { + "epoch": 62.53021978021978, + "grad_norm": 9.445155143737793, + "learning_rate": 1.873489010989011e-05, + "loss": 0.1868, + "step": 22761 + }, + { + "epoch": 62.532967032967036, + "grad_norm": 14.637892723083496, + "learning_rate": 1.8733516483516483e-05, + "loss": 0.1949, + "step": 22762 + }, + { + "epoch": 62.535714285714285, + "grad_norm": 20.476947784423828, + "learning_rate": 1.873214285714286e-05, + "loss": 0.3788, + "step": 22763 + }, + { + "epoch": 62.53846153846154, + "grad_norm": 32.04714584350586, + "learning_rate": 1.8730769230769233e-05, + "loss": 1.1195, + "step": 22764 + }, + { + "epoch": 62.54120879120879, + "grad_norm": 15.453448295593262, + "learning_rate": 1.8729395604395606e-05, + "loss": 0.6431, + "step": 22765 + }, + { + "epoch": 62.543956043956044, + "grad_norm": 16.111644744873047, + "learning_rate": 1.872802197802198e-05, + "loss": 0.3397, + "step": 22766 + }, + { + "epoch": 62.5467032967033, + "grad_norm": 19.631000518798828, + "learning_rate": 1.8726648351648353e-05, + "loss": 0.3597, + "step": 22767 + }, + { + "epoch": 62.54945054945055, + "grad_norm": 20.786314010620117, + "learning_rate": 1.8725274725274726e-05, + "loss": 0.4396, + "step": 22768 + }, + { + "epoch": 62.5521978021978, + "grad_norm": 4.331080913543701, + "learning_rate": 1.87239010989011e-05, + "loss": 0.0701, + "step": 22769 + }, + { + "epoch": 62.55494505494506, + "grad_norm": 15.13902473449707, + "learning_rate": 1.8722527472527473e-05, + "loss": 0.2349, + "step": 22770 + }, + { + "epoch": 62.55769230769231, + "grad_norm": 6.193726062774658, + "learning_rate": 1.8721153846153847e-05, + "loss": 0.1133, + "step": 22771 + }, + { + "epoch": 62.56043956043956, + "grad_norm": 7.265041828155518, + "learning_rate": 1.871978021978022e-05, + "loss": 0.097, + "step": 22772 + }, + { + "epoch": 62.56318681318681, + "grad_norm": 21.240558624267578, + "learning_rate": 1.8718406593406597e-05, + "loss": 0.3269, + "step": 22773 + }, + { + "epoch": 62.565934065934066, + "grad_norm": 10.783928871154785, + "learning_rate": 1.871703296703297e-05, + "loss": 0.4031, + "step": 22774 + }, + { + "epoch": 62.56868131868132, + "grad_norm": 1.6705313920974731, + "learning_rate": 1.871565934065934e-05, + "loss": 0.0214, + "step": 22775 + }, + { + "epoch": 62.57142857142857, + "grad_norm": 13.729721069335938, + "learning_rate": 1.8714285714285714e-05, + "loss": 0.2746, + "step": 22776 + }, + { + "epoch": 62.574175824175825, + "grad_norm": 28.329050064086914, + "learning_rate": 1.8712912087912087e-05, + "loss": 0.338, + "step": 22777 + }, + { + "epoch": 62.57692307692308, + "grad_norm": 17.117889404296875, + "learning_rate": 1.8711538461538464e-05, + "loss": 0.3535, + "step": 22778 + }, + { + "epoch": 62.57967032967033, + "grad_norm": 11.805569648742676, + "learning_rate": 1.8710164835164837e-05, + "loss": 0.1403, + "step": 22779 + }, + { + "epoch": 62.582417582417584, + "grad_norm": 20.662717819213867, + "learning_rate": 1.870879120879121e-05, + "loss": 0.4581, + "step": 22780 + }, + { + "epoch": 62.58516483516483, + "grad_norm": 6.924108505249023, + "learning_rate": 1.8707417582417584e-05, + "loss": 0.0768, + "step": 22781 + }, + { + "epoch": 62.58791208791209, + "grad_norm": 9.482193946838379, + "learning_rate": 1.8706043956043958e-05, + "loss": 0.1935, + "step": 22782 + }, + { + "epoch": 62.59065934065934, + "grad_norm": 13.153976440429688, + "learning_rate": 1.870467032967033e-05, + "loss": 0.3661, + "step": 22783 + }, + { + "epoch": 62.59340659340659, + "grad_norm": 15.592009544372559, + "learning_rate": 1.8703296703296704e-05, + "loss": 0.3865, + "step": 22784 + }, + { + "epoch": 62.59615384615385, + "grad_norm": 16.092679977416992, + "learning_rate": 1.8701923076923078e-05, + "loss": 0.3392, + "step": 22785 + }, + { + "epoch": 62.5989010989011, + "grad_norm": 5.18287467956543, + "learning_rate": 1.870054945054945e-05, + "loss": 0.0502, + "step": 22786 + }, + { + "epoch": 62.60164835164835, + "grad_norm": 16.978919982910156, + "learning_rate": 1.8699175824175825e-05, + "loss": 0.4695, + "step": 22787 + }, + { + "epoch": 62.604395604395606, + "grad_norm": 17.56269645690918, + "learning_rate": 1.86978021978022e-05, + "loss": 0.3698, + "step": 22788 + }, + { + "epoch": 62.607142857142854, + "grad_norm": 2.290344715118408, + "learning_rate": 1.869642857142857e-05, + "loss": 0.0309, + "step": 22789 + }, + { + "epoch": 62.60989010989011, + "grad_norm": 19.739273071289062, + "learning_rate": 1.8695054945054945e-05, + "loss": 0.4736, + "step": 22790 + }, + { + "epoch": 62.612637362637365, + "grad_norm": 13.9700288772583, + "learning_rate": 1.8693681318681318e-05, + "loss": 0.4554, + "step": 22791 + }, + { + "epoch": 62.61538461538461, + "grad_norm": 12.300209999084473, + "learning_rate": 1.869230769230769e-05, + "loss": 0.2235, + "step": 22792 + }, + { + "epoch": 62.61813186813187, + "grad_norm": 18.172954559326172, + "learning_rate": 1.869093406593407e-05, + "loss": 0.3432, + "step": 22793 + }, + { + "epoch": 62.620879120879124, + "grad_norm": 12.758564949035645, + "learning_rate": 1.8689560439560442e-05, + "loss": 0.2781, + "step": 22794 + }, + { + "epoch": 62.62362637362637, + "grad_norm": 5.811945915222168, + "learning_rate": 1.8688186813186815e-05, + "loss": 0.0909, + "step": 22795 + }, + { + "epoch": 62.62637362637363, + "grad_norm": 14.312542915344238, + "learning_rate": 1.868681318681319e-05, + "loss": 0.4891, + "step": 22796 + }, + { + "epoch": 62.629120879120876, + "grad_norm": 11.124794006347656, + "learning_rate": 1.8685439560439562e-05, + "loss": 0.3537, + "step": 22797 + }, + { + "epoch": 62.63186813186813, + "grad_norm": 5.606863975524902, + "learning_rate": 1.8684065934065935e-05, + "loss": 0.0627, + "step": 22798 + }, + { + "epoch": 62.63461538461539, + "grad_norm": 7.086370944976807, + "learning_rate": 1.868269230769231e-05, + "loss": 0.1792, + "step": 22799 + }, + { + "epoch": 62.637362637362635, + "grad_norm": 11.60667896270752, + "learning_rate": 1.8681318681318682e-05, + "loss": 0.3212, + "step": 22800 + }, + { + "epoch": 62.64010989010989, + "grad_norm": 12.643099784851074, + "learning_rate": 1.8679945054945056e-05, + "loss": 0.2994, + "step": 22801 + }, + { + "epoch": 62.642857142857146, + "grad_norm": 18.275630950927734, + "learning_rate": 1.867857142857143e-05, + "loss": 0.6661, + "step": 22802 + }, + { + "epoch": 62.645604395604394, + "grad_norm": 5.1200127601623535, + "learning_rate": 1.8677197802197806e-05, + "loss": 0.0769, + "step": 22803 + }, + { + "epoch": 62.64835164835165, + "grad_norm": 11.54061222076416, + "learning_rate": 1.8675824175824176e-05, + "loss": 0.2005, + "step": 22804 + }, + { + "epoch": 62.6510989010989, + "grad_norm": 12.50219440460205, + "learning_rate": 1.867445054945055e-05, + "loss": 0.1897, + "step": 22805 + }, + { + "epoch": 62.65384615384615, + "grad_norm": 16.049161911010742, + "learning_rate": 1.8673076923076923e-05, + "loss": 0.3486, + "step": 22806 + }, + { + "epoch": 62.65659340659341, + "grad_norm": 1.9644393920898438, + "learning_rate": 1.8671703296703296e-05, + "loss": 0.0263, + "step": 22807 + }, + { + "epoch": 62.65934065934066, + "grad_norm": 15.873116493225098, + "learning_rate": 1.867032967032967e-05, + "loss": 0.6356, + "step": 22808 + }, + { + "epoch": 62.66208791208791, + "grad_norm": 22.264732360839844, + "learning_rate": 1.8668956043956046e-05, + "loss": 0.3658, + "step": 22809 + }, + { + "epoch": 62.66483516483517, + "grad_norm": 6.511414527893066, + "learning_rate": 1.866758241758242e-05, + "loss": 0.0803, + "step": 22810 + }, + { + "epoch": 62.667582417582416, + "grad_norm": 10.618232727050781, + "learning_rate": 1.8666208791208793e-05, + "loss": 0.2544, + "step": 22811 + }, + { + "epoch": 62.67032967032967, + "grad_norm": 10.800360679626465, + "learning_rate": 1.8664835164835167e-05, + "loss": 0.2544, + "step": 22812 + }, + { + "epoch": 62.67307692307692, + "grad_norm": 12.29266357421875, + "learning_rate": 1.8663461538461537e-05, + "loss": 0.4596, + "step": 22813 + }, + { + "epoch": 62.675824175824175, + "grad_norm": 32.11254119873047, + "learning_rate": 1.8662087912087913e-05, + "loss": 0.8087, + "step": 22814 + }, + { + "epoch": 62.67857142857143, + "grad_norm": 12.242915153503418, + "learning_rate": 1.8660714285714287e-05, + "loss": 0.1668, + "step": 22815 + }, + { + "epoch": 62.68131868131868, + "grad_norm": 11.95899772644043, + "learning_rate": 1.865934065934066e-05, + "loss": 0.2473, + "step": 22816 + }, + { + "epoch": 62.684065934065934, + "grad_norm": 15.905510902404785, + "learning_rate": 1.8657967032967034e-05, + "loss": 0.3976, + "step": 22817 + }, + { + "epoch": 62.68681318681319, + "grad_norm": 8.935895919799805, + "learning_rate": 1.8656593406593407e-05, + "loss": 0.1053, + "step": 22818 + }, + { + "epoch": 62.68956043956044, + "grad_norm": 14.678495407104492, + "learning_rate": 1.865521978021978e-05, + "loss": 0.1578, + "step": 22819 + }, + { + "epoch": 62.69230769230769, + "grad_norm": 5.639744758605957, + "learning_rate": 1.8653846153846154e-05, + "loss": 0.0604, + "step": 22820 + }, + { + "epoch": 62.69505494505494, + "grad_norm": 3.4163615703582764, + "learning_rate": 1.8652472527472527e-05, + "loss": 0.0537, + "step": 22821 + }, + { + "epoch": 62.6978021978022, + "grad_norm": 14.862122535705566, + "learning_rate": 1.86510989010989e-05, + "loss": 0.3023, + "step": 22822 + }, + { + "epoch": 62.70054945054945, + "grad_norm": 11.752130508422852, + "learning_rate": 1.8649725274725274e-05, + "loss": 0.2126, + "step": 22823 + }, + { + "epoch": 62.7032967032967, + "grad_norm": 5.9816436767578125, + "learning_rate": 1.864835164835165e-05, + "loss": 0.1429, + "step": 22824 + }, + { + "epoch": 62.706043956043956, + "grad_norm": 6.44183874130249, + "learning_rate": 1.8646978021978024e-05, + "loss": 0.1308, + "step": 22825 + }, + { + "epoch": 62.70879120879121, + "grad_norm": 11.240633010864258, + "learning_rate": 1.8645604395604398e-05, + "loss": 0.1745, + "step": 22826 + }, + { + "epoch": 62.71153846153846, + "grad_norm": 9.867380142211914, + "learning_rate": 1.864423076923077e-05, + "loss": 0.168, + "step": 22827 + }, + { + "epoch": 62.714285714285715, + "grad_norm": 21.75201416015625, + "learning_rate": 1.864285714285714e-05, + "loss": 0.5184, + "step": 22828 + }, + { + "epoch": 62.717032967032964, + "grad_norm": 8.44411849975586, + "learning_rate": 1.8641483516483518e-05, + "loss": 0.0908, + "step": 22829 + }, + { + "epoch": 62.71978021978022, + "grad_norm": 7.146555423736572, + "learning_rate": 1.864010989010989e-05, + "loss": 0.189, + "step": 22830 + }, + { + "epoch": 62.722527472527474, + "grad_norm": 15.056660652160645, + "learning_rate": 1.8638736263736265e-05, + "loss": 0.4008, + "step": 22831 + }, + { + "epoch": 62.72527472527472, + "grad_norm": 8.414050102233887, + "learning_rate": 1.8637362637362638e-05, + "loss": 0.1128, + "step": 22832 + }, + { + "epoch": 62.72802197802198, + "grad_norm": 12.269149780273438, + "learning_rate": 1.863598901098901e-05, + "loss": 0.1877, + "step": 22833 + }, + { + "epoch": 62.73076923076923, + "grad_norm": 8.095736503601074, + "learning_rate": 1.8634615384615385e-05, + "loss": 0.0741, + "step": 22834 + }, + { + "epoch": 62.73351648351648, + "grad_norm": 5.6441569328308105, + "learning_rate": 1.8633241758241758e-05, + "loss": 0.0687, + "step": 22835 + }, + { + "epoch": 62.73626373626374, + "grad_norm": 29.743316650390625, + "learning_rate": 1.863186813186813e-05, + "loss": 0.6494, + "step": 22836 + }, + { + "epoch": 62.73901098901099, + "grad_norm": 2.989476203918457, + "learning_rate": 1.8630494505494505e-05, + "loss": 0.0373, + "step": 22837 + }, + { + "epoch": 62.74175824175824, + "grad_norm": 10.510912895202637, + "learning_rate": 1.862912087912088e-05, + "loss": 0.1956, + "step": 22838 + }, + { + "epoch": 62.744505494505496, + "grad_norm": 9.518651008605957, + "learning_rate": 1.8627747252747255e-05, + "loss": 0.2198, + "step": 22839 + }, + { + "epoch": 62.747252747252745, + "grad_norm": 4.274623870849609, + "learning_rate": 1.862637362637363e-05, + "loss": 0.0659, + "step": 22840 + }, + { + "epoch": 62.75, + "grad_norm": 14.494104385375977, + "learning_rate": 1.8625000000000002e-05, + "loss": 0.2595, + "step": 22841 + }, + { + "epoch": 62.752747252747255, + "grad_norm": 6.781161785125732, + "learning_rate": 1.8623626373626376e-05, + "loss": 0.0971, + "step": 22842 + }, + { + "epoch": 62.755494505494504, + "grad_norm": 10.780017852783203, + "learning_rate": 1.8622252747252746e-05, + "loss": 0.2412, + "step": 22843 + }, + { + "epoch": 62.75824175824176, + "grad_norm": 9.520708084106445, + "learning_rate": 1.8620879120879122e-05, + "loss": 0.1445, + "step": 22844 + }, + { + "epoch": 62.76098901098901, + "grad_norm": 18.05059051513672, + "learning_rate": 1.8619505494505496e-05, + "loss": 0.2994, + "step": 22845 + }, + { + "epoch": 62.76373626373626, + "grad_norm": 5.649967670440674, + "learning_rate": 1.861813186813187e-05, + "loss": 0.0942, + "step": 22846 + }, + { + "epoch": 62.76648351648352, + "grad_norm": 6.955604553222656, + "learning_rate": 1.8616758241758243e-05, + "loss": 0.0544, + "step": 22847 + }, + { + "epoch": 62.76923076923077, + "grad_norm": 17.218250274658203, + "learning_rate": 1.8615384615384616e-05, + "loss": 0.5658, + "step": 22848 + }, + { + "epoch": 62.77197802197802, + "grad_norm": 10.585559844970703, + "learning_rate": 1.861401098901099e-05, + "loss": 0.1174, + "step": 22849 + }, + { + "epoch": 62.77472527472528, + "grad_norm": 3.431403875350952, + "learning_rate": 1.8612637362637363e-05, + "loss": 0.0678, + "step": 22850 + }, + { + "epoch": 62.777472527472526, + "grad_norm": 17.127357482910156, + "learning_rate": 1.8611263736263736e-05, + "loss": 0.3, + "step": 22851 + }, + { + "epoch": 62.78021978021978, + "grad_norm": 10.447072982788086, + "learning_rate": 1.860989010989011e-05, + "loss": 0.2359, + "step": 22852 + }, + { + "epoch": 62.782967032967036, + "grad_norm": 16.824928283691406, + "learning_rate": 1.8608516483516483e-05, + "loss": 0.6897, + "step": 22853 + }, + { + "epoch": 62.785714285714285, + "grad_norm": 17.887371063232422, + "learning_rate": 1.860714285714286e-05, + "loss": 0.3266, + "step": 22854 + }, + { + "epoch": 62.78846153846154, + "grad_norm": 14.48853588104248, + "learning_rate": 1.8605769230769233e-05, + "loss": 0.2123, + "step": 22855 + }, + { + "epoch": 62.79120879120879, + "grad_norm": 11.532893180847168, + "learning_rate": 1.8604395604395607e-05, + "loss": 0.2428, + "step": 22856 + }, + { + "epoch": 62.793956043956044, + "grad_norm": 5.819322109222412, + "learning_rate": 1.860302197802198e-05, + "loss": 0.0569, + "step": 22857 + }, + { + "epoch": 62.7967032967033, + "grad_norm": 9.7629976272583, + "learning_rate": 1.860164835164835e-05, + "loss": 0.2336, + "step": 22858 + }, + { + "epoch": 62.79945054945055, + "grad_norm": 5.319675922393799, + "learning_rate": 1.8600274725274727e-05, + "loss": 0.128, + "step": 22859 + }, + { + "epoch": 62.8021978021978, + "grad_norm": 18.03647232055664, + "learning_rate": 1.85989010989011e-05, + "loss": 0.2689, + "step": 22860 + }, + { + "epoch": 62.80494505494506, + "grad_norm": 8.386798858642578, + "learning_rate": 1.8597527472527474e-05, + "loss": 0.1751, + "step": 22861 + }, + { + "epoch": 62.80769230769231, + "grad_norm": 11.13785457611084, + "learning_rate": 1.8596153846153847e-05, + "loss": 0.1264, + "step": 22862 + }, + { + "epoch": 62.81043956043956, + "grad_norm": 8.999238014221191, + "learning_rate": 1.859478021978022e-05, + "loss": 0.3395, + "step": 22863 + }, + { + "epoch": 62.81318681318681, + "grad_norm": 14.712555885314941, + "learning_rate": 1.8593406593406594e-05, + "loss": 0.2048, + "step": 22864 + }, + { + "epoch": 62.815934065934066, + "grad_norm": 9.971298217773438, + "learning_rate": 1.8592032967032967e-05, + "loss": 0.2191, + "step": 22865 + }, + { + "epoch": 62.81868131868132, + "grad_norm": 11.40158462524414, + "learning_rate": 1.859065934065934e-05, + "loss": 0.2199, + "step": 22866 + }, + { + "epoch": 62.82142857142857, + "grad_norm": 17.23143196105957, + "learning_rate": 1.8589285714285714e-05, + "loss": 0.2618, + "step": 22867 + }, + { + "epoch": 62.824175824175825, + "grad_norm": 16.94760513305664, + "learning_rate": 1.8587912087912087e-05, + "loss": 0.2821, + "step": 22868 + }, + { + "epoch": 62.82692307692308, + "grad_norm": 9.372869491577148, + "learning_rate": 1.8586538461538464e-05, + "loss": 0.2138, + "step": 22869 + }, + { + "epoch": 62.82967032967033, + "grad_norm": 14.41503620147705, + "learning_rate": 1.8585164835164838e-05, + "loss": 0.2077, + "step": 22870 + }, + { + "epoch": 62.832417582417584, + "grad_norm": 12.103618621826172, + "learning_rate": 1.858379120879121e-05, + "loss": 0.2598, + "step": 22871 + }, + { + "epoch": 62.83516483516483, + "grad_norm": 1.7445228099822998, + "learning_rate": 1.8582417582417584e-05, + "loss": 0.0221, + "step": 22872 + }, + { + "epoch": 62.83791208791209, + "grad_norm": 15.843084335327148, + "learning_rate": 1.8581043956043955e-05, + "loss": 0.3485, + "step": 22873 + }, + { + "epoch": 62.84065934065934, + "grad_norm": 5.159858226776123, + "learning_rate": 1.857967032967033e-05, + "loss": 0.0883, + "step": 22874 + }, + { + "epoch": 62.84340659340659, + "grad_norm": 18.745704650878906, + "learning_rate": 1.8578296703296705e-05, + "loss": 0.4626, + "step": 22875 + }, + { + "epoch": 62.84615384615385, + "grad_norm": 3.0440850257873535, + "learning_rate": 1.8576923076923078e-05, + "loss": 0.0494, + "step": 22876 + }, + { + "epoch": 62.8489010989011, + "grad_norm": 14.616808891296387, + "learning_rate": 1.857554945054945e-05, + "loss": 0.271, + "step": 22877 + }, + { + "epoch": 62.85164835164835, + "grad_norm": 16.162036895751953, + "learning_rate": 1.8574175824175825e-05, + "loss": 0.3873, + "step": 22878 + }, + { + "epoch": 62.854395604395606, + "grad_norm": 7.620202541351318, + "learning_rate": 1.85728021978022e-05, + "loss": 0.127, + "step": 22879 + }, + { + "epoch": 62.857142857142854, + "grad_norm": 9.95407485961914, + "learning_rate": 1.8571428571428572e-05, + "loss": 0.1191, + "step": 22880 + }, + { + "epoch": 62.85989010989011, + "grad_norm": 17.538557052612305, + "learning_rate": 1.8570054945054945e-05, + "loss": 0.2621, + "step": 22881 + }, + { + "epoch": 62.862637362637365, + "grad_norm": 10.56408405303955, + "learning_rate": 1.856868131868132e-05, + "loss": 0.1682, + "step": 22882 + }, + { + "epoch": 62.86538461538461, + "grad_norm": 19.65500259399414, + "learning_rate": 1.8567307692307692e-05, + "loss": 0.7017, + "step": 22883 + }, + { + "epoch": 62.86813186813187, + "grad_norm": 14.047462463378906, + "learning_rate": 1.856593406593407e-05, + "loss": 0.5709, + "step": 22884 + }, + { + "epoch": 62.870879120879124, + "grad_norm": 10.930752754211426, + "learning_rate": 1.8564560439560442e-05, + "loss": 0.2213, + "step": 22885 + }, + { + "epoch": 62.87362637362637, + "grad_norm": 10.037508964538574, + "learning_rate": 1.8563186813186816e-05, + "loss": 0.1096, + "step": 22886 + }, + { + "epoch": 62.87637362637363, + "grad_norm": 2.4741716384887695, + "learning_rate": 1.8561813186813186e-05, + "loss": 0.0249, + "step": 22887 + }, + { + "epoch": 62.879120879120876, + "grad_norm": 5.060784339904785, + "learning_rate": 1.856043956043956e-05, + "loss": 0.1059, + "step": 22888 + }, + { + "epoch": 62.88186813186813, + "grad_norm": 28.64875602722168, + "learning_rate": 1.8559065934065936e-05, + "loss": 0.5541, + "step": 22889 + }, + { + "epoch": 62.88461538461539, + "grad_norm": 7.349822044372559, + "learning_rate": 1.855769230769231e-05, + "loss": 0.1073, + "step": 22890 + }, + { + "epoch": 62.887362637362635, + "grad_norm": 12.896378517150879, + "learning_rate": 1.8556318681318683e-05, + "loss": 0.4379, + "step": 22891 + }, + { + "epoch": 62.89010989010989, + "grad_norm": 9.668390274047852, + "learning_rate": 1.8554945054945056e-05, + "loss": 0.1431, + "step": 22892 + }, + { + "epoch": 62.892857142857146, + "grad_norm": 12.583760261535645, + "learning_rate": 1.855357142857143e-05, + "loss": 0.2856, + "step": 22893 + }, + { + "epoch": 62.895604395604394, + "grad_norm": 13.540132522583008, + "learning_rate": 1.8552197802197803e-05, + "loss": 0.2994, + "step": 22894 + }, + { + "epoch": 62.89835164835165, + "grad_norm": 19.759090423583984, + "learning_rate": 1.8550824175824176e-05, + "loss": 0.3745, + "step": 22895 + }, + { + "epoch": 62.9010989010989, + "grad_norm": 2.276623249053955, + "learning_rate": 1.854945054945055e-05, + "loss": 0.0299, + "step": 22896 + }, + { + "epoch": 62.90384615384615, + "grad_norm": 4.352982521057129, + "learning_rate": 1.8548076923076923e-05, + "loss": 0.0425, + "step": 22897 + }, + { + "epoch": 62.90659340659341, + "grad_norm": 5.623144626617432, + "learning_rate": 1.8546703296703296e-05, + "loss": 0.0598, + "step": 22898 + }, + { + "epoch": 62.90934065934066, + "grad_norm": 4.9404730796813965, + "learning_rate": 1.8545329670329673e-05, + "loss": 0.0357, + "step": 22899 + }, + { + "epoch": 62.91208791208791, + "grad_norm": 11.577954292297363, + "learning_rate": 1.8543956043956047e-05, + "loss": 0.1701, + "step": 22900 + }, + { + "epoch": 62.91483516483517, + "grad_norm": 8.397408485412598, + "learning_rate": 1.854258241758242e-05, + "loss": 0.1004, + "step": 22901 + }, + { + "epoch": 62.917582417582416, + "grad_norm": 30.935279846191406, + "learning_rate": 1.854120879120879e-05, + "loss": 0.3377, + "step": 22902 + }, + { + "epoch": 62.92032967032967, + "grad_norm": 16.110984802246094, + "learning_rate": 1.8539835164835163e-05, + "loss": 0.5352, + "step": 22903 + }, + { + "epoch": 62.92307692307692, + "grad_norm": 9.500223159790039, + "learning_rate": 1.853846153846154e-05, + "loss": 0.1688, + "step": 22904 + }, + { + "epoch": 62.925824175824175, + "grad_norm": 9.527986526489258, + "learning_rate": 1.8537087912087914e-05, + "loss": 0.1046, + "step": 22905 + }, + { + "epoch": 62.92857142857143, + "grad_norm": 4.834295749664307, + "learning_rate": 1.8535714285714287e-05, + "loss": 0.0697, + "step": 22906 + }, + { + "epoch": 62.93131868131868, + "grad_norm": 13.825906753540039, + "learning_rate": 1.853434065934066e-05, + "loss": 0.2384, + "step": 22907 + }, + { + "epoch": 62.934065934065934, + "grad_norm": 12.890979766845703, + "learning_rate": 1.8532967032967034e-05, + "loss": 0.3211, + "step": 22908 + }, + { + "epoch": 62.93681318681319, + "grad_norm": 2.7522220611572266, + "learning_rate": 1.8531593406593407e-05, + "loss": 0.0242, + "step": 22909 + }, + { + "epoch": 62.93956043956044, + "grad_norm": 16.082401275634766, + "learning_rate": 1.853021978021978e-05, + "loss": 0.2737, + "step": 22910 + }, + { + "epoch": 62.94230769230769, + "grad_norm": 4.026629447937012, + "learning_rate": 1.8528846153846154e-05, + "loss": 0.0689, + "step": 22911 + }, + { + "epoch": 62.94505494505494, + "grad_norm": 11.496526718139648, + "learning_rate": 1.8527472527472528e-05, + "loss": 0.4094, + "step": 22912 + }, + { + "epoch": 62.9478021978022, + "grad_norm": 11.307458877563477, + "learning_rate": 1.85260989010989e-05, + "loss": 0.1845, + "step": 22913 + }, + { + "epoch": 62.95054945054945, + "grad_norm": 15.047086715698242, + "learning_rate": 1.8524725274725278e-05, + "loss": 0.3618, + "step": 22914 + }, + { + "epoch": 62.9532967032967, + "grad_norm": 15.098014831542969, + "learning_rate": 1.852335164835165e-05, + "loss": 0.3309, + "step": 22915 + }, + { + "epoch": 62.956043956043956, + "grad_norm": 18.9080753326416, + "learning_rate": 1.8521978021978025e-05, + "loss": 0.4649, + "step": 22916 + }, + { + "epoch": 62.95879120879121, + "grad_norm": 5.611502170562744, + "learning_rate": 1.8520604395604395e-05, + "loss": 0.0662, + "step": 22917 + }, + { + "epoch": 62.96153846153846, + "grad_norm": 15.671971321105957, + "learning_rate": 1.8519230769230768e-05, + "loss": 0.2967, + "step": 22918 + }, + { + "epoch": 62.964285714285715, + "grad_norm": 7.856523036956787, + "learning_rate": 1.8517857142857145e-05, + "loss": 0.1537, + "step": 22919 + }, + { + "epoch": 62.967032967032964, + "grad_norm": 12.135002136230469, + "learning_rate": 1.8516483516483518e-05, + "loss": 0.3364, + "step": 22920 + }, + { + "epoch": 62.96978021978022, + "grad_norm": 6.251580715179443, + "learning_rate": 1.851510989010989e-05, + "loss": 0.1334, + "step": 22921 + }, + { + "epoch": 62.972527472527474, + "grad_norm": 11.582521438598633, + "learning_rate": 1.8513736263736265e-05, + "loss": 0.2523, + "step": 22922 + }, + { + "epoch": 62.97527472527472, + "grad_norm": 9.292316436767578, + "learning_rate": 1.851236263736264e-05, + "loss": 0.0761, + "step": 22923 + }, + { + "epoch": 62.97802197802198, + "grad_norm": 9.431536674499512, + "learning_rate": 1.8510989010989012e-05, + "loss": 0.3245, + "step": 22924 + }, + { + "epoch": 62.98076923076923, + "grad_norm": 15.95406436920166, + "learning_rate": 1.8509615384615385e-05, + "loss": 0.3868, + "step": 22925 + }, + { + "epoch": 62.98351648351648, + "grad_norm": 10.068368911743164, + "learning_rate": 1.850824175824176e-05, + "loss": 0.1803, + "step": 22926 + }, + { + "epoch": 62.98626373626374, + "grad_norm": 9.267749786376953, + "learning_rate": 1.8506868131868132e-05, + "loss": 0.103, + "step": 22927 + }, + { + "epoch": 62.98901098901099, + "grad_norm": 13.318338394165039, + "learning_rate": 1.8505494505494505e-05, + "loss": 0.3689, + "step": 22928 + }, + { + "epoch": 62.99175824175824, + "grad_norm": 23.573654174804688, + "learning_rate": 1.850412087912088e-05, + "loss": 0.4523, + "step": 22929 + }, + { + "epoch": 62.994505494505496, + "grad_norm": 10.025497436523438, + "learning_rate": 1.8502747252747256e-05, + "loss": 0.1629, + "step": 22930 + }, + { + "epoch": 62.997252747252745, + "grad_norm": 12.561638832092285, + "learning_rate": 1.850137362637363e-05, + "loss": 0.1809, + "step": 22931 + }, + { + "epoch": 63.0, + "grad_norm": 6.1819891929626465, + "learning_rate": 1.85e-05, + "loss": 0.0413, + "step": 22932 + }, + { + "epoch": 63.0, + "eval_accuracy": 0.8264462809917356, + "eval_f1": 0.8268328048495125, + "eval_f1_DuraRiadoRio_64x64": 0.8052805280528053, + "eval_f1_Mole_64x64": 0.8625954198473282, + "eval_f1_Quebrado_64x64": 0.8447653429602888, + "eval_f1_RiadoRio_64x64": 0.7389830508474576, + "eval_f1_RioFechado_64x64": 0.8825396825396825, + "eval_loss": 0.712731659412384, + "eval_precision": 0.8340655869157786, + "eval_precision_DuraRiadoRio_64x64": 0.7672955974842768, + "eval_precision_Mole_64x64": 0.9576271186440678, + "eval_precision_Quebrado_64x64": 0.8796992481203008, + "eval_precision_RiadoRio_64x64": 0.7622377622377622, + "eval_precision_RioFechado_64x64": 0.8034682080924855, + "eval_recall": 0.8280845894077918, + "eval_recall_DuraRiadoRio_64x64": 0.8472222222222222, + "eval_recall_Mole_64x64": 0.7847222222222222, + "eval_recall_Quebrado_64x64": 0.8125, + "eval_recall_RiadoRio_64x64": 0.7171052631578947, + "eval_recall_RioFechado_64x64": 0.9788732394366197, + "eval_runtime": 1.7035, + "eval_samples_per_second": 426.187, + "eval_steps_per_second": 27.004, + "step": 22932 + }, + { + "epoch": 63.002747252747255, + "grad_norm": 16.72037124633789, + "learning_rate": 1.8498626373626372e-05, + "loss": 0.5496, + "step": 22933 + }, + { + "epoch": 63.005494505494504, + "grad_norm": 7.841467380523682, + "learning_rate": 1.8497252747252746e-05, + "loss": 0.1821, + "step": 22934 + }, + { + "epoch": 63.00824175824176, + "grad_norm": 7.893950462341309, + "learning_rate": 1.8495879120879123e-05, + "loss": 0.1761, + "step": 22935 + }, + { + "epoch": 63.010989010989015, + "grad_norm": 18.84259033203125, + "learning_rate": 1.8494505494505496e-05, + "loss": 0.3276, + "step": 22936 + }, + { + "epoch": 63.01373626373626, + "grad_norm": 10.79946517944336, + "learning_rate": 1.849313186813187e-05, + "loss": 0.1981, + "step": 22937 + }, + { + "epoch": 63.01648351648352, + "grad_norm": 8.021435737609863, + "learning_rate": 1.8491758241758243e-05, + "loss": 0.1184, + "step": 22938 + }, + { + "epoch": 63.01923076923077, + "grad_norm": 7.4848551750183105, + "learning_rate": 1.8490384615384616e-05, + "loss": 0.1227, + "step": 22939 + }, + { + "epoch": 63.02197802197802, + "grad_norm": 7.955387592315674, + "learning_rate": 1.848901098901099e-05, + "loss": 0.1987, + "step": 22940 + }, + { + "epoch": 63.02472527472528, + "grad_norm": 8.412369728088379, + "learning_rate": 1.8487637362637363e-05, + "loss": 0.1488, + "step": 22941 + }, + { + "epoch": 63.027472527472526, + "grad_norm": 15.355948448181152, + "learning_rate": 1.8486263736263737e-05, + "loss": 0.3268, + "step": 22942 + }, + { + "epoch": 63.03021978021978, + "grad_norm": 7.736238479614258, + "learning_rate": 1.848489010989011e-05, + "loss": 0.1309, + "step": 22943 + }, + { + "epoch": 63.032967032967036, + "grad_norm": 7.603329658508301, + "learning_rate": 1.8483516483516483e-05, + "loss": 0.0522, + "step": 22944 + }, + { + "epoch": 63.035714285714285, + "grad_norm": 21.516324996948242, + "learning_rate": 1.848214285714286e-05, + "loss": 0.6674, + "step": 22945 + }, + { + "epoch": 63.03846153846154, + "grad_norm": 8.76607608795166, + "learning_rate": 1.8480769230769234e-05, + "loss": 0.2477, + "step": 22946 + }, + { + "epoch": 63.04120879120879, + "grad_norm": 10.538087844848633, + "learning_rate": 1.8479395604395604e-05, + "loss": 0.228, + "step": 22947 + }, + { + "epoch": 63.043956043956044, + "grad_norm": 18.369224548339844, + "learning_rate": 1.8478021978021977e-05, + "loss": 0.2799, + "step": 22948 + }, + { + "epoch": 63.0467032967033, + "grad_norm": 7.103433132171631, + "learning_rate": 1.847664835164835e-05, + "loss": 0.1837, + "step": 22949 + }, + { + "epoch": 63.04945054945055, + "grad_norm": 13.584806442260742, + "learning_rate": 1.8475274725274727e-05, + "loss": 0.2287, + "step": 22950 + }, + { + "epoch": 63.0521978021978, + "grad_norm": 1.635236144065857, + "learning_rate": 1.84739010989011e-05, + "loss": 0.0118, + "step": 22951 + }, + { + "epoch": 63.05494505494506, + "grad_norm": 5.294798851013184, + "learning_rate": 1.8472527472527474e-05, + "loss": 0.0944, + "step": 22952 + }, + { + "epoch": 63.05769230769231, + "grad_norm": 6.194151878356934, + "learning_rate": 1.8471153846153847e-05, + "loss": 0.0626, + "step": 22953 + }, + { + "epoch": 63.06043956043956, + "grad_norm": 4.460822105407715, + "learning_rate": 1.846978021978022e-05, + "loss": 0.0427, + "step": 22954 + }, + { + "epoch": 63.06318681318681, + "grad_norm": 5.31672477722168, + "learning_rate": 1.8468406593406594e-05, + "loss": 0.1439, + "step": 22955 + }, + { + "epoch": 63.065934065934066, + "grad_norm": 13.331286430358887, + "learning_rate": 1.8467032967032968e-05, + "loss": 0.226, + "step": 22956 + }, + { + "epoch": 63.06868131868132, + "grad_norm": 10.826438903808594, + "learning_rate": 1.846565934065934e-05, + "loss": 0.1955, + "step": 22957 + }, + { + "epoch": 63.07142857142857, + "grad_norm": 4.502261161804199, + "learning_rate": 1.8464285714285714e-05, + "loss": 0.0544, + "step": 22958 + }, + { + "epoch": 63.074175824175825, + "grad_norm": 7.172041416168213, + "learning_rate": 1.8462912087912088e-05, + "loss": 0.0712, + "step": 22959 + }, + { + "epoch": 63.07692307692308, + "grad_norm": 7.156990051269531, + "learning_rate": 1.8461538461538465e-05, + "loss": 0.1437, + "step": 22960 + }, + { + "epoch": 63.07967032967033, + "grad_norm": 26.17157554626465, + "learning_rate": 1.8460164835164838e-05, + "loss": 0.8537, + "step": 22961 + }, + { + "epoch": 63.082417582417584, + "grad_norm": 12.346207618713379, + "learning_rate": 1.8458791208791208e-05, + "loss": 0.1815, + "step": 22962 + }, + { + "epoch": 63.08516483516483, + "grad_norm": 9.097393989562988, + "learning_rate": 1.845741758241758e-05, + "loss": 0.1647, + "step": 22963 + }, + { + "epoch": 63.08791208791209, + "grad_norm": 19.103986740112305, + "learning_rate": 1.8456043956043955e-05, + "loss": 0.4827, + "step": 22964 + }, + { + "epoch": 63.09065934065934, + "grad_norm": 14.739676475524902, + "learning_rate": 1.845467032967033e-05, + "loss": 0.3826, + "step": 22965 + }, + { + "epoch": 63.09340659340659, + "grad_norm": 12.996903419494629, + "learning_rate": 1.8453296703296705e-05, + "loss": 0.416, + "step": 22966 + }, + { + "epoch": 63.09615384615385, + "grad_norm": 3.0950517654418945, + "learning_rate": 1.845192307692308e-05, + "loss": 0.0517, + "step": 22967 + }, + { + "epoch": 63.0989010989011, + "grad_norm": 20.649572372436523, + "learning_rate": 1.8450549450549452e-05, + "loss": 0.2137, + "step": 22968 + }, + { + "epoch": 63.10164835164835, + "grad_norm": 14.493916511535645, + "learning_rate": 1.8449175824175825e-05, + "loss": 0.312, + "step": 22969 + }, + { + "epoch": 63.104395604395606, + "grad_norm": 7.760828971862793, + "learning_rate": 1.84478021978022e-05, + "loss": 0.0838, + "step": 22970 + }, + { + "epoch": 63.107142857142854, + "grad_norm": 23.132755279541016, + "learning_rate": 1.8446428571428572e-05, + "loss": 0.53, + "step": 22971 + }, + { + "epoch": 63.10989010989011, + "grad_norm": 4.677041053771973, + "learning_rate": 1.8445054945054945e-05, + "loss": 0.0818, + "step": 22972 + }, + { + "epoch": 63.112637362637365, + "grad_norm": 12.295943260192871, + "learning_rate": 1.844368131868132e-05, + "loss": 0.1148, + "step": 22973 + }, + { + "epoch": 63.11538461538461, + "grad_norm": 18.685916900634766, + "learning_rate": 1.8442307692307692e-05, + "loss": 0.686, + "step": 22974 + }, + { + "epoch": 63.11813186813187, + "grad_norm": 6.2545647621154785, + "learning_rate": 1.844093406593407e-05, + "loss": 0.1601, + "step": 22975 + }, + { + "epoch": 63.120879120879124, + "grad_norm": 19.212480545043945, + "learning_rate": 1.8439560439560443e-05, + "loss": 0.6331, + "step": 22976 + }, + { + "epoch": 63.12362637362637, + "grad_norm": 14.429140090942383, + "learning_rate": 1.8438186813186813e-05, + "loss": 0.2642, + "step": 22977 + }, + { + "epoch": 63.12637362637363, + "grad_norm": 6.606967926025391, + "learning_rate": 1.8436813186813186e-05, + "loss": 0.0752, + "step": 22978 + }, + { + "epoch": 63.129120879120876, + "grad_norm": 11.414929389953613, + "learning_rate": 1.843543956043956e-05, + "loss": 0.2085, + "step": 22979 + }, + { + "epoch": 63.13186813186813, + "grad_norm": 3.4236819744110107, + "learning_rate": 1.8434065934065936e-05, + "loss": 0.0272, + "step": 22980 + }, + { + "epoch": 63.13461538461539, + "grad_norm": 13.296173095703125, + "learning_rate": 1.843269230769231e-05, + "loss": 0.3078, + "step": 22981 + }, + { + "epoch": 63.137362637362635, + "grad_norm": 2.495173454284668, + "learning_rate": 1.8431318681318683e-05, + "loss": 0.0495, + "step": 22982 + }, + { + "epoch": 63.14010989010989, + "grad_norm": 18.202106475830078, + "learning_rate": 1.8429945054945056e-05, + "loss": 0.5914, + "step": 22983 + }, + { + "epoch": 63.142857142857146, + "grad_norm": 14.77318286895752, + "learning_rate": 1.842857142857143e-05, + "loss": 0.3732, + "step": 22984 + }, + { + "epoch": 63.145604395604394, + "grad_norm": 5.673403263092041, + "learning_rate": 1.8427197802197803e-05, + "loss": 0.063, + "step": 22985 + }, + { + "epoch": 63.14835164835165, + "grad_norm": 8.80098819732666, + "learning_rate": 1.8425824175824177e-05, + "loss": 0.1846, + "step": 22986 + }, + { + "epoch": 63.1510989010989, + "grad_norm": 5.914661407470703, + "learning_rate": 1.842445054945055e-05, + "loss": 0.1998, + "step": 22987 + }, + { + "epoch": 63.15384615384615, + "grad_norm": 20.77682113647461, + "learning_rate": 1.8423076923076923e-05, + "loss": 0.2295, + "step": 22988 + }, + { + "epoch": 63.15659340659341, + "grad_norm": 20.61565399169922, + "learning_rate": 1.8421703296703297e-05, + "loss": 0.6713, + "step": 22989 + }, + { + "epoch": 63.15934065934066, + "grad_norm": 4.98504638671875, + "learning_rate": 1.8420329670329674e-05, + "loss": 0.0569, + "step": 22990 + }, + { + "epoch": 63.16208791208791, + "grad_norm": 12.26187515258789, + "learning_rate": 1.8418956043956047e-05, + "loss": 0.2316, + "step": 22991 + }, + { + "epoch": 63.16483516483517, + "grad_norm": 4.815119743347168, + "learning_rate": 1.8417582417582417e-05, + "loss": 0.1234, + "step": 22992 + }, + { + "epoch": 63.167582417582416, + "grad_norm": 3.8508145809173584, + "learning_rate": 1.841620879120879e-05, + "loss": 0.0899, + "step": 22993 + }, + { + "epoch": 63.17032967032967, + "grad_norm": 25.047500610351562, + "learning_rate": 1.8414835164835164e-05, + "loss": 0.8111, + "step": 22994 + }, + { + "epoch": 63.17307692307692, + "grad_norm": 3.535109043121338, + "learning_rate": 1.841346153846154e-05, + "loss": 0.0234, + "step": 22995 + }, + { + "epoch": 63.175824175824175, + "grad_norm": 13.22065544128418, + "learning_rate": 1.8412087912087914e-05, + "loss": 0.3172, + "step": 22996 + }, + { + "epoch": 63.17857142857143, + "grad_norm": 14.927175521850586, + "learning_rate": 1.8410714285714287e-05, + "loss": 0.3473, + "step": 22997 + }, + { + "epoch": 63.18131868131868, + "grad_norm": 13.508931159973145, + "learning_rate": 1.840934065934066e-05, + "loss": 0.287, + "step": 22998 + }, + { + "epoch": 63.184065934065934, + "grad_norm": 8.899971008300781, + "learning_rate": 1.8407967032967034e-05, + "loss": 0.1178, + "step": 22999 + }, + { + "epoch": 63.18681318681319, + "grad_norm": 8.245407104492188, + "learning_rate": 1.8406593406593408e-05, + "loss": 0.0989, + "step": 23000 + }, + { + "epoch": 63.18956043956044, + "grad_norm": 12.245593070983887, + "learning_rate": 1.840521978021978e-05, + "loss": 0.1957, + "step": 23001 + }, + { + "epoch": 63.19230769230769, + "grad_norm": 1.662749171257019, + "learning_rate": 1.8403846153846154e-05, + "loss": 0.0165, + "step": 23002 + }, + { + "epoch": 63.19505494505494, + "grad_norm": 16.678701400756836, + "learning_rate": 1.8402472527472528e-05, + "loss": 0.5057, + "step": 23003 + }, + { + "epoch": 63.1978021978022, + "grad_norm": 10.445292472839355, + "learning_rate": 1.84010989010989e-05, + "loss": 0.2069, + "step": 23004 + }, + { + "epoch": 63.20054945054945, + "grad_norm": 3.9017391204833984, + "learning_rate": 1.8399725274725278e-05, + "loss": 0.0419, + "step": 23005 + }, + { + "epoch": 63.2032967032967, + "grad_norm": 17.253847122192383, + "learning_rate": 1.839835164835165e-05, + "loss": 0.2364, + "step": 23006 + }, + { + "epoch": 63.206043956043956, + "grad_norm": 10.466950416564941, + "learning_rate": 1.839697802197802e-05, + "loss": 0.3221, + "step": 23007 + }, + { + "epoch": 63.20879120879121, + "grad_norm": 10.074297904968262, + "learning_rate": 1.8395604395604395e-05, + "loss": 0.2255, + "step": 23008 + }, + { + "epoch": 63.21153846153846, + "grad_norm": 30.93499183654785, + "learning_rate": 1.839423076923077e-05, + "loss": 1.0076, + "step": 23009 + }, + { + "epoch": 63.214285714285715, + "grad_norm": 12.20091724395752, + "learning_rate": 1.8392857142857145e-05, + "loss": 0.2909, + "step": 23010 + }, + { + "epoch": 63.217032967032964, + "grad_norm": 10.585145950317383, + "learning_rate": 1.839148351648352e-05, + "loss": 0.151, + "step": 23011 + }, + { + "epoch": 63.21978021978022, + "grad_norm": 20.666711807250977, + "learning_rate": 1.8390109890109892e-05, + "loss": 0.3529, + "step": 23012 + }, + { + "epoch": 63.222527472527474, + "grad_norm": 6.639384746551514, + "learning_rate": 1.8388736263736265e-05, + "loss": 0.1684, + "step": 23013 + }, + { + "epoch": 63.22527472527472, + "grad_norm": 15.879700660705566, + "learning_rate": 1.838736263736264e-05, + "loss": 0.2311, + "step": 23014 + }, + { + "epoch": 63.22802197802198, + "grad_norm": 17.56591033935547, + "learning_rate": 1.8385989010989012e-05, + "loss": 0.2265, + "step": 23015 + }, + { + "epoch": 63.23076923076923, + "grad_norm": 42.16929626464844, + "learning_rate": 1.8384615384615386e-05, + "loss": 0.4373, + "step": 23016 + }, + { + "epoch": 63.23351648351648, + "grad_norm": 5.682682037353516, + "learning_rate": 1.838324175824176e-05, + "loss": 0.0613, + "step": 23017 + }, + { + "epoch": 63.23626373626374, + "grad_norm": 17.26270294189453, + "learning_rate": 1.8381868131868132e-05, + "loss": 0.4347, + "step": 23018 + }, + { + "epoch": 63.239010989010985, + "grad_norm": 3.011911392211914, + "learning_rate": 1.8380494505494506e-05, + "loss": 0.0248, + "step": 23019 + }, + { + "epoch": 63.24175824175824, + "grad_norm": 25.15980339050293, + "learning_rate": 1.8379120879120883e-05, + "loss": 1.0044, + "step": 23020 + }, + { + "epoch": 63.244505494505496, + "grad_norm": 15.061596870422363, + "learning_rate": 1.8377747252747256e-05, + "loss": 0.2701, + "step": 23021 + }, + { + "epoch": 63.247252747252745, + "grad_norm": 9.349418640136719, + "learning_rate": 1.8376373626373626e-05, + "loss": 0.1258, + "step": 23022 + }, + { + "epoch": 63.25, + "grad_norm": 9.127593040466309, + "learning_rate": 1.8375e-05, + "loss": 0.2338, + "step": 23023 + }, + { + "epoch": 63.252747252747255, + "grad_norm": 9.918193817138672, + "learning_rate": 1.8373626373626373e-05, + "loss": 0.3133, + "step": 23024 + }, + { + "epoch": 63.255494505494504, + "grad_norm": 10.095477104187012, + "learning_rate": 1.837225274725275e-05, + "loss": 0.1088, + "step": 23025 + }, + { + "epoch": 63.25824175824176, + "grad_norm": 3.2411227226257324, + "learning_rate": 1.8370879120879123e-05, + "loss": 0.0577, + "step": 23026 + }, + { + "epoch": 63.260989010989015, + "grad_norm": 15.431709289550781, + "learning_rate": 1.8369505494505496e-05, + "loss": 0.3469, + "step": 23027 + }, + { + "epoch": 63.26373626373626, + "grad_norm": 13.9339017868042, + "learning_rate": 1.836813186813187e-05, + "loss": 0.2009, + "step": 23028 + }, + { + "epoch": 63.26648351648352, + "grad_norm": 6.196386814117432, + "learning_rate": 1.8366758241758243e-05, + "loss": 0.1577, + "step": 23029 + }, + { + "epoch": 63.26923076923077, + "grad_norm": 9.112886428833008, + "learning_rate": 1.8365384615384617e-05, + "loss": 0.2977, + "step": 23030 + }, + { + "epoch": 63.27197802197802, + "grad_norm": 4.5528435707092285, + "learning_rate": 1.836401098901099e-05, + "loss": 0.0523, + "step": 23031 + }, + { + "epoch": 63.27472527472528, + "grad_norm": 13.731372833251953, + "learning_rate": 1.8362637362637363e-05, + "loss": 0.1678, + "step": 23032 + }, + { + "epoch": 63.277472527472526, + "grad_norm": 27.180721282958984, + "learning_rate": 1.8361263736263737e-05, + "loss": 1.0076, + "step": 23033 + }, + { + "epoch": 63.28021978021978, + "grad_norm": 15.688594818115234, + "learning_rate": 1.835989010989011e-05, + "loss": 0.314, + "step": 23034 + }, + { + "epoch": 63.282967032967036, + "grad_norm": 9.822113990783691, + "learning_rate": 1.8358516483516484e-05, + "loss": 0.1949, + "step": 23035 + }, + { + "epoch": 63.285714285714285, + "grad_norm": 5.166406631469727, + "learning_rate": 1.835714285714286e-05, + "loss": 0.1388, + "step": 23036 + }, + { + "epoch": 63.28846153846154, + "grad_norm": 9.183380126953125, + "learning_rate": 1.835576923076923e-05, + "loss": 0.1615, + "step": 23037 + }, + { + "epoch": 63.29120879120879, + "grad_norm": 9.28984260559082, + "learning_rate": 1.8354395604395604e-05, + "loss": 0.2744, + "step": 23038 + }, + { + "epoch": 63.293956043956044, + "grad_norm": 18.62458038330078, + "learning_rate": 1.8353021978021977e-05, + "loss": 0.4866, + "step": 23039 + }, + { + "epoch": 63.2967032967033, + "grad_norm": 2.3298165798187256, + "learning_rate": 1.835164835164835e-05, + "loss": 0.0269, + "step": 23040 + }, + { + "epoch": 63.29945054945055, + "grad_norm": 11.859528541564941, + "learning_rate": 1.8350274725274728e-05, + "loss": 0.1786, + "step": 23041 + }, + { + "epoch": 63.3021978021978, + "grad_norm": 3.2612738609313965, + "learning_rate": 1.83489010989011e-05, + "loss": 0.0628, + "step": 23042 + }, + { + "epoch": 63.30494505494506, + "grad_norm": 8.189202308654785, + "learning_rate": 1.8347527472527474e-05, + "loss": 0.0606, + "step": 23043 + }, + { + "epoch": 63.30769230769231, + "grad_norm": 1.5982433557510376, + "learning_rate": 1.8346153846153848e-05, + "loss": 0.0262, + "step": 23044 + }, + { + "epoch": 63.31043956043956, + "grad_norm": 14.877223014831543, + "learning_rate": 1.8344780219780218e-05, + "loss": 0.1815, + "step": 23045 + }, + { + "epoch": 63.31318681318681, + "grad_norm": 11.852904319763184, + "learning_rate": 1.8343406593406595e-05, + "loss": 0.2151, + "step": 23046 + }, + { + "epoch": 63.315934065934066, + "grad_norm": 10.63142204284668, + "learning_rate": 1.8342032967032968e-05, + "loss": 0.177, + "step": 23047 + }, + { + "epoch": 63.31868131868132, + "grad_norm": 20.68977165222168, + "learning_rate": 1.834065934065934e-05, + "loss": 0.7103, + "step": 23048 + }, + { + "epoch": 63.32142857142857, + "grad_norm": 3.298771858215332, + "learning_rate": 1.8339285714285715e-05, + "loss": 0.0688, + "step": 23049 + }, + { + "epoch": 63.324175824175825, + "grad_norm": 18.870206832885742, + "learning_rate": 1.8337912087912088e-05, + "loss": 0.3293, + "step": 23050 + }, + { + "epoch": 63.32692307692308, + "grad_norm": 5.122588157653809, + "learning_rate": 1.8336538461538465e-05, + "loss": 0.0762, + "step": 23051 + }, + { + "epoch": 63.32967032967033, + "grad_norm": 14.537763595581055, + "learning_rate": 1.8335164835164835e-05, + "loss": 0.171, + "step": 23052 + }, + { + "epoch": 63.332417582417584, + "grad_norm": 10.556371688842773, + "learning_rate": 1.833379120879121e-05, + "loss": 0.2095, + "step": 23053 + }, + { + "epoch": 63.33516483516483, + "grad_norm": 11.765780448913574, + "learning_rate": 1.8332417582417582e-05, + "loss": 0.2151, + "step": 23054 + }, + { + "epoch": 63.33791208791209, + "grad_norm": 12.180306434631348, + "learning_rate": 1.8331043956043955e-05, + "loss": 0.3688, + "step": 23055 + }, + { + "epoch": 63.34065934065934, + "grad_norm": 13.556970596313477, + "learning_rate": 1.8329670329670332e-05, + "loss": 0.2872, + "step": 23056 + }, + { + "epoch": 63.34340659340659, + "grad_norm": 5.019028186798096, + "learning_rate": 1.8328296703296705e-05, + "loss": 0.0805, + "step": 23057 + }, + { + "epoch": 63.34615384615385, + "grad_norm": 8.375590324401855, + "learning_rate": 1.832692307692308e-05, + "loss": 0.1643, + "step": 23058 + }, + { + "epoch": 63.3489010989011, + "grad_norm": 5.718531131744385, + "learning_rate": 1.8325549450549452e-05, + "loss": 0.1046, + "step": 23059 + }, + { + "epoch": 63.35164835164835, + "grad_norm": 3.429387092590332, + "learning_rate": 1.8324175824175822e-05, + "loss": 0.0861, + "step": 23060 + }, + { + "epoch": 63.354395604395606, + "grad_norm": 14.872159957885742, + "learning_rate": 1.83228021978022e-05, + "loss": 0.3947, + "step": 23061 + }, + { + "epoch": 63.357142857142854, + "grad_norm": 9.029465675354004, + "learning_rate": 1.8321428571428572e-05, + "loss": 0.1543, + "step": 23062 + }, + { + "epoch": 63.35989010989011, + "grad_norm": 6.720057487487793, + "learning_rate": 1.8320054945054946e-05, + "loss": 0.136, + "step": 23063 + }, + { + "epoch": 63.362637362637365, + "grad_norm": 19.847299575805664, + "learning_rate": 1.831868131868132e-05, + "loss": 0.3181, + "step": 23064 + }, + { + "epoch": 63.36538461538461, + "grad_norm": 7.757532596588135, + "learning_rate": 1.8317307692307693e-05, + "loss": 0.1216, + "step": 23065 + }, + { + "epoch": 63.36813186813187, + "grad_norm": 8.653818130493164, + "learning_rate": 1.831593406593407e-05, + "loss": 0.1334, + "step": 23066 + }, + { + "epoch": 63.370879120879124, + "grad_norm": 17.365327835083008, + "learning_rate": 1.831456043956044e-05, + "loss": 0.36, + "step": 23067 + }, + { + "epoch": 63.37362637362637, + "grad_norm": 7.962026596069336, + "learning_rate": 1.8313186813186813e-05, + "loss": 0.1284, + "step": 23068 + }, + { + "epoch": 63.37637362637363, + "grad_norm": 6.159664630889893, + "learning_rate": 1.8311813186813186e-05, + "loss": 0.0851, + "step": 23069 + }, + { + "epoch": 63.379120879120876, + "grad_norm": 9.566685676574707, + "learning_rate": 1.831043956043956e-05, + "loss": 0.109, + "step": 23070 + }, + { + "epoch": 63.38186813186813, + "grad_norm": 21.518903732299805, + "learning_rate": 1.8309065934065936e-05, + "loss": 0.3438, + "step": 23071 + }, + { + "epoch": 63.38461538461539, + "grad_norm": 18.15656852722168, + "learning_rate": 1.830769230769231e-05, + "loss": 0.6332, + "step": 23072 + }, + { + "epoch": 63.387362637362635, + "grad_norm": 10.966415405273438, + "learning_rate": 1.8306318681318683e-05, + "loss": 0.305, + "step": 23073 + }, + { + "epoch": 63.39010989010989, + "grad_norm": 5.386603355407715, + "learning_rate": 1.8304945054945057e-05, + "loss": 0.1139, + "step": 23074 + }, + { + "epoch": 63.392857142857146, + "grad_norm": 7.017963886260986, + "learning_rate": 1.8303571428571427e-05, + "loss": 0.1257, + "step": 23075 + }, + { + "epoch": 63.395604395604394, + "grad_norm": 11.469749450683594, + "learning_rate": 1.8302197802197804e-05, + "loss": 0.1537, + "step": 23076 + }, + { + "epoch": 63.39835164835165, + "grad_norm": 9.866226196289062, + "learning_rate": 1.8300824175824177e-05, + "loss": 0.1548, + "step": 23077 + }, + { + "epoch": 63.4010989010989, + "grad_norm": 15.869855880737305, + "learning_rate": 1.829945054945055e-05, + "loss": 0.3862, + "step": 23078 + }, + { + "epoch": 63.40384615384615, + "grad_norm": 5.677172660827637, + "learning_rate": 1.8298076923076924e-05, + "loss": 0.0617, + "step": 23079 + }, + { + "epoch": 63.40659340659341, + "grad_norm": 22.742149353027344, + "learning_rate": 1.8296703296703297e-05, + "loss": 0.692, + "step": 23080 + }, + { + "epoch": 63.40934065934066, + "grad_norm": 0.4822660982608795, + "learning_rate": 1.8295329670329674e-05, + "loss": 0.0066, + "step": 23081 + }, + { + "epoch": 63.41208791208791, + "grad_norm": 16.44049835205078, + "learning_rate": 1.8293956043956044e-05, + "loss": 0.4052, + "step": 23082 + }, + { + "epoch": 63.41483516483517, + "grad_norm": 13.539273262023926, + "learning_rate": 1.8292582417582417e-05, + "loss": 0.4072, + "step": 23083 + }, + { + "epoch": 63.417582417582416, + "grad_norm": 11.251572608947754, + "learning_rate": 1.829120879120879e-05, + "loss": 0.1772, + "step": 23084 + }, + { + "epoch": 63.42032967032967, + "grad_norm": 16.81399154663086, + "learning_rate": 1.8289835164835164e-05, + "loss": 0.2772, + "step": 23085 + }, + { + "epoch": 63.42307692307692, + "grad_norm": 11.952560424804688, + "learning_rate": 1.828846153846154e-05, + "loss": 0.2489, + "step": 23086 + }, + { + "epoch": 63.425824175824175, + "grad_norm": 20.969837188720703, + "learning_rate": 1.8287087912087914e-05, + "loss": 0.4604, + "step": 23087 + }, + { + "epoch": 63.42857142857143, + "grad_norm": 12.123503684997559, + "learning_rate": 1.8285714285714288e-05, + "loss": 0.1895, + "step": 23088 + }, + { + "epoch": 63.43131868131868, + "grad_norm": 4.365536689758301, + "learning_rate": 1.828434065934066e-05, + "loss": 0.0561, + "step": 23089 + }, + { + "epoch": 63.434065934065934, + "grad_norm": 23.81765365600586, + "learning_rate": 1.828296703296703e-05, + "loss": 0.3947, + "step": 23090 + }, + { + "epoch": 63.43681318681319, + "grad_norm": 18.384593963623047, + "learning_rate": 1.8281593406593408e-05, + "loss": 0.4586, + "step": 23091 + }, + { + "epoch": 63.43956043956044, + "grad_norm": 12.17330551147461, + "learning_rate": 1.828021978021978e-05, + "loss": 0.4107, + "step": 23092 + }, + { + "epoch": 63.44230769230769, + "grad_norm": 20.349119186401367, + "learning_rate": 1.8278846153846155e-05, + "loss": 0.4146, + "step": 23093 + }, + { + "epoch": 63.44505494505494, + "grad_norm": 17.283639907836914, + "learning_rate": 1.8277472527472528e-05, + "loss": 0.3651, + "step": 23094 + }, + { + "epoch": 63.4478021978022, + "grad_norm": 6.651456832885742, + "learning_rate": 1.82760989010989e-05, + "loss": 0.093, + "step": 23095 + }, + { + "epoch": 63.45054945054945, + "grad_norm": 13.174253463745117, + "learning_rate": 1.8274725274725275e-05, + "loss": 0.392, + "step": 23096 + }, + { + "epoch": 63.4532967032967, + "grad_norm": 7.783249855041504, + "learning_rate": 1.827335164835165e-05, + "loss": 0.1518, + "step": 23097 + }, + { + "epoch": 63.456043956043956, + "grad_norm": 10.28105640411377, + "learning_rate": 1.8271978021978022e-05, + "loss": 0.1144, + "step": 23098 + }, + { + "epoch": 63.45879120879121, + "grad_norm": 16.348203659057617, + "learning_rate": 1.8270604395604395e-05, + "loss": 0.2153, + "step": 23099 + }, + { + "epoch": 63.46153846153846, + "grad_norm": 14.673998832702637, + "learning_rate": 1.826923076923077e-05, + "loss": 0.1526, + "step": 23100 + }, + { + "epoch": 63.464285714285715, + "grad_norm": 10.953737258911133, + "learning_rate": 1.8267857142857145e-05, + "loss": 0.1276, + "step": 23101 + }, + { + "epoch": 63.467032967032964, + "grad_norm": 5.42728853225708, + "learning_rate": 1.826648351648352e-05, + "loss": 0.1019, + "step": 23102 + }, + { + "epoch": 63.46978021978022, + "grad_norm": 2.6033542156219482, + "learning_rate": 1.8265109890109892e-05, + "loss": 0.0285, + "step": 23103 + }, + { + "epoch": 63.472527472527474, + "grad_norm": 4.6227288246154785, + "learning_rate": 1.8263736263736266e-05, + "loss": 0.0593, + "step": 23104 + }, + { + "epoch": 63.47527472527472, + "grad_norm": 15.62209415435791, + "learning_rate": 1.8262362637362636e-05, + "loss": 0.3803, + "step": 23105 + }, + { + "epoch": 63.47802197802198, + "grad_norm": 11.187524795532227, + "learning_rate": 1.8260989010989013e-05, + "loss": 0.1873, + "step": 23106 + }, + { + "epoch": 63.48076923076923, + "grad_norm": 13.301772117614746, + "learning_rate": 1.8259615384615386e-05, + "loss": 0.2362, + "step": 23107 + }, + { + "epoch": 63.48351648351648, + "grad_norm": 10.183404922485352, + "learning_rate": 1.825824175824176e-05, + "loss": 0.1805, + "step": 23108 + }, + { + "epoch": 63.48626373626374, + "grad_norm": 6.835082054138184, + "learning_rate": 1.8256868131868133e-05, + "loss": 0.2171, + "step": 23109 + }, + { + "epoch": 63.489010989010985, + "grad_norm": 17.848285675048828, + "learning_rate": 1.8255494505494506e-05, + "loss": 0.3983, + "step": 23110 + }, + { + "epoch": 63.49175824175824, + "grad_norm": 19.186838150024414, + "learning_rate": 1.825412087912088e-05, + "loss": 0.3276, + "step": 23111 + }, + { + "epoch": 63.494505494505496, + "grad_norm": 16.300334930419922, + "learning_rate": 1.8252747252747253e-05, + "loss": 0.5538, + "step": 23112 + }, + { + "epoch": 63.497252747252745, + "grad_norm": 1.022170066833496, + "learning_rate": 1.8251373626373626e-05, + "loss": 0.0171, + "step": 23113 + }, + { + "epoch": 63.5, + "grad_norm": 24.18263053894043, + "learning_rate": 1.825e-05, + "loss": 0.6514, + "step": 23114 + }, + { + "epoch": 63.502747252747255, + "grad_norm": 13.562167167663574, + "learning_rate": 1.8248626373626373e-05, + "loss": 0.6562, + "step": 23115 + }, + { + "epoch": 63.505494505494504, + "grad_norm": 8.342848777770996, + "learning_rate": 1.824725274725275e-05, + "loss": 0.0775, + "step": 23116 + }, + { + "epoch": 63.50824175824176, + "grad_norm": 8.046690940856934, + "learning_rate": 1.8245879120879123e-05, + "loss": 0.222, + "step": 23117 + }, + { + "epoch": 63.51098901098901, + "grad_norm": 14.79636287689209, + "learning_rate": 1.8244505494505497e-05, + "loss": 0.3178, + "step": 23118 + }, + { + "epoch": 63.51373626373626, + "grad_norm": 16.540931701660156, + "learning_rate": 1.824313186813187e-05, + "loss": 0.3782, + "step": 23119 + }, + { + "epoch": 63.51648351648352, + "grad_norm": 4.135252475738525, + "learning_rate": 1.824175824175824e-05, + "loss": 0.0531, + "step": 23120 + }, + { + "epoch": 63.51923076923077, + "grad_norm": 9.213618278503418, + "learning_rate": 1.8240384615384617e-05, + "loss": 0.2252, + "step": 23121 + }, + { + "epoch": 63.52197802197802, + "grad_norm": 18.020723342895508, + "learning_rate": 1.823901098901099e-05, + "loss": 0.44, + "step": 23122 + }, + { + "epoch": 63.52472527472528, + "grad_norm": 15.466252326965332, + "learning_rate": 1.8237637362637364e-05, + "loss": 0.2648, + "step": 23123 + }, + { + "epoch": 63.527472527472526, + "grad_norm": 15.488204002380371, + "learning_rate": 1.8236263736263737e-05, + "loss": 0.1456, + "step": 23124 + }, + { + "epoch": 63.53021978021978, + "grad_norm": 13.251459121704102, + "learning_rate": 1.823489010989011e-05, + "loss": 0.2187, + "step": 23125 + }, + { + "epoch": 63.532967032967036, + "grad_norm": 10.807052612304688, + "learning_rate": 1.8233516483516484e-05, + "loss": 0.2121, + "step": 23126 + }, + { + "epoch": 63.535714285714285, + "grad_norm": 8.010251998901367, + "learning_rate": 1.8232142857142857e-05, + "loss": 0.1293, + "step": 23127 + }, + { + "epoch": 63.53846153846154, + "grad_norm": 15.106927871704102, + "learning_rate": 1.823076923076923e-05, + "loss": 0.2807, + "step": 23128 + }, + { + "epoch": 63.54120879120879, + "grad_norm": 14.614946365356445, + "learning_rate": 1.8229395604395604e-05, + "loss": 0.1953, + "step": 23129 + }, + { + "epoch": 63.543956043956044, + "grad_norm": 15.137928009033203, + "learning_rate": 1.8228021978021978e-05, + "loss": 0.296, + "step": 23130 + }, + { + "epoch": 63.5467032967033, + "grad_norm": 4.599599838256836, + "learning_rate": 1.8226648351648354e-05, + "loss": 0.0551, + "step": 23131 + }, + { + "epoch": 63.54945054945055, + "grad_norm": 10.891457557678223, + "learning_rate": 1.8225274725274728e-05, + "loss": 0.2248, + "step": 23132 + }, + { + "epoch": 63.5521978021978, + "grad_norm": 3.7979297637939453, + "learning_rate": 1.82239010989011e-05, + "loss": 0.0616, + "step": 23133 + }, + { + "epoch": 63.55494505494506, + "grad_norm": 4.7446184158325195, + "learning_rate": 1.8222527472527475e-05, + "loss": 0.0568, + "step": 23134 + }, + { + "epoch": 63.55769230769231, + "grad_norm": 14.201547622680664, + "learning_rate": 1.8221153846153845e-05, + "loss": 0.4509, + "step": 23135 + }, + { + "epoch": 63.56043956043956, + "grad_norm": 12.611266136169434, + "learning_rate": 1.821978021978022e-05, + "loss": 0.2587, + "step": 23136 + }, + { + "epoch": 63.56318681318681, + "grad_norm": 16.254945755004883, + "learning_rate": 1.8218406593406595e-05, + "loss": 0.2821, + "step": 23137 + }, + { + "epoch": 63.565934065934066, + "grad_norm": 10.163589477539062, + "learning_rate": 1.8217032967032968e-05, + "loss": 0.1948, + "step": 23138 + }, + { + "epoch": 63.56868131868132, + "grad_norm": 13.516570091247559, + "learning_rate": 1.8215659340659342e-05, + "loss": 0.3958, + "step": 23139 + }, + { + "epoch": 63.57142857142857, + "grad_norm": 1.655403971672058, + "learning_rate": 1.8214285714285715e-05, + "loss": 0.0183, + "step": 23140 + }, + { + "epoch": 63.574175824175825, + "grad_norm": 11.24920654296875, + "learning_rate": 1.821291208791209e-05, + "loss": 0.3375, + "step": 23141 + }, + { + "epoch": 63.57692307692308, + "grad_norm": 15.641838073730469, + "learning_rate": 1.8211538461538462e-05, + "loss": 0.2866, + "step": 23142 + }, + { + "epoch": 63.57967032967033, + "grad_norm": 13.808099746704102, + "learning_rate": 1.8210164835164835e-05, + "loss": 0.2654, + "step": 23143 + }, + { + "epoch": 63.582417582417584, + "grad_norm": 13.9111328125, + "learning_rate": 1.820879120879121e-05, + "loss": 0.3232, + "step": 23144 + }, + { + "epoch": 63.58516483516483, + "grad_norm": 4.243396759033203, + "learning_rate": 1.8207417582417582e-05, + "loss": 0.0523, + "step": 23145 + }, + { + "epoch": 63.58791208791209, + "grad_norm": 13.123051643371582, + "learning_rate": 1.820604395604396e-05, + "loss": 0.3182, + "step": 23146 + }, + { + "epoch": 63.59065934065934, + "grad_norm": 12.401962280273438, + "learning_rate": 1.8204670329670332e-05, + "loss": 0.196, + "step": 23147 + }, + { + "epoch": 63.59340659340659, + "grad_norm": 15.44470500946045, + "learning_rate": 1.8203296703296706e-05, + "loss": 0.1507, + "step": 23148 + }, + { + "epoch": 63.59615384615385, + "grad_norm": 8.864781379699707, + "learning_rate": 1.820192307692308e-05, + "loss": 0.1398, + "step": 23149 + }, + { + "epoch": 63.5989010989011, + "grad_norm": 7.397823333740234, + "learning_rate": 1.820054945054945e-05, + "loss": 0.0745, + "step": 23150 + }, + { + "epoch": 63.60164835164835, + "grad_norm": 7.06559944152832, + "learning_rate": 1.8199175824175823e-05, + "loss": 0.148, + "step": 23151 + }, + { + "epoch": 63.604395604395606, + "grad_norm": 4.032033920288086, + "learning_rate": 1.81978021978022e-05, + "loss": 0.0795, + "step": 23152 + }, + { + "epoch": 63.607142857142854, + "grad_norm": 8.16759967803955, + "learning_rate": 1.8196428571428573e-05, + "loss": 0.203, + "step": 23153 + }, + { + "epoch": 63.60989010989011, + "grad_norm": 15.410085678100586, + "learning_rate": 1.8195054945054946e-05, + "loss": 0.3519, + "step": 23154 + }, + { + "epoch": 63.612637362637365, + "grad_norm": 8.380608558654785, + "learning_rate": 1.819368131868132e-05, + "loss": 0.0997, + "step": 23155 + }, + { + "epoch": 63.61538461538461, + "grad_norm": 8.792801856994629, + "learning_rate": 1.8192307692307693e-05, + "loss": 0.1558, + "step": 23156 + }, + { + "epoch": 63.61813186813187, + "grad_norm": 16.375343322753906, + "learning_rate": 1.8190934065934066e-05, + "loss": 0.3204, + "step": 23157 + }, + { + "epoch": 63.620879120879124, + "grad_norm": 15.37935733795166, + "learning_rate": 1.818956043956044e-05, + "loss": 0.4559, + "step": 23158 + }, + { + "epoch": 63.62362637362637, + "grad_norm": 11.690055847167969, + "learning_rate": 1.8188186813186813e-05, + "loss": 0.1666, + "step": 23159 + }, + { + "epoch": 63.62637362637363, + "grad_norm": 12.12235164642334, + "learning_rate": 1.8186813186813187e-05, + "loss": 0.2014, + "step": 23160 + }, + { + "epoch": 63.629120879120876, + "grad_norm": 1.1917015314102173, + "learning_rate": 1.818543956043956e-05, + "loss": 0.0202, + "step": 23161 + }, + { + "epoch": 63.63186813186813, + "grad_norm": 21.32801055908203, + "learning_rate": 1.8184065934065937e-05, + "loss": 0.4222, + "step": 23162 + }, + { + "epoch": 63.63461538461539, + "grad_norm": 7.21918249130249, + "learning_rate": 1.818269230769231e-05, + "loss": 0.112, + "step": 23163 + }, + { + "epoch": 63.637362637362635, + "grad_norm": 9.195635795593262, + "learning_rate": 1.8181318681318684e-05, + "loss": 0.0918, + "step": 23164 + }, + { + "epoch": 63.64010989010989, + "grad_norm": 14.137932777404785, + "learning_rate": 1.8179945054945054e-05, + "loss": 0.3537, + "step": 23165 + }, + { + "epoch": 63.642857142857146, + "grad_norm": 26.867385864257812, + "learning_rate": 1.8178571428571427e-05, + "loss": 0.6566, + "step": 23166 + }, + { + "epoch": 63.645604395604394, + "grad_norm": 8.5541410446167, + "learning_rate": 1.8177197802197804e-05, + "loss": 0.2951, + "step": 23167 + }, + { + "epoch": 63.64835164835165, + "grad_norm": 10.233147621154785, + "learning_rate": 1.8175824175824177e-05, + "loss": 0.3009, + "step": 23168 + }, + { + "epoch": 63.6510989010989, + "grad_norm": 4.555187225341797, + "learning_rate": 1.817445054945055e-05, + "loss": 0.0715, + "step": 23169 + }, + { + "epoch": 63.65384615384615, + "grad_norm": 18.287973403930664, + "learning_rate": 1.8173076923076924e-05, + "loss": 0.5464, + "step": 23170 + }, + { + "epoch": 63.65659340659341, + "grad_norm": 5.005723476409912, + "learning_rate": 1.8171703296703297e-05, + "loss": 0.0867, + "step": 23171 + }, + { + "epoch": 63.65934065934066, + "grad_norm": 19.664600372314453, + "learning_rate": 1.817032967032967e-05, + "loss": 0.2399, + "step": 23172 + }, + { + "epoch": 63.66208791208791, + "grad_norm": 13.020992279052734, + "learning_rate": 1.8168956043956044e-05, + "loss": 0.3228, + "step": 23173 + }, + { + "epoch": 63.66483516483517, + "grad_norm": 3.1110317707061768, + "learning_rate": 1.8167582417582418e-05, + "loss": 0.0557, + "step": 23174 + }, + { + "epoch": 63.667582417582416, + "grad_norm": 9.698473930358887, + "learning_rate": 1.816620879120879e-05, + "loss": 0.1069, + "step": 23175 + }, + { + "epoch": 63.67032967032967, + "grad_norm": 15.447867393493652, + "learning_rate": 1.8164835164835165e-05, + "loss": 0.2408, + "step": 23176 + }, + { + "epoch": 63.67307692307692, + "grad_norm": 8.159444808959961, + "learning_rate": 1.816346153846154e-05, + "loss": 0.0618, + "step": 23177 + }, + { + "epoch": 63.675824175824175, + "grad_norm": 27.177885055541992, + "learning_rate": 1.8162087912087915e-05, + "loss": 0.4739, + "step": 23178 + }, + { + "epoch": 63.67857142857143, + "grad_norm": 11.22081184387207, + "learning_rate": 1.8160714285714288e-05, + "loss": 0.178, + "step": 23179 + }, + { + "epoch": 63.68131868131868, + "grad_norm": 12.865981101989746, + "learning_rate": 1.8159340659340658e-05, + "loss": 0.4215, + "step": 23180 + }, + { + "epoch": 63.684065934065934, + "grad_norm": 9.988991737365723, + "learning_rate": 1.815796703296703e-05, + "loss": 0.2148, + "step": 23181 + }, + { + "epoch": 63.68681318681319, + "grad_norm": 14.459775924682617, + "learning_rate": 1.815659340659341e-05, + "loss": 0.2564, + "step": 23182 + }, + { + "epoch": 63.68956043956044, + "grad_norm": 22.40185546875, + "learning_rate": 1.8155219780219782e-05, + "loss": 0.6657, + "step": 23183 + }, + { + "epoch": 63.69230769230769, + "grad_norm": 16.03660774230957, + "learning_rate": 1.8153846153846155e-05, + "loss": 0.3776, + "step": 23184 + }, + { + "epoch": 63.69505494505494, + "grad_norm": 14.358628273010254, + "learning_rate": 1.815247252747253e-05, + "loss": 0.2129, + "step": 23185 + }, + { + "epoch": 63.6978021978022, + "grad_norm": 14.350610733032227, + "learning_rate": 1.8151098901098902e-05, + "loss": 0.2456, + "step": 23186 + }, + { + "epoch": 63.70054945054945, + "grad_norm": 14.292306900024414, + "learning_rate": 1.8149725274725275e-05, + "loss": 0.2609, + "step": 23187 + }, + { + "epoch": 63.7032967032967, + "grad_norm": 12.565582275390625, + "learning_rate": 1.814835164835165e-05, + "loss": 0.4236, + "step": 23188 + }, + { + "epoch": 63.706043956043956, + "grad_norm": 8.32454776763916, + "learning_rate": 1.8146978021978022e-05, + "loss": 0.294, + "step": 23189 + }, + { + "epoch": 63.70879120879121, + "grad_norm": 5.624580383300781, + "learning_rate": 1.8145604395604396e-05, + "loss": 0.0843, + "step": 23190 + }, + { + "epoch": 63.71153846153846, + "grad_norm": 15.093806266784668, + "learning_rate": 1.814423076923077e-05, + "loss": 0.2808, + "step": 23191 + }, + { + "epoch": 63.714285714285715, + "grad_norm": 28.75529670715332, + "learning_rate": 1.8142857142857146e-05, + "loss": 0.165, + "step": 23192 + }, + { + "epoch": 63.717032967032964, + "grad_norm": 8.570514678955078, + "learning_rate": 1.814148351648352e-05, + "loss": 0.1576, + "step": 23193 + }, + { + "epoch": 63.71978021978022, + "grad_norm": 8.701932907104492, + "learning_rate": 1.814010989010989e-05, + "loss": 0.3021, + "step": 23194 + }, + { + "epoch": 63.722527472527474, + "grad_norm": 8.549254417419434, + "learning_rate": 1.8138736263736263e-05, + "loss": 0.1688, + "step": 23195 + }, + { + "epoch": 63.72527472527472, + "grad_norm": 9.07747745513916, + "learning_rate": 1.8137362637362636e-05, + "loss": 0.2735, + "step": 23196 + }, + { + "epoch": 63.72802197802198, + "grad_norm": 11.451676368713379, + "learning_rate": 1.8135989010989013e-05, + "loss": 0.1289, + "step": 23197 + }, + { + "epoch": 63.73076923076923, + "grad_norm": 12.590556144714355, + "learning_rate": 1.8134615384615386e-05, + "loss": 0.1673, + "step": 23198 + }, + { + "epoch": 63.73351648351648, + "grad_norm": 14.80595588684082, + "learning_rate": 1.813324175824176e-05, + "loss": 0.2852, + "step": 23199 + }, + { + "epoch": 63.73626373626374, + "grad_norm": 0.8727855086326599, + "learning_rate": 1.8131868131868133e-05, + "loss": 0.0142, + "step": 23200 + }, + { + "epoch": 63.73901098901099, + "grad_norm": 9.139320373535156, + "learning_rate": 1.8130494505494506e-05, + "loss": 0.1346, + "step": 23201 + }, + { + "epoch": 63.74175824175824, + "grad_norm": 14.465514183044434, + "learning_rate": 1.812912087912088e-05, + "loss": 0.2719, + "step": 23202 + }, + { + "epoch": 63.744505494505496, + "grad_norm": 20.22344207763672, + "learning_rate": 1.8127747252747253e-05, + "loss": 0.3419, + "step": 23203 + }, + { + "epoch": 63.747252747252745, + "grad_norm": 17.964548110961914, + "learning_rate": 1.8126373626373627e-05, + "loss": 0.4513, + "step": 23204 + }, + { + "epoch": 63.75, + "grad_norm": 9.270122528076172, + "learning_rate": 1.8125e-05, + "loss": 0.1896, + "step": 23205 + }, + { + "epoch": 63.752747252747255, + "grad_norm": 9.395946502685547, + "learning_rate": 1.8123626373626374e-05, + "loss": 0.1583, + "step": 23206 + }, + { + "epoch": 63.755494505494504, + "grad_norm": 12.886573791503906, + "learning_rate": 1.812225274725275e-05, + "loss": 0.27, + "step": 23207 + }, + { + "epoch": 63.75824175824176, + "grad_norm": 24.611982345581055, + "learning_rate": 1.8120879120879124e-05, + "loss": 0.9983, + "step": 23208 + }, + { + "epoch": 63.76098901098901, + "grad_norm": 11.978767395019531, + "learning_rate": 1.8119505494505494e-05, + "loss": 0.1791, + "step": 23209 + }, + { + "epoch": 63.76373626373626, + "grad_norm": 1.1754313707351685, + "learning_rate": 1.8118131868131867e-05, + "loss": 0.0161, + "step": 23210 + }, + { + "epoch": 63.76648351648352, + "grad_norm": 5.444667816162109, + "learning_rate": 1.811675824175824e-05, + "loss": 0.0993, + "step": 23211 + }, + { + "epoch": 63.76923076923077, + "grad_norm": 11.722921371459961, + "learning_rate": 1.8115384615384617e-05, + "loss": 0.3152, + "step": 23212 + }, + { + "epoch": 63.77197802197802, + "grad_norm": 7.210737228393555, + "learning_rate": 1.811401098901099e-05, + "loss": 0.0908, + "step": 23213 + }, + { + "epoch": 63.77472527472528, + "grad_norm": 9.521989822387695, + "learning_rate": 1.8112637362637364e-05, + "loss": 0.3238, + "step": 23214 + }, + { + "epoch": 63.777472527472526, + "grad_norm": 4.282077312469482, + "learning_rate": 1.8111263736263738e-05, + "loss": 0.0385, + "step": 23215 + }, + { + "epoch": 63.78021978021978, + "grad_norm": 13.607436180114746, + "learning_rate": 1.810989010989011e-05, + "loss": 0.2432, + "step": 23216 + }, + { + "epoch": 63.782967032967036, + "grad_norm": 19.76300621032715, + "learning_rate": 1.8108516483516484e-05, + "loss": 0.3155, + "step": 23217 + }, + { + "epoch": 63.785714285714285, + "grad_norm": 10.538976669311523, + "learning_rate": 1.8107142857142858e-05, + "loss": 0.1807, + "step": 23218 + }, + { + "epoch": 63.78846153846154, + "grad_norm": 6.188882827758789, + "learning_rate": 1.810576923076923e-05, + "loss": 0.0872, + "step": 23219 + }, + { + "epoch": 63.79120879120879, + "grad_norm": 15.686177253723145, + "learning_rate": 1.8104395604395605e-05, + "loss": 0.5076, + "step": 23220 + }, + { + "epoch": 63.793956043956044, + "grad_norm": 11.432846069335938, + "learning_rate": 1.8103021978021978e-05, + "loss": 0.1769, + "step": 23221 + }, + { + "epoch": 63.7967032967033, + "grad_norm": 13.157838821411133, + "learning_rate": 1.8101648351648355e-05, + "loss": 0.2808, + "step": 23222 + }, + { + "epoch": 63.79945054945055, + "grad_norm": 7.539647579193115, + "learning_rate": 1.8100274725274728e-05, + "loss": 0.0927, + "step": 23223 + }, + { + "epoch": 63.8021978021978, + "grad_norm": 6.8836870193481445, + "learning_rate": 1.8098901098901098e-05, + "loss": 0.1058, + "step": 23224 + }, + { + "epoch": 63.80494505494506, + "grad_norm": 29.21259307861328, + "learning_rate": 1.809752747252747e-05, + "loss": 0.4379, + "step": 23225 + }, + { + "epoch": 63.80769230769231, + "grad_norm": 21.10576820373535, + "learning_rate": 1.8096153846153845e-05, + "loss": 0.508, + "step": 23226 + }, + { + "epoch": 63.81043956043956, + "grad_norm": 7.172410488128662, + "learning_rate": 1.8094780219780222e-05, + "loss": 0.1299, + "step": 23227 + }, + { + "epoch": 63.81318681318681, + "grad_norm": 16.202688217163086, + "learning_rate": 1.8093406593406595e-05, + "loss": 0.2565, + "step": 23228 + }, + { + "epoch": 63.815934065934066, + "grad_norm": 7.574930191040039, + "learning_rate": 1.809203296703297e-05, + "loss": 0.1764, + "step": 23229 + }, + { + "epoch": 63.81868131868132, + "grad_norm": 1.4901196956634521, + "learning_rate": 1.8090659340659342e-05, + "loss": 0.02, + "step": 23230 + }, + { + "epoch": 63.82142857142857, + "grad_norm": 17.207921981811523, + "learning_rate": 1.8089285714285715e-05, + "loss": 0.1983, + "step": 23231 + }, + { + "epoch": 63.824175824175825, + "grad_norm": 14.52580451965332, + "learning_rate": 1.808791208791209e-05, + "loss": 0.2029, + "step": 23232 + }, + { + "epoch": 63.82692307692308, + "grad_norm": 5.194515228271484, + "learning_rate": 1.8086538461538462e-05, + "loss": 0.0745, + "step": 23233 + }, + { + "epoch": 63.82967032967033, + "grad_norm": 12.384866714477539, + "learning_rate": 1.8085164835164836e-05, + "loss": 0.1944, + "step": 23234 + }, + { + "epoch": 63.832417582417584, + "grad_norm": 14.891267776489258, + "learning_rate": 1.808379120879121e-05, + "loss": 0.336, + "step": 23235 + }, + { + "epoch": 63.83516483516483, + "grad_norm": 2.564084768295288, + "learning_rate": 1.8082417582417582e-05, + "loss": 0.0228, + "step": 23236 + }, + { + "epoch": 63.83791208791209, + "grad_norm": 17.867143630981445, + "learning_rate": 1.808104395604396e-05, + "loss": 0.2262, + "step": 23237 + }, + { + "epoch": 63.84065934065934, + "grad_norm": 11.432881355285645, + "learning_rate": 1.8079670329670333e-05, + "loss": 0.3584, + "step": 23238 + }, + { + "epoch": 63.84340659340659, + "grad_norm": 12.060464859008789, + "learning_rate": 1.8078296703296703e-05, + "loss": 0.361, + "step": 23239 + }, + { + "epoch": 63.84615384615385, + "grad_norm": 15.18740177154541, + "learning_rate": 1.8076923076923076e-05, + "loss": 0.2935, + "step": 23240 + }, + { + "epoch": 63.8489010989011, + "grad_norm": 21.82072639465332, + "learning_rate": 1.807554945054945e-05, + "loss": 0.3377, + "step": 23241 + }, + { + "epoch": 63.85164835164835, + "grad_norm": 21.341712951660156, + "learning_rate": 1.8074175824175826e-05, + "loss": 0.5846, + "step": 23242 + }, + { + "epoch": 63.854395604395606, + "grad_norm": 5.199185848236084, + "learning_rate": 1.80728021978022e-05, + "loss": 0.0387, + "step": 23243 + }, + { + "epoch": 63.857142857142854, + "grad_norm": 5.338019847869873, + "learning_rate": 1.8071428571428573e-05, + "loss": 0.1555, + "step": 23244 + }, + { + "epoch": 63.85989010989011, + "grad_norm": 8.361054420471191, + "learning_rate": 1.8070054945054947e-05, + "loss": 0.1946, + "step": 23245 + }, + { + "epoch": 63.862637362637365, + "grad_norm": 13.623472213745117, + "learning_rate": 1.806868131868132e-05, + "loss": 0.4163, + "step": 23246 + }, + { + "epoch": 63.86538461538461, + "grad_norm": 7.966927528381348, + "learning_rate": 1.8067307692307693e-05, + "loss": 0.1227, + "step": 23247 + }, + { + "epoch": 63.86813186813187, + "grad_norm": 13.268256187438965, + "learning_rate": 1.8065934065934067e-05, + "loss": 0.265, + "step": 23248 + }, + { + "epoch": 63.870879120879124, + "grad_norm": 7.435497760772705, + "learning_rate": 1.806456043956044e-05, + "loss": 0.1099, + "step": 23249 + }, + { + "epoch": 63.87362637362637, + "grad_norm": 5.9957098960876465, + "learning_rate": 1.8063186813186814e-05, + "loss": 0.0591, + "step": 23250 + }, + { + "epoch": 63.87637362637363, + "grad_norm": 7.034447193145752, + "learning_rate": 1.8061813186813187e-05, + "loss": 0.1915, + "step": 23251 + }, + { + "epoch": 63.879120879120876, + "grad_norm": 8.888070106506348, + "learning_rate": 1.8060439560439564e-05, + "loss": 0.1813, + "step": 23252 + }, + { + "epoch": 63.88186813186813, + "grad_norm": 4.700502395629883, + "learning_rate": 1.8059065934065937e-05, + "loss": 0.0369, + "step": 23253 + }, + { + "epoch": 63.88461538461539, + "grad_norm": 8.140954971313477, + "learning_rate": 1.8057692307692307e-05, + "loss": 0.1005, + "step": 23254 + }, + { + "epoch": 63.887362637362635, + "grad_norm": 9.529706001281738, + "learning_rate": 1.805631868131868e-05, + "loss": 0.1209, + "step": 23255 + }, + { + "epoch": 63.89010989010989, + "grad_norm": 4.3590803146362305, + "learning_rate": 1.8054945054945054e-05, + "loss": 0.0783, + "step": 23256 + }, + { + "epoch": 63.892857142857146, + "grad_norm": 13.044632911682129, + "learning_rate": 1.805357142857143e-05, + "loss": 0.251, + "step": 23257 + }, + { + "epoch": 63.895604395604394, + "grad_norm": 25.35123634338379, + "learning_rate": 1.8052197802197804e-05, + "loss": 0.5691, + "step": 23258 + }, + { + "epoch": 63.89835164835165, + "grad_norm": 20.477134704589844, + "learning_rate": 1.8050824175824178e-05, + "loss": 0.3701, + "step": 23259 + }, + { + "epoch": 63.9010989010989, + "grad_norm": 10.031868934631348, + "learning_rate": 1.804945054945055e-05, + "loss": 0.2201, + "step": 23260 + }, + { + "epoch": 63.90384615384615, + "grad_norm": 15.524984359741211, + "learning_rate": 1.8048076923076924e-05, + "loss": 0.3346, + "step": 23261 + }, + { + "epoch": 63.90659340659341, + "grad_norm": 9.500144004821777, + "learning_rate": 1.8046703296703298e-05, + "loss": 0.1466, + "step": 23262 + }, + { + "epoch": 63.90934065934066, + "grad_norm": 7.54421329498291, + "learning_rate": 1.804532967032967e-05, + "loss": 0.1513, + "step": 23263 + }, + { + "epoch": 63.91208791208791, + "grad_norm": 15.235830307006836, + "learning_rate": 1.8043956043956045e-05, + "loss": 0.2969, + "step": 23264 + }, + { + "epoch": 63.91483516483517, + "grad_norm": 1.3706380128860474, + "learning_rate": 1.8042582417582418e-05, + "loss": 0.0212, + "step": 23265 + }, + { + "epoch": 63.917582417582416, + "grad_norm": 2.9510068893432617, + "learning_rate": 1.804120879120879e-05, + "loss": 0.0355, + "step": 23266 + }, + { + "epoch": 63.92032967032967, + "grad_norm": 16.362821578979492, + "learning_rate": 1.8039835164835165e-05, + "loss": 0.2212, + "step": 23267 + }, + { + "epoch": 63.92307692307692, + "grad_norm": 7.830824375152588, + "learning_rate": 1.803846153846154e-05, + "loss": 0.1436, + "step": 23268 + }, + { + "epoch": 63.925824175824175, + "grad_norm": 3.2707083225250244, + "learning_rate": 1.8037087912087912e-05, + "loss": 0.0354, + "step": 23269 + }, + { + "epoch": 63.92857142857143, + "grad_norm": 4.848544597625732, + "learning_rate": 1.8035714285714285e-05, + "loss": 0.0645, + "step": 23270 + }, + { + "epoch": 63.93131868131868, + "grad_norm": 16.374584197998047, + "learning_rate": 1.803434065934066e-05, + "loss": 0.2047, + "step": 23271 + }, + { + "epoch": 63.934065934065934, + "grad_norm": 2.3988542556762695, + "learning_rate": 1.8032967032967032e-05, + "loss": 0.0404, + "step": 23272 + }, + { + "epoch": 63.93681318681319, + "grad_norm": 20.750768661499023, + "learning_rate": 1.803159340659341e-05, + "loss": 0.3659, + "step": 23273 + }, + { + "epoch": 63.93956043956044, + "grad_norm": 16.217519760131836, + "learning_rate": 1.8030219780219782e-05, + "loss": 0.1952, + "step": 23274 + }, + { + "epoch": 63.94230769230769, + "grad_norm": 6.1000142097473145, + "learning_rate": 1.8028846153846156e-05, + "loss": 0.123, + "step": 23275 + }, + { + "epoch": 63.94505494505494, + "grad_norm": 5.142087936401367, + "learning_rate": 1.802747252747253e-05, + "loss": 0.072, + "step": 23276 + }, + { + "epoch": 63.9478021978022, + "grad_norm": 12.271563529968262, + "learning_rate": 1.8026098901098902e-05, + "loss": 0.3952, + "step": 23277 + }, + { + "epoch": 63.95054945054945, + "grad_norm": 17.75429916381836, + "learning_rate": 1.8024725274725276e-05, + "loss": 0.5617, + "step": 23278 + }, + { + "epoch": 63.9532967032967, + "grad_norm": 14.956185340881348, + "learning_rate": 1.802335164835165e-05, + "loss": 0.4033, + "step": 23279 + }, + { + "epoch": 63.956043956043956, + "grad_norm": 11.30204963684082, + "learning_rate": 1.8021978021978023e-05, + "loss": 0.2796, + "step": 23280 + }, + { + "epoch": 63.95879120879121, + "grad_norm": 7.216456890106201, + "learning_rate": 1.8020604395604396e-05, + "loss": 0.081, + "step": 23281 + }, + { + "epoch": 63.96153846153846, + "grad_norm": 9.415472984313965, + "learning_rate": 1.801923076923077e-05, + "loss": 0.2003, + "step": 23282 + }, + { + "epoch": 63.964285714285715, + "grad_norm": 19.711563110351562, + "learning_rate": 1.8017857142857146e-05, + "loss": 0.3283, + "step": 23283 + }, + { + "epoch": 63.967032967032964, + "grad_norm": 13.522333145141602, + "learning_rate": 1.8016483516483516e-05, + "loss": 0.1812, + "step": 23284 + }, + { + "epoch": 63.96978021978022, + "grad_norm": 8.893193244934082, + "learning_rate": 1.801510989010989e-05, + "loss": 0.1222, + "step": 23285 + }, + { + "epoch": 63.972527472527474, + "grad_norm": 4.625339984893799, + "learning_rate": 1.8013736263736263e-05, + "loss": 0.0963, + "step": 23286 + }, + { + "epoch": 63.97527472527472, + "grad_norm": 9.969414710998535, + "learning_rate": 1.8012362637362636e-05, + "loss": 0.1591, + "step": 23287 + }, + { + "epoch": 63.97802197802198, + "grad_norm": 6.689894676208496, + "learning_rate": 1.8010989010989013e-05, + "loss": 0.1374, + "step": 23288 + }, + { + "epoch": 63.98076923076923, + "grad_norm": 11.87936782836914, + "learning_rate": 1.8009615384615387e-05, + "loss": 0.2378, + "step": 23289 + }, + { + "epoch": 63.98351648351648, + "grad_norm": 7.226129531860352, + "learning_rate": 1.800824175824176e-05, + "loss": 0.0834, + "step": 23290 + }, + { + "epoch": 63.98626373626374, + "grad_norm": 22.894031524658203, + "learning_rate": 1.8006868131868133e-05, + "loss": 0.262, + "step": 23291 + }, + { + "epoch": 63.98901098901099, + "grad_norm": 22.851119995117188, + "learning_rate": 1.8005494505494503e-05, + "loss": 0.7263, + "step": 23292 + }, + { + "epoch": 63.99175824175824, + "grad_norm": 21.253582000732422, + "learning_rate": 1.800412087912088e-05, + "loss": 0.4945, + "step": 23293 + }, + { + "epoch": 63.994505494505496, + "grad_norm": 14.49795913696289, + "learning_rate": 1.8002747252747254e-05, + "loss": 0.3207, + "step": 23294 + }, + { + "epoch": 63.997252747252745, + "grad_norm": 10.543478965759277, + "learning_rate": 1.8001373626373627e-05, + "loss": 0.203, + "step": 23295 + }, + { + "epoch": 64.0, + "grad_norm": 50.864681243896484, + "learning_rate": 1.8e-05, + "loss": 1.2568, + "step": 23296 + }, + { + "epoch": 64.0, + "eval_accuracy": 0.5055096418732782, + "eval_f1": 0.5234122748307614, + "eval_f1_DuraRiadoRio_64x64": 0.5472636815920398, + "eval_f1_Mole_64x64": 0.5929203539823009, + "eval_f1_Quebrado_64x64": 0.5370370370370371, + "eval_f1_RiadoRio_64x64": 0.45499181669394434, + "eval_f1_RioFechado_64x64": 0.48484848484848486, + "eval_loss": 3.723972797393799, + "eval_precision": 0.7495032216281178, + "eval_precision_DuraRiadoRio_64x64": 0.9649122807017544, + "eval_precision_Mole_64x64": 0.8170731707317073, + "eval_precision_Quebrado_64x64": 0.8055555555555556, + "eval_precision_RiadoRio_64x64": 0.3028322440087146, + "eval_precision_RioFechado_64x64": 0.8571428571428571, + "eval_recall": 0.5005003706449223, + "eval_recall_DuraRiadoRio_64x64": 0.3819444444444444, + "eval_recall_Mole_64x64": 0.4652777777777778, + "eval_recall_Quebrado_64x64": 0.4027777777777778, + "eval_recall_RiadoRio_64x64": 0.9144736842105263, + "eval_recall_RioFechado_64x64": 0.3380281690140845, + "eval_runtime": 1.7495, + "eval_samples_per_second": 414.985, + "eval_steps_per_second": 26.294, + "step": 23296 + }, + { + "epoch": 64.00274725274726, + "grad_norm": 12.535876274108887, + "learning_rate": 1.7998626373626374e-05, + "loss": 0.2553, + "step": 23297 + }, + { + "epoch": 64.00549450549451, + "grad_norm": 14.553695678710938, + "learning_rate": 1.799725274725275e-05, + "loss": 0.4064, + "step": 23298 + }, + { + "epoch": 64.00824175824175, + "grad_norm": 13.234770774841309, + "learning_rate": 1.799587912087912e-05, + "loss": 0.2079, + "step": 23299 + }, + { + "epoch": 64.01098901098901, + "grad_norm": 17.2615909576416, + "learning_rate": 1.7994505494505494e-05, + "loss": 0.364, + "step": 23300 + }, + { + "epoch": 64.01373626373626, + "grad_norm": 12.726898193359375, + "learning_rate": 1.7993131868131867e-05, + "loss": 0.1536, + "step": 23301 + }, + { + "epoch": 64.01648351648352, + "grad_norm": 10.62975788116455, + "learning_rate": 1.799175824175824e-05, + "loss": 0.1763, + "step": 23302 + }, + { + "epoch": 64.01923076923077, + "grad_norm": 13.013880729675293, + "learning_rate": 1.7990384615384618e-05, + "loss": 0.2169, + "step": 23303 + }, + { + "epoch": 64.02197802197803, + "grad_norm": 9.664257049560547, + "learning_rate": 1.798901098901099e-05, + "loss": 0.247, + "step": 23304 + }, + { + "epoch": 64.02472527472527, + "grad_norm": 7.585111618041992, + "learning_rate": 1.7987637362637365e-05, + "loss": 0.0835, + "step": 23305 + }, + { + "epoch": 64.02747252747253, + "grad_norm": 7.646313667297363, + "learning_rate": 1.7986263736263738e-05, + "loss": 0.1453, + "step": 23306 + }, + { + "epoch": 64.03021978021978, + "grad_norm": 10.847524642944336, + "learning_rate": 1.7984890109890108e-05, + "loss": 0.2636, + "step": 23307 + }, + { + "epoch": 64.03296703296704, + "grad_norm": 20.882099151611328, + "learning_rate": 1.7983516483516485e-05, + "loss": 0.4454, + "step": 23308 + }, + { + "epoch": 64.03571428571429, + "grad_norm": 8.709371566772461, + "learning_rate": 1.7982142857142858e-05, + "loss": 0.3523, + "step": 23309 + }, + { + "epoch": 64.03846153846153, + "grad_norm": 19.116422653198242, + "learning_rate": 1.798076923076923e-05, + "loss": 0.2267, + "step": 23310 + }, + { + "epoch": 64.04120879120879, + "grad_norm": 4.813119888305664, + "learning_rate": 1.7979395604395605e-05, + "loss": 0.034, + "step": 23311 + }, + { + "epoch": 64.04395604395604, + "grad_norm": 54.10966110229492, + "learning_rate": 1.797802197802198e-05, + "loss": 0.311, + "step": 23312 + }, + { + "epoch": 64.0467032967033, + "grad_norm": 6.466495513916016, + "learning_rate": 1.7976648351648355e-05, + "loss": 0.1369, + "step": 23313 + }, + { + "epoch": 64.04945054945055, + "grad_norm": 18.541444778442383, + "learning_rate": 1.7975274725274725e-05, + "loss": 0.2365, + "step": 23314 + }, + { + "epoch": 64.0521978021978, + "grad_norm": 2.8688623905181885, + "learning_rate": 1.79739010989011e-05, + "loss": 0.0522, + "step": 23315 + }, + { + "epoch": 64.05494505494505, + "grad_norm": 8.643852233886719, + "learning_rate": 1.7972527472527472e-05, + "loss": 0.1745, + "step": 23316 + }, + { + "epoch": 64.0576923076923, + "grad_norm": 9.91787052154541, + "learning_rate": 1.7971153846153845e-05, + "loss": 0.2202, + "step": 23317 + }, + { + "epoch": 64.06043956043956, + "grad_norm": 10.846449851989746, + "learning_rate": 1.7969780219780222e-05, + "loss": 0.2734, + "step": 23318 + }, + { + "epoch": 64.06318681318682, + "grad_norm": 28.99056625366211, + "learning_rate": 1.7968406593406596e-05, + "loss": 0.8347, + "step": 23319 + }, + { + "epoch": 64.06593406593407, + "grad_norm": 6.984396457672119, + "learning_rate": 1.796703296703297e-05, + "loss": 0.0912, + "step": 23320 + }, + { + "epoch": 64.06868131868131, + "grad_norm": 7.694764137268066, + "learning_rate": 1.7965659340659342e-05, + "loss": 0.2291, + "step": 23321 + }, + { + "epoch": 64.07142857142857, + "grad_norm": 8.593817710876465, + "learning_rate": 1.7964285714285712e-05, + "loss": 0.1073, + "step": 23322 + }, + { + "epoch": 64.07417582417582, + "grad_norm": 20.342721939086914, + "learning_rate": 1.796291208791209e-05, + "loss": 0.4231, + "step": 23323 + }, + { + "epoch": 64.07692307692308, + "grad_norm": 12.56351375579834, + "learning_rate": 1.7961538461538463e-05, + "loss": 0.2309, + "step": 23324 + }, + { + "epoch": 64.07967032967034, + "grad_norm": 12.250295639038086, + "learning_rate": 1.7960164835164836e-05, + "loss": 0.1812, + "step": 23325 + }, + { + "epoch": 64.08241758241758, + "grad_norm": 8.93278694152832, + "learning_rate": 1.795879120879121e-05, + "loss": 0.2073, + "step": 23326 + }, + { + "epoch": 64.08516483516483, + "grad_norm": 16.231679916381836, + "learning_rate": 1.7957417582417583e-05, + "loss": 0.2932, + "step": 23327 + }, + { + "epoch": 64.08791208791209, + "grad_norm": 2.640706777572632, + "learning_rate": 1.795604395604396e-05, + "loss": 0.0306, + "step": 23328 + }, + { + "epoch": 64.09065934065934, + "grad_norm": 19.730504989624023, + "learning_rate": 1.795467032967033e-05, + "loss": 0.4112, + "step": 23329 + }, + { + "epoch": 64.0934065934066, + "grad_norm": 16.537567138671875, + "learning_rate": 1.7953296703296703e-05, + "loss": 0.3337, + "step": 23330 + }, + { + "epoch": 64.09615384615384, + "grad_norm": 3.7268459796905518, + "learning_rate": 1.7951923076923076e-05, + "loss": 0.0472, + "step": 23331 + }, + { + "epoch": 64.0989010989011, + "grad_norm": 38.2153205871582, + "learning_rate": 1.795054945054945e-05, + "loss": 0.7583, + "step": 23332 + }, + { + "epoch": 64.10164835164835, + "grad_norm": 8.060846328735352, + "learning_rate": 1.7949175824175827e-05, + "loss": 0.0907, + "step": 23333 + }, + { + "epoch": 64.1043956043956, + "grad_norm": 21.502094268798828, + "learning_rate": 1.79478021978022e-05, + "loss": 0.3867, + "step": 23334 + }, + { + "epoch": 64.10714285714286, + "grad_norm": 19.417587280273438, + "learning_rate": 1.7946428571428573e-05, + "loss": 0.4719, + "step": 23335 + }, + { + "epoch": 64.10989010989012, + "grad_norm": 13.094037055969238, + "learning_rate": 1.7945054945054947e-05, + "loss": 0.3569, + "step": 23336 + }, + { + "epoch": 64.11263736263736, + "grad_norm": 7.734708786010742, + "learning_rate": 1.7943681318681317e-05, + "loss": 0.1487, + "step": 23337 + }, + { + "epoch": 64.11538461538461, + "grad_norm": 7.611135482788086, + "learning_rate": 1.7942307692307694e-05, + "loss": 0.1277, + "step": 23338 + }, + { + "epoch": 64.11813186813187, + "grad_norm": 14.84539794921875, + "learning_rate": 1.7940934065934067e-05, + "loss": 0.3169, + "step": 23339 + }, + { + "epoch": 64.12087912087912, + "grad_norm": 17.25105857849121, + "learning_rate": 1.793956043956044e-05, + "loss": 0.2976, + "step": 23340 + }, + { + "epoch": 64.12362637362638, + "grad_norm": 11.010753631591797, + "learning_rate": 1.7938186813186814e-05, + "loss": 0.2177, + "step": 23341 + }, + { + "epoch": 64.12637362637362, + "grad_norm": 9.553995132446289, + "learning_rate": 1.7936813186813187e-05, + "loss": 0.3037, + "step": 23342 + }, + { + "epoch": 64.12912087912088, + "grad_norm": 3.262299060821533, + "learning_rate": 1.7935439560439564e-05, + "loss": 0.0657, + "step": 23343 + }, + { + "epoch": 64.13186813186813, + "grad_norm": 25.190580368041992, + "learning_rate": 1.7934065934065934e-05, + "loss": 0.5807, + "step": 23344 + }, + { + "epoch": 64.13461538461539, + "grad_norm": 7.621279239654541, + "learning_rate": 1.7932692307692308e-05, + "loss": 0.1676, + "step": 23345 + }, + { + "epoch": 64.13736263736264, + "grad_norm": 60.501949310302734, + "learning_rate": 1.793131868131868e-05, + "loss": 0.4417, + "step": 23346 + }, + { + "epoch": 64.14010989010988, + "grad_norm": 18.186931610107422, + "learning_rate": 1.7929945054945054e-05, + "loss": 0.3949, + "step": 23347 + }, + { + "epoch": 64.14285714285714, + "grad_norm": 22.621742248535156, + "learning_rate": 1.792857142857143e-05, + "loss": 0.077, + "step": 23348 + }, + { + "epoch": 64.1456043956044, + "grad_norm": 6.009578227996826, + "learning_rate": 1.7927197802197805e-05, + "loss": 0.1389, + "step": 23349 + }, + { + "epoch": 64.14835164835165, + "grad_norm": 9.63199234008789, + "learning_rate": 1.7925824175824178e-05, + "loss": 0.178, + "step": 23350 + }, + { + "epoch": 64.1510989010989, + "grad_norm": 24.185644149780273, + "learning_rate": 1.792445054945055e-05, + "loss": 0.7644, + "step": 23351 + }, + { + "epoch": 64.15384615384616, + "grad_norm": 3.572450637817383, + "learning_rate": 1.792307692307692e-05, + "loss": 0.0722, + "step": 23352 + }, + { + "epoch": 64.1565934065934, + "grad_norm": 2.1691339015960693, + "learning_rate": 1.7921703296703298e-05, + "loss": 0.0262, + "step": 23353 + }, + { + "epoch": 64.15934065934066, + "grad_norm": 10.605439186096191, + "learning_rate": 1.792032967032967e-05, + "loss": 0.2211, + "step": 23354 + }, + { + "epoch": 64.16208791208791, + "grad_norm": 23.007118225097656, + "learning_rate": 1.7918956043956045e-05, + "loss": 0.5936, + "step": 23355 + }, + { + "epoch": 64.16483516483517, + "grad_norm": 13.499189376831055, + "learning_rate": 1.791758241758242e-05, + "loss": 0.3188, + "step": 23356 + }, + { + "epoch": 64.16758241758242, + "grad_norm": 2.5434985160827637, + "learning_rate": 1.7916208791208792e-05, + "loss": 0.0313, + "step": 23357 + }, + { + "epoch": 64.17032967032966, + "grad_norm": 13.670241355895996, + "learning_rate": 1.791483516483517e-05, + "loss": 0.1517, + "step": 23358 + }, + { + "epoch": 64.17307692307692, + "grad_norm": 22.973079681396484, + "learning_rate": 1.791346153846154e-05, + "loss": 0.6103, + "step": 23359 + }, + { + "epoch": 64.17582417582418, + "grad_norm": 14.765026092529297, + "learning_rate": 1.7912087912087912e-05, + "loss": 0.4142, + "step": 23360 + }, + { + "epoch": 64.17857142857143, + "grad_norm": 10.178622245788574, + "learning_rate": 1.7910714285714285e-05, + "loss": 0.2127, + "step": 23361 + }, + { + "epoch": 64.18131868131869, + "grad_norm": 17.62300682067871, + "learning_rate": 1.790934065934066e-05, + "loss": 0.5026, + "step": 23362 + }, + { + "epoch": 64.18406593406593, + "grad_norm": 8.791454315185547, + "learning_rate": 1.7907967032967036e-05, + "loss": 0.1647, + "step": 23363 + }, + { + "epoch": 64.18681318681318, + "grad_norm": 13.18008804321289, + "learning_rate": 1.790659340659341e-05, + "loss": 0.2741, + "step": 23364 + }, + { + "epoch": 64.18956043956044, + "grad_norm": 19.92270851135254, + "learning_rate": 1.7905219780219782e-05, + "loss": 0.57, + "step": 23365 + }, + { + "epoch": 64.1923076923077, + "grad_norm": 4.20648193359375, + "learning_rate": 1.7903846153846156e-05, + "loss": 0.0699, + "step": 23366 + }, + { + "epoch": 64.19505494505495, + "grad_norm": 17.16298484802246, + "learning_rate": 1.7902472527472526e-05, + "loss": 0.4075, + "step": 23367 + }, + { + "epoch": 64.1978021978022, + "grad_norm": 14.69089412689209, + "learning_rate": 1.7901098901098903e-05, + "loss": 0.3278, + "step": 23368 + }, + { + "epoch": 64.20054945054945, + "grad_norm": 12.335888862609863, + "learning_rate": 1.7899725274725276e-05, + "loss": 0.1711, + "step": 23369 + }, + { + "epoch": 64.2032967032967, + "grad_norm": 12.923556327819824, + "learning_rate": 1.789835164835165e-05, + "loss": 0.3102, + "step": 23370 + }, + { + "epoch": 64.20604395604396, + "grad_norm": 9.412564277648926, + "learning_rate": 1.7896978021978023e-05, + "loss": 0.1816, + "step": 23371 + }, + { + "epoch": 64.20879120879121, + "grad_norm": 9.851888656616211, + "learning_rate": 1.7895604395604396e-05, + "loss": 0.1226, + "step": 23372 + }, + { + "epoch": 64.21153846153847, + "grad_norm": 13.076805114746094, + "learning_rate": 1.7894230769230773e-05, + "loss": 0.1966, + "step": 23373 + }, + { + "epoch": 64.21428571428571, + "grad_norm": 4.939399242401123, + "learning_rate": 1.7892857142857143e-05, + "loss": 0.0842, + "step": 23374 + }, + { + "epoch": 64.21703296703296, + "grad_norm": 20.052515029907227, + "learning_rate": 1.7891483516483517e-05, + "loss": 0.2942, + "step": 23375 + }, + { + "epoch": 64.21978021978022, + "grad_norm": 7.8147993087768555, + "learning_rate": 1.789010989010989e-05, + "loss": 0.105, + "step": 23376 + }, + { + "epoch": 64.22252747252747, + "grad_norm": 13.29527759552002, + "learning_rate": 1.7888736263736263e-05, + "loss": 0.2742, + "step": 23377 + }, + { + "epoch": 64.22527472527473, + "grad_norm": 4.732611179351807, + "learning_rate": 1.7887362637362637e-05, + "loss": 0.061, + "step": 23378 + }, + { + "epoch": 64.22802197802197, + "grad_norm": 4.944165229797363, + "learning_rate": 1.7885989010989014e-05, + "loss": 0.0847, + "step": 23379 + }, + { + "epoch": 64.23076923076923, + "grad_norm": 10.017754554748535, + "learning_rate": 1.7884615384615387e-05, + "loss": 0.115, + "step": 23380 + }, + { + "epoch": 64.23351648351648, + "grad_norm": 6.0072712898254395, + "learning_rate": 1.788324175824176e-05, + "loss": 0.099, + "step": 23381 + }, + { + "epoch": 64.23626373626374, + "grad_norm": 14.520885467529297, + "learning_rate": 1.788186813186813e-05, + "loss": 0.1994, + "step": 23382 + }, + { + "epoch": 64.23901098901099, + "grad_norm": 10.892129898071289, + "learning_rate": 1.7880494505494504e-05, + "loss": 0.2343, + "step": 23383 + }, + { + "epoch": 64.24175824175825, + "grad_norm": 7.890566349029541, + "learning_rate": 1.787912087912088e-05, + "loss": 0.117, + "step": 23384 + }, + { + "epoch": 64.24450549450549, + "grad_norm": 8.066164016723633, + "learning_rate": 1.7877747252747254e-05, + "loss": 0.1893, + "step": 23385 + }, + { + "epoch": 64.24725274725274, + "grad_norm": 3.5853464603424072, + "learning_rate": 1.7876373626373627e-05, + "loss": 0.0482, + "step": 23386 + }, + { + "epoch": 64.25, + "grad_norm": 11.097978591918945, + "learning_rate": 1.7875e-05, + "loss": 0.2946, + "step": 23387 + }, + { + "epoch": 64.25274725274726, + "grad_norm": 8.278438568115234, + "learning_rate": 1.7873626373626374e-05, + "loss": 0.103, + "step": 23388 + }, + { + "epoch": 64.25549450549451, + "grad_norm": 26.087841033935547, + "learning_rate": 1.7872252747252748e-05, + "loss": 0.6387, + "step": 23389 + }, + { + "epoch": 64.25824175824175, + "grad_norm": 7.262818336486816, + "learning_rate": 1.787087912087912e-05, + "loss": 0.1527, + "step": 23390 + }, + { + "epoch": 64.26098901098901, + "grad_norm": 15.305314064025879, + "learning_rate": 1.7869505494505494e-05, + "loss": 0.3009, + "step": 23391 + }, + { + "epoch": 64.26373626373626, + "grad_norm": 3.832613468170166, + "learning_rate": 1.7868131868131868e-05, + "loss": 0.0756, + "step": 23392 + }, + { + "epoch": 64.26648351648352, + "grad_norm": 5.974184036254883, + "learning_rate": 1.786675824175824e-05, + "loss": 0.061, + "step": 23393 + }, + { + "epoch": 64.26923076923077, + "grad_norm": 6.444697380065918, + "learning_rate": 1.7865384615384618e-05, + "loss": 0.0871, + "step": 23394 + }, + { + "epoch": 64.27197802197803, + "grad_norm": 11.790919303894043, + "learning_rate": 1.786401098901099e-05, + "loss": 0.1744, + "step": 23395 + }, + { + "epoch": 64.27472527472527, + "grad_norm": 4.905649662017822, + "learning_rate": 1.7862637362637365e-05, + "loss": 0.109, + "step": 23396 + }, + { + "epoch": 64.27747252747253, + "grad_norm": 13.23562240600586, + "learning_rate": 1.7861263736263735e-05, + "loss": 0.3499, + "step": 23397 + }, + { + "epoch": 64.28021978021978, + "grad_norm": 2.24822735786438, + "learning_rate": 1.7859890109890108e-05, + "loss": 0.0246, + "step": 23398 + }, + { + "epoch": 64.28296703296704, + "grad_norm": 8.601600646972656, + "learning_rate": 1.7858516483516485e-05, + "loss": 0.1303, + "step": 23399 + }, + { + "epoch": 64.28571428571429, + "grad_norm": 18.605987548828125, + "learning_rate": 1.785714285714286e-05, + "loss": 0.3744, + "step": 23400 + }, + { + "epoch": 64.28846153846153, + "grad_norm": 5.771212577819824, + "learning_rate": 1.7855769230769232e-05, + "loss": 0.1111, + "step": 23401 + }, + { + "epoch": 64.29120879120879, + "grad_norm": 18.62578582763672, + "learning_rate": 1.7854395604395605e-05, + "loss": 0.2605, + "step": 23402 + }, + { + "epoch": 64.29395604395604, + "grad_norm": 15.233307838439941, + "learning_rate": 1.785302197802198e-05, + "loss": 0.5408, + "step": 23403 + }, + { + "epoch": 64.2967032967033, + "grad_norm": 16.922096252441406, + "learning_rate": 1.7851648351648352e-05, + "loss": 0.6228, + "step": 23404 + }, + { + "epoch": 64.29945054945055, + "grad_norm": 6.378253936767578, + "learning_rate": 1.7850274725274726e-05, + "loss": 0.1328, + "step": 23405 + }, + { + "epoch": 64.3021978021978, + "grad_norm": 16.175050735473633, + "learning_rate": 1.78489010989011e-05, + "loss": 0.2429, + "step": 23406 + }, + { + "epoch": 64.30494505494505, + "grad_norm": 8.183453559875488, + "learning_rate": 1.7847527472527472e-05, + "loss": 0.1107, + "step": 23407 + }, + { + "epoch": 64.3076923076923, + "grad_norm": 11.153139114379883, + "learning_rate": 1.7846153846153846e-05, + "loss": 0.187, + "step": 23408 + }, + { + "epoch": 64.31043956043956, + "grad_norm": 10.174643516540527, + "learning_rate": 1.7844780219780223e-05, + "loss": 0.0738, + "step": 23409 + }, + { + "epoch": 64.31318681318682, + "grad_norm": 12.924893379211426, + "learning_rate": 1.7843406593406596e-05, + "loss": 0.1633, + "step": 23410 + }, + { + "epoch": 64.31593406593407, + "grad_norm": 8.830008506774902, + "learning_rate": 1.784203296703297e-05, + "loss": 0.1194, + "step": 23411 + }, + { + "epoch": 64.31868131868131, + "grad_norm": 6.555688858032227, + "learning_rate": 1.784065934065934e-05, + "loss": 0.1246, + "step": 23412 + }, + { + "epoch": 64.32142857142857, + "grad_norm": 19.35861587524414, + "learning_rate": 1.7839285714285713e-05, + "loss": 0.5327, + "step": 23413 + }, + { + "epoch": 64.32417582417582, + "grad_norm": 15.427850723266602, + "learning_rate": 1.783791208791209e-05, + "loss": 0.2392, + "step": 23414 + }, + { + "epoch": 64.32692307692308, + "grad_norm": 3.582919120788574, + "learning_rate": 1.7836538461538463e-05, + "loss": 0.0368, + "step": 23415 + }, + { + "epoch": 64.32967032967034, + "grad_norm": 10.942133903503418, + "learning_rate": 1.7835164835164836e-05, + "loss": 0.1155, + "step": 23416 + }, + { + "epoch": 64.33241758241758, + "grad_norm": 8.494149208068848, + "learning_rate": 1.783379120879121e-05, + "loss": 0.1638, + "step": 23417 + }, + { + "epoch": 64.33516483516483, + "grad_norm": 15.201897621154785, + "learning_rate": 1.7832417582417583e-05, + "loss": 0.3286, + "step": 23418 + }, + { + "epoch": 64.33791208791209, + "grad_norm": 14.770434379577637, + "learning_rate": 1.7831043956043957e-05, + "loss": 0.1594, + "step": 23419 + }, + { + "epoch": 64.34065934065934, + "grad_norm": 19.388153076171875, + "learning_rate": 1.782967032967033e-05, + "loss": 0.4323, + "step": 23420 + }, + { + "epoch": 64.3434065934066, + "grad_norm": 14.80733871459961, + "learning_rate": 1.7828296703296703e-05, + "loss": 0.2421, + "step": 23421 + }, + { + "epoch": 64.34615384615384, + "grad_norm": 12.707820892333984, + "learning_rate": 1.7826923076923077e-05, + "loss": 0.3689, + "step": 23422 + }, + { + "epoch": 64.3489010989011, + "grad_norm": 17.326793670654297, + "learning_rate": 1.782554945054945e-05, + "loss": 0.2523, + "step": 23423 + }, + { + "epoch": 64.35164835164835, + "grad_norm": 11.727323532104492, + "learning_rate": 1.7824175824175827e-05, + "loss": 0.0995, + "step": 23424 + }, + { + "epoch": 64.3543956043956, + "grad_norm": 2.100952625274658, + "learning_rate": 1.78228021978022e-05, + "loss": 0.0324, + "step": 23425 + }, + { + "epoch": 64.35714285714286, + "grad_norm": 8.070841789245605, + "learning_rate": 1.7821428571428574e-05, + "loss": 0.2471, + "step": 23426 + }, + { + "epoch": 64.35989010989012, + "grad_norm": 19.917028427124023, + "learning_rate": 1.7820054945054944e-05, + "loss": 0.5815, + "step": 23427 + }, + { + "epoch": 64.36263736263736, + "grad_norm": 2.9956846237182617, + "learning_rate": 1.7818681318681317e-05, + "loss": 0.0393, + "step": 23428 + }, + { + "epoch": 64.36538461538461, + "grad_norm": 18.050966262817383, + "learning_rate": 1.7817307692307694e-05, + "loss": 0.5735, + "step": 23429 + }, + { + "epoch": 64.36813186813187, + "grad_norm": 18.141117095947266, + "learning_rate": 1.7815934065934067e-05, + "loss": 0.1949, + "step": 23430 + }, + { + "epoch": 64.37087912087912, + "grad_norm": 15.710249900817871, + "learning_rate": 1.781456043956044e-05, + "loss": 0.4847, + "step": 23431 + }, + { + "epoch": 64.37362637362638, + "grad_norm": 9.697161674499512, + "learning_rate": 1.7813186813186814e-05, + "loss": 0.1088, + "step": 23432 + }, + { + "epoch": 64.37637362637362, + "grad_norm": 6.366300582885742, + "learning_rate": 1.7811813186813188e-05, + "loss": 0.1042, + "step": 23433 + }, + { + "epoch": 64.37912087912088, + "grad_norm": 25.247407913208008, + "learning_rate": 1.781043956043956e-05, + "loss": 0.7063, + "step": 23434 + }, + { + "epoch": 64.38186813186813, + "grad_norm": 2.3921234607696533, + "learning_rate": 1.7809065934065934e-05, + "loss": 0.0342, + "step": 23435 + }, + { + "epoch": 64.38461538461539, + "grad_norm": 15.008213996887207, + "learning_rate": 1.7807692307692308e-05, + "loss": 0.3171, + "step": 23436 + }, + { + "epoch": 64.38736263736264, + "grad_norm": 13.284135818481445, + "learning_rate": 1.780631868131868e-05, + "loss": 0.1887, + "step": 23437 + }, + { + "epoch": 64.39010989010988, + "grad_norm": 20.476625442504883, + "learning_rate": 1.7804945054945055e-05, + "loss": 0.4475, + "step": 23438 + }, + { + "epoch": 64.39285714285714, + "grad_norm": 13.527026176452637, + "learning_rate": 1.780357142857143e-05, + "loss": 0.2693, + "step": 23439 + }, + { + "epoch": 64.3956043956044, + "grad_norm": 10.081581115722656, + "learning_rate": 1.7802197802197805e-05, + "loss": 0.2899, + "step": 23440 + }, + { + "epoch": 64.39835164835165, + "grad_norm": 6.0485358238220215, + "learning_rate": 1.780082417582418e-05, + "loss": 0.1536, + "step": 23441 + }, + { + "epoch": 64.4010989010989, + "grad_norm": 7.830409526824951, + "learning_rate": 1.779945054945055e-05, + "loss": 0.071, + "step": 23442 + }, + { + "epoch": 64.40384615384616, + "grad_norm": 8.036050796508789, + "learning_rate": 1.7798076923076922e-05, + "loss": 0.131, + "step": 23443 + }, + { + "epoch": 64.4065934065934, + "grad_norm": 9.289682388305664, + "learning_rate": 1.77967032967033e-05, + "loss": 0.1551, + "step": 23444 + }, + { + "epoch": 64.40934065934066, + "grad_norm": 13.86426067352295, + "learning_rate": 1.7795329670329672e-05, + "loss": 0.1275, + "step": 23445 + }, + { + "epoch": 64.41208791208791, + "grad_norm": 19.23280906677246, + "learning_rate": 1.7793956043956045e-05, + "loss": 0.4203, + "step": 23446 + }, + { + "epoch": 64.41483516483517, + "grad_norm": 17.25568389892578, + "learning_rate": 1.779258241758242e-05, + "loss": 0.265, + "step": 23447 + }, + { + "epoch": 64.41758241758242, + "grad_norm": 13.468606948852539, + "learning_rate": 1.7791208791208792e-05, + "loss": 0.1648, + "step": 23448 + }, + { + "epoch": 64.42032967032966, + "grad_norm": 22.484363555908203, + "learning_rate": 1.7789835164835166e-05, + "loss": 0.5279, + "step": 23449 + }, + { + "epoch": 64.42307692307692, + "grad_norm": 9.017675399780273, + "learning_rate": 1.778846153846154e-05, + "loss": 0.0933, + "step": 23450 + }, + { + "epoch": 64.42582417582418, + "grad_norm": 10.849469184875488, + "learning_rate": 1.7787087912087912e-05, + "loss": 0.1377, + "step": 23451 + }, + { + "epoch": 64.42857142857143, + "grad_norm": 5.628569602966309, + "learning_rate": 1.7785714285714286e-05, + "loss": 0.0938, + "step": 23452 + }, + { + "epoch": 64.43131868131869, + "grad_norm": 18.36321258544922, + "learning_rate": 1.778434065934066e-05, + "loss": 0.3705, + "step": 23453 + }, + { + "epoch": 64.43406593406593, + "grad_norm": 21.411624908447266, + "learning_rate": 1.7782967032967036e-05, + "loss": 0.3821, + "step": 23454 + }, + { + "epoch": 64.43681318681318, + "grad_norm": 15.60371208190918, + "learning_rate": 1.778159340659341e-05, + "loss": 0.2169, + "step": 23455 + }, + { + "epoch": 64.43956043956044, + "grad_norm": 26.954771041870117, + "learning_rate": 1.7780219780219783e-05, + "loss": 0.4755, + "step": 23456 + }, + { + "epoch": 64.4423076923077, + "grad_norm": 14.316423416137695, + "learning_rate": 1.7778846153846153e-05, + "loss": 0.2169, + "step": 23457 + }, + { + "epoch": 64.44505494505495, + "grad_norm": 41.45683670043945, + "learning_rate": 1.7777472527472526e-05, + "loss": 0.7969, + "step": 23458 + }, + { + "epoch": 64.4478021978022, + "grad_norm": 11.2691011428833, + "learning_rate": 1.7776098901098903e-05, + "loss": 0.2622, + "step": 23459 + }, + { + "epoch": 64.45054945054945, + "grad_norm": 11.737895011901855, + "learning_rate": 1.7774725274725276e-05, + "loss": 0.2267, + "step": 23460 + }, + { + "epoch": 64.4532967032967, + "grad_norm": 21.711883544921875, + "learning_rate": 1.777335164835165e-05, + "loss": 0.2533, + "step": 23461 + }, + { + "epoch": 64.45604395604396, + "grad_norm": 18.52580451965332, + "learning_rate": 1.7771978021978023e-05, + "loss": 0.2458, + "step": 23462 + }, + { + "epoch": 64.45879120879121, + "grad_norm": 19.66469955444336, + "learning_rate": 1.7770604395604397e-05, + "loss": 0.3443, + "step": 23463 + }, + { + "epoch": 64.46153846153847, + "grad_norm": 7.614482402801514, + "learning_rate": 1.776923076923077e-05, + "loss": 0.1774, + "step": 23464 + }, + { + "epoch": 64.46428571428571, + "grad_norm": 4.893793106079102, + "learning_rate": 1.7767857142857143e-05, + "loss": 0.0573, + "step": 23465 + }, + { + "epoch": 64.46703296703296, + "grad_norm": 6.83840799331665, + "learning_rate": 1.7766483516483517e-05, + "loss": 0.0988, + "step": 23466 + }, + { + "epoch": 64.46978021978022, + "grad_norm": 0.8475670218467712, + "learning_rate": 1.776510989010989e-05, + "loss": 0.0129, + "step": 23467 + }, + { + "epoch": 64.47252747252747, + "grad_norm": 13.431802749633789, + "learning_rate": 1.7763736263736264e-05, + "loss": 0.2914, + "step": 23468 + }, + { + "epoch": 64.47527472527473, + "grad_norm": 28.88371467590332, + "learning_rate": 1.776236263736264e-05, + "loss": 0.8121, + "step": 23469 + }, + { + "epoch": 64.47802197802197, + "grad_norm": 14.503761291503906, + "learning_rate": 1.7760989010989014e-05, + "loss": 0.2623, + "step": 23470 + }, + { + "epoch": 64.48076923076923, + "grad_norm": 14.423046112060547, + "learning_rate": 1.7759615384615387e-05, + "loss": 0.5197, + "step": 23471 + }, + { + "epoch": 64.48351648351648, + "grad_norm": 15.034867286682129, + "learning_rate": 1.7758241758241757e-05, + "loss": 0.2093, + "step": 23472 + }, + { + "epoch": 64.48626373626374, + "grad_norm": 5.832544326782227, + "learning_rate": 1.775686813186813e-05, + "loss": 0.0575, + "step": 23473 + }, + { + "epoch": 64.48901098901099, + "grad_norm": 6.11806058883667, + "learning_rate": 1.7755494505494508e-05, + "loss": 0.135, + "step": 23474 + }, + { + "epoch": 64.49175824175825, + "grad_norm": 41.16438293457031, + "learning_rate": 1.775412087912088e-05, + "loss": 0.4354, + "step": 23475 + }, + { + "epoch": 64.49450549450549, + "grad_norm": 12.11240291595459, + "learning_rate": 1.7752747252747254e-05, + "loss": 0.3121, + "step": 23476 + }, + { + "epoch": 64.49725274725274, + "grad_norm": 3.712984323501587, + "learning_rate": 1.7751373626373628e-05, + "loss": 0.0541, + "step": 23477 + }, + { + "epoch": 64.5, + "grad_norm": 12.043506622314453, + "learning_rate": 1.775e-05, + "loss": 0.2007, + "step": 23478 + }, + { + "epoch": 64.50274725274726, + "grad_norm": 6.186254978179932, + "learning_rate": 1.7748626373626375e-05, + "loss": 0.0623, + "step": 23479 + }, + { + "epoch": 64.50549450549451, + "grad_norm": 12.102937698364258, + "learning_rate": 1.7747252747252748e-05, + "loss": 0.3131, + "step": 23480 + }, + { + "epoch": 64.50824175824175, + "grad_norm": 16.771657943725586, + "learning_rate": 1.774587912087912e-05, + "loss": 0.2609, + "step": 23481 + }, + { + "epoch": 64.51098901098901, + "grad_norm": 13.9378023147583, + "learning_rate": 1.7744505494505495e-05, + "loss": 0.5402, + "step": 23482 + }, + { + "epoch": 64.51373626373626, + "grad_norm": 5.353035926818848, + "learning_rate": 1.7743131868131868e-05, + "loss": 0.152, + "step": 23483 + }, + { + "epoch": 64.51648351648352, + "grad_norm": 18.14777374267578, + "learning_rate": 1.7741758241758245e-05, + "loss": 0.4115, + "step": 23484 + }, + { + "epoch": 64.51923076923077, + "grad_norm": 13.46679401397705, + "learning_rate": 1.774038461538462e-05, + "loss": 0.2654, + "step": 23485 + }, + { + "epoch": 64.52197802197803, + "grad_norm": 8.061574935913086, + "learning_rate": 1.7739010989010992e-05, + "loss": 0.1231, + "step": 23486 + }, + { + "epoch": 64.52472527472527, + "grad_norm": 8.274675369262695, + "learning_rate": 1.7737637362637362e-05, + "loss": 0.1736, + "step": 23487 + }, + { + "epoch": 64.52747252747253, + "grad_norm": 17.557708740234375, + "learning_rate": 1.7736263736263735e-05, + "loss": 0.2729, + "step": 23488 + }, + { + "epoch": 64.53021978021978, + "grad_norm": 30.159225463867188, + "learning_rate": 1.7734890109890112e-05, + "loss": 0.902, + "step": 23489 + }, + { + "epoch": 64.53296703296704, + "grad_norm": 6.840950012207031, + "learning_rate": 1.7733516483516485e-05, + "loss": 0.1056, + "step": 23490 + }, + { + "epoch": 64.53571428571429, + "grad_norm": 14.133988380432129, + "learning_rate": 1.773214285714286e-05, + "loss": 0.425, + "step": 23491 + }, + { + "epoch": 64.53846153846153, + "grad_norm": 8.792460441589355, + "learning_rate": 1.7730769230769232e-05, + "loss": 0.2604, + "step": 23492 + }, + { + "epoch": 64.54120879120879, + "grad_norm": 3.7172887325286865, + "learning_rate": 1.7729395604395606e-05, + "loss": 0.0394, + "step": 23493 + }, + { + "epoch": 64.54395604395604, + "grad_norm": 17.27635955810547, + "learning_rate": 1.772802197802198e-05, + "loss": 0.2588, + "step": 23494 + }, + { + "epoch": 64.5467032967033, + "grad_norm": 17.793277740478516, + "learning_rate": 1.7726648351648352e-05, + "loss": 0.3062, + "step": 23495 + }, + { + "epoch": 64.54945054945055, + "grad_norm": 7.023172378540039, + "learning_rate": 1.7725274725274726e-05, + "loss": 0.1509, + "step": 23496 + }, + { + "epoch": 64.5521978021978, + "grad_norm": 12.029247283935547, + "learning_rate": 1.77239010989011e-05, + "loss": 0.2292, + "step": 23497 + }, + { + "epoch": 64.55494505494505, + "grad_norm": 10.667143821716309, + "learning_rate": 1.7722527472527473e-05, + "loss": 0.1415, + "step": 23498 + }, + { + "epoch": 64.5576923076923, + "grad_norm": 23.63097381591797, + "learning_rate": 1.7721153846153846e-05, + "loss": 0.4444, + "step": 23499 + }, + { + "epoch": 64.56043956043956, + "grad_norm": 12.352802276611328, + "learning_rate": 1.7719780219780223e-05, + "loss": 0.2557, + "step": 23500 + }, + { + "epoch": 64.56318681318682, + "grad_norm": 21.496450424194336, + "learning_rate": 1.7718406593406593e-05, + "loss": 0.3295, + "step": 23501 + }, + { + "epoch": 64.56593406593407, + "grad_norm": 10.25273323059082, + "learning_rate": 1.7717032967032966e-05, + "loss": 0.4497, + "step": 23502 + }, + { + "epoch": 64.56868131868131, + "grad_norm": 19.009984970092773, + "learning_rate": 1.771565934065934e-05, + "loss": 0.4603, + "step": 23503 + }, + { + "epoch": 64.57142857142857, + "grad_norm": 3.0328938961029053, + "learning_rate": 1.7714285714285713e-05, + "loss": 0.0392, + "step": 23504 + }, + { + "epoch": 64.57417582417582, + "grad_norm": 15.45313835144043, + "learning_rate": 1.771291208791209e-05, + "loss": 0.4364, + "step": 23505 + }, + { + "epoch": 64.57692307692308, + "grad_norm": 1.7453677654266357, + "learning_rate": 1.7711538461538463e-05, + "loss": 0.0213, + "step": 23506 + }, + { + "epoch": 64.57967032967034, + "grad_norm": 13.914204597473145, + "learning_rate": 1.7710164835164837e-05, + "loss": 0.2156, + "step": 23507 + }, + { + "epoch": 64.58241758241758, + "grad_norm": 8.673354148864746, + "learning_rate": 1.770879120879121e-05, + "loss": 0.2498, + "step": 23508 + }, + { + "epoch": 64.58516483516483, + "grad_norm": 10.968255043029785, + "learning_rate": 1.7707417582417584e-05, + "loss": 0.152, + "step": 23509 + }, + { + "epoch": 64.58791208791209, + "grad_norm": 19.818199157714844, + "learning_rate": 1.7706043956043957e-05, + "loss": 0.3224, + "step": 23510 + }, + { + "epoch": 64.59065934065934, + "grad_norm": 16.898298263549805, + "learning_rate": 1.770467032967033e-05, + "loss": 0.288, + "step": 23511 + }, + { + "epoch": 64.5934065934066, + "grad_norm": 13.500039100646973, + "learning_rate": 1.7703296703296704e-05, + "loss": 0.2731, + "step": 23512 + }, + { + "epoch": 64.59615384615384, + "grad_norm": 10.205612182617188, + "learning_rate": 1.7701923076923077e-05, + "loss": 0.1643, + "step": 23513 + }, + { + "epoch": 64.5989010989011, + "grad_norm": 10.092964172363281, + "learning_rate": 1.770054945054945e-05, + "loss": 0.2173, + "step": 23514 + }, + { + "epoch": 64.60164835164835, + "grad_norm": 19.73973274230957, + "learning_rate": 1.7699175824175827e-05, + "loss": 0.562, + "step": 23515 + }, + { + "epoch": 64.6043956043956, + "grad_norm": 14.059968948364258, + "learning_rate": 1.7697802197802197e-05, + "loss": 0.4455, + "step": 23516 + }, + { + "epoch": 64.60714285714286, + "grad_norm": 7.158220291137695, + "learning_rate": 1.769642857142857e-05, + "loss": 0.1381, + "step": 23517 + }, + { + "epoch": 64.60989010989012, + "grad_norm": 2.8297958374023438, + "learning_rate": 1.7695054945054944e-05, + "loss": 0.0425, + "step": 23518 + }, + { + "epoch": 64.61263736263736, + "grad_norm": 12.984562873840332, + "learning_rate": 1.7693681318681318e-05, + "loss": 0.2262, + "step": 23519 + }, + { + "epoch": 64.61538461538461, + "grad_norm": 17.050704956054688, + "learning_rate": 1.7692307692307694e-05, + "loss": 0.4354, + "step": 23520 + }, + { + "epoch": 64.61813186813187, + "grad_norm": 19.99207305908203, + "learning_rate": 1.7690934065934068e-05, + "loss": 0.3992, + "step": 23521 + }, + { + "epoch": 64.62087912087912, + "grad_norm": 11.543672561645508, + "learning_rate": 1.768956043956044e-05, + "loss": 0.175, + "step": 23522 + }, + { + "epoch": 64.62362637362638, + "grad_norm": 11.92178726196289, + "learning_rate": 1.7688186813186815e-05, + "loss": 0.257, + "step": 23523 + }, + { + "epoch": 64.62637362637362, + "grad_norm": 24.651105880737305, + "learning_rate": 1.7686813186813188e-05, + "loss": 0.9202, + "step": 23524 + }, + { + "epoch": 64.62912087912088, + "grad_norm": 15.86009407043457, + "learning_rate": 1.768543956043956e-05, + "loss": 0.2554, + "step": 23525 + }, + { + "epoch": 64.63186813186813, + "grad_norm": 10.234734535217285, + "learning_rate": 1.7684065934065935e-05, + "loss": 0.2535, + "step": 23526 + }, + { + "epoch": 64.63461538461539, + "grad_norm": 16.239501953125, + "learning_rate": 1.7682692307692308e-05, + "loss": 0.2666, + "step": 23527 + }, + { + "epoch": 64.63736263736264, + "grad_norm": 7.9546799659729, + "learning_rate": 1.768131868131868e-05, + "loss": 0.1278, + "step": 23528 + }, + { + "epoch": 64.64010989010988, + "grad_norm": 11.36001205444336, + "learning_rate": 1.7679945054945055e-05, + "loss": 0.2352, + "step": 23529 + }, + { + "epoch": 64.64285714285714, + "grad_norm": 24.147951126098633, + "learning_rate": 1.7678571428571432e-05, + "loss": 0.6624, + "step": 23530 + }, + { + "epoch": 64.6456043956044, + "grad_norm": 15.054434776306152, + "learning_rate": 1.7677197802197802e-05, + "loss": 0.4827, + "step": 23531 + }, + { + "epoch": 64.64835164835165, + "grad_norm": 20.840478897094727, + "learning_rate": 1.7675824175824175e-05, + "loss": 0.3296, + "step": 23532 + }, + { + "epoch": 64.6510989010989, + "grad_norm": 3.4609720706939697, + "learning_rate": 1.767445054945055e-05, + "loss": 0.0444, + "step": 23533 + }, + { + "epoch": 64.65384615384616, + "grad_norm": 7.0169806480407715, + "learning_rate": 1.7673076923076922e-05, + "loss": 0.1815, + "step": 23534 + }, + { + "epoch": 64.6565934065934, + "grad_norm": 12.936722755432129, + "learning_rate": 1.76717032967033e-05, + "loss": 0.164, + "step": 23535 + }, + { + "epoch": 64.65934065934066, + "grad_norm": 25.997455596923828, + "learning_rate": 1.7670329670329672e-05, + "loss": 0.8588, + "step": 23536 + }, + { + "epoch": 64.66208791208791, + "grad_norm": 12.644231796264648, + "learning_rate": 1.7668956043956046e-05, + "loss": 0.1016, + "step": 23537 + }, + { + "epoch": 64.66483516483517, + "grad_norm": 9.746201515197754, + "learning_rate": 1.766758241758242e-05, + "loss": 0.13, + "step": 23538 + }, + { + "epoch": 64.66758241758242, + "grad_norm": 5.8594207763671875, + "learning_rate": 1.7666208791208793e-05, + "loss": 0.1214, + "step": 23539 + }, + { + "epoch": 64.67032967032966, + "grad_norm": 12.666788101196289, + "learning_rate": 1.7664835164835166e-05, + "loss": 0.2689, + "step": 23540 + }, + { + "epoch": 64.67307692307692, + "grad_norm": 3.471806287765503, + "learning_rate": 1.766346153846154e-05, + "loss": 0.0467, + "step": 23541 + }, + { + "epoch": 64.67582417582418, + "grad_norm": 9.867177963256836, + "learning_rate": 1.7662087912087913e-05, + "loss": 0.2633, + "step": 23542 + }, + { + "epoch": 64.67857142857143, + "grad_norm": 12.757665634155273, + "learning_rate": 1.7660714285714286e-05, + "loss": 0.1303, + "step": 23543 + }, + { + "epoch": 64.68131868131869, + "grad_norm": 7.464662075042725, + "learning_rate": 1.765934065934066e-05, + "loss": 0.1568, + "step": 23544 + }, + { + "epoch": 64.68406593406593, + "grad_norm": 2.7690727710723877, + "learning_rate": 1.7657967032967036e-05, + "loss": 0.0559, + "step": 23545 + }, + { + "epoch": 64.68681318681318, + "grad_norm": 6.568133354187012, + "learning_rate": 1.7656593406593406e-05, + "loss": 0.0795, + "step": 23546 + }, + { + "epoch": 64.68956043956044, + "grad_norm": 12.107850074768066, + "learning_rate": 1.765521978021978e-05, + "loss": 0.172, + "step": 23547 + }, + { + "epoch": 64.6923076923077, + "grad_norm": 18.107789993286133, + "learning_rate": 1.7653846153846153e-05, + "loss": 0.3593, + "step": 23548 + }, + { + "epoch": 64.69505494505495, + "grad_norm": 18.30695915222168, + "learning_rate": 1.7652472527472527e-05, + "loss": 0.6298, + "step": 23549 + }, + { + "epoch": 64.6978021978022, + "grad_norm": 14.106932640075684, + "learning_rate": 1.7651098901098903e-05, + "loss": 0.3296, + "step": 23550 + }, + { + "epoch": 64.70054945054945, + "grad_norm": 7.520101070404053, + "learning_rate": 1.7649725274725277e-05, + "loss": 0.1143, + "step": 23551 + }, + { + "epoch": 64.7032967032967, + "grad_norm": 20.455806732177734, + "learning_rate": 1.764835164835165e-05, + "loss": 0.4821, + "step": 23552 + }, + { + "epoch": 64.70604395604396, + "grad_norm": 7.869492530822754, + "learning_rate": 1.7646978021978024e-05, + "loss": 0.0758, + "step": 23553 + }, + { + "epoch": 64.70879120879121, + "grad_norm": 20.223369598388672, + "learning_rate": 1.7645604395604397e-05, + "loss": 0.3415, + "step": 23554 + }, + { + "epoch": 64.71153846153847, + "grad_norm": 11.895666122436523, + "learning_rate": 1.764423076923077e-05, + "loss": 0.2066, + "step": 23555 + }, + { + "epoch": 64.71428571428571, + "grad_norm": 2.326946258544922, + "learning_rate": 1.7642857142857144e-05, + "loss": 0.029, + "step": 23556 + }, + { + "epoch": 64.71703296703296, + "grad_norm": 11.393447875976562, + "learning_rate": 1.7641483516483517e-05, + "loss": 0.2777, + "step": 23557 + }, + { + "epoch": 64.71978021978022, + "grad_norm": 15.077470779418945, + "learning_rate": 1.764010989010989e-05, + "loss": 0.4227, + "step": 23558 + }, + { + "epoch": 64.72252747252747, + "grad_norm": 5.39408016204834, + "learning_rate": 1.7638736263736264e-05, + "loss": 0.0516, + "step": 23559 + }, + { + "epoch": 64.72527472527473, + "grad_norm": 7.7299675941467285, + "learning_rate": 1.763736263736264e-05, + "loss": 0.1125, + "step": 23560 + }, + { + "epoch": 64.72802197802197, + "grad_norm": 8.76329231262207, + "learning_rate": 1.763598901098901e-05, + "loss": 0.1428, + "step": 23561 + }, + { + "epoch": 64.73076923076923, + "grad_norm": 25.37152862548828, + "learning_rate": 1.7634615384615384e-05, + "loss": 0.8101, + "step": 23562 + }, + { + "epoch": 64.73351648351648, + "grad_norm": 22.10232925415039, + "learning_rate": 1.7633241758241758e-05, + "loss": 0.5449, + "step": 23563 + }, + { + "epoch": 64.73626373626374, + "grad_norm": 11.75334644317627, + "learning_rate": 1.763186813186813e-05, + "loss": 0.3405, + "step": 23564 + }, + { + "epoch": 64.73901098901099, + "grad_norm": 12.66230297088623, + "learning_rate": 1.7630494505494508e-05, + "loss": 0.3619, + "step": 23565 + }, + { + "epoch": 64.74175824175825, + "grad_norm": 6.388112545013428, + "learning_rate": 1.762912087912088e-05, + "loss": 0.1068, + "step": 23566 + }, + { + "epoch": 64.74450549450549, + "grad_norm": 16.032886505126953, + "learning_rate": 1.7627747252747255e-05, + "loss": 0.3241, + "step": 23567 + }, + { + "epoch": 64.74725274725274, + "grad_norm": 15.505097389221191, + "learning_rate": 1.7626373626373628e-05, + "loss": 0.1507, + "step": 23568 + }, + { + "epoch": 64.75, + "grad_norm": 5.644867420196533, + "learning_rate": 1.7625e-05, + "loss": 0.079, + "step": 23569 + }, + { + "epoch": 64.75274725274726, + "grad_norm": 13.11474895477295, + "learning_rate": 1.7623626373626375e-05, + "loss": 0.3094, + "step": 23570 + }, + { + "epoch": 64.75549450549451, + "grad_norm": 6.822292327880859, + "learning_rate": 1.762225274725275e-05, + "loss": 0.1258, + "step": 23571 + }, + { + "epoch": 64.75824175824175, + "grad_norm": 9.627447128295898, + "learning_rate": 1.7620879120879122e-05, + "loss": 0.2055, + "step": 23572 + }, + { + "epoch": 64.76098901098901, + "grad_norm": 3.652940034866333, + "learning_rate": 1.7619505494505495e-05, + "loss": 0.0537, + "step": 23573 + }, + { + "epoch": 64.76373626373626, + "grad_norm": 16.277812957763672, + "learning_rate": 1.761813186813187e-05, + "loss": 0.4288, + "step": 23574 + }, + { + "epoch": 64.76648351648352, + "grad_norm": 4.705752849578857, + "learning_rate": 1.7616758241758245e-05, + "loss": 0.0871, + "step": 23575 + }, + { + "epoch": 64.76923076923077, + "grad_norm": 9.133275032043457, + "learning_rate": 1.7615384615384615e-05, + "loss": 0.0792, + "step": 23576 + }, + { + "epoch": 64.77197802197803, + "grad_norm": 18.343914031982422, + "learning_rate": 1.761401098901099e-05, + "loss": 0.3516, + "step": 23577 + }, + { + "epoch": 64.77472527472527, + "grad_norm": 11.365285873413086, + "learning_rate": 1.7612637362637362e-05, + "loss": 0.2809, + "step": 23578 + }, + { + "epoch": 64.77747252747253, + "grad_norm": 18.832839965820312, + "learning_rate": 1.7611263736263736e-05, + "loss": 0.2324, + "step": 23579 + }, + { + "epoch": 64.78021978021978, + "grad_norm": 11.342218399047852, + "learning_rate": 1.7609890109890112e-05, + "loss": 0.2209, + "step": 23580 + }, + { + "epoch": 64.78296703296704, + "grad_norm": 16.277273178100586, + "learning_rate": 1.7608516483516486e-05, + "loss": 0.5463, + "step": 23581 + }, + { + "epoch": 64.78571428571429, + "grad_norm": 1.354347586631775, + "learning_rate": 1.760714285714286e-05, + "loss": 0.0152, + "step": 23582 + }, + { + "epoch": 64.78846153846153, + "grad_norm": 7.076754570007324, + "learning_rate": 1.7605769230769233e-05, + "loss": 0.1593, + "step": 23583 + }, + { + "epoch": 64.79120879120879, + "grad_norm": 14.747666358947754, + "learning_rate": 1.7604395604395606e-05, + "loss": 0.2654, + "step": 23584 + }, + { + "epoch": 64.79395604395604, + "grad_norm": 11.814533233642578, + "learning_rate": 1.760302197802198e-05, + "loss": 0.2772, + "step": 23585 + }, + { + "epoch": 64.7967032967033, + "grad_norm": 8.381767272949219, + "learning_rate": 1.7601648351648353e-05, + "loss": 0.1405, + "step": 23586 + }, + { + "epoch": 64.79945054945055, + "grad_norm": 12.87745189666748, + "learning_rate": 1.7600274725274726e-05, + "loss": 0.1974, + "step": 23587 + }, + { + "epoch": 64.8021978021978, + "grad_norm": 10.094128608703613, + "learning_rate": 1.75989010989011e-05, + "loss": 0.2594, + "step": 23588 + }, + { + "epoch": 64.80494505494505, + "grad_norm": 4.054675102233887, + "learning_rate": 1.7597527472527473e-05, + "loss": 0.043, + "step": 23589 + }, + { + "epoch": 64.8076923076923, + "grad_norm": 3.5653584003448486, + "learning_rate": 1.759615384615385e-05, + "loss": 0.0634, + "step": 23590 + }, + { + "epoch": 64.81043956043956, + "grad_norm": 9.827070236206055, + "learning_rate": 1.759478021978022e-05, + "loss": 0.1222, + "step": 23591 + }, + { + "epoch": 64.81318681318682, + "grad_norm": 19.421049118041992, + "learning_rate": 1.7593406593406593e-05, + "loss": 0.5341, + "step": 23592 + }, + { + "epoch": 64.81593406593407, + "grad_norm": 16.791301727294922, + "learning_rate": 1.7592032967032967e-05, + "loss": 0.3033, + "step": 23593 + }, + { + "epoch": 64.81868131868131, + "grad_norm": 5.701168060302734, + "learning_rate": 1.759065934065934e-05, + "loss": 0.0615, + "step": 23594 + }, + { + "epoch": 64.82142857142857, + "grad_norm": 8.279464721679688, + "learning_rate": 1.7589285714285717e-05, + "loss": 0.1314, + "step": 23595 + }, + { + "epoch": 64.82417582417582, + "grad_norm": 11.671632766723633, + "learning_rate": 1.758791208791209e-05, + "loss": 0.1932, + "step": 23596 + }, + { + "epoch": 64.82692307692308, + "grad_norm": 12.763392448425293, + "learning_rate": 1.7586538461538464e-05, + "loss": 0.2163, + "step": 23597 + }, + { + "epoch": 64.82967032967034, + "grad_norm": 2.70023250579834, + "learning_rate": 1.7585164835164837e-05, + "loss": 0.0191, + "step": 23598 + }, + { + "epoch": 64.83241758241758, + "grad_norm": 21.598594665527344, + "learning_rate": 1.7583791208791207e-05, + "loss": 0.4942, + "step": 23599 + }, + { + "epoch": 64.83516483516483, + "grad_norm": 16.42427635192871, + "learning_rate": 1.7582417582417584e-05, + "loss": 0.502, + "step": 23600 + }, + { + "epoch": 64.83791208791209, + "grad_norm": 17.640222549438477, + "learning_rate": 1.7581043956043957e-05, + "loss": 0.3286, + "step": 23601 + }, + { + "epoch": 64.84065934065934, + "grad_norm": 16.666168212890625, + "learning_rate": 1.757967032967033e-05, + "loss": 0.2805, + "step": 23602 + }, + { + "epoch": 64.8434065934066, + "grad_norm": 12.614570617675781, + "learning_rate": 1.7578296703296704e-05, + "loss": 0.2472, + "step": 23603 + }, + { + "epoch": 64.84615384615384, + "grad_norm": 16.523948669433594, + "learning_rate": 1.7576923076923078e-05, + "loss": 0.6364, + "step": 23604 + }, + { + "epoch": 64.8489010989011, + "grad_norm": 5.985018253326416, + "learning_rate": 1.757554945054945e-05, + "loss": 0.0506, + "step": 23605 + }, + { + "epoch": 64.85164835164835, + "grad_norm": 6.271529197692871, + "learning_rate": 1.7574175824175824e-05, + "loss": 0.1297, + "step": 23606 + }, + { + "epoch": 64.8543956043956, + "grad_norm": 10.265519142150879, + "learning_rate": 1.7572802197802198e-05, + "loss": 0.2249, + "step": 23607 + }, + { + "epoch": 64.85714285714286, + "grad_norm": 12.06341552734375, + "learning_rate": 1.757142857142857e-05, + "loss": 0.355, + "step": 23608 + }, + { + "epoch": 64.85989010989012, + "grad_norm": 8.731021881103516, + "learning_rate": 1.7570054945054945e-05, + "loss": 0.175, + "step": 23609 + }, + { + "epoch": 64.86263736263736, + "grad_norm": 8.291970252990723, + "learning_rate": 1.7568681318681318e-05, + "loss": 0.2218, + "step": 23610 + }, + { + "epoch": 64.86538461538461, + "grad_norm": 15.454596519470215, + "learning_rate": 1.7567307692307695e-05, + "loss": 0.3514, + "step": 23611 + }, + { + "epoch": 64.86813186813187, + "grad_norm": 18.969867706298828, + "learning_rate": 1.7565934065934068e-05, + "loss": 0.4214, + "step": 23612 + }, + { + "epoch": 64.87087912087912, + "grad_norm": 6.959582805633545, + "learning_rate": 1.756456043956044e-05, + "loss": 0.1599, + "step": 23613 + }, + { + "epoch": 64.87362637362638, + "grad_norm": 13.159769058227539, + "learning_rate": 1.756318681318681e-05, + "loss": 0.3046, + "step": 23614 + }, + { + "epoch": 64.87637362637362, + "grad_norm": 13.282438278198242, + "learning_rate": 1.7561813186813185e-05, + "loss": 0.1904, + "step": 23615 + }, + { + "epoch": 64.87912087912088, + "grad_norm": 4.248419761657715, + "learning_rate": 1.7560439560439562e-05, + "loss": 0.0402, + "step": 23616 + }, + { + "epoch": 64.88186813186813, + "grad_norm": 26.88921356201172, + "learning_rate": 1.7559065934065935e-05, + "loss": 0.6133, + "step": 23617 + }, + { + "epoch": 64.88461538461539, + "grad_norm": 11.739437103271484, + "learning_rate": 1.755769230769231e-05, + "loss": 0.3576, + "step": 23618 + }, + { + "epoch": 64.88736263736264, + "grad_norm": 16.10499382019043, + "learning_rate": 1.7556318681318682e-05, + "loss": 0.3296, + "step": 23619 + }, + { + "epoch": 64.89010989010988, + "grad_norm": 14.575020790100098, + "learning_rate": 1.7554945054945055e-05, + "loss": 0.2185, + "step": 23620 + }, + { + "epoch": 64.89285714285714, + "grad_norm": 14.951020240783691, + "learning_rate": 1.755357142857143e-05, + "loss": 0.3757, + "step": 23621 + }, + { + "epoch": 64.8956043956044, + "grad_norm": 21.437334060668945, + "learning_rate": 1.7552197802197802e-05, + "loss": 0.4761, + "step": 23622 + }, + { + "epoch": 64.89835164835165, + "grad_norm": 7.174850940704346, + "learning_rate": 1.7550824175824176e-05, + "loss": 0.1271, + "step": 23623 + }, + { + "epoch": 64.9010989010989, + "grad_norm": 19.41828727722168, + "learning_rate": 1.754945054945055e-05, + "loss": 0.3512, + "step": 23624 + }, + { + "epoch": 64.90384615384616, + "grad_norm": 16.917802810668945, + "learning_rate": 1.7548076923076922e-05, + "loss": 0.2752, + "step": 23625 + }, + { + "epoch": 64.9065934065934, + "grad_norm": 5.05758810043335, + "learning_rate": 1.75467032967033e-05, + "loss": 0.0768, + "step": 23626 + }, + { + "epoch": 64.90934065934066, + "grad_norm": 12.175165176391602, + "learning_rate": 1.7545329670329673e-05, + "loss": 0.2795, + "step": 23627 + }, + { + "epoch": 64.91208791208791, + "grad_norm": 3.3415091037750244, + "learning_rate": 1.7543956043956046e-05, + "loss": 0.0327, + "step": 23628 + }, + { + "epoch": 64.91483516483517, + "grad_norm": 25.525083541870117, + "learning_rate": 1.7542582417582416e-05, + "loss": 0.5056, + "step": 23629 + }, + { + "epoch": 64.91758241758242, + "grad_norm": 10.025918006896973, + "learning_rate": 1.754120879120879e-05, + "loss": 0.1972, + "step": 23630 + }, + { + "epoch": 64.92032967032966, + "grad_norm": 0.694859504699707, + "learning_rate": 1.7539835164835166e-05, + "loss": 0.0064, + "step": 23631 + }, + { + "epoch": 64.92307692307692, + "grad_norm": 22.14999771118164, + "learning_rate": 1.753846153846154e-05, + "loss": 0.3082, + "step": 23632 + }, + { + "epoch": 64.92582417582418, + "grad_norm": 8.062689781188965, + "learning_rate": 1.7537087912087913e-05, + "loss": 0.188, + "step": 23633 + }, + { + "epoch": 64.92857142857143, + "grad_norm": 15.41829776763916, + "learning_rate": 1.7535714285714287e-05, + "loss": 0.4858, + "step": 23634 + }, + { + "epoch": 64.93131868131869, + "grad_norm": 11.539565086364746, + "learning_rate": 1.753434065934066e-05, + "loss": 0.2581, + "step": 23635 + }, + { + "epoch": 64.93406593406593, + "grad_norm": 19.775409698486328, + "learning_rate": 1.7532967032967033e-05, + "loss": 0.5347, + "step": 23636 + }, + { + "epoch": 64.93681318681318, + "grad_norm": 1.4698351621627808, + "learning_rate": 1.7531593406593407e-05, + "loss": 0.0142, + "step": 23637 + }, + { + "epoch": 64.93956043956044, + "grad_norm": 4.966139793395996, + "learning_rate": 1.753021978021978e-05, + "loss": 0.0762, + "step": 23638 + }, + { + "epoch": 64.9423076923077, + "grad_norm": 12.069602012634277, + "learning_rate": 1.7528846153846154e-05, + "loss": 0.4131, + "step": 23639 + }, + { + "epoch": 64.94505494505495, + "grad_norm": 9.757140159606934, + "learning_rate": 1.7527472527472527e-05, + "loss": 0.0853, + "step": 23640 + }, + { + "epoch": 64.9478021978022, + "grad_norm": 21.53092384338379, + "learning_rate": 1.7526098901098904e-05, + "loss": 0.4457, + "step": 23641 + }, + { + "epoch": 64.95054945054945, + "grad_norm": 9.445009231567383, + "learning_rate": 1.7524725274725277e-05, + "loss": 0.1632, + "step": 23642 + }, + { + "epoch": 64.9532967032967, + "grad_norm": 9.442599296569824, + "learning_rate": 1.752335164835165e-05, + "loss": 0.2002, + "step": 23643 + }, + { + "epoch": 64.95604395604396, + "grad_norm": 10.32172679901123, + "learning_rate": 1.752197802197802e-05, + "loss": 0.2014, + "step": 23644 + }, + { + "epoch": 64.95879120879121, + "grad_norm": 22.00871467590332, + "learning_rate": 1.7520604395604394e-05, + "loss": 0.4344, + "step": 23645 + }, + { + "epoch": 64.96153846153847, + "grad_norm": 20.834674835205078, + "learning_rate": 1.751923076923077e-05, + "loss": 0.7282, + "step": 23646 + }, + { + "epoch": 64.96428571428571, + "grad_norm": 5.764524459838867, + "learning_rate": 1.7517857142857144e-05, + "loss": 0.0624, + "step": 23647 + }, + { + "epoch": 64.96703296703296, + "grad_norm": 16.180959701538086, + "learning_rate": 1.7516483516483518e-05, + "loss": 0.47, + "step": 23648 + }, + { + "epoch": 64.96978021978022, + "grad_norm": 2.2458746433258057, + "learning_rate": 1.751510989010989e-05, + "loss": 0.0317, + "step": 23649 + }, + { + "epoch": 64.97252747252747, + "grad_norm": 10.793403625488281, + "learning_rate": 1.7513736263736264e-05, + "loss": 0.1561, + "step": 23650 + }, + { + "epoch": 64.97527472527473, + "grad_norm": 8.8829927444458, + "learning_rate": 1.7512362637362638e-05, + "loss": 0.2379, + "step": 23651 + }, + { + "epoch": 64.97802197802197, + "grad_norm": 14.120848655700684, + "learning_rate": 1.751098901098901e-05, + "loss": 0.3576, + "step": 23652 + }, + { + "epoch": 64.98076923076923, + "grad_norm": 24.825122833251953, + "learning_rate": 1.7509615384615385e-05, + "loss": 0.7067, + "step": 23653 + }, + { + "epoch": 64.98351648351648, + "grad_norm": 9.327655792236328, + "learning_rate": 1.7508241758241758e-05, + "loss": 0.2214, + "step": 23654 + }, + { + "epoch": 64.98626373626374, + "grad_norm": 12.382682800292969, + "learning_rate": 1.750686813186813e-05, + "loss": 0.2385, + "step": 23655 + }, + { + "epoch": 64.98901098901099, + "grad_norm": 19.723520278930664, + "learning_rate": 1.7505494505494508e-05, + "loss": 0.4864, + "step": 23656 + }, + { + "epoch": 64.99175824175825, + "grad_norm": 19.208066940307617, + "learning_rate": 1.750412087912088e-05, + "loss": 0.4596, + "step": 23657 + }, + { + "epoch": 64.99450549450549, + "grad_norm": 6.758378028869629, + "learning_rate": 1.7502747252747255e-05, + "loss": 0.1266, + "step": 23658 + }, + { + "epoch": 64.99725274725274, + "grad_norm": 9.53446102142334, + "learning_rate": 1.7501373626373625e-05, + "loss": 0.1195, + "step": 23659 + }, + { + "epoch": 65.0, + "grad_norm": 33.094791412353516, + "learning_rate": 1.75e-05, + "loss": 0.3057, + "step": 23660 + }, + { + "epoch": 65.0, + "eval_accuracy": 0.7479338842975206, + "eval_f1": 0.7517332238920457, + "eval_f1_DuraRiadoRio_64x64": 0.7510204081632653, + "eval_f1_Mole_64x64": 0.7465181058495822, + "eval_f1_Quebrado_64x64": 0.8043478260869565, + "eval_f1_RiadoRio_64x64": 0.6862170087976539, + "eval_f1_RioFechado_64x64": 0.7705627705627706, + "eval_loss": 1.0150043964385986, + "eval_precision": 0.7988207226038219, + "eval_precision_DuraRiadoRio_64x64": 0.9108910891089109, + "eval_precision_Mole_64x64": 0.6232558139534884, + "eval_precision_Quebrado_64x64": 0.8409090909090909, + "eval_precision_RiadoRio_64x64": 0.6190476190476191, + "eval_precision_RioFechado_64x64": 1.0, + "eval_recall": 0.7473550366526645, + "eval_recall_DuraRiadoRio_64x64": 0.6388888888888888, + "eval_recall_Mole_64x64": 0.9305555555555556, + "eval_recall_Quebrado_64x64": 0.7708333333333334, + "eval_recall_RiadoRio_64x64": 0.7697368421052632, + "eval_recall_RioFechado_64x64": 0.6267605633802817, + "eval_runtime": 1.853, + "eval_samples_per_second": 391.796, + "eval_steps_per_second": 24.825, + "step": 23660 + }, + { + "epoch": 65.00274725274726, + "grad_norm": 16.17399787902832, + "learning_rate": 1.7498626373626375e-05, + "loss": 0.4454, + "step": 23661 + }, + { + "epoch": 65.00549450549451, + "grad_norm": 22.768869400024414, + "learning_rate": 1.749725274725275e-05, + "loss": 0.4994, + "step": 23662 + }, + { + "epoch": 65.00824175824175, + "grad_norm": 8.7938232421875, + "learning_rate": 1.7495879120879122e-05, + "loss": 0.1031, + "step": 23663 + }, + { + "epoch": 65.01098901098901, + "grad_norm": 13.43032169342041, + "learning_rate": 1.7494505494505495e-05, + "loss": 0.3888, + "step": 23664 + }, + { + "epoch": 65.01373626373626, + "grad_norm": 14.263070106506348, + "learning_rate": 1.749313186813187e-05, + "loss": 0.6006, + "step": 23665 + }, + { + "epoch": 65.01648351648352, + "grad_norm": 21.562183380126953, + "learning_rate": 1.7491758241758242e-05, + "loss": 0.4075, + "step": 23666 + }, + { + "epoch": 65.01923076923077, + "grad_norm": 18.946409225463867, + "learning_rate": 1.7490384615384616e-05, + "loss": 0.238, + "step": 23667 + }, + { + "epoch": 65.02197802197803, + "grad_norm": 11.759861946105957, + "learning_rate": 1.748901098901099e-05, + "loss": 0.226, + "step": 23668 + }, + { + "epoch": 65.02472527472527, + "grad_norm": 2.0695877075195312, + "learning_rate": 1.7487637362637363e-05, + "loss": 0.0195, + "step": 23669 + }, + { + "epoch": 65.02747252747253, + "grad_norm": 22.00707244873047, + "learning_rate": 1.7486263736263736e-05, + "loss": 0.8504, + "step": 23670 + }, + { + "epoch": 65.03021978021978, + "grad_norm": 2.6917309761047363, + "learning_rate": 1.7484890109890113e-05, + "loss": 0.0422, + "step": 23671 + }, + { + "epoch": 65.03296703296704, + "grad_norm": 21.145597457885742, + "learning_rate": 1.7483516483516486e-05, + "loss": 0.4299, + "step": 23672 + }, + { + "epoch": 65.03571428571429, + "grad_norm": 6.748446464538574, + "learning_rate": 1.748214285714286e-05, + "loss": 0.1524, + "step": 23673 + }, + { + "epoch": 65.03846153846153, + "grad_norm": 9.750492095947266, + "learning_rate": 1.748076923076923e-05, + "loss": 0.1962, + "step": 23674 + }, + { + "epoch": 65.04120879120879, + "grad_norm": 6.751366138458252, + "learning_rate": 1.7479395604395603e-05, + "loss": 0.1354, + "step": 23675 + }, + { + "epoch": 65.04395604395604, + "grad_norm": 11.284323692321777, + "learning_rate": 1.747802197802198e-05, + "loss": 0.2258, + "step": 23676 + }, + { + "epoch": 65.0467032967033, + "grad_norm": 15.138742446899414, + "learning_rate": 1.7476648351648353e-05, + "loss": 0.3497, + "step": 23677 + }, + { + "epoch": 65.04945054945055, + "grad_norm": 10.999629974365234, + "learning_rate": 1.7475274725274727e-05, + "loss": 0.2677, + "step": 23678 + }, + { + "epoch": 65.0521978021978, + "grad_norm": 6.210311412811279, + "learning_rate": 1.74739010989011e-05, + "loss": 0.1291, + "step": 23679 + }, + { + "epoch": 65.05494505494505, + "grad_norm": 14.58012866973877, + "learning_rate": 1.7472527472527473e-05, + "loss": 0.4028, + "step": 23680 + }, + { + "epoch": 65.0576923076923, + "grad_norm": 5.345721244812012, + "learning_rate": 1.7471153846153847e-05, + "loss": 0.0918, + "step": 23681 + }, + { + "epoch": 65.06043956043956, + "grad_norm": 13.637232780456543, + "learning_rate": 1.746978021978022e-05, + "loss": 0.2017, + "step": 23682 + }, + { + "epoch": 65.06318681318682, + "grad_norm": 13.368350982666016, + "learning_rate": 1.7468406593406594e-05, + "loss": 0.3905, + "step": 23683 + }, + { + "epoch": 65.06593406593407, + "grad_norm": 10.952990531921387, + "learning_rate": 1.7467032967032967e-05, + "loss": 0.1721, + "step": 23684 + }, + { + "epoch": 65.06868131868131, + "grad_norm": 12.411883354187012, + "learning_rate": 1.746565934065934e-05, + "loss": 0.3736, + "step": 23685 + }, + { + "epoch": 65.07142857142857, + "grad_norm": 21.057552337646484, + "learning_rate": 1.7464285714285717e-05, + "loss": 0.5056, + "step": 23686 + }, + { + "epoch": 65.07417582417582, + "grad_norm": 6.381502151489258, + "learning_rate": 1.746291208791209e-05, + "loss": 0.1593, + "step": 23687 + }, + { + "epoch": 65.07692307692308, + "grad_norm": 15.261890411376953, + "learning_rate": 1.7461538461538464e-05, + "loss": 0.513, + "step": 23688 + }, + { + "epoch": 65.07967032967034, + "grad_norm": 7.947957992553711, + "learning_rate": 1.7460164835164834e-05, + "loss": 0.0842, + "step": 23689 + }, + { + "epoch": 65.08241758241758, + "grad_norm": 14.771490097045898, + "learning_rate": 1.7458791208791207e-05, + "loss": 0.1852, + "step": 23690 + }, + { + "epoch": 65.08516483516483, + "grad_norm": 0.9520156979560852, + "learning_rate": 1.7457417582417584e-05, + "loss": 0.0118, + "step": 23691 + }, + { + "epoch": 65.08791208791209, + "grad_norm": 15.407235145568848, + "learning_rate": 1.7456043956043958e-05, + "loss": 0.1815, + "step": 23692 + }, + { + "epoch": 65.09065934065934, + "grad_norm": 23.506439208984375, + "learning_rate": 1.745467032967033e-05, + "loss": 0.6575, + "step": 23693 + }, + { + "epoch": 65.0934065934066, + "grad_norm": 21.27898597717285, + "learning_rate": 1.7453296703296704e-05, + "loss": 0.5832, + "step": 23694 + }, + { + "epoch": 65.09615384615384, + "grad_norm": 34.4932975769043, + "learning_rate": 1.7451923076923078e-05, + "loss": 0.5398, + "step": 23695 + }, + { + "epoch": 65.0989010989011, + "grad_norm": 7.986922264099121, + "learning_rate": 1.745054945054945e-05, + "loss": 0.1548, + "step": 23696 + }, + { + "epoch": 65.10164835164835, + "grad_norm": 16.24139976501465, + "learning_rate": 1.7449175824175825e-05, + "loss": 0.3627, + "step": 23697 + }, + { + "epoch": 65.1043956043956, + "grad_norm": 1.6925631761550903, + "learning_rate": 1.7447802197802198e-05, + "loss": 0.0184, + "step": 23698 + }, + { + "epoch": 65.10714285714286, + "grad_norm": 8.963611602783203, + "learning_rate": 1.744642857142857e-05, + "loss": 0.1986, + "step": 23699 + }, + { + "epoch": 65.10989010989012, + "grad_norm": 9.498960494995117, + "learning_rate": 1.7445054945054945e-05, + "loss": 0.1509, + "step": 23700 + }, + { + "epoch": 65.11263736263736, + "grad_norm": 16.952756881713867, + "learning_rate": 1.7443681318681322e-05, + "loss": 0.2514, + "step": 23701 + }, + { + "epoch": 65.11538461538461, + "grad_norm": 8.958663940429688, + "learning_rate": 1.7442307692307695e-05, + "loss": 0.1374, + "step": 23702 + }, + { + "epoch": 65.11813186813187, + "grad_norm": 2.034111738204956, + "learning_rate": 1.744093406593407e-05, + "loss": 0.0247, + "step": 23703 + }, + { + "epoch": 65.12087912087912, + "grad_norm": 11.127663612365723, + "learning_rate": 1.743956043956044e-05, + "loss": 0.1243, + "step": 23704 + }, + { + "epoch": 65.12362637362638, + "grad_norm": 11.956137657165527, + "learning_rate": 1.7438186813186812e-05, + "loss": 0.1498, + "step": 23705 + }, + { + "epoch": 65.12637362637362, + "grad_norm": 18.385665893554688, + "learning_rate": 1.743681318681319e-05, + "loss": 0.4933, + "step": 23706 + }, + { + "epoch": 65.12912087912088, + "grad_norm": 9.528636932373047, + "learning_rate": 1.7435439560439562e-05, + "loss": 0.1313, + "step": 23707 + }, + { + "epoch": 65.13186813186813, + "grad_norm": 16.339126586914062, + "learning_rate": 1.7434065934065936e-05, + "loss": 0.3054, + "step": 23708 + }, + { + "epoch": 65.13461538461539, + "grad_norm": 8.292621612548828, + "learning_rate": 1.743269230769231e-05, + "loss": 0.1576, + "step": 23709 + }, + { + "epoch": 65.13736263736264, + "grad_norm": 11.812777519226074, + "learning_rate": 1.7431318681318682e-05, + "loss": 0.2553, + "step": 23710 + }, + { + "epoch": 65.14010989010988, + "grad_norm": 1.823070764541626, + "learning_rate": 1.7429945054945056e-05, + "loss": 0.0237, + "step": 23711 + }, + { + "epoch": 65.14285714285714, + "grad_norm": 2.507964849472046, + "learning_rate": 1.742857142857143e-05, + "loss": 0.0478, + "step": 23712 + }, + { + "epoch": 65.1456043956044, + "grad_norm": 19.625839233398438, + "learning_rate": 1.7427197802197803e-05, + "loss": 0.2828, + "step": 23713 + }, + { + "epoch": 65.14835164835165, + "grad_norm": 26.557598114013672, + "learning_rate": 1.7425824175824176e-05, + "loss": 0.5844, + "step": 23714 + }, + { + "epoch": 65.1510989010989, + "grad_norm": 9.538089752197266, + "learning_rate": 1.742445054945055e-05, + "loss": 0.2585, + "step": 23715 + }, + { + "epoch": 65.15384615384616, + "grad_norm": 7.636487007141113, + "learning_rate": 1.7423076923076926e-05, + "loss": 0.1104, + "step": 23716 + }, + { + "epoch": 65.1565934065934, + "grad_norm": 3.497105121612549, + "learning_rate": 1.74217032967033e-05, + "loss": 0.0395, + "step": 23717 + }, + { + "epoch": 65.15934065934066, + "grad_norm": 9.007041931152344, + "learning_rate": 1.7420329670329673e-05, + "loss": 0.1661, + "step": 23718 + }, + { + "epoch": 65.16208791208791, + "grad_norm": 13.194191932678223, + "learning_rate": 1.7418956043956043e-05, + "loss": 0.1865, + "step": 23719 + }, + { + "epoch": 65.16483516483517, + "grad_norm": 5.3204545974731445, + "learning_rate": 1.7417582417582416e-05, + "loss": 0.1681, + "step": 23720 + }, + { + "epoch": 65.16758241758242, + "grad_norm": 8.352797508239746, + "learning_rate": 1.741620879120879e-05, + "loss": 0.0757, + "step": 23721 + }, + { + "epoch": 65.17032967032966, + "grad_norm": 8.900627136230469, + "learning_rate": 1.7414835164835167e-05, + "loss": 0.1246, + "step": 23722 + }, + { + "epoch": 65.17307692307692, + "grad_norm": 20.33156967163086, + "learning_rate": 1.741346153846154e-05, + "loss": 0.3828, + "step": 23723 + }, + { + "epoch": 65.17582417582418, + "grad_norm": 4.047744274139404, + "learning_rate": 1.7412087912087913e-05, + "loss": 0.0437, + "step": 23724 + }, + { + "epoch": 65.17857142857143, + "grad_norm": 6.908021450042725, + "learning_rate": 1.7410714285714287e-05, + "loss": 0.1301, + "step": 23725 + }, + { + "epoch": 65.18131868131869, + "grad_norm": 6.910796642303467, + "learning_rate": 1.740934065934066e-05, + "loss": 0.1009, + "step": 23726 + }, + { + "epoch": 65.18406593406593, + "grad_norm": 3.6624937057495117, + "learning_rate": 1.7407967032967034e-05, + "loss": 0.0524, + "step": 23727 + }, + { + "epoch": 65.18681318681318, + "grad_norm": 11.364792823791504, + "learning_rate": 1.7406593406593407e-05, + "loss": 0.1645, + "step": 23728 + }, + { + "epoch": 65.18956043956044, + "grad_norm": 7.596839904785156, + "learning_rate": 1.740521978021978e-05, + "loss": 0.1546, + "step": 23729 + }, + { + "epoch": 65.1923076923077, + "grad_norm": 8.236888885498047, + "learning_rate": 1.7403846153846154e-05, + "loss": 0.1979, + "step": 23730 + }, + { + "epoch": 65.19505494505495, + "grad_norm": 15.549551963806152, + "learning_rate": 1.7402472527472527e-05, + "loss": 0.2754, + "step": 23731 + }, + { + "epoch": 65.1978021978022, + "grad_norm": 7.266268730163574, + "learning_rate": 1.7401098901098904e-05, + "loss": 0.0923, + "step": 23732 + }, + { + "epoch": 65.20054945054945, + "grad_norm": 10.229293823242188, + "learning_rate": 1.7399725274725277e-05, + "loss": 0.2663, + "step": 23733 + }, + { + "epoch": 65.2032967032967, + "grad_norm": 18.539554595947266, + "learning_rate": 1.7398351648351648e-05, + "loss": 0.3238, + "step": 23734 + }, + { + "epoch": 65.20604395604396, + "grad_norm": 18.616960525512695, + "learning_rate": 1.739697802197802e-05, + "loss": 0.3095, + "step": 23735 + }, + { + "epoch": 65.20879120879121, + "grad_norm": 24.618215560913086, + "learning_rate": 1.7395604395604394e-05, + "loss": 0.5382, + "step": 23736 + }, + { + "epoch": 65.21153846153847, + "grad_norm": 7.448218822479248, + "learning_rate": 1.739423076923077e-05, + "loss": 0.1748, + "step": 23737 + }, + { + "epoch": 65.21428571428571, + "grad_norm": 9.195262908935547, + "learning_rate": 1.7392857142857145e-05, + "loss": 0.1615, + "step": 23738 + }, + { + "epoch": 65.21703296703296, + "grad_norm": 10.151025772094727, + "learning_rate": 1.7391483516483518e-05, + "loss": 0.1602, + "step": 23739 + }, + { + "epoch": 65.21978021978022, + "grad_norm": 8.386466979980469, + "learning_rate": 1.739010989010989e-05, + "loss": 0.2363, + "step": 23740 + }, + { + "epoch": 65.22252747252747, + "grad_norm": 4.952104091644287, + "learning_rate": 1.7388736263736265e-05, + "loss": 0.0825, + "step": 23741 + }, + { + "epoch": 65.22527472527473, + "grad_norm": 4.700046539306641, + "learning_rate": 1.7387362637362638e-05, + "loss": 0.0359, + "step": 23742 + }, + { + "epoch": 65.22802197802197, + "grad_norm": 6.108992099761963, + "learning_rate": 1.738598901098901e-05, + "loss": 0.0953, + "step": 23743 + }, + { + "epoch": 65.23076923076923, + "grad_norm": 2.303926467895508, + "learning_rate": 1.7384615384615385e-05, + "loss": 0.0317, + "step": 23744 + }, + { + "epoch": 65.23351648351648, + "grad_norm": 13.071233749389648, + "learning_rate": 1.738324175824176e-05, + "loss": 0.1594, + "step": 23745 + }, + { + "epoch": 65.23626373626374, + "grad_norm": 16.309341430664062, + "learning_rate": 1.7381868131868132e-05, + "loss": 0.341, + "step": 23746 + }, + { + "epoch": 65.23901098901099, + "grad_norm": 15.217294692993164, + "learning_rate": 1.738049450549451e-05, + "loss": 0.1966, + "step": 23747 + }, + { + "epoch": 65.24175824175825, + "grad_norm": 7.757230758666992, + "learning_rate": 1.7379120879120882e-05, + "loss": 0.1379, + "step": 23748 + }, + { + "epoch": 65.24450549450549, + "grad_norm": 3.1777703762054443, + "learning_rate": 1.7377747252747252e-05, + "loss": 0.0525, + "step": 23749 + }, + { + "epoch": 65.24725274725274, + "grad_norm": 24.776012420654297, + "learning_rate": 1.7376373626373625e-05, + "loss": 0.4029, + "step": 23750 + }, + { + "epoch": 65.25, + "grad_norm": 15.261096000671387, + "learning_rate": 1.7375e-05, + "loss": 0.3281, + "step": 23751 + }, + { + "epoch": 65.25274725274726, + "grad_norm": 18.484586715698242, + "learning_rate": 1.7373626373626376e-05, + "loss": 0.3925, + "step": 23752 + }, + { + "epoch": 65.25549450549451, + "grad_norm": 21.781002044677734, + "learning_rate": 1.737225274725275e-05, + "loss": 0.6558, + "step": 23753 + }, + { + "epoch": 65.25824175824175, + "grad_norm": 4.416936874389648, + "learning_rate": 1.7370879120879122e-05, + "loss": 0.0322, + "step": 23754 + }, + { + "epoch": 65.26098901098901, + "grad_norm": 14.327925682067871, + "learning_rate": 1.7369505494505496e-05, + "loss": 0.2625, + "step": 23755 + }, + { + "epoch": 65.26373626373626, + "grad_norm": 19.23553466796875, + "learning_rate": 1.736813186813187e-05, + "loss": 0.2925, + "step": 23756 + }, + { + "epoch": 65.26648351648352, + "grad_norm": 4.165347099304199, + "learning_rate": 1.7366758241758243e-05, + "loss": 0.0573, + "step": 23757 + }, + { + "epoch": 65.26923076923077, + "grad_norm": 10.601889610290527, + "learning_rate": 1.7365384615384616e-05, + "loss": 0.1981, + "step": 23758 + }, + { + "epoch": 65.27197802197803, + "grad_norm": 6.930940628051758, + "learning_rate": 1.736401098901099e-05, + "loss": 0.0742, + "step": 23759 + }, + { + "epoch": 65.27472527472527, + "grad_norm": 14.754523277282715, + "learning_rate": 1.7362637362637363e-05, + "loss": 0.5004, + "step": 23760 + }, + { + "epoch": 65.27747252747253, + "grad_norm": 7.890929222106934, + "learning_rate": 1.7361263736263736e-05, + "loss": 0.0808, + "step": 23761 + }, + { + "epoch": 65.28021978021978, + "grad_norm": 10.043848991394043, + "learning_rate": 1.7359890109890113e-05, + "loss": 0.1245, + "step": 23762 + }, + { + "epoch": 65.28296703296704, + "grad_norm": 5.502089500427246, + "learning_rate": 1.7358516483516486e-05, + "loss": 0.0503, + "step": 23763 + }, + { + "epoch": 65.28571428571429, + "grad_norm": 9.67906665802002, + "learning_rate": 1.7357142857142856e-05, + "loss": 0.1058, + "step": 23764 + }, + { + "epoch": 65.28846153846153, + "grad_norm": 12.40079402923584, + "learning_rate": 1.735576923076923e-05, + "loss": 0.2729, + "step": 23765 + }, + { + "epoch": 65.29120879120879, + "grad_norm": 9.031990051269531, + "learning_rate": 1.7354395604395603e-05, + "loss": 0.139, + "step": 23766 + }, + { + "epoch": 65.29395604395604, + "grad_norm": 10.012330055236816, + "learning_rate": 1.735302197802198e-05, + "loss": 0.1955, + "step": 23767 + }, + { + "epoch": 65.2967032967033, + "grad_norm": 7.331533432006836, + "learning_rate": 1.7351648351648354e-05, + "loss": 0.1525, + "step": 23768 + }, + { + "epoch": 65.29945054945055, + "grad_norm": 4.772521495819092, + "learning_rate": 1.7350274725274727e-05, + "loss": 0.078, + "step": 23769 + }, + { + "epoch": 65.3021978021978, + "grad_norm": 11.331830978393555, + "learning_rate": 1.73489010989011e-05, + "loss": 0.2855, + "step": 23770 + }, + { + "epoch": 65.30494505494505, + "grad_norm": 10.9261474609375, + "learning_rate": 1.7347527472527474e-05, + "loss": 0.1691, + "step": 23771 + }, + { + "epoch": 65.3076923076923, + "grad_norm": 7.746607780456543, + "learning_rate": 1.7346153846153847e-05, + "loss": 0.1689, + "step": 23772 + }, + { + "epoch": 65.31043956043956, + "grad_norm": 14.58570384979248, + "learning_rate": 1.734478021978022e-05, + "loss": 0.3314, + "step": 23773 + }, + { + "epoch": 65.31318681318682, + "grad_norm": 8.460394859313965, + "learning_rate": 1.7343406593406594e-05, + "loss": 0.1444, + "step": 23774 + }, + { + "epoch": 65.31593406593407, + "grad_norm": 16.951366424560547, + "learning_rate": 1.7342032967032967e-05, + "loss": 0.2676, + "step": 23775 + }, + { + "epoch": 65.31868131868131, + "grad_norm": 8.13805866241455, + "learning_rate": 1.734065934065934e-05, + "loss": 0.2393, + "step": 23776 + }, + { + "epoch": 65.32142857142857, + "grad_norm": 19.807857513427734, + "learning_rate": 1.7339285714285718e-05, + "loss": 0.5846, + "step": 23777 + }, + { + "epoch": 65.32417582417582, + "grad_norm": 12.248083114624023, + "learning_rate": 1.733791208791209e-05, + "loss": 0.2167, + "step": 23778 + }, + { + "epoch": 65.32692307692308, + "grad_norm": 6.5010223388671875, + "learning_rate": 1.733653846153846e-05, + "loss": 0.094, + "step": 23779 + }, + { + "epoch": 65.32967032967034, + "grad_norm": 20.408222198486328, + "learning_rate": 1.7335164835164834e-05, + "loss": 0.4563, + "step": 23780 + }, + { + "epoch": 65.33241758241758, + "grad_norm": 19.502239227294922, + "learning_rate": 1.7333791208791208e-05, + "loss": 0.2671, + "step": 23781 + }, + { + "epoch": 65.33516483516483, + "grad_norm": 9.510367393493652, + "learning_rate": 1.7332417582417585e-05, + "loss": 0.2694, + "step": 23782 + }, + { + "epoch": 65.33791208791209, + "grad_norm": 14.693981170654297, + "learning_rate": 1.7331043956043958e-05, + "loss": 0.3546, + "step": 23783 + }, + { + "epoch": 65.34065934065934, + "grad_norm": 17.606538772583008, + "learning_rate": 1.732967032967033e-05, + "loss": 0.5327, + "step": 23784 + }, + { + "epoch": 65.3434065934066, + "grad_norm": 1.425329566001892, + "learning_rate": 1.7328296703296705e-05, + "loss": 0.0117, + "step": 23785 + }, + { + "epoch": 65.34615384615384, + "grad_norm": 10.1631498336792, + "learning_rate": 1.7326923076923078e-05, + "loss": 0.1525, + "step": 23786 + }, + { + "epoch": 65.3489010989011, + "grad_norm": 14.21940803527832, + "learning_rate": 1.732554945054945e-05, + "loss": 0.2294, + "step": 23787 + }, + { + "epoch": 65.35164835164835, + "grad_norm": 14.505230903625488, + "learning_rate": 1.7324175824175825e-05, + "loss": 0.2627, + "step": 23788 + }, + { + "epoch": 65.3543956043956, + "grad_norm": 2.934229612350464, + "learning_rate": 1.73228021978022e-05, + "loss": 0.0566, + "step": 23789 + }, + { + "epoch": 65.35714285714286, + "grad_norm": 4.000529766082764, + "learning_rate": 1.7321428571428572e-05, + "loss": 0.0712, + "step": 23790 + }, + { + "epoch": 65.35989010989012, + "grad_norm": 6.679113388061523, + "learning_rate": 1.7320054945054945e-05, + "loss": 0.1338, + "step": 23791 + }, + { + "epoch": 65.36263736263736, + "grad_norm": 5.114108562469482, + "learning_rate": 1.7318681318681322e-05, + "loss": 0.088, + "step": 23792 + }, + { + "epoch": 65.36538461538461, + "grad_norm": 9.313465118408203, + "learning_rate": 1.7317307692307695e-05, + "loss": 0.1721, + "step": 23793 + }, + { + "epoch": 65.36813186813187, + "grad_norm": 6.818769931793213, + "learning_rate": 1.7315934065934065e-05, + "loss": 0.1374, + "step": 23794 + }, + { + "epoch": 65.37087912087912, + "grad_norm": 5.692094326019287, + "learning_rate": 1.731456043956044e-05, + "loss": 0.0731, + "step": 23795 + }, + { + "epoch": 65.37362637362638, + "grad_norm": 2.7321395874023438, + "learning_rate": 1.7313186813186812e-05, + "loss": 0.0326, + "step": 23796 + }, + { + "epoch": 65.37637362637362, + "grad_norm": 1.0144641399383545, + "learning_rate": 1.731181318681319e-05, + "loss": 0.0144, + "step": 23797 + }, + { + "epoch": 65.37912087912088, + "grad_norm": 10.295047760009766, + "learning_rate": 1.7310439560439562e-05, + "loss": 0.2443, + "step": 23798 + }, + { + "epoch": 65.38186813186813, + "grad_norm": 27.589345932006836, + "learning_rate": 1.7309065934065936e-05, + "loss": 0.4463, + "step": 23799 + }, + { + "epoch": 65.38461538461539, + "grad_norm": 9.598076820373535, + "learning_rate": 1.730769230769231e-05, + "loss": 0.3056, + "step": 23800 + }, + { + "epoch": 65.38736263736264, + "grad_norm": 14.324554443359375, + "learning_rate": 1.7306318681318683e-05, + "loss": 0.211, + "step": 23801 + }, + { + "epoch": 65.39010989010988, + "grad_norm": 10.114977836608887, + "learning_rate": 1.7304945054945056e-05, + "loss": 0.3524, + "step": 23802 + }, + { + "epoch": 65.39285714285714, + "grad_norm": 4.6775689125061035, + "learning_rate": 1.730357142857143e-05, + "loss": 0.0838, + "step": 23803 + }, + { + "epoch": 65.3956043956044, + "grad_norm": 6.011691570281982, + "learning_rate": 1.7302197802197803e-05, + "loss": 0.0762, + "step": 23804 + }, + { + "epoch": 65.39835164835165, + "grad_norm": 18.80976104736328, + "learning_rate": 1.7300824175824176e-05, + "loss": 0.5687, + "step": 23805 + }, + { + "epoch": 65.4010989010989, + "grad_norm": 5.549017906188965, + "learning_rate": 1.729945054945055e-05, + "loss": 0.0996, + "step": 23806 + }, + { + "epoch": 65.40384615384616, + "grad_norm": 2.6316254138946533, + "learning_rate": 1.7298076923076927e-05, + "loss": 0.0383, + "step": 23807 + }, + { + "epoch": 65.4065934065934, + "grad_norm": 3.7461509704589844, + "learning_rate": 1.72967032967033e-05, + "loss": 0.0406, + "step": 23808 + }, + { + "epoch": 65.40934065934066, + "grad_norm": 19.593006134033203, + "learning_rate": 1.729532967032967e-05, + "loss": 0.4838, + "step": 23809 + }, + { + "epoch": 65.41208791208791, + "grad_norm": 21.362531661987305, + "learning_rate": 1.7293956043956043e-05, + "loss": 0.5153, + "step": 23810 + }, + { + "epoch": 65.41483516483517, + "grad_norm": 7.975862979888916, + "learning_rate": 1.7292582417582417e-05, + "loss": 0.1624, + "step": 23811 + }, + { + "epoch": 65.41758241758242, + "grad_norm": 2.308652877807617, + "learning_rate": 1.7291208791208794e-05, + "loss": 0.0383, + "step": 23812 + }, + { + "epoch": 65.42032967032966, + "grad_norm": 11.007269859313965, + "learning_rate": 1.7289835164835167e-05, + "loss": 0.2912, + "step": 23813 + }, + { + "epoch": 65.42307692307692, + "grad_norm": 8.203330039978027, + "learning_rate": 1.728846153846154e-05, + "loss": 0.1341, + "step": 23814 + }, + { + "epoch": 65.42582417582418, + "grad_norm": 17.381851196289062, + "learning_rate": 1.7287087912087914e-05, + "loss": 0.3855, + "step": 23815 + }, + { + "epoch": 65.42857142857143, + "grad_norm": 11.713255882263184, + "learning_rate": 1.7285714285714287e-05, + "loss": 0.1604, + "step": 23816 + }, + { + "epoch": 65.43131868131869, + "grad_norm": 2.0855977535247803, + "learning_rate": 1.728434065934066e-05, + "loss": 0.0258, + "step": 23817 + }, + { + "epoch": 65.43406593406593, + "grad_norm": 15.90982723236084, + "learning_rate": 1.7282967032967034e-05, + "loss": 0.3212, + "step": 23818 + }, + { + "epoch": 65.43681318681318, + "grad_norm": 17.23021697998047, + "learning_rate": 1.7281593406593407e-05, + "loss": 0.1819, + "step": 23819 + }, + { + "epoch": 65.43956043956044, + "grad_norm": 18.11634063720703, + "learning_rate": 1.728021978021978e-05, + "loss": 0.3924, + "step": 23820 + }, + { + "epoch": 65.4423076923077, + "grad_norm": 5.474271774291992, + "learning_rate": 1.7278846153846154e-05, + "loss": 0.0662, + "step": 23821 + }, + { + "epoch": 65.44505494505495, + "grad_norm": 7.692967891693115, + "learning_rate": 1.727747252747253e-05, + "loss": 0.0825, + "step": 23822 + }, + { + "epoch": 65.4478021978022, + "grad_norm": 34.795413970947266, + "learning_rate": 1.72760989010989e-05, + "loss": 1.3173, + "step": 23823 + }, + { + "epoch": 65.45054945054945, + "grad_norm": 26.112207412719727, + "learning_rate": 1.7274725274725274e-05, + "loss": 0.7242, + "step": 23824 + }, + { + "epoch": 65.4532967032967, + "grad_norm": 21.069400787353516, + "learning_rate": 1.7273351648351648e-05, + "loss": 0.5161, + "step": 23825 + }, + { + "epoch": 65.45604395604396, + "grad_norm": 17.236000061035156, + "learning_rate": 1.727197802197802e-05, + "loss": 0.2232, + "step": 23826 + }, + { + "epoch": 65.45879120879121, + "grad_norm": 3.424452066421509, + "learning_rate": 1.7270604395604398e-05, + "loss": 0.0502, + "step": 23827 + }, + { + "epoch": 65.46153846153847, + "grad_norm": 8.942002296447754, + "learning_rate": 1.726923076923077e-05, + "loss": 0.0897, + "step": 23828 + }, + { + "epoch": 65.46428571428571, + "grad_norm": 4.475400447845459, + "learning_rate": 1.7267857142857145e-05, + "loss": 0.0672, + "step": 23829 + }, + { + "epoch": 65.46703296703296, + "grad_norm": 15.102354049682617, + "learning_rate": 1.7266483516483518e-05, + "loss": 0.3163, + "step": 23830 + }, + { + "epoch": 65.46978021978022, + "grad_norm": 15.884710311889648, + "learning_rate": 1.726510989010989e-05, + "loss": 0.5614, + "step": 23831 + }, + { + "epoch": 65.47252747252747, + "grad_norm": 3.4047560691833496, + "learning_rate": 1.7263736263736262e-05, + "loss": 0.0279, + "step": 23832 + }, + { + "epoch": 65.47527472527473, + "grad_norm": 25.512319564819336, + "learning_rate": 1.726236263736264e-05, + "loss": 0.5886, + "step": 23833 + }, + { + "epoch": 65.47802197802197, + "grad_norm": 13.351862907409668, + "learning_rate": 1.7260989010989012e-05, + "loss": 0.2737, + "step": 23834 + }, + { + "epoch": 65.48076923076923, + "grad_norm": 14.472771644592285, + "learning_rate": 1.7259615384615385e-05, + "loss": 0.454, + "step": 23835 + }, + { + "epoch": 65.48351648351648, + "grad_norm": 10.39320182800293, + "learning_rate": 1.725824175824176e-05, + "loss": 0.3179, + "step": 23836 + }, + { + "epoch": 65.48626373626374, + "grad_norm": 6.589404582977295, + "learning_rate": 1.7256868131868132e-05, + "loss": 0.0526, + "step": 23837 + }, + { + "epoch": 65.48901098901099, + "grad_norm": 17.268627166748047, + "learning_rate": 1.7255494505494506e-05, + "loss": 0.3052, + "step": 23838 + }, + { + "epoch": 65.49175824175825, + "grad_norm": 20.525178909301758, + "learning_rate": 1.725412087912088e-05, + "loss": 0.3432, + "step": 23839 + }, + { + "epoch": 65.49450549450549, + "grad_norm": 11.868157386779785, + "learning_rate": 1.7252747252747252e-05, + "loss": 0.2049, + "step": 23840 + }, + { + "epoch": 65.49725274725274, + "grad_norm": 17.292665481567383, + "learning_rate": 1.7251373626373626e-05, + "loss": 0.4248, + "step": 23841 + }, + { + "epoch": 65.5, + "grad_norm": 12.031220436096191, + "learning_rate": 1.725e-05, + "loss": 0.23, + "step": 23842 + }, + { + "epoch": 65.50274725274726, + "grad_norm": 14.159268379211426, + "learning_rate": 1.7248626373626376e-05, + "loss": 0.2082, + "step": 23843 + }, + { + "epoch": 65.50549450549451, + "grad_norm": 21.099632263183594, + "learning_rate": 1.724725274725275e-05, + "loss": 0.3252, + "step": 23844 + }, + { + "epoch": 65.50824175824175, + "grad_norm": 16.954856872558594, + "learning_rate": 1.7245879120879123e-05, + "loss": 0.3272, + "step": 23845 + }, + { + "epoch": 65.51098901098901, + "grad_norm": 2.307379722595215, + "learning_rate": 1.7244505494505496e-05, + "loss": 0.0205, + "step": 23846 + }, + { + "epoch": 65.51373626373626, + "grad_norm": 3.184609889984131, + "learning_rate": 1.7243131868131866e-05, + "loss": 0.0374, + "step": 23847 + }, + { + "epoch": 65.51648351648352, + "grad_norm": 12.975719451904297, + "learning_rate": 1.7241758241758243e-05, + "loss": 0.2139, + "step": 23848 + }, + { + "epoch": 65.51923076923077, + "grad_norm": 12.690787315368652, + "learning_rate": 1.7240384615384616e-05, + "loss": 0.1709, + "step": 23849 + }, + { + "epoch": 65.52197802197803, + "grad_norm": 9.565864562988281, + "learning_rate": 1.723901098901099e-05, + "loss": 0.1434, + "step": 23850 + }, + { + "epoch": 65.52472527472527, + "grad_norm": 5.611612796783447, + "learning_rate": 1.7237637362637363e-05, + "loss": 0.0623, + "step": 23851 + }, + { + "epoch": 65.52747252747253, + "grad_norm": 11.507109642028809, + "learning_rate": 1.7236263736263737e-05, + "loss": 0.3096, + "step": 23852 + }, + { + "epoch": 65.53021978021978, + "grad_norm": 17.916072845458984, + "learning_rate": 1.723489010989011e-05, + "loss": 0.3649, + "step": 23853 + }, + { + "epoch": 65.53296703296704, + "grad_norm": 5.568484306335449, + "learning_rate": 1.7233516483516483e-05, + "loss": 0.0467, + "step": 23854 + }, + { + "epoch": 65.53571428571429, + "grad_norm": 8.801352500915527, + "learning_rate": 1.7232142857142857e-05, + "loss": 0.1139, + "step": 23855 + }, + { + "epoch": 65.53846153846153, + "grad_norm": 8.381497383117676, + "learning_rate": 1.723076923076923e-05, + "loss": 0.0995, + "step": 23856 + }, + { + "epoch": 65.54120879120879, + "grad_norm": 12.19053840637207, + "learning_rate": 1.7229395604395604e-05, + "loss": 0.2484, + "step": 23857 + }, + { + "epoch": 65.54395604395604, + "grad_norm": 9.705509185791016, + "learning_rate": 1.722802197802198e-05, + "loss": 0.2667, + "step": 23858 + }, + { + "epoch": 65.5467032967033, + "grad_norm": 14.0128812789917, + "learning_rate": 1.7226648351648354e-05, + "loss": 0.464, + "step": 23859 + }, + { + "epoch": 65.54945054945055, + "grad_norm": 8.695771217346191, + "learning_rate": 1.7225274725274727e-05, + "loss": 0.193, + "step": 23860 + }, + { + "epoch": 65.5521978021978, + "grad_norm": 7.357226371765137, + "learning_rate": 1.72239010989011e-05, + "loss": 0.0917, + "step": 23861 + }, + { + "epoch": 65.55494505494505, + "grad_norm": 26.44147491455078, + "learning_rate": 1.722252747252747e-05, + "loss": 0.4703, + "step": 23862 + }, + { + "epoch": 65.5576923076923, + "grad_norm": 6.79311466217041, + "learning_rate": 1.7221153846153847e-05, + "loss": 0.1061, + "step": 23863 + }, + { + "epoch": 65.56043956043956, + "grad_norm": 16.272066116333008, + "learning_rate": 1.721978021978022e-05, + "loss": 0.4354, + "step": 23864 + }, + { + "epoch": 65.56318681318682, + "grad_norm": 25.102081298828125, + "learning_rate": 1.7218406593406594e-05, + "loss": 0.5832, + "step": 23865 + }, + { + "epoch": 65.56593406593407, + "grad_norm": 14.721436500549316, + "learning_rate": 1.7217032967032968e-05, + "loss": 0.1945, + "step": 23866 + }, + { + "epoch": 65.56868131868131, + "grad_norm": 8.66125202178955, + "learning_rate": 1.721565934065934e-05, + "loss": 0.0986, + "step": 23867 + }, + { + "epoch": 65.57142857142857, + "grad_norm": 12.776544570922852, + "learning_rate": 1.7214285714285715e-05, + "loss": 0.1718, + "step": 23868 + }, + { + "epoch": 65.57417582417582, + "grad_norm": 12.868233680725098, + "learning_rate": 1.7212912087912088e-05, + "loss": 0.1562, + "step": 23869 + }, + { + "epoch": 65.57692307692308, + "grad_norm": 15.904420852661133, + "learning_rate": 1.721153846153846e-05, + "loss": 0.3507, + "step": 23870 + }, + { + "epoch": 65.57967032967034, + "grad_norm": 9.714677810668945, + "learning_rate": 1.7210164835164835e-05, + "loss": 0.1264, + "step": 23871 + }, + { + "epoch": 65.58241758241758, + "grad_norm": 7.463742733001709, + "learning_rate": 1.7208791208791208e-05, + "loss": 0.14, + "step": 23872 + }, + { + "epoch": 65.58516483516483, + "grad_norm": 16.164459228515625, + "learning_rate": 1.7207417582417585e-05, + "loss": 0.2932, + "step": 23873 + }, + { + "epoch": 65.58791208791209, + "grad_norm": 15.770454406738281, + "learning_rate": 1.720604395604396e-05, + "loss": 0.3324, + "step": 23874 + }, + { + "epoch": 65.59065934065934, + "grad_norm": 2.746713876724243, + "learning_rate": 1.7204670329670332e-05, + "loss": 0.0491, + "step": 23875 + }, + { + "epoch": 65.5934065934066, + "grad_norm": 1.2702770233154297, + "learning_rate": 1.7203296703296705e-05, + "loss": 0.0119, + "step": 23876 + }, + { + "epoch": 65.59615384615384, + "grad_norm": 12.250171661376953, + "learning_rate": 1.7201923076923075e-05, + "loss": 0.1733, + "step": 23877 + }, + { + "epoch": 65.5989010989011, + "grad_norm": 17.121204376220703, + "learning_rate": 1.7200549450549452e-05, + "loss": 0.4376, + "step": 23878 + }, + { + "epoch": 65.60164835164835, + "grad_norm": 14.457601547241211, + "learning_rate": 1.7199175824175825e-05, + "loss": 0.1799, + "step": 23879 + }, + { + "epoch": 65.6043956043956, + "grad_norm": 12.106547355651855, + "learning_rate": 1.71978021978022e-05, + "loss": 0.418, + "step": 23880 + }, + { + "epoch": 65.60714285714286, + "grad_norm": 5.092884063720703, + "learning_rate": 1.7196428571428572e-05, + "loss": 0.0618, + "step": 23881 + }, + { + "epoch": 65.60989010989012, + "grad_norm": 9.011465072631836, + "learning_rate": 1.7195054945054946e-05, + "loss": 0.156, + "step": 23882 + }, + { + "epoch": 65.61263736263736, + "grad_norm": 7.265961647033691, + "learning_rate": 1.719368131868132e-05, + "loss": 0.1454, + "step": 23883 + }, + { + "epoch": 65.61538461538461, + "grad_norm": 12.460071563720703, + "learning_rate": 1.7192307692307692e-05, + "loss": 0.1921, + "step": 23884 + }, + { + "epoch": 65.61813186813187, + "grad_norm": 12.33376693725586, + "learning_rate": 1.7190934065934066e-05, + "loss": 0.3791, + "step": 23885 + }, + { + "epoch": 65.62087912087912, + "grad_norm": 16.3375186920166, + "learning_rate": 1.718956043956044e-05, + "loss": 0.1997, + "step": 23886 + }, + { + "epoch": 65.62362637362638, + "grad_norm": 6.718636989593506, + "learning_rate": 1.7188186813186813e-05, + "loss": 0.0874, + "step": 23887 + }, + { + "epoch": 65.62637362637362, + "grad_norm": 13.89725399017334, + "learning_rate": 1.718681318681319e-05, + "loss": 0.4891, + "step": 23888 + }, + { + "epoch": 65.62912087912088, + "grad_norm": 4.795475482940674, + "learning_rate": 1.7185439560439563e-05, + "loss": 0.0945, + "step": 23889 + }, + { + "epoch": 65.63186813186813, + "grad_norm": 16.09710693359375, + "learning_rate": 1.7184065934065936e-05, + "loss": 0.4779, + "step": 23890 + }, + { + "epoch": 65.63461538461539, + "grad_norm": 17.140869140625, + "learning_rate": 1.718269230769231e-05, + "loss": 0.2573, + "step": 23891 + }, + { + "epoch": 65.63736263736264, + "grad_norm": 9.650593757629395, + "learning_rate": 1.718131868131868e-05, + "loss": 0.1869, + "step": 23892 + }, + { + "epoch": 65.64010989010988, + "grad_norm": 13.101499557495117, + "learning_rate": 1.7179945054945056e-05, + "loss": 0.1623, + "step": 23893 + }, + { + "epoch": 65.64285714285714, + "grad_norm": 11.908868789672852, + "learning_rate": 1.717857142857143e-05, + "loss": 0.3167, + "step": 23894 + }, + { + "epoch": 65.6456043956044, + "grad_norm": 10.216605186462402, + "learning_rate": 1.7177197802197803e-05, + "loss": 0.2403, + "step": 23895 + }, + { + "epoch": 65.64835164835165, + "grad_norm": 6.272260665893555, + "learning_rate": 1.7175824175824177e-05, + "loss": 0.0735, + "step": 23896 + }, + { + "epoch": 65.6510989010989, + "grad_norm": 11.322107315063477, + "learning_rate": 1.717445054945055e-05, + "loss": 0.175, + "step": 23897 + }, + { + "epoch": 65.65384615384616, + "grad_norm": 10.698258399963379, + "learning_rate": 1.7173076923076924e-05, + "loss": 0.2724, + "step": 23898 + }, + { + "epoch": 65.6565934065934, + "grad_norm": 12.127379417419434, + "learning_rate": 1.7171703296703297e-05, + "loss": 0.2823, + "step": 23899 + }, + { + "epoch": 65.65934065934066, + "grad_norm": 11.683250427246094, + "learning_rate": 1.717032967032967e-05, + "loss": 0.1552, + "step": 23900 + }, + { + "epoch": 65.66208791208791, + "grad_norm": 9.083328247070312, + "learning_rate": 1.7168956043956044e-05, + "loss": 0.2517, + "step": 23901 + }, + { + "epoch": 65.66483516483517, + "grad_norm": 18.99822998046875, + "learning_rate": 1.7167582417582417e-05, + "loss": 0.3703, + "step": 23902 + }, + { + "epoch": 65.66758241758242, + "grad_norm": 4.789574146270752, + "learning_rate": 1.7166208791208794e-05, + "loss": 0.0867, + "step": 23903 + }, + { + "epoch": 65.67032967032966, + "grad_norm": 24.254920959472656, + "learning_rate": 1.7164835164835167e-05, + "loss": 0.8142, + "step": 23904 + }, + { + "epoch": 65.67307692307692, + "grad_norm": 11.12809944152832, + "learning_rate": 1.716346153846154e-05, + "loss": 0.3188, + "step": 23905 + }, + { + "epoch": 65.67582417582418, + "grad_norm": 13.25826358795166, + "learning_rate": 1.716208791208791e-05, + "loss": 0.202, + "step": 23906 + }, + { + "epoch": 65.67857142857143, + "grad_norm": 20.11405372619629, + "learning_rate": 1.7160714285714284e-05, + "loss": 0.2869, + "step": 23907 + }, + { + "epoch": 65.68131868131869, + "grad_norm": 28.062847137451172, + "learning_rate": 1.715934065934066e-05, + "loss": 0.5673, + "step": 23908 + }, + { + "epoch": 65.68406593406593, + "grad_norm": 8.908262252807617, + "learning_rate": 1.7157967032967034e-05, + "loss": 0.1532, + "step": 23909 + }, + { + "epoch": 65.68681318681318, + "grad_norm": 7.849819660186768, + "learning_rate": 1.7156593406593408e-05, + "loss": 0.096, + "step": 23910 + }, + { + "epoch": 65.68956043956044, + "grad_norm": 14.731237411499023, + "learning_rate": 1.715521978021978e-05, + "loss": 0.2565, + "step": 23911 + }, + { + "epoch": 65.6923076923077, + "grad_norm": 19.308063507080078, + "learning_rate": 1.7153846153846155e-05, + "loss": 0.4334, + "step": 23912 + }, + { + "epoch": 65.69505494505495, + "grad_norm": 3.937056303024292, + "learning_rate": 1.7152472527472528e-05, + "loss": 0.0786, + "step": 23913 + }, + { + "epoch": 65.6978021978022, + "grad_norm": 10.709877967834473, + "learning_rate": 1.71510989010989e-05, + "loss": 0.1323, + "step": 23914 + }, + { + "epoch": 65.70054945054945, + "grad_norm": 8.878875732421875, + "learning_rate": 1.7149725274725275e-05, + "loss": 0.1843, + "step": 23915 + }, + { + "epoch": 65.7032967032967, + "grad_norm": 8.630937576293945, + "learning_rate": 1.7148351648351648e-05, + "loss": 0.2736, + "step": 23916 + }, + { + "epoch": 65.70604395604396, + "grad_norm": 4.941340923309326, + "learning_rate": 1.714697802197802e-05, + "loss": 0.0444, + "step": 23917 + }, + { + "epoch": 65.70879120879121, + "grad_norm": 4.056732177734375, + "learning_rate": 1.71456043956044e-05, + "loss": 0.0996, + "step": 23918 + }, + { + "epoch": 65.71153846153847, + "grad_norm": 8.288723945617676, + "learning_rate": 1.7144230769230772e-05, + "loss": 0.2521, + "step": 23919 + }, + { + "epoch": 65.71428571428571, + "grad_norm": 9.3808012008667, + "learning_rate": 1.7142857142857145e-05, + "loss": 0.2052, + "step": 23920 + }, + { + "epoch": 65.71703296703296, + "grad_norm": 8.370959281921387, + "learning_rate": 1.7141483516483515e-05, + "loss": 0.2056, + "step": 23921 + }, + { + "epoch": 65.71978021978022, + "grad_norm": 22.318422317504883, + "learning_rate": 1.714010989010989e-05, + "loss": 0.3887, + "step": 23922 + }, + { + "epoch": 65.72252747252747, + "grad_norm": 8.530580520629883, + "learning_rate": 1.7138736263736265e-05, + "loss": 0.1411, + "step": 23923 + }, + { + "epoch": 65.72527472527473, + "grad_norm": 6.3600077629089355, + "learning_rate": 1.713736263736264e-05, + "loss": 0.0789, + "step": 23924 + }, + { + "epoch": 65.72802197802197, + "grad_norm": 20.44805145263672, + "learning_rate": 1.7135989010989012e-05, + "loss": 0.362, + "step": 23925 + }, + { + "epoch": 65.73076923076923, + "grad_norm": 12.191545486450195, + "learning_rate": 1.7134615384615386e-05, + "loss": 0.3166, + "step": 23926 + }, + { + "epoch": 65.73351648351648, + "grad_norm": 20.65435218811035, + "learning_rate": 1.713324175824176e-05, + "loss": 0.4106, + "step": 23927 + }, + { + "epoch": 65.73626373626374, + "grad_norm": 4.711272716522217, + "learning_rate": 1.7131868131868132e-05, + "loss": 0.0865, + "step": 23928 + }, + { + "epoch": 65.73901098901099, + "grad_norm": 15.92041301727295, + "learning_rate": 1.7130494505494506e-05, + "loss": 0.4937, + "step": 23929 + }, + { + "epoch": 65.74175824175825, + "grad_norm": 17.001556396484375, + "learning_rate": 1.712912087912088e-05, + "loss": 0.444, + "step": 23930 + }, + { + "epoch": 65.74450549450549, + "grad_norm": 11.80575180053711, + "learning_rate": 1.7127747252747253e-05, + "loss": 0.1539, + "step": 23931 + }, + { + "epoch": 65.74725274725274, + "grad_norm": 9.885555267333984, + "learning_rate": 1.7126373626373626e-05, + "loss": 0.2041, + "step": 23932 + }, + { + "epoch": 65.75, + "grad_norm": 1.5415451526641846, + "learning_rate": 1.7125000000000003e-05, + "loss": 0.015, + "step": 23933 + }, + { + "epoch": 65.75274725274726, + "grad_norm": 11.370535850524902, + "learning_rate": 1.7123626373626376e-05, + "loss": 0.2373, + "step": 23934 + }, + { + "epoch": 65.75549450549451, + "grad_norm": 13.669954299926758, + "learning_rate": 1.712225274725275e-05, + "loss": 0.2382, + "step": 23935 + }, + { + "epoch": 65.75824175824175, + "grad_norm": 6.470427989959717, + "learning_rate": 1.712087912087912e-05, + "loss": 0.1591, + "step": 23936 + }, + { + "epoch": 65.76098901098901, + "grad_norm": 2.8400485515594482, + "learning_rate": 1.7119505494505493e-05, + "loss": 0.0484, + "step": 23937 + }, + { + "epoch": 65.76373626373626, + "grad_norm": 11.113178253173828, + "learning_rate": 1.711813186813187e-05, + "loss": 0.1798, + "step": 23938 + }, + { + "epoch": 65.76648351648352, + "grad_norm": 8.444149017333984, + "learning_rate": 1.7116758241758243e-05, + "loss": 0.2524, + "step": 23939 + }, + { + "epoch": 65.76923076923077, + "grad_norm": 9.008517265319824, + "learning_rate": 1.7115384615384617e-05, + "loss": 0.2746, + "step": 23940 + }, + { + "epoch": 65.77197802197803, + "grad_norm": 14.807811737060547, + "learning_rate": 1.711401098901099e-05, + "loss": 0.3326, + "step": 23941 + }, + { + "epoch": 65.77472527472527, + "grad_norm": 26.151336669921875, + "learning_rate": 1.7112637362637364e-05, + "loss": 0.7304, + "step": 23942 + }, + { + "epoch": 65.77747252747253, + "grad_norm": 9.635923385620117, + "learning_rate": 1.7111263736263737e-05, + "loss": 0.1771, + "step": 23943 + }, + { + "epoch": 65.78021978021978, + "grad_norm": 8.655516624450684, + "learning_rate": 1.710989010989011e-05, + "loss": 0.1778, + "step": 23944 + }, + { + "epoch": 65.78296703296704, + "grad_norm": 6.721618175506592, + "learning_rate": 1.7108516483516484e-05, + "loss": 0.1787, + "step": 23945 + }, + { + "epoch": 65.78571428571429, + "grad_norm": 8.90371036529541, + "learning_rate": 1.7107142857142857e-05, + "loss": 0.2603, + "step": 23946 + }, + { + "epoch": 65.78846153846153, + "grad_norm": 24.195892333984375, + "learning_rate": 1.710576923076923e-05, + "loss": 0.8036, + "step": 23947 + }, + { + "epoch": 65.79120879120879, + "grad_norm": 15.650989532470703, + "learning_rate": 1.7104395604395604e-05, + "loss": 0.3411, + "step": 23948 + }, + { + "epoch": 65.79395604395604, + "grad_norm": 12.825435638427734, + "learning_rate": 1.710302197802198e-05, + "loss": 0.2192, + "step": 23949 + }, + { + "epoch": 65.7967032967033, + "grad_norm": 12.187623023986816, + "learning_rate": 1.7101648351648354e-05, + "loss": 0.1255, + "step": 23950 + }, + { + "epoch": 65.79945054945055, + "grad_norm": 18.973859786987305, + "learning_rate": 1.7100274725274724e-05, + "loss": 0.5459, + "step": 23951 + }, + { + "epoch": 65.8021978021978, + "grad_norm": 2.7671849727630615, + "learning_rate": 1.7098901098901098e-05, + "loss": 0.0432, + "step": 23952 + }, + { + "epoch": 65.80494505494505, + "grad_norm": 10.861839294433594, + "learning_rate": 1.709752747252747e-05, + "loss": 0.1121, + "step": 23953 + }, + { + "epoch": 65.8076923076923, + "grad_norm": 9.544500350952148, + "learning_rate": 1.7096153846153848e-05, + "loss": 0.176, + "step": 23954 + }, + { + "epoch": 65.81043956043956, + "grad_norm": 21.454421997070312, + "learning_rate": 1.709478021978022e-05, + "loss": 0.4283, + "step": 23955 + }, + { + "epoch": 65.81318681318682, + "grad_norm": 11.233359336853027, + "learning_rate": 1.7093406593406595e-05, + "loss": 0.2059, + "step": 23956 + }, + { + "epoch": 65.81593406593407, + "grad_norm": 17.830419540405273, + "learning_rate": 1.7092032967032968e-05, + "loss": 0.3819, + "step": 23957 + }, + { + "epoch": 65.81868131868131, + "grad_norm": 9.591200828552246, + "learning_rate": 1.709065934065934e-05, + "loss": 0.216, + "step": 23958 + }, + { + "epoch": 65.82142857142857, + "grad_norm": 9.418740272521973, + "learning_rate": 1.7089285714285715e-05, + "loss": 0.1459, + "step": 23959 + }, + { + "epoch": 65.82417582417582, + "grad_norm": 0.6534411907196045, + "learning_rate": 1.7087912087912088e-05, + "loss": 0.0084, + "step": 23960 + }, + { + "epoch": 65.82692307692308, + "grad_norm": 7.156965732574463, + "learning_rate": 1.708653846153846e-05, + "loss": 0.1282, + "step": 23961 + }, + { + "epoch": 65.82967032967034, + "grad_norm": 7.859875679016113, + "learning_rate": 1.7085164835164835e-05, + "loss": 0.0861, + "step": 23962 + }, + { + "epoch": 65.83241758241758, + "grad_norm": 5.378879070281982, + "learning_rate": 1.708379120879121e-05, + "loss": 0.0579, + "step": 23963 + }, + { + "epoch": 65.83516483516483, + "grad_norm": 6.479548454284668, + "learning_rate": 1.7082417582417585e-05, + "loss": 0.1073, + "step": 23964 + }, + { + "epoch": 65.83791208791209, + "grad_norm": 20.30757713317871, + "learning_rate": 1.708104395604396e-05, + "loss": 0.3145, + "step": 23965 + }, + { + "epoch": 65.84065934065934, + "grad_norm": 12.591293334960938, + "learning_rate": 1.707967032967033e-05, + "loss": 0.1616, + "step": 23966 + }, + { + "epoch": 65.8434065934066, + "grad_norm": 8.083179473876953, + "learning_rate": 1.7078296703296702e-05, + "loss": 0.1184, + "step": 23967 + }, + { + "epoch": 65.84615384615384, + "grad_norm": 13.313417434692383, + "learning_rate": 1.7076923076923076e-05, + "loss": 0.2881, + "step": 23968 + }, + { + "epoch": 65.8489010989011, + "grad_norm": 24.61526870727539, + "learning_rate": 1.7075549450549452e-05, + "loss": 0.3467, + "step": 23969 + }, + { + "epoch": 65.85164835164835, + "grad_norm": 13.931982040405273, + "learning_rate": 1.7074175824175826e-05, + "loss": 0.233, + "step": 23970 + }, + { + "epoch": 65.8543956043956, + "grad_norm": 16.21027946472168, + "learning_rate": 1.70728021978022e-05, + "loss": 0.3716, + "step": 23971 + }, + { + "epoch": 65.85714285714286, + "grad_norm": 6.0291218757629395, + "learning_rate": 1.7071428571428573e-05, + "loss": 0.0654, + "step": 23972 + }, + { + "epoch": 65.85989010989012, + "grad_norm": 10.19442081451416, + "learning_rate": 1.7070054945054946e-05, + "loss": 0.1633, + "step": 23973 + }, + { + "epoch": 65.86263736263736, + "grad_norm": 17.41166114807129, + "learning_rate": 1.706868131868132e-05, + "loss": 0.5366, + "step": 23974 + }, + { + "epoch": 65.86538461538461, + "grad_norm": 11.722028732299805, + "learning_rate": 1.7067307692307693e-05, + "loss": 0.1581, + "step": 23975 + }, + { + "epoch": 65.86813186813187, + "grad_norm": 12.262102127075195, + "learning_rate": 1.7065934065934066e-05, + "loss": 0.2277, + "step": 23976 + }, + { + "epoch": 65.87087912087912, + "grad_norm": 26.344362258911133, + "learning_rate": 1.706456043956044e-05, + "loss": 0.3838, + "step": 23977 + }, + { + "epoch": 65.87362637362638, + "grad_norm": 5.752313137054443, + "learning_rate": 1.7063186813186813e-05, + "loss": 0.0604, + "step": 23978 + }, + { + "epoch": 65.87637362637362, + "grad_norm": 9.128525733947754, + "learning_rate": 1.706181318681319e-05, + "loss": 0.1849, + "step": 23979 + }, + { + "epoch": 65.87912087912088, + "grad_norm": 12.530045509338379, + "learning_rate": 1.7060439560439563e-05, + "loss": 0.2382, + "step": 23980 + }, + { + "epoch": 65.88186813186813, + "grad_norm": 18.796995162963867, + "learning_rate": 1.7059065934065933e-05, + "loss": 0.212, + "step": 23981 + }, + { + "epoch": 65.88461538461539, + "grad_norm": 11.888846397399902, + "learning_rate": 1.7057692307692307e-05, + "loss": 0.2957, + "step": 23982 + }, + { + "epoch": 65.88736263736264, + "grad_norm": 7.3617401123046875, + "learning_rate": 1.705631868131868e-05, + "loss": 0.118, + "step": 23983 + }, + { + "epoch": 65.89010989010988, + "grad_norm": 10.614693641662598, + "learning_rate": 1.7054945054945057e-05, + "loss": 0.1229, + "step": 23984 + }, + { + "epoch": 65.89285714285714, + "grad_norm": 9.875166893005371, + "learning_rate": 1.705357142857143e-05, + "loss": 0.1941, + "step": 23985 + }, + { + "epoch": 65.8956043956044, + "grad_norm": 12.1836519241333, + "learning_rate": 1.7052197802197804e-05, + "loss": 0.2016, + "step": 23986 + }, + { + "epoch": 65.89835164835165, + "grad_norm": 12.911211013793945, + "learning_rate": 1.7050824175824177e-05, + "loss": 0.1914, + "step": 23987 + }, + { + "epoch": 65.9010989010989, + "grad_norm": 25.734296798706055, + "learning_rate": 1.704945054945055e-05, + "loss": 0.6334, + "step": 23988 + }, + { + "epoch": 65.90384615384616, + "grad_norm": 6.349938869476318, + "learning_rate": 1.7048076923076924e-05, + "loss": 0.12, + "step": 23989 + }, + { + "epoch": 65.9065934065934, + "grad_norm": 5.275228023529053, + "learning_rate": 1.7046703296703297e-05, + "loss": 0.0673, + "step": 23990 + }, + { + "epoch": 65.90934065934066, + "grad_norm": 12.95989990234375, + "learning_rate": 1.704532967032967e-05, + "loss": 0.1779, + "step": 23991 + }, + { + "epoch": 65.91208791208791, + "grad_norm": 15.475417137145996, + "learning_rate": 1.7043956043956044e-05, + "loss": 0.1675, + "step": 23992 + }, + { + "epoch": 65.91483516483517, + "grad_norm": 18.558670043945312, + "learning_rate": 1.7042582417582417e-05, + "loss": 0.4187, + "step": 23993 + }, + { + "epoch": 65.91758241758242, + "grad_norm": 11.60758113861084, + "learning_rate": 1.7041208791208794e-05, + "loss": 0.1898, + "step": 23994 + }, + { + "epoch": 65.92032967032966, + "grad_norm": 8.05626392364502, + "learning_rate": 1.7039835164835168e-05, + "loss": 0.1914, + "step": 23995 + }, + { + "epoch": 65.92307692307692, + "grad_norm": 12.5924072265625, + "learning_rate": 1.7038461538461538e-05, + "loss": 0.2544, + "step": 23996 + }, + { + "epoch": 65.92582417582418, + "grad_norm": 10.407829284667969, + "learning_rate": 1.703708791208791e-05, + "loss": 0.2214, + "step": 23997 + }, + { + "epoch": 65.92857142857143, + "grad_norm": 18.380163192749023, + "learning_rate": 1.7035714285714285e-05, + "loss": 0.3652, + "step": 23998 + }, + { + "epoch": 65.93131868131869, + "grad_norm": 14.519978523254395, + "learning_rate": 1.703434065934066e-05, + "loss": 0.1564, + "step": 23999 + }, + { + "epoch": 65.93406593406593, + "grad_norm": 8.607497215270996, + "learning_rate": 1.7032967032967035e-05, + "loss": 0.0397, + "step": 24000 + }, + { + "epoch": 65.93681318681318, + "grad_norm": 19.052553176879883, + "learning_rate": 1.7031593406593408e-05, + "loss": 0.2228, + "step": 24001 + }, + { + "epoch": 65.93956043956044, + "grad_norm": 24.07705307006836, + "learning_rate": 1.703021978021978e-05, + "loss": 0.7955, + "step": 24002 + }, + { + "epoch": 65.9423076923077, + "grad_norm": 12.44604778289795, + "learning_rate": 1.7028846153846155e-05, + "loss": 0.2797, + "step": 24003 + }, + { + "epoch": 65.94505494505495, + "grad_norm": 17.40606117248535, + "learning_rate": 1.702747252747253e-05, + "loss": 0.4396, + "step": 24004 + }, + { + "epoch": 65.9478021978022, + "grad_norm": 15.633919715881348, + "learning_rate": 1.7026098901098902e-05, + "loss": 0.2605, + "step": 24005 + }, + { + "epoch": 65.95054945054945, + "grad_norm": 7.15366792678833, + "learning_rate": 1.7024725274725275e-05, + "loss": 0.2014, + "step": 24006 + }, + { + "epoch": 65.9532967032967, + "grad_norm": 10.327659606933594, + "learning_rate": 1.702335164835165e-05, + "loss": 0.1571, + "step": 24007 + }, + { + "epoch": 65.95604395604396, + "grad_norm": 6.944056987762451, + "learning_rate": 1.7021978021978022e-05, + "loss": 0.0506, + "step": 24008 + }, + { + "epoch": 65.95879120879121, + "grad_norm": 4.70759391784668, + "learning_rate": 1.70206043956044e-05, + "loss": 0.0581, + "step": 24009 + }, + { + "epoch": 65.96153846153847, + "grad_norm": 8.520628929138184, + "learning_rate": 1.7019230769230772e-05, + "loss": 0.3203, + "step": 24010 + }, + { + "epoch": 65.96428571428571, + "grad_norm": 14.565634727478027, + "learning_rate": 1.7017857142857142e-05, + "loss": 0.3323, + "step": 24011 + }, + { + "epoch": 65.96703296703296, + "grad_norm": 5.23042631149292, + "learning_rate": 1.7016483516483516e-05, + "loss": 0.0895, + "step": 24012 + }, + { + "epoch": 65.96978021978022, + "grad_norm": 15.769078254699707, + "learning_rate": 1.701510989010989e-05, + "loss": 0.3809, + "step": 24013 + }, + { + "epoch": 65.97252747252747, + "grad_norm": 10.550079345703125, + "learning_rate": 1.7013736263736266e-05, + "loss": 0.2199, + "step": 24014 + }, + { + "epoch": 65.97527472527473, + "grad_norm": 8.742097854614258, + "learning_rate": 1.701236263736264e-05, + "loss": 0.1768, + "step": 24015 + }, + { + "epoch": 65.97802197802197, + "grad_norm": 10.199941635131836, + "learning_rate": 1.7010989010989013e-05, + "loss": 0.2995, + "step": 24016 + }, + { + "epoch": 65.98076923076923, + "grad_norm": 17.482728958129883, + "learning_rate": 1.7009615384615386e-05, + "loss": 0.5467, + "step": 24017 + }, + { + "epoch": 65.98351648351648, + "grad_norm": 7.067507266998291, + "learning_rate": 1.700824175824176e-05, + "loss": 0.0992, + "step": 24018 + }, + { + "epoch": 65.98626373626374, + "grad_norm": 9.815210342407227, + "learning_rate": 1.7006868131868133e-05, + "loss": 0.2345, + "step": 24019 + }, + { + "epoch": 65.98901098901099, + "grad_norm": 2.688784599304199, + "learning_rate": 1.7005494505494506e-05, + "loss": 0.0321, + "step": 24020 + }, + { + "epoch": 65.99175824175825, + "grad_norm": 8.488547325134277, + "learning_rate": 1.700412087912088e-05, + "loss": 0.1404, + "step": 24021 + }, + { + "epoch": 65.99450549450549, + "grad_norm": 12.766180992126465, + "learning_rate": 1.7002747252747253e-05, + "loss": 0.1097, + "step": 24022 + }, + { + "epoch": 65.99725274725274, + "grad_norm": 16.8944091796875, + "learning_rate": 1.7001373626373626e-05, + "loss": 0.2878, + "step": 24023 + }, + { + "epoch": 66.0, + "grad_norm": 51.113006591796875, + "learning_rate": 1.7000000000000003e-05, + "loss": 0.5195, + "step": 24024 + }, + { + "epoch": 66.0, + "eval_accuracy": 0.3953168044077135, + "eval_f1": 0.32456388184806767, + "eval_f1_DuraRiadoRio_64x64": 0.052980132450331126, + "eval_f1_Mole_64x64": 0.3352601156069364, + "eval_f1_Quebrado_64x64": 0.2085889570552147, + "eval_f1_RiadoRio_64x64": 0.5857988165680473, + "eval_f1_RioFechado_64x64": 0.44019138755980863, + "eval_loss": 7.494670391082764, + "eval_precision": 0.656591912104838, + "eval_precision_DuraRiadoRio_64x64": 0.5714285714285714, + "eval_precision_Mole_64x64": 1.0, + "eval_precision_Quebrado_64x64": 0.8947368421052632, + "eval_precision_RiadoRio_64x64": 0.532258064516129, + "eval_precision_RioFechado_64x64": 0.2845360824742268, + "eval_recall": 0.3940737995222799, + "eval_recall_DuraRiadoRio_64x64": 0.027777777777777776, + "eval_recall_Mole_64x64": 0.2013888888888889, + "eval_recall_Quebrado_64x64": 0.11805555555555555, + "eval_recall_RiadoRio_64x64": 0.6513157894736842, + "eval_recall_RioFechado_64x64": 0.971830985915493, + "eval_runtime": 1.7138, + "eval_samples_per_second": 423.63, + "eval_steps_per_second": 26.842, + "step": 24024 + }, + { + "epoch": 66.00274725274726, + "grad_norm": 2.662806987762451, + "learning_rate": 1.6998626373626377e-05, + "loss": 0.0437, + "step": 24025 + }, + { + "epoch": 66.00549450549451, + "grad_norm": 16.651451110839844, + "learning_rate": 1.6997252747252747e-05, + "loss": 0.5371, + "step": 24026 + }, + { + "epoch": 66.00824175824175, + "grad_norm": 7.638054370880127, + "learning_rate": 1.699587912087912e-05, + "loss": 0.0962, + "step": 24027 + }, + { + "epoch": 66.01098901098901, + "grad_norm": 21.088022232055664, + "learning_rate": 1.6994505494505493e-05, + "loss": 0.5904, + "step": 24028 + }, + { + "epoch": 66.01373626373626, + "grad_norm": 15.588183403015137, + "learning_rate": 1.699313186813187e-05, + "loss": 0.2534, + "step": 24029 + }, + { + "epoch": 66.01648351648352, + "grad_norm": 8.651721000671387, + "learning_rate": 1.6991758241758244e-05, + "loss": 0.0605, + "step": 24030 + }, + { + "epoch": 66.01923076923077, + "grad_norm": 13.586865425109863, + "learning_rate": 1.6990384615384617e-05, + "loss": 0.3165, + "step": 24031 + }, + { + "epoch": 66.02197802197803, + "grad_norm": 20.35411262512207, + "learning_rate": 1.698901098901099e-05, + "loss": 0.3723, + "step": 24032 + }, + { + "epoch": 66.02472527472527, + "grad_norm": 7.862538814544678, + "learning_rate": 1.6987637362637364e-05, + "loss": 0.0757, + "step": 24033 + }, + { + "epoch": 66.02747252747253, + "grad_norm": 12.447115898132324, + "learning_rate": 1.6986263736263737e-05, + "loss": 0.3382, + "step": 24034 + }, + { + "epoch": 66.03021978021978, + "grad_norm": 11.129426002502441, + "learning_rate": 1.698489010989011e-05, + "loss": 0.1845, + "step": 24035 + }, + { + "epoch": 66.03296703296704, + "grad_norm": 12.660420417785645, + "learning_rate": 1.6983516483516484e-05, + "loss": 0.2887, + "step": 24036 + }, + { + "epoch": 66.03571428571429, + "grad_norm": 2.1500372886657715, + "learning_rate": 1.6982142857142858e-05, + "loss": 0.025, + "step": 24037 + }, + { + "epoch": 66.03846153846153, + "grad_norm": 16.419910430908203, + "learning_rate": 1.698076923076923e-05, + "loss": 0.2749, + "step": 24038 + }, + { + "epoch": 66.04120879120879, + "grad_norm": 12.439788818359375, + "learning_rate": 1.6979395604395608e-05, + "loss": 0.0766, + "step": 24039 + }, + { + "epoch": 66.04395604395604, + "grad_norm": 18.222517013549805, + "learning_rate": 1.697802197802198e-05, + "loss": 0.4702, + "step": 24040 + }, + { + "epoch": 66.0467032967033, + "grad_norm": 5.5331339836120605, + "learning_rate": 1.697664835164835e-05, + "loss": 0.0812, + "step": 24041 + }, + { + "epoch": 66.04945054945055, + "grad_norm": 5.1931233406066895, + "learning_rate": 1.6975274725274725e-05, + "loss": 0.0456, + "step": 24042 + }, + { + "epoch": 66.0521978021978, + "grad_norm": 20.445714950561523, + "learning_rate": 1.6973901098901098e-05, + "loss": 0.314, + "step": 24043 + }, + { + "epoch": 66.05494505494505, + "grad_norm": 4.425958156585693, + "learning_rate": 1.6972527472527475e-05, + "loss": 0.0569, + "step": 24044 + }, + { + "epoch": 66.0576923076923, + "grad_norm": 17.88526725769043, + "learning_rate": 1.6971153846153848e-05, + "loss": 0.3236, + "step": 24045 + }, + { + "epoch": 66.06043956043956, + "grad_norm": 14.55229663848877, + "learning_rate": 1.696978021978022e-05, + "loss": 0.2091, + "step": 24046 + }, + { + "epoch": 66.06318681318682, + "grad_norm": 14.164923667907715, + "learning_rate": 1.6968406593406595e-05, + "loss": 0.4835, + "step": 24047 + }, + { + "epoch": 66.06593406593407, + "grad_norm": 19.840742111206055, + "learning_rate": 1.696703296703297e-05, + "loss": 0.5279, + "step": 24048 + }, + { + "epoch": 66.06868131868131, + "grad_norm": 10.104036331176758, + "learning_rate": 1.6965659340659342e-05, + "loss": 0.1393, + "step": 24049 + }, + { + "epoch": 66.07142857142857, + "grad_norm": 18.465591430664062, + "learning_rate": 1.6964285714285715e-05, + "loss": 0.402, + "step": 24050 + }, + { + "epoch": 66.07417582417582, + "grad_norm": 9.248054504394531, + "learning_rate": 1.696291208791209e-05, + "loss": 0.1251, + "step": 24051 + }, + { + "epoch": 66.07692307692308, + "grad_norm": 7.567388534545898, + "learning_rate": 1.6961538461538462e-05, + "loss": 0.0777, + "step": 24052 + }, + { + "epoch": 66.07967032967034, + "grad_norm": 11.170299530029297, + "learning_rate": 1.6960164835164835e-05, + "loss": 0.1675, + "step": 24053 + }, + { + "epoch": 66.08241758241758, + "grad_norm": 11.463724136352539, + "learning_rate": 1.6958791208791212e-05, + "loss": 0.15, + "step": 24054 + }, + { + "epoch": 66.08516483516483, + "grad_norm": 16.14664077758789, + "learning_rate": 1.6957417582417586e-05, + "loss": 0.3381, + "step": 24055 + }, + { + "epoch": 66.08791208791209, + "grad_norm": 25.341720581054688, + "learning_rate": 1.6956043956043956e-05, + "loss": 0.1513, + "step": 24056 + }, + { + "epoch": 66.09065934065934, + "grad_norm": 17.02794075012207, + "learning_rate": 1.695467032967033e-05, + "loss": 0.5421, + "step": 24057 + }, + { + "epoch": 66.0934065934066, + "grad_norm": 15.0161771774292, + "learning_rate": 1.6953296703296702e-05, + "loss": 0.2611, + "step": 24058 + }, + { + "epoch": 66.09615384615384, + "grad_norm": 21.31981658935547, + "learning_rate": 1.6951923076923076e-05, + "loss": 0.4177, + "step": 24059 + }, + { + "epoch": 66.0989010989011, + "grad_norm": 15.279703140258789, + "learning_rate": 1.6950549450549453e-05, + "loss": 0.3169, + "step": 24060 + }, + { + "epoch": 66.10164835164835, + "grad_norm": 2.056955337524414, + "learning_rate": 1.6949175824175826e-05, + "loss": 0.0299, + "step": 24061 + }, + { + "epoch": 66.1043956043956, + "grad_norm": 6.816380977630615, + "learning_rate": 1.69478021978022e-05, + "loss": 0.1439, + "step": 24062 + }, + { + "epoch": 66.10714285714286, + "grad_norm": 5.707094669342041, + "learning_rate": 1.6946428571428573e-05, + "loss": 0.0676, + "step": 24063 + }, + { + "epoch": 66.10989010989012, + "grad_norm": 14.090936660766602, + "learning_rate": 1.6945054945054943e-05, + "loss": 0.386, + "step": 24064 + }, + { + "epoch": 66.11263736263736, + "grad_norm": 8.342535018920898, + "learning_rate": 1.694368131868132e-05, + "loss": 0.1217, + "step": 24065 + }, + { + "epoch": 66.11538461538461, + "grad_norm": 9.466864585876465, + "learning_rate": 1.6942307692307693e-05, + "loss": 0.1819, + "step": 24066 + }, + { + "epoch": 66.11813186813187, + "grad_norm": 6.435956001281738, + "learning_rate": 1.6940934065934067e-05, + "loss": 0.0926, + "step": 24067 + }, + { + "epoch": 66.12087912087912, + "grad_norm": 13.239033699035645, + "learning_rate": 1.693956043956044e-05, + "loss": 0.207, + "step": 24068 + }, + { + "epoch": 66.12362637362638, + "grad_norm": 5.498947620391846, + "learning_rate": 1.6938186813186813e-05, + "loss": 0.0838, + "step": 24069 + }, + { + "epoch": 66.12637362637362, + "grad_norm": 16.523441314697266, + "learning_rate": 1.693681318681319e-05, + "loss": 0.3072, + "step": 24070 + }, + { + "epoch": 66.12912087912088, + "grad_norm": 17.68167495727539, + "learning_rate": 1.693543956043956e-05, + "loss": 0.5141, + "step": 24071 + }, + { + "epoch": 66.13186813186813, + "grad_norm": 16.662065505981445, + "learning_rate": 1.6934065934065934e-05, + "loss": 0.2497, + "step": 24072 + }, + { + "epoch": 66.13461538461539, + "grad_norm": 4.360153675079346, + "learning_rate": 1.6932692307692307e-05, + "loss": 0.0315, + "step": 24073 + }, + { + "epoch": 66.13736263736264, + "grad_norm": 12.660449981689453, + "learning_rate": 1.693131868131868e-05, + "loss": 0.306, + "step": 24074 + }, + { + "epoch": 66.14010989010988, + "grad_norm": 13.20985221862793, + "learning_rate": 1.6929945054945057e-05, + "loss": 0.5098, + "step": 24075 + }, + { + "epoch": 66.14285714285714, + "grad_norm": 19.533357620239258, + "learning_rate": 1.692857142857143e-05, + "loss": 0.4897, + "step": 24076 + }, + { + "epoch": 66.1456043956044, + "grad_norm": 10.615779876708984, + "learning_rate": 1.6927197802197804e-05, + "loss": 0.0919, + "step": 24077 + }, + { + "epoch": 66.14835164835165, + "grad_norm": 16.22401237487793, + "learning_rate": 1.6925824175824177e-05, + "loss": 0.3322, + "step": 24078 + }, + { + "epoch": 66.1510989010989, + "grad_norm": 2.098790168762207, + "learning_rate": 1.6924450549450547e-05, + "loss": 0.0284, + "step": 24079 + }, + { + "epoch": 66.15384615384616, + "grad_norm": 4.716526031494141, + "learning_rate": 1.6923076923076924e-05, + "loss": 0.0957, + "step": 24080 + }, + { + "epoch": 66.1565934065934, + "grad_norm": 17.002946853637695, + "learning_rate": 1.6921703296703298e-05, + "loss": 0.537, + "step": 24081 + }, + { + "epoch": 66.15934065934066, + "grad_norm": 9.180018424987793, + "learning_rate": 1.692032967032967e-05, + "loss": 0.1386, + "step": 24082 + }, + { + "epoch": 66.16208791208791, + "grad_norm": 13.296189308166504, + "learning_rate": 1.6918956043956044e-05, + "loss": 0.3147, + "step": 24083 + }, + { + "epoch": 66.16483516483517, + "grad_norm": 29.20854949951172, + "learning_rate": 1.6917582417582418e-05, + "loss": 0.7769, + "step": 24084 + }, + { + "epoch": 66.16758241758242, + "grad_norm": 14.093253135681152, + "learning_rate": 1.6916208791208795e-05, + "loss": 0.3932, + "step": 24085 + }, + { + "epoch": 66.17032967032966, + "grad_norm": 3.754258871078491, + "learning_rate": 1.6914835164835165e-05, + "loss": 0.0485, + "step": 24086 + }, + { + "epoch": 66.17307692307692, + "grad_norm": 9.855661392211914, + "learning_rate": 1.6913461538461538e-05, + "loss": 0.248, + "step": 24087 + }, + { + "epoch": 66.17582417582418, + "grad_norm": 7.56883430480957, + "learning_rate": 1.691208791208791e-05, + "loss": 0.1153, + "step": 24088 + }, + { + "epoch": 66.17857142857143, + "grad_norm": 11.864703178405762, + "learning_rate": 1.6910714285714285e-05, + "loss": 0.261, + "step": 24089 + }, + { + "epoch": 66.18131868131869, + "grad_norm": 15.279314994812012, + "learning_rate": 1.690934065934066e-05, + "loss": 0.3361, + "step": 24090 + }, + { + "epoch": 66.18406593406593, + "grad_norm": 21.163352966308594, + "learning_rate": 1.6907967032967035e-05, + "loss": 0.3967, + "step": 24091 + }, + { + "epoch": 66.18681318681318, + "grad_norm": 23.882265090942383, + "learning_rate": 1.690659340659341e-05, + "loss": 0.5344, + "step": 24092 + }, + { + "epoch": 66.18956043956044, + "grad_norm": 3.168148994445801, + "learning_rate": 1.6905219780219782e-05, + "loss": 0.0346, + "step": 24093 + }, + { + "epoch": 66.1923076923077, + "grad_norm": 17.174041748046875, + "learning_rate": 1.6903846153846152e-05, + "loss": 0.3625, + "step": 24094 + }, + { + "epoch": 66.19505494505495, + "grad_norm": 9.220351219177246, + "learning_rate": 1.690247252747253e-05, + "loss": 0.1499, + "step": 24095 + }, + { + "epoch": 66.1978021978022, + "grad_norm": 10.06676959991455, + "learning_rate": 1.6901098901098902e-05, + "loss": 0.2231, + "step": 24096 + }, + { + "epoch": 66.20054945054945, + "grad_norm": 4.333787441253662, + "learning_rate": 1.6899725274725276e-05, + "loss": 0.0528, + "step": 24097 + }, + { + "epoch": 66.2032967032967, + "grad_norm": 17.196046829223633, + "learning_rate": 1.689835164835165e-05, + "loss": 0.3698, + "step": 24098 + }, + { + "epoch": 66.20604395604396, + "grad_norm": 7.695697784423828, + "learning_rate": 1.6896978021978022e-05, + "loss": 0.1641, + "step": 24099 + }, + { + "epoch": 66.20879120879121, + "grad_norm": 4.0716471672058105, + "learning_rate": 1.68956043956044e-05, + "loss": 0.0614, + "step": 24100 + }, + { + "epoch": 66.21153846153847, + "grad_norm": 4.388802528381348, + "learning_rate": 1.689423076923077e-05, + "loss": 0.0506, + "step": 24101 + }, + { + "epoch": 66.21428571428571, + "grad_norm": 8.394695281982422, + "learning_rate": 1.6892857142857143e-05, + "loss": 0.0929, + "step": 24102 + }, + { + "epoch": 66.21703296703296, + "grad_norm": 13.129633903503418, + "learning_rate": 1.6891483516483516e-05, + "loss": 0.2472, + "step": 24103 + }, + { + "epoch": 66.21978021978022, + "grad_norm": 6.40693473815918, + "learning_rate": 1.689010989010989e-05, + "loss": 0.091, + "step": 24104 + }, + { + "epoch": 66.22252747252747, + "grad_norm": 9.609192848205566, + "learning_rate": 1.6888736263736266e-05, + "loss": 0.1993, + "step": 24105 + }, + { + "epoch": 66.22527472527473, + "grad_norm": 22.05345344543457, + "learning_rate": 1.688736263736264e-05, + "loss": 0.9894, + "step": 24106 + }, + { + "epoch": 66.22802197802197, + "grad_norm": 16.016664505004883, + "learning_rate": 1.6885989010989013e-05, + "loss": 0.4402, + "step": 24107 + }, + { + "epoch": 66.23076923076923, + "grad_norm": 1.4321095943450928, + "learning_rate": 1.6884615384615386e-05, + "loss": 0.0196, + "step": 24108 + }, + { + "epoch": 66.23351648351648, + "grad_norm": 11.582387924194336, + "learning_rate": 1.6883241758241756e-05, + "loss": 0.1884, + "step": 24109 + }, + { + "epoch": 66.23626373626374, + "grad_norm": 13.017916679382324, + "learning_rate": 1.6881868131868133e-05, + "loss": 0.1508, + "step": 24110 + }, + { + "epoch": 66.23901098901099, + "grad_norm": 6.930899143218994, + "learning_rate": 1.6880494505494507e-05, + "loss": 0.1312, + "step": 24111 + }, + { + "epoch": 66.24175824175825, + "grad_norm": 8.874621391296387, + "learning_rate": 1.687912087912088e-05, + "loss": 0.1597, + "step": 24112 + }, + { + "epoch": 66.24450549450549, + "grad_norm": 9.0726957321167, + "learning_rate": 1.6877747252747253e-05, + "loss": 0.1298, + "step": 24113 + }, + { + "epoch": 66.24725274725274, + "grad_norm": 9.92884635925293, + "learning_rate": 1.6876373626373627e-05, + "loss": 0.2362, + "step": 24114 + }, + { + "epoch": 66.25, + "grad_norm": 12.272010803222656, + "learning_rate": 1.6875000000000004e-05, + "loss": 0.239, + "step": 24115 + }, + { + "epoch": 66.25274725274726, + "grad_norm": 6.345081329345703, + "learning_rate": 1.6873626373626374e-05, + "loss": 0.1092, + "step": 24116 + }, + { + "epoch": 66.25549450549451, + "grad_norm": 10.589208602905273, + "learning_rate": 1.6872252747252747e-05, + "loss": 0.1387, + "step": 24117 + }, + { + "epoch": 66.25824175824175, + "grad_norm": 17.695606231689453, + "learning_rate": 1.687087912087912e-05, + "loss": 0.4697, + "step": 24118 + }, + { + "epoch": 66.26098901098901, + "grad_norm": 23.39122200012207, + "learning_rate": 1.6869505494505494e-05, + "loss": 0.4879, + "step": 24119 + }, + { + "epoch": 66.26373626373626, + "grad_norm": 8.28425407409668, + "learning_rate": 1.686813186813187e-05, + "loss": 0.1174, + "step": 24120 + }, + { + "epoch": 66.26648351648352, + "grad_norm": 5.155109882354736, + "learning_rate": 1.6866758241758244e-05, + "loss": 0.105, + "step": 24121 + }, + { + "epoch": 66.26923076923077, + "grad_norm": 9.167926788330078, + "learning_rate": 1.6865384615384617e-05, + "loss": 0.1298, + "step": 24122 + }, + { + "epoch": 66.27197802197803, + "grad_norm": 14.810372352600098, + "learning_rate": 1.686401098901099e-05, + "loss": 0.3132, + "step": 24123 + }, + { + "epoch": 66.27472527472527, + "grad_norm": 13.307365417480469, + "learning_rate": 1.686263736263736e-05, + "loss": 0.2469, + "step": 24124 + }, + { + "epoch": 66.27747252747253, + "grad_norm": 18.816085815429688, + "learning_rate": 1.6861263736263738e-05, + "loss": 0.6355, + "step": 24125 + }, + { + "epoch": 66.28021978021978, + "grad_norm": 13.489545822143555, + "learning_rate": 1.685989010989011e-05, + "loss": 0.1696, + "step": 24126 + }, + { + "epoch": 66.28296703296704, + "grad_norm": 5.4579010009765625, + "learning_rate": 1.6858516483516484e-05, + "loss": 0.0518, + "step": 24127 + }, + { + "epoch": 66.28571428571429, + "grad_norm": 5.969086170196533, + "learning_rate": 1.6857142857142858e-05, + "loss": 0.1006, + "step": 24128 + }, + { + "epoch": 66.28846153846153, + "grad_norm": 5.8415751457214355, + "learning_rate": 1.685576923076923e-05, + "loss": 0.0885, + "step": 24129 + }, + { + "epoch": 66.29120879120879, + "grad_norm": 13.616491317749023, + "learning_rate": 1.6854395604395605e-05, + "loss": 0.2729, + "step": 24130 + }, + { + "epoch": 66.29395604395604, + "grad_norm": 14.14544677734375, + "learning_rate": 1.6853021978021978e-05, + "loss": 0.4847, + "step": 24131 + }, + { + "epoch": 66.2967032967033, + "grad_norm": 17.23800277709961, + "learning_rate": 1.685164835164835e-05, + "loss": 0.4377, + "step": 24132 + }, + { + "epoch": 66.29945054945055, + "grad_norm": 6.550265312194824, + "learning_rate": 1.6850274725274725e-05, + "loss": 0.1089, + "step": 24133 + }, + { + "epoch": 66.3021978021978, + "grad_norm": 8.573355674743652, + "learning_rate": 1.68489010989011e-05, + "loss": 0.1825, + "step": 24134 + }, + { + "epoch": 66.30494505494505, + "grad_norm": 11.614031791687012, + "learning_rate": 1.6847527472527475e-05, + "loss": 0.3024, + "step": 24135 + }, + { + "epoch": 66.3076923076923, + "grad_norm": 4.074169158935547, + "learning_rate": 1.684615384615385e-05, + "loss": 0.0623, + "step": 24136 + }, + { + "epoch": 66.31043956043956, + "grad_norm": 13.131340980529785, + "learning_rate": 1.6844780219780222e-05, + "loss": 0.3027, + "step": 24137 + }, + { + "epoch": 66.31318681318682, + "grad_norm": 11.155202865600586, + "learning_rate": 1.6843406593406595e-05, + "loss": 0.2573, + "step": 24138 + }, + { + "epoch": 66.31593406593407, + "grad_norm": 6.6271209716796875, + "learning_rate": 1.6842032967032965e-05, + "loss": 0.1708, + "step": 24139 + }, + { + "epoch": 66.31868131868131, + "grad_norm": 7.1841936111450195, + "learning_rate": 1.6840659340659342e-05, + "loss": 0.1178, + "step": 24140 + }, + { + "epoch": 66.32142857142857, + "grad_norm": 13.893446922302246, + "learning_rate": 1.6839285714285716e-05, + "loss": 0.2028, + "step": 24141 + }, + { + "epoch": 66.32417582417582, + "grad_norm": 20.827003479003906, + "learning_rate": 1.683791208791209e-05, + "loss": 0.5642, + "step": 24142 + }, + { + "epoch": 66.32692307692308, + "grad_norm": 12.42609691619873, + "learning_rate": 1.6836538461538462e-05, + "loss": 0.1461, + "step": 24143 + }, + { + "epoch": 66.32967032967034, + "grad_norm": 5.534986972808838, + "learning_rate": 1.6835164835164836e-05, + "loss": 0.0368, + "step": 24144 + }, + { + "epoch": 66.33241758241758, + "grad_norm": 10.052825927734375, + "learning_rate": 1.683379120879121e-05, + "loss": 0.1686, + "step": 24145 + }, + { + "epoch": 66.33516483516483, + "grad_norm": 10.622224807739258, + "learning_rate": 1.6832417582417583e-05, + "loss": 0.2586, + "step": 24146 + }, + { + "epoch": 66.33791208791209, + "grad_norm": 9.819803237915039, + "learning_rate": 1.6831043956043956e-05, + "loss": 0.1132, + "step": 24147 + }, + { + "epoch": 66.34065934065934, + "grad_norm": 11.389691352844238, + "learning_rate": 1.682967032967033e-05, + "loss": 0.3177, + "step": 24148 + }, + { + "epoch": 66.3434065934066, + "grad_norm": 10.863724708557129, + "learning_rate": 1.6828296703296703e-05, + "loss": 0.1201, + "step": 24149 + }, + { + "epoch": 66.34615384615384, + "grad_norm": 9.678563117980957, + "learning_rate": 1.682692307692308e-05, + "loss": 0.1384, + "step": 24150 + }, + { + "epoch": 66.3489010989011, + "grad_norm": 5.42617130279541, + "learning_rate": 1.6825549450549453e-05, + "loss": 0.04, + "step": 24151 + }, + { + "epoch": 66.35164835164835, + "grad_norm": 7.2817182540893555, + "learning_rate": 1.6824175824175826e-05, + "loss": 0.0742, + "step": 24152 + }, + { + "epoch": 66.3543956043956, + "grad_norm": 13.324227333068848, + "learning_rate": 1.68228021978022e-05, + "loss": 0.2513, + "step": 24153 + }, + { + "epoch": 66.35714285714286, + "grad_norm": 10.520397186279297, + "learning_rate": 1.682142857142857e-05, + "loss": 0.184, + "step": 24154 + }, + { + "epoch": 66.35989010989012, + "grad_norm": 12.81907844543457, + "learning_rate": 1.6820054945054947e-05, + "loss": 0.2933, + "step": 24155 + }, + { + "epoch": 66.36263736263736, + "grad_norm": 12.849538803100586, + "learning_rate": 1.681868131868132e-05, + "loss": 0.424, + "step": 24156 + }, + { + "epoch": 66.36538461538461, + "grad_norm": 17.441436767578125, + "learning_rate": 1.6817307692307693e-05, + "loss": 0.2473, + "step": 24157 + }, + { + "epoch": 66.36813186813187, + "grad_norm": 15.114358901977539, + "learning_rate": 1.6815934065934067e-05, + "loss": 0.2837, + "step": 24158 + }, + { + "epoch": 66.37087912087912, + "grad_norm": 7.0959553718566895, + "learning_rate": 1.681456043956044e-05, + "loss": 0.0985, + "step": 24159 + }, + { + "epoch": 66.37362637362638, + "grad_norm": 33.29113006591797, + "learning_rate": 1.6813186813186814e-05, + "loss": 0.7996, + "step": 24160 + }, + { + "epoch": 66.37637362637362, + "grad_norm": 16.108455657958984, + "learning_rate": 1.6811813186813187e-05, + "loss": 0.4441, + "step": 24161 + }, + { + "epoch": 66.37912087912088, + "grad_norm": 16.770832061767578, + "learning_rate": 1.681043956043956e-05, + "loss": 0.2293, + "step": 24162 + }, + { + "epoch": 66.38186813186813, + "grad_norm": 9.890972137451172, + "learning_rate": 1.6809065934065934e-05, + "loss": 0.1396, + "step": 24163 + }, + { + "epoch": 66.38461538461539, + "grad_norm": 9.786373138427734, + "learning_rate": 1.6807692307692307e-05, + "loss": 0.0977, + "step": 24164 + }, + { + "epoch": 66.38736263736264, + "grad_norm": 26.591859817504883, + "learning_rate": 1.6806318681318684e-05, + "loss": 0.8325, + "step": 24165 + }, + { + "epoch": 66.39010989010988, + "grad_norm": 6.974837303161621, + "learning_rate": 1.6804945054945058e-05, + "loss": 0.1019, + "step": 24166 + }, + { + "epoch": 66.39285714285714, + "grad_norm": 1.013816237449646, + "learning_rate": 1.680357142857143e-05, + "loss": 0.0139, + "step": 24167 + }, + { + "epoch": 66.3956043956044, + "grad_norm": 5.824398994445801, + "learning_rate": 1.6802197802197804e-05, + "loss": 0.0749, + "step": 24168 + }, + { + "epoch": 66.39835164835165, + "grad_norm": 10.011160850524902, + "learning_rate": 1.6800824175824174e-05, + "loss": 0.2319, + "step": 24169 + }, + { + "epoch": 66.4010989010989, + "grad_norm": 15.753628730773926, + "learning_rate": 1.679945054945055e-05, + "loss": 0.4765, + "step": 24170 + }, + { + "epoch": 66.40384615384616, + "grad_norm": 3.62227725982666, + "learning_rate": 1.6798076923076925e-05, + "loss": 0.0583, + "step": 24171 + }, + { + "epoch": 66.4065934065934, + "grad_norm": 11.451849937438965, + "learning_rate": 1.6796703296703298e-05, + "loss": 0.362, + "step": 24172 + }, + { + "epoch": 66.40934065934066, + "grad_norm": 9.36434555053711, + "learning_rate": 1.679532967032967e-05, + "loss": 0.0869, + "step": 24173 + }, + { + "epoch": 66.41208791208791, + "grad_norm": 13.19161605834961, + "learning_rate": 1.6793956043956045e-05, + "loss": 0.1118, + "step": 24174 + }, + { + "epoch": 66.41483516483517, + "grad_norm": 6.8019022941589355, + "learning_rate": 1.6792582417582418e-05, + "loss": 0.1709, + "step": 24175 + }, + { + "epoch": 66.41758241758242, + "grad_norm": 12.132014274597168, + "learning_rate": 1.679120879120879e-05, + "loss": 0.2235, + "step": 24176 + }, + { + "epoch": 66.42032967032966, + "grad_norm": 7.765744686126709, + "learning_rate": 1.6789835164835165e-05, + "loss": 0.1372, + "step": 24177 + }, + { + "epoch": 66.42307692307692, + "grad_norm": 15.282699584960938, + "learning_rate": 1.678846153846154e-05, + "loss": 0.3114, + "step": 24178 + }, + { + "epoch": 66.42582417582418, + "grad_norm": 15.618497848510742, + "learning_rate": 1.6787087912087912e-05, + "loss": 0.2231, + "step": 24179 + }, + { + "epoch": 66.42857142857143, + "grad_norm": 30.049543380737305, + "learning_rate": 1.6785714285714285e-05, + "loss": 1.151, + "step": 24180 + }, + { + "epoch": 66.43131868131869, + "grad_norm": 12.003924369812012, + "learning_rate": 1.6784340659340662e-05, + "loss": 0.2515, + "step": 24181 + }, + { + "epoch": 66.43406593406593, + "grad_norm": 16.658496856689453, + "learning_rate": 1.6782967032967035e-05, + "loss": 0.2745, + "step": 24182 + }, + { + "epoch": 66.43681318681318, + "grad_norm": 6.007382392883301, + "learning_rate": 1.678159340659341e-05, + "loss": 0.1608, + "step": 24183 + }, + { + "epoch": 66.43956043956044, + "grad_norm": 2.241994857788086, + "learning_rate": 1.678021978021978e-05, + "loss": 0.0294, + "step": 24184 + }, + { + "epoch": 66.4423076923077, + "grad_norm": 10.282574653625488, + "learning_rate": 1.6778846153846152e-05, + "loss": 0.2149, + "step": 24185 + }, + { + "epoch": 66.44505494505495, + "grad_norm": 17.23691749572754, + "learning_rate": 1.677747252747253e-05, + "loss": 0.7844, + "step": 24186 + }, + { + "epoch": 66.4478021978022, + "grad_norm": 7.419919013977051, + "learning_rate": 1.6776098901098902e-05, + "loss": 0.0658, + "step": 24187 + }, + { + "epoch": 66.45054945054945, + "grad_norm": 10.100696563720703, + "learning_rate": 1.6774725274725276e-05, + "loss": 0.2017, + "step": 24188 + }, + { + "epoch": 66.4532967032967, + "grad_norm": 6.010370254516602, + "learning_rate": 1.677335164835165e-05, + "loss": 0.2537, + "step": 24189 + }, + { + "epoch": 66.45604395604396, + "grad_norm": 16.702219009399414, + "learning_rate": 1.6771978021978023e-05, + "loss": 0.4593, + "step": 24190 + }, + { + "epoch": 66.45879120879121, + "grad_norm": 12.785371780395508, + "learning_rate": 1.6770604395604396e-05, + "loss": 0.3641, + "step": 24191 + }, + { + "epoch": 66.46153846153847, + "grad_norm": 17.593219757080078, + "learning_rate": 1.676923076923077e-05, + "loss": 0.2583, + "step": 24192 + }, + { + "epoch": 66.46428571428571, + "grad_norm": 4.403104782104492, + "learning_rate": 1.6767857142857143e-05, + "loss": 0.0436, + "step": 24193 + }, + { + "epoch": 66.46703296703296, + "grad_norm": 4.70558500289917, + "learning_rate": 1.6766483516483516e-05, + "loss": 0.0855, + "step": 24194 + }, + { + "epoch": 66.46978021978022, + "grad_norm": 23.44442367553711, + "learning_rate": 1.676510989010989e-05, + "loss": 0.3614, + "step": 24195 + }, + { + "epoch": 66.47252747252747, + "grad_norm": 14.669761657714844, + "learning_rate": 1.6763736263736266e-05, + "loss": 0.3463, + "step": 24196 + }, + { + "epoch": 66.47527472527473, + "grad_norm": 22.046497344970703, + "learning_rate": 1.676236263736264e-05, + "loss": 0.7399, + "step": 24197 + }, + { + "epoch": 66.47802197802197, + "grad_norm": 9.325408935546875, + "learning_rate": 1.6760989010989013e-05, + "loss": 0.1232, + "step": 24198 + }, + { + "epoch": 66.48076923076923, + "grad_norm": 16.255632400512695, + "learning_rate": 1.6759615384615383e-05, + "loss": 0.2959, + "step": 24199 + }, + { + "epoch": 66.48351648351648, + "grad_norm": 10.980995178222656, + "learning_rate": 1.6758241758241757e-05, + "loss": 0.2042, + "step": 24200 + }, + { + "epoch": 66.48626373626374, + "grad_norm": 19.909387588500977, + "learning_rate": 1.6756868131868134e-05, + "loss": 0.3433, + "step": 24201 + }, + { + "epoch": 66.48901098901099, + "grad_norm": 7.6663899421691895, + "learning_rate": 1.6755494505494507e-05, + "loss": 0.1173, + "step": 24202 + }, + { + "epoch": 66.49175824175825, + "grad_norm": 14.600703239440918, + "learning_rate": 1.675412087912088e-05, + "loss": 0.3752, + "step": 24203 + }, + { + "epoch": 66.49450549450549, + "grad_norm": 8.624876976013184, + "learning_rate": 1.6752747252747254e-05, + "loss": 0.1504, + "step": 24204 + }, + { + "epoch": 66.49725274725274, + "grad_norm": 14.81829833984375, + "learning_rate": 1.6751373626373627e-05, + "loss": 0.1416, + "step": 24205 + }, + { + "epoch": 66.5, + "grad_norm": 12.634321212768555, + "learning_rate": 1.675e-05, + "loss": 0.2946, + "step": 24206 + }, + { + "epoch": 66.50274725274726, + "grad_norm": 20.057483673095703, + "learning_rate": 1.6748626373626374e-05, + "loss": 0.5354, + "step": 24207 + }, + { + "epoch": 66.50549450549451, + "grad_norm": 17.268760681152344, + "learning_rate": 1.6747252747252747e-05, + "loss": 0.4939, + "step": 24208 + }, + { + "epoch": 66.50824175824175, + "grad_norm": 8.50231647491455, + "learning_rate": 1.674587912087912e-05, + "loss": 0.1096, + "step": 24209 + }, + { + "epoch": 66.51098901098901, + "grad_norm": 6.087785720825195, + "learning_rate": 1.6744505494505494e-05, + "loss": 0.1021, + "step": 24210 + }, + { + "epoch": 66.51373626373626, + "grad_norm": 15.874465942382812, + "learning_rate": 1.674313186813187e-05, + "loss": 0.2386, + "step": 24211 + }, + { + "epoch": 66.51648351648352, + "grad_norm": 3.5327134132385254, + "learning_rate": 1.6741758241758244e-05, + "loss": 0.0588, + "step": 24212 + }, + { + "epoch": 66.51923076923077, + "grad_norm": 8.054304122924805, + "learning_rate": 1.6740384615384618e-05, + "loss": 0.1718, + "step": 24213 + }, + { + "epoch": 66.52197802197803, + "grad_norm": 17.05556869506836, + "learning_rate": 1.6739010989010988e-05, + "loss": 0.4561, + "step": 24214 + }, + { + "epoch": 66.52472527472527, + "grad_norm": 2.9011757373809814, + "learning_rate": 1.673763736263736e-05, + "loss": 0.0425, + "step": 24215 + }, + { + "epoch": 66.52747252747253, + "grad_norm": 17.274206161499023, + "learning_rate": 1.6736263736263738e-05, + "loss": 0.4572, + "step": 24216 + }, + { + "epoch": 66.53021978021978, + "grad_norm": 16.880922317504883, + "learning_rate": 1.673489010989011e-05, + "loss": 0.2506, + "step": 24217 + }, + { + "epoch": 66.53296703296704, + "grad_norm": 12.745655059814453, + "learning_rate": 1.6733516483516485e-05, + "loss": 0.3807, + "step": 24218 + }, + { + "epoch": 66.53571428571429, + "grad_norm": 8.26811408996582, + "learning_rate": 1.6732142857142858e-05, + "loss": 0.0853, + "step": 24219 + }, + { + "epoch": 66.53846153846153, + "grad_norm": 15.531513214111328, + "learning_rate": 1.673076923076923e-05, + "loss": 0.3495, + "step": 24220 + }, + { + "epoch": 66.54120879120879, + "grad_norm": 7.4059343338012695, + "learning_rate": 1.6729395604395605e-05, + "loss": 0.0869, + "step": 24221 + }, + { + "epoch": 66.54395604395604, + "grad_norm": 9.556861877441406, + "learning_rate": 1.672802197802198e-05, + "loss": 0.1971, + "step": 24222 + }, + { + "epoch": 66.5467032967033, + "grad_norm": 6.967145919799805, + "learning_rate": 1.6726648351648352e-05, + "loss": 0.0529, + "step": 24223 + }, + { + "epoch": 66.54945054945055, + "grad_norm": 10.023334503173828, + "learning_rate": 1.6725274725274725e-05, + "loss": 0.2976, + "step": 24224 + }, + { + "epoch": 66.5521978021978, + "grad_norm": 19.36716651916504, + "learning_rate": 1.67239010989011e-05, + "loss": 0.4895, + "step": 24225 + }, + { + "epoch": 66.55494505494505, + "grad_norm": 8.000229835510254, + "learning_rate": 1.6722527472527475e-05, + "loss": 0.2237, + "step": 24226 + }, + { + "epoch": 66.5576923076923, + "grad_norm": 9.457845687866211, + "learning_rate": 1.672115384615385e-05, + "loss": 0.1582, + "step": 24227 + }, + { + "epoch": 66.56043956043956, + "grad_norm": 8.392292976379395, + "learning_rate": 1.671978021978022e-05, + "loss": 0.1257, + "step": 24228 + }, + { + "epoch": 66.56318681318682, + "grad_norm": 22.39362144470215, + "learning_rate": 1.6718406593406592e-05, + "loss": 0.6898, + "step": 24229 + }, + { + "epoch": 66.56593406593407, + "grad_norm": 10.427874565124512, + "learning_rate": 1.6717032967032966e-05, + "loss": 0.1398, + "step": 24230 + }, + { + "epoch": 66.56868131868131, + "grad_norm": 2.1190671920776367, + "learning_rate": 1.6715659340659343e-05, + "loss": 0.0253, + "step": 24231 + }, + { + "epoch": 66.57142857142857, + "grad_norm": 5.03966760635376, + "learning_rate": 1.6714285714285716e-05, + "loss": 0.075, + "step": 24232 + }, + { + "epoch": 66.57417582417582, + "grad_norm": 8.29718017578125, + "learning_rate": 1.671291208791209e-05, + "loss": 0.1325, + "step": 24233 + }, + { + "epoch": 66.57692307692308, + "grad_norm": 20.18770980834961, + "learning_rate": 1.6711538461538463e-05, + "loss": 0.1865, + "step": 24234 + }, + { + "epoch": 66.57967032967034, + "grad_norm": 23.78361701965332, + "learning_rate": 1.6710164835164836e-05, + "loss": 0.5536, + "step": 24235 + }, + { + "epoch": 66.58241758241758, + "grad_norm": 4.162525177001953, + "learning_rate": 1.670879120879121e-05, + "loss": 0.068, + "step": 24236 + }, + { + "epoch": 66.58516483516483, + "grad_norm": 6.1390228271484375, + "learning_rate": 1.6707417582417583e-05, + "loss": 0.0886, + "step": 24237 + }, + { + "epoch": 66.58791208791209, + "grad_norm": 7.170651435852051, + "learning_rate": 1.6706043956043956e-05, + "loss": 0.0918, + "step": 24238 + }, + { + "epoch": 66.59065934065934, + "grad_norm": 6.2290472984313965, + "learning_rate": 1.670467032967033e-05, + "loss": 0.0882, + "step": 24239 + }, + { + "epoch": 66.5934065934066, + "grad_norm": 16.626705169677734, + "learning_rate": 1.6703296703296703e-05, + "loss": 0.4288, + "step": 24240 + }, + { + "epoch": 66.59615384615384, + "grad_norm": 15.157344818115234, + "learning_rate": 1.670192307692308e-05, + "loss": 0.213, + "step": 24241 + }, + { + "epoch": 66.5989010989011, + "grad_norm": 19.774904251098633, + "learning_rate": 1.6700549450549453e-05, + "loss": 0.4319, + "step": 24242 + }, + { + "epoch": 66.60164835164835, + "grad_norm": 16.03277015686035, + "learning_rate": 1.6699175824175823e-05, + "loss": 0.2893, + "step": 24243 + }, + { + "epoch": 66.6043956043956, + "grad_norm": 6.923455715179443, + "learning_rate": 1.6697802197802197e-05, + "loss": 0.1749, + "step": 24244 + }, + { + "epoch": 66.60714285714286, + "grad_norm": 18.102293014526367, + "learning_rate": 1.669642857142857e-05, + "loss": 0.46, + "step": 24245 + }, + { + "epoch": 66.60989010989012, + "grad_norm": 14.106287956237793, + "learning_rate": 1.6695054945054947e-05, + "loss": 0.3448, + "step": 24246 + }, + { + "epoch": 66.61263736263736, + "grad_norm": 8.931817054748535, + "learning_rate": 1.669368131868132e-05, + "loss": 0.1386, + "step": 24247 + }, + { + "epoch": 66.61538461538461, + "grad_norm": 5.230246067047119, + "learning_rate": 1.6692307692307694e-05, + "loss": 0.0491, + "step": 24248 + }, + { + "epoch": 66.61813186813187, + "grad_norm": 19.52317237854004, + "learning_rate": 1.6690934065934067e-05, + "loss": 0.6483, + "step": 24249 + }, + { + "epoch": 66.62087912087912, + "grad_norm": 3.550985336303711, + "learning_rate": 1.668956043956044e-05, + "loss": 0.0653, + "step": 24250 + }, + { + "epoch": 66.62362637362638, + "grad_norm": 15.970634460449219, + "learning_rate": 1.6688186813186814e-05, + "loss": 0.3819, + "step": 24251 + }, + { + "epoch": 66.62637362637362, + "grad_norm": 6.6024980545043945, + "learning_rate": 1.6686813186813187e-05, + "loss": 0.1291, + "step": 24252 + }, + { + "epoch": 66.62912087912088, + "grad_norm": 12.427111625671387, + "learning_rate": 1.668543956043956e-05, + "loss": 0.2612, + "step": 24253 + }, + { + "epoch": 66.63186813186813, + "grad_norm": 17.733097076416016, + "learning_rate": 1.6684065934065934e-05, + "loss": 0.3323, + "step": 24254 + }, + { + "epoch": 66.63461538461539, + "grad_norm": 10.38109302520752, + "learning_rate": 1.6682692307692308e-05, + "loss": 0.3643, + "step": 24255 + }, + { + "epoch": 66.63736263736264, + "grad_norm": 11.835598945617676, + "learning_rate": 1.6681318681318684e-05, + "loss": 0.2679, + "step": 24256 + }, + { + "epoch": 66.64010989010988, + "grad_norm": 18.929054260253906, + "learning_rate": 1.6679945054945058e-05, + "loss": 0.2792, + "step": 24257 + }, + { + "epoch": 66.64285714285714, + "grad_norm": 2.896188497543335, + "learning_rate": 1.6678571428571428e-05, + "loss": 0.0473, + "step": 24258 + }, + { + "epoch": 66.6456043956044, + "grad_norm": 26.027559280395508, + "learning_rate": 1.66771978021978e-05, + "loss": 0.523, + "step": 24259 + }, + { + "epoch": 66.64835164835165, + "grad_norm": 11.802626609802246, + "learning_rate": 1.6675824175824175e-05, + "loss": 0.2028, + "step": 24260 + }, + { + "epoch": 66.6510989010989, + "grad_norm": 4.938651084899902, + "learning_rate": 1.667445054945055e-05, + "loss": 0.0903, + "step": 24261 + }, + { + "epoch": 66.65384615384616, + "grad_norm": 4.939132213592529, + "learning_rate": 1.6673076923076925e-05, + "loss": 0.0584, + "step": 24262 + }, + { + "epoch": 66.6565934065934, + "grad_norm": 7.089598178863525, + "learning_rate": 1.66717032967033e-05, + "loss": 0.1878, + "step": 24263 + }, + { + "epoch": 66.65934065934066, + "grad_norm": 12.400973320007324, + "learning_rate": 1.6670329670329672e-05, + "loss": 0.3611, + "step": 24264 + }, + { + "epoch": 66.66208791208791, + "grad_norm": 3.475738286972046, + "learning_rate": 1.6668956043956045e-05, + "loss": 0.0485, + "step": 24265 + }, + { + "epoch": 66.66483516483517, + "grad_norm": 13.658805847167969, + "learning_rate": 1.666758241758242e-05, + "loss": 0.333, + "step": 24266 + }, + { + "epoch": 66.66758241758242, + "grad_norm": 3.421369791030884, + "learning_rate": 1.6666208791208792e-05, + "loss": 0.0393, + "step": 24267 + }, + { + "epoch": 66.67032967032966, + "grad_norm": 10.240153312683105, + "learning_rate": 1.6664835164835165e-05, + "loss": 0.2484, + "step": 24268 + }, + { + "epoch": 66.67307692307692, + "grad_norm": 15.394254684448242, + "learning_rate": 1.666346153846154e-05, + "loss": 0.3004, + "step": 24269 + }, + { + "epoch": 66.67582417582418, + "grad_norm": 4.430144309997559, + "learning_rate": 1.6662087912087912e-05, + "loss": 0.0892, + "step": 24270 + }, + { + "epoch": 66.67857142857143, + "grad_norm": 14.806602478027344, + "learning_rate": 1.666071428571429e-05, + "loss": 0.2563, + "step": 24271 + }, + { + "epoch": 66.68131868131869, + "grad_norm": 8.420989036560059, + "learning_rate": 1.6659340659340662e-05, + "loss": 0.2031, + "step": 24272 + }, + { + "epoch": 66.68406593406593, + "grad_norm": 5.431443214416504, + "learning_rate": 1.6657967032967032e-05, + "loss": 0.0864, + "step": 24273 + }, + { + "epoch": 66.68681318681318, + "grad_norm": 12.535348892211914, + "learning_rate": 1.6656593406593406e-05, + "loss": 0.3936, + "step": 24274 + }, + { + "epoch": 66.68956043956044, + "grad_norm": 26.828954696655273, + "learning_rate": 1.665521978021978e-05, + "loss": 0.7115, + "step": 24275 + }, + { + "epoch": 66.6923076923077, + "grad_norm": 20.99148941040039, + "learning_rate": 1.6653846153846156e-05, + "loss": 0.8493, + "step": 24276 + }, + { + "epoch": 66.69505494505495, + "grad_norm": 18.776203155517578, + "learning_rate": 1.665247252747253e-05, + "loss": 0.3583, + "step": 24277 + }, + { + "epoch": 66.6978021978022, + "grad_norm": 8.313679695129395, + "learning_rate": 1.6651098901098903e-05, + "loss": 0.1656, + "step": 24278 + }, + { + "epoch": 66.70054945054945, + "grad_norm": 16.885581970214844, + "learning_rate": 1.6649725274725276e-05, + "loss": 0.5405, + "step": 24279 + }, + { + "epoch": 66.7032967032967, + "grad_norm": 6.2224440574646, + "learning_rate": 1.664835164835165e-05, + "loss": 0.1385, + "step": 24280 + }, + { + "epoch": 66.70604395604396, + "grad_norm": 8.348402976989746, + "learning_rate": 1.6646978021978023e-05, + "loss": 0.2204, + "step": 24281 + }, + { + "epoch": 66.70879120879121, + "grad_norm": 14.60255241394043, + "learning_rate": 1.6645604395604396e-05, + "loss": 0.3125, + "step": 24282 + }, + { + "epoch": 66.71153846153847, + "grad_norm": 14.360513687133789, + "learning_rate": 1.664423076923077e-05, + "loss": 0.3405, + "step": 24283 + }, + { + "epoch": 66.71428571428571, + "grad_norm": 6.623615741729736, + "learning_rate": 1.6642857142857143e-05, + "loss": 0.0981, + "step": 24284 + }, + { + "epoch": 66.71703296703296, + "grad_norm": 10.898383140563965, + "learning_rate": 1.6641483516483517e-05, + "loss": 0.25, + "step": 24285 + }, + { + "epoch": 66.71978021978022, + "grad_norm": 6.755305290222168, + "learning_rate": 1.664010989010989e-05, + "loss": 0.0924, + "step": 24286 + }, + { + "epoch": 66.72252747252747, + "grad_norm": 4.298361778259277, + "learning_rate": 1.6638736263736267e-05, + "loss": 0.0601, + "step": 24287 + }, + { + "epoch": 66.72527472527473, + "grad_norm": 24.223478317260742, + "learning_rate": 1.6637362637362637e-05, + "loss": 0.4299, + "step": 24288 + }, + { + "epoch": 66.72802197802197, + "grad_norm": 18.611417770385742, + "learning_rate": 1.663598901098901e-05, + "loss": 0.5843, + "step": 24289 + }, + { + "epoch": 66.73076923076923, + "grad_norm": 12.396949768066406, + "learning_rate": 1.6634615384615384e-05, + "loss": 0.1099, + "step": 24290 + }, + { + "epoch": 66.73351648351648, + "grad_norm": 20.74062156677246, + "learning_rate": 1.6633241758241757e-05, + "loss": 0.3978, + "step": 24291 + }, + { + "epoch": 66.73626373626374, + "grad_norm": 9.366076469421387, + "learning_rate": 1.6631868131868134e-05, + "loss": 0.2035, + "step": 24292 + }, + { + "epoch": 66.73901098901099, + "grad_norm": 14.958521842956543, + "learning_rate": 1.6630494505494507e-05, + "loss": 0.6899, + "step": 24293 + }, + { + "epoch": 66.74175824175825, + "grad_norm": 9.943636894226074, + "learning_rate": 1.662912087912088e-05, + "loss": 0.2031, + "step": 24294 + }, + { + "epoch": 66.74450549450549, + "grad_norm": 2.1828088760375977, + "learning_rate": 1.6627747252747254e-05, + "loss": 0.0292, + "step": 24295 + }, + { + "epoch": 66.74725274725274, + "grad_norm": 21.271238327026367, + "learning_rate": 1.6626373626373628e-05, + "loss": 0.8841, + "step": 24296 + }, + { + "epoch": 66.75, + "grad_norm": 3.2080953121185303, + "learning_rate": 1.6625e-05, + "loss": 0.0359, + "step": 24297 + }, + { + "epoch": 66.75274725274726, + "grad_norm": 6.351064205169678, + "learning_rate": 1.6623626373626374e-05, + "loss": 0.1838, + "step": 24298 + }, + { + "epoch": 66.75549450549451, + "grad_norm": 1.8989261388778687, + "learning_rate": 1.6622252747252748e-05, + "loss": 0.0248, + "step": 24299 + }, + { + "epoch": 66.75824175824175, + "grad_norm": 29.611238479614258, + "learning_rate": 1.662087912087912e-05, + "loss": 1.0624, + "step": 24300 + }, + { + "epoch": 66.76098901098901, + "grad_norm": 19.1588191986084, + "learning_rate": 1.6619505494505495e-05, + "loss": 0.4875, + "step": 24301 + }, + { + "epoch": 66.76373626373626, + "grad_norm": 13.06722354888916, + "learning_rate": 1.661813186813187e-05, + "loss": 0.2632, + "step": 24302 + }, + { + "epoch": 66.76648351648352, + "grad_norm": 7.6401567459106445, + "learning_rate": 1.661675824175824e-05, + "loss": 0.1442, + "step": 24303 + }, + { + "epoch": 66.76923076923077, + "grad_norm": 5.62652063369751, + "learning_rate": 1.6615384615384615e-05, + "loss": 0.0926, + "step": 24304 + }, + { + "epoch": 66.77197802197803, + "grad_norm": 9.343013763427734, + "learning_rate": 1.6614010989010988e-05, + "loss": 0.19, + "step": 24305 + }, + { + "epoch": 66.77472527472527, + "grad_norm": 6.451746940612793, + "learning_rate": 1.661263736263736e-05, + "loss": 0.1332, + "step": 24306 + }, + { + "epoch": 66.77747252747253, + "grad_norm": 7.70482873916626, + "learning_rate": 1.661126373626374e-05, + "loss": 0.1472, + "step": 24307 + }, + { + "epoch": 66.78021978021978, + "grad_norm": 24.608787536621094, + "learning_rate": 1.6609890109890112e-05, + "loss": 0.5451, + "step": 24308 + }, + { + "epoch": 66.78296703296704, + "grad_norm": 6.754331588745117, + "learning_rate": 1.6608516483516485e-05, + "loss": 0.1049, + "step": 24309 + }, + { + "epoch": 66.78571428571429, + "grad_norm": 9.436671257019043, + "learning_rate": 1.660714285714286e-05, + "loss": 0.1107, + "step": 24310 + }, + { + "epoch": 66.78846153846153, + "grad_norm": 23.14813804626465, + "learning_rate": 1.660576923076923e-05, + "loss": 0.7349, + "step": 24311 + }, + { + "epoch": 66.79120879120879, + "grad_norm": 8.26268482208252, + "learning_rate": 1.6604395604395605e-05, + "loss": 0.1063, + "step": 24312 + }, + { + "epoch": 66.79395604395604, + "grad_norm": 9.495816230773926, + "learning_rate": 1.660302197802198e-05, + "loss": 0.1522, + "step": 24313 + }, + { + "epoch": 66.7967032967033, + "grad_norm": 5.516950607299805, + "learning_rate": 1.6601648351648352e-05, + "loss": 0.091, + "step": 24314 + }, + { + "epoch": 66.79945054945055, + "grad_norm": 15.773036003112793, + "learning_rate": 1.6600274725274726e-05, + "loss": 0.4532, + "step": 24315 + }, + { + "epoch": 66.8021978021978, + "grad_norm": 18.443111419677734, + "learning_rate": 1.65989010989011e-05, + "loss": 0.3666, + "step": 24316 + }, + { + "epoch": 66.80494505494505, + "grad_norm": 16.613191604614258, + "learning_rate": 1.6597527472527476e-05, + "loss": 0.3156, + "step": 24317 + }, + { + "epoch": 66.8076923076923, + "grad_norm": 13.748327255249023, + "learning_rate": 1.6596153846153846e-05, + "loss": 0.3476, + "step": 24318 + }, + { + "epoch": 66.81043956043956, + "grad_norm": 18.194700241088867, + "learning_rate": 1.659478021978022e-05, + "loss": 0.3696, + "step": 24319 + }, + { + "epoch": 66.81318681318682, + "grad_norm": 24.34139060974121, + "learning_rate": 1.6593406593406593e-05, + "loss": 0.6241, + "step": 24320 + }, + { + "epoch": 66.81593406593407, + "grad_norm": 19.2459774017334, + "learning_rate": 1.6592032967032966e-05, + "loss": 0.4278, + "step": 24321 + }, + { + "epoch": 66.81868131868131, + "grad_norm": 13.502911567687988, + "learning_rate": 1.6590659340659343e-05, + "loss": 0.2895, + "step": 24322 + }, + { + "epoch": 66.82142857142857, + "grad_norm": 14.89370059967041, + "learning_rate": 1.6589285714285716e-05, + "loss": 0.2522, + "step": 24323 + }, + { + "epoch": 66.82417582417582, + "grad_norm": 6.9641289710998535, + "learning_rate": 1.658791208791209e-05, + "loss": 0.1013, + "step": 24324 + }, + { + "epoch": 66.82692307692308, + "grad_norm": 23.359813690185547, + "learning_rate": 1.6586538461538463e-05, + "loss": 0.5236, + "step": 24325 + }, + { + "epoch": 66.82967032967034, + "grad_norm": 4.283267021179199, + "learning_rate": 1.6585164835164833e-05, + "loss": 0.112, + "step": 24326 + }, + { + "epoch": 66.83241758241758, + "grad_norm": 14.237489700317383, + "learning_rate": 1.658379120879121e-05, + "loss": 0.2394, + "step": 24327 + }, + { + "epoch": 66.83516483516483, + "grad_norm": 14.559786796569824, + "learning_rate": 1.6582417582417583e-05, + "loss": 0.2598, + "step": 24328 + }, + { + "epoch": 66.83791208791209, + "grad_norm": 22.60686683654785, + "learning_rate": 1.6581043956043957e-05, + "loss": 0.4219, + "step": 24329 + }, + { + "epoch": 66.84065934065934, + "grad_norm": 12.832897186279297, + "learning_rate": 1.657967032967033e-05, + "loss": 0.3116, + "step": 24330 + }, + { + "epoch": 66.8434065934066, + "grad_norm": 10.826713562011719, + "learning_rate": 1.6578296703296704e-05, + "loss": 0.2269, + "step": 24331 + }, + { + "epoch": 66.84615384615384, + "grad_norm": 9.670783996582031, + "learning_rate": 1.657692307692308e-05, + "loss": 0.1956, + "step": 24332 + }, + { + "epoch": 66.8489010989011, + "grad_norm": 10.577378273010254, + "learning_rate": 1.657554945054945e-05, + "loss": 0.1434, + "step": 24333 + }, + { + "epoch": 66.85164835164835, + "grad_norm": 9.896589279174805, + "learning_rate": 1.6574175824175824e-05, + "loss": 0.3041, + "step": 24334 + }, + { + "epoch": 66.8543956043956, + "grad_norm": 5.205019474029541, + "learning_rate": 1.6572802197802197e-05, + "loss": 0.1518, + "step": 24335 + }, + { + "epoch": 66.85714285714286, + "grad_norm": 4.296961307525635, + "learning_rate": 1.657142857142857e-05, + "loss": 0.0308, + "step": 24336 + }, + { + "epoch": 66.85989010989012, + "grad_norm": 11.668726921081543, + "learning_rate": 1.6570054945054947e-05, + "loss": 0.2398, + "step": 24337 + }, + { + "epoch": 66.86263736263736, + "grad_norm": 13.916604042053223, + "learning_rate": 1.656868131868132e-05, + "loss": 0.2347, + "step": 24338 + }, + { + "epoch": 66.86538461538461, + "grad_norm": 16.425952911376953, + "learning_rate": 1.6567307692307694e-05, + "loss": 0.7178, + "step": 24339 + }, + { + "epoch": 66.86813186813187, + "grad_norm": 5.9109954833984375, + "learning_rate": 1.6565934065934068e-05, + "loss": 0.0706, + "step": 24340 + }, + { + "epoch": 66.87087912087912, + "grad_norm": 16.03849220275879, + "learning_rate": 1.6564560439560438e-05, + "loss": 0.3289, + "step": 24341 + }, + { + "epoch": 66.87362637362638, + "grad_norm": 14.752239227294922, + "learning_rate": 1.6563186813186814e-05, + "loss": 0.3192, + "step": 24342 + }, + { + "epoch": 66.87637362637362, + "grad_norm": 6.893368721008301, + "learning_rate": 1.6561813186813188e-05, + "loss": 0.0912, + "step": 24343 + }, + { + "epoch": 66.87912087912088, + "grad_norm": 15.380313873291016, + "learning_rate": 1.656043956043956e-05, + "loss": 0.4109, + "step": 24344 + }, + { + "epoch": 66.88186813186813, + "grad_norm": 11.407469749450684, + "learning_rate": 1.6559065934065935e-05, + "loss": 0.1408, + "step": 24345 + }, + { + "epoch": 66.88461538461539, + "grad_norm": 12.372218132019043, + "learning_rate": 1.6557692307692308e-05, + "loss": 0.3755, + "step": 24346 + }, + { + "epoch": 66.88736263736264, + "grad_norm": 15.02672004699707, + "learning_rate": 1.6556318681318685e-05, + "loss": 0.2378, + "step": 24347 + }, + { + "epoch": 66.89010989010988, + "grad_norm": 7.141339302062988, + "learning_rate": 1.6554945054945055e-05, + "loss": 0.1422, + "step": 24348 + }, + { + "epoch": 66.89285714285714, + "grad_norm": 16.912471771240234, + "learning_rate": 1.6553571428571428e-05, + "loss": 0.3981, + "step": 24349 + }, + { + "epoch": 66.8956043956044, + "grad_norm": 11.580483436584473, + "learning_rate": 1.65521978021978e-05, + "loss": 0.1263, + "step": 24350 + }, + { + "epoch": 66.89835164835165, + "grad_norm": 10.907649040222168, + "learning_rate": 1.6550824175824175e-05, + "loss": 0.2334, + "step": 24351 + }, + { + "epoch": 66.9010989010989, + "grad_norm": 9.36141300201416, + "learning_rate": 1.6549450549450552e-05, + "loss": 0.2533, + "step": 24352 + }, + { + "epoch": 66.90384615384616, + "grad_norm": 8.144742965698242, + "learning_rate": 1.6548076923076925e-05, + "loss": 0.1337, + "step": 24353 + }, + { + "epoch": 66.9065934065934, + "grad_norm": 23.32516098022461, + "learning_rate": 1.65467032967033e-05, + "loss": 0.4232, + "step": 24354 + }, + { + "epoch": 66.90934065934066, + "grad_norm": 22.798372268676758, + "learning_rate": 1.6545329670329672e-05, + "loss": 0.551, + "step": 24355 + }, + { + "epoch": 66.91208791208791, + "grad_norm": 17.310169219970703, + "learning_rate": 1.6543956043956042e-05, + "loss": 0.5589, + "step": 24356 + }, + { + "epoch": 66.91483516483517, + "grad_norm": 16.05255699157715, + "learning_rate": 1.654258241758242e-05, + "loss": 0.2833, + "step": 24357 + }, + { + "epoch": 66.91758241758242, + "grad_norm": 5.654644966125488, + "learning_rate": 1.6541208791208792e-05, + "loss": 0.1393, + "step": 24358 + }, + { + "epoch": 66.92032967032966, + "grad_norm": 10.364599227905273, + "learning_rate": 1.6539835164835166e-05, + "loss": 0.1156, + "step": 24359 + }, + { + "epoch": 66.92307692307692, + "grad_norm": 7.076319217681885, + "learning_rate": 1.653846153846154e-05, + "loss": 0.0686, + "step": 24360 + }, + { + "epoch": 66.92582417582418, + "grad_norm": 17.93438720703125, + "learning_rate": 1.6537087912087913e-05, + "loss": 0.4738, + "step": 24361 + }, + { + "epoch": 66.92857142857143, + "grad_norm": 2.307246208190918, + "learning_rate": 1.653571428571429e-05, + "loss": 0.0209, + "step": 24362 + }, + { + "epoch": 66.93131868131869, + "grad_norm": 11.665212631225586, + "learning_rate": 1.653434065934066e-05, + "loss": 0.185, + "step": 24363 + }, + { + "epoch": 66.93406593406593, + "grad_norm": 11.475922584533691, + "learning_rate": 1.6532967032967033e-05, + "loss": 0.1948, + "step": 24364 + }, + { + "epoch": 66.93681318681318, + "grad_norm": 7.657863616943359, + "learning_rate": 1.6531593406593406e-05, + "loss": 0.1289, + "step": 24365 + }, + { + "epoch": 66.93956043956044, + "grad_norm": 7.657604694366455, + "learning_rate": 1.653021978021978e-05, + "loss": 0.14, + "step": 24366 + }, + { + "epoch": 66.9423076923077, + "grad_norm": 7.7361297607421875, + "learning_rate": 1.6528846153846156e-05, + "loss": 0.106, + "step": 24367 + }, + { + "epoch": 66.94505494505495, + "grad_norm": 14.543883323669434, + "learning_rate": 1.652747252747253e-05, + "loss": 0.4252, + "step": 24368 + }, + { + "epoch": 66.9478021978022, + "grad_norm": 7.231820106506348, + "learning_rate": 1.6526098901098903e-05, + "loss": 0.098, + "step": 24369 + }, + { + "epoch": 66.95054945054945, + "grad_norm": 11.90561580657959, + "learning_rate": 1.6524725274725277e-05, + "loss": 0.2846, + "step": 24370 + }, + { + "epoch": 66.9532967032967, + "grad_norm": 8.98583984375, + "learning_rate": 1.6523351648351647e-05, + "loss": 0.2155, + "step": 24371 + }, + { + "epoch": 66.95604395604396, + "grad_norm": 11.64077091217041, + "learning_rate": 1.6521978021978023e-05, + "loss": 0.2606, + "step": 24372 + }, + { + "epoch": 66.95879120879121, + "grad_norm": 16.700742721557617, + "learning_rate": 1.6520604395604397e-05, + "loss": 0.3662, + "step": 24373 + }, + { + "epoch": 66.96153846153847, + "grad_norm": 5.376322269439697, + "learning_rate": 1.651923076923077e-05, + "loss": 0.0857, + "step": 24374 + }, + { + "epoch": 66.96428571428571, + "grad_norm": 16.188976287841797, + "learning_rate": 1.6517857142857144e-05, + "loss": 0.2227, + "step": 24375 + }, + { + "epoch": 66.96703296703296, + "grad_norm": 18.266088485717773, + "learning_rate": 1.6516483516483517e-05, + "loss": 0.5525, + "step": 24376 + }, + { + "epoch": 66.96978021978022, + "grad_norm": 13.721569061279297, + "learning_rate": 1.6515109890109894e-05, + "loss": 0.3565, + "step": 24377 + }, + { + "epoch": 66.97252747252747, + "grad_norm": 11.1149320602417, + "learning_rate": 1.6513736263736264e-05, + "loss": 0.1908, + "step": 24378 + }, + { + "epoch": 66.97527472527473, + "grad_norm": 18.713294982910156, + "learning_rate": 1.6512362637362637e-05, + "loss": 0.2549, + "step": 24379 + }, + { + "epoch": 66.97802197802197, + "grad_norm": 7.932097911834717, + "learning_rate": 1.651098901098901e-05, + "loss": 0.182, + "step": 24380 + }, + { + "epoch": 66.98076923076923, + "grad_norm": 6.534134864807129, + "learning_rate": 1.6509615384615384e-05, + "loss": 0.0448, + "step": 24381 + }, + { + "epoch": 66.98351648351648, + "grad_norm": 15.754515647888184, + "learning_rate": 1.650824175824176e-05, + "loss": 0.615, + "step": 24382 + }, + { + "epoch": 66.98626373626374, + "grad_norm": 14.318574905395508, + "learning_rate": 1.6506868131868134e-05, + "loss": 0.2097, + "step": 24383 + }, + { + "epoch": 66.98901098901099, + "grad_norm": 8.171542167663574, + "learning_rate": 1.6505494505494508e-05, + "loss": 0.1104, + "step": 24384 + }, + { + "epoch": 66.99175824175825, + "grad_norm": 7.708734035491943, + "learning_rate": 1.650412087912088e-05, + "loss": 0.2157, + "step": 24385 + }, + { + "epoch": 66.99450549450549, + "grad_norm": 3.551900625228882, + "learning_rate": 1.650274725274725e-05, + "loss": 0.0438, + "step": 24386 + }, + { + "epoch": 66.99725274725274, + "grad_norm": 20.366104125976562, + "learning_rate": 1.6501373626373628e-05, + "loss": 0.6306, + "step": 24387 + }, + { + "epoch": 67.0, + "grad_norm": 7.378732681274414, + "learning_rate": 1.65e-05, + "loss": 0.0658, + "step": 24388 + }, + { + "epoch": 67.0, + "eval_accuracy": 0.8319559228650137, + "eval_f1": 0.8335180390882243, + "eval_f1_DuraRiadoRio_64x64": 0.8451612903225807, + "eval_f1_Mole_64x64": 0.9044117647058824, + "eval_f1_Quebrado_64x64": 0.7881040892193308, + "eval_f1_RiadoRio_64x64": 0.7371794871794872, + "eval_f1_RioFechado_64x64": 0.8927335640138409, + "eval_loss": 0.5079649090766907, + "eval_precision": 0.8388790293828375, + "eval_precision_DuraRiadoRio_64x64": 0.7891566265060241, + "eval_precision_Mole_64x64": 0.9609375, + "eval_precision_Quebrado_64x64": 0.848, + "eval_precision_RiadoRio_64x64": 0.71875, + "eval_precision_RioFechado_64x64": 0.8775510204081632, + "eval_recall": 0.8330059303187547, + "eval_recall_DuraRiadoRio_64x64": 0.9097222222222222, + "eval_recall_Mole_64x64": 0.8541666666666666, + "eval_recall_Quebrado_64x64": 0.7361111111111112, + "eval_recall_RiadoRio_64x64": 0.756578947368421, + "eval_recall_RioFechado_64x64": 0.9084507042253521, + "eval_runtime": 1.8504, + "eval_samples_per_second": 392.356, + "eval_steps_per_second": 24.86, + "step": 24388 + }, + { + "epoch": 67.00274725274726, + "grad_norm": 1.4769421815872192, + "learning_rate": 1.6498626373626375e-05, + "loss": 0.0208, + "step": 24389 + }, + { + "epoch": 67.00549450549451, + "grad_norm": 15.93852424621582, + "learning_rate": 1.6497252747252748e-05, + "loss": 0.1918, + "step": 24390 + }, + { + "epoch": 67.00824175824175, + "grad_norm": 9.889260292053223, + "learning_rate": 1.649587912087912e-05, + "loss": 0.2437, + "step": 24391 + }, + { + "epoch": 67.01098901098901, + "grad_norm": 19.856201171875, + "learning_rate": 1.6494505494505498e-05, + "loss": 0.3996, + "step": 24392 + }, + { + "epoch": 67.01373626373626, + "grad_norm": 20.80209732055664, + "learning_rate": 1.6493131868131868e-05, + "loss": 0.3447, + "step": 24393 + }, + { + "epoch": 67.01648351648352, + "grad_norm": 4.037868976593018, + "learning_rate": 1.6491758241758242e-05, + "loss": 0.0665, + "step": 24394 + }, + { + "epoch": 67.01923076923077, + "grad_norm": 17.26386260986328, + "learning_rate": 1.6490384615384615e-05, + "loss": 0.3856, + "step": 24395 + }, + { + "epoch": 67.02197802197803, + "grad_norm": 7.434548377990723, + "learning_rate": 1.648901098901099e-05, + "loss": 0.114, + "step": 24396 + }, + { + "epoch": 67.02472527472527, + "grad_norm": 5.444586753845215, + "learning_rate": 1.6487637362637365e-05, + "loss": 0.0832, + "step": 24397 + }, + { + "epoch": 67.02747252747253, + "grad_norm": 14.595475196838379, + "learning_rate": 1.648626373626374e-05, + "loss": 0.3283, + "step": 24398 + }, + { + "epoch": 67.03021978021978, + "grad_norm": 17.967945098876953, + "learning_rate": 1.6484890109890112e-05, + "loss": 0.3725, + "step": 24399 + }, + { + "epoch": 67.03296703296704, + "grad_norm": 6.264486312866211, + "learning_rate": 1.6483516483516486e-05, + "loss": 0.0817, + "step": 24400 + }, + { + "epoch": 67.03571428571429, + "grad_norm": 11.959267616271973, + "learning_rate": 1.6482142857142856e-05, + "loss": 0.1783, + "step": 24401 + }, + { + "epoch": 67.03846153846153, + "grad_norm": 13.078739166259766, + "learning_rate": 1.648076923076923e-05, + "loss": 0.2409, + "step": 24402 + }, + { + "epoch": 67.04120879120879, + "grad_norm": 25.558317184448242, + "learning_rate": 1.6479395604395606e-05, + "loss": 0.4148, + "step": 24403 + }, + { + "epoch": 67.04395604395604, + "grad_norm": 5.294405460357666, + "learning_rate": 1.647802197802198e-05, + "loss": 0.0486, + "step": 24404 + }, + { + "epoch": 67.0467032967033, + "grad_norm": 15.300326347351074, + "learning_rate": 1.6476648351648353e-05, + "loss": 0.3137, + "step": 24405 + }, + { + "epoch": 67.04945054945055, + "grad_norm": 27.92144012451172, + "learning_rate": 1.6475274725274726e-05, + "loss": 1.2479, + "step": 24406 + }, + { + "epoch": 67.0521978021978, + "grad_norm": 16.499643325805664, + "learning_rate": 1.64739010989011e-05, + "loss": 0.3062, + "step": 24407 + }, + { + "epoch": 67.05494505494505, + "grad_norm": 15.16910171508789, + "learning_rate": 1.6472527472527473e-05, + "loss": 0.4385, + "step": 24408 + }, + { + "epoch": 67.0576923076923, + "grad_norm": 18.719980239868164, + "learning_rate": 1.6471153846153846e-05, + "loss": 0.5969, + "step": 24409 + }, + { + "epoch": 67.06043956043956, + "grad_norm": 15.192085266113281, + "learning_rate": 1.646978021978022e-05, + "loss": 0.3097, + "step": 24410 + }, + { + "epoch": 67.06318681318682, + "grad_norm": 9.472198486328125, + "learning_rate": 1.6468406593406593e-05, + "loss": 0.1753, + "step": 24411 + }, + { + "epoch": 67.06593406593407, + "grad_norm": 6.418234348297119, + "learning_rate": 1.6467032967032966e-05, + "loss": 0.0874, + "step": 24412 + }, + { + "epoch": 67.06868131868131, + "grad_norm": 17.51590919494629, + "learning_rate": 1.6465659340659343e-05, + "loss": 0.3723, + "step": 24413 + }, + { + "epoch": 67.07142857142857, + "grad_norm": 4.070910930633545, + "learning_rate": 1.6464285714285717e-05, + "loss": 0.0817, + "step": 24414 + }, + { + "epoch": 67.07417582417582, + "grad_norm": 8.312202453613281, + "learning_rate": 1.646291208791209e-05, + "loss": 0.1854, + "step": 24415 + }, + { + "epoch": 67.07692307692308, + "grad_norm": 21.035066604614258, + "learning_rate": 1.646153846153846e-05, + "loss": 0.3802, + "step": 24416 + }, + { + "epoch": 67.07967032967034, + "grad_norm": 12.842308044433594, + "learning_rate": 1.6460164835164833e-05, + "loss": 0.1606, + "step": 24417 + }, + { + "epoch": 67.08241758241758, + "grad_norm": 4.709959506988525, + "learning_rate": 1.645879120879121e-05, + "loss": 0.0306, + "step": 24418 + }, + { + "epoch": 67.08516483516483, + "grad_norm": 11.051206588745117, + "learning_rate": 1.6457417582417584e-05, + "loss": 0.2994, + "step": 24419 + }, + { + "epoch": 67.08791208791209, + "grad_norm": 13.28238582611084, + "learning_rate": 1.6456043956043957e-05, + "loss": 0.2741, + "step": 24420 + }, + { + "epoch": 67.09065934065934, + "grad_norm": 24.538074493408203, + "learning_rate": 1.645467032967033e-05, + "loss": 0.342, + "step": 24421 + }, + { + "epoch": 67.0934065934066, + "grad_norm": 24.766441345214844, + "learning_rate": 1.6453296703296704e-05, + "loss": 0.8176, + "step": 24422 + }, + { + "epoch": 67.09615384615384, + "grad_norm": 6.384857177734375, + "learning_rate": 1.6451923076923077e-05, + "loss": 0.1232, + "step": 24423 + }, + { + "epoch": 67.0989010989011, + "grad_norm": 12.936208724975586, + "learning_rate": 1.645054945054945e-05, + "loss": 0.2719, + "step": 24424 + }, + { + "epoch": 67.10164835164835, + "grad_norm": 9.647928237915039, + "learning_rate": 1.6449175824175824e-05, + "loss": 0.1274, + "step": 24425 + }, + { + "epoch": 67.1043956043956, + "grad_norm": 8.478320121765137, + "learning_rate": 1.6447802197802198e-05, + "loss": 0.1879, + "step": 24426 + }, + { + "epoch": 67.10714285714286, + "grad_norm": 4.525652885437012, + "learning_rate": 1.644642857142857e-05, + "loss": 0.0808, + "step": 24427 + }, + { + "epoch": 67.10989010989012, + "grad_norm": 20.8129940032959, + "learning_rate": 1.6445054945054948e-05, + "loss": 0.7517, + "step": 24428 + }, + { + "epoch": 67.11263736263736, + "grad_norm": 5.650303840637207, + "learning_rate": 1.644368131868132e-05, + "loss": 0.0496, + "step": 24429 + }, + { + "epoch": 67.11538461538461, + "grad_norm": 7.567859649658203, + "learning_rate": 1.6442307692307695e-05, + "loss": 0.1275, + "step": 24430 + }, + { + "epoch": 67.11813186813187, + "grad_norm": 10.471475601196289, + "learning_rate": 1.6440934065934065e-05, + "loss": 0.3177, + "step": 24431 + }, + { + "epoch": 67.12087912087912, + "grad_norm": 10.587319374084473, + "learning_rate": 1.6439560439560438e-05, + "loss": 0.1525, + "step": 24432 + }, + { + "epoch": 67.12362637362638, + "grad_norm": 10.482976913452148, + "learning_rate": 1.6438186813186815e-05, + "loss": 0.2133, + "step": 24433 + }, + { + "epoch": 67.12637362637362, + "grad_norm": 19.167753219604492, + "learning_rate": 1.6436813186813188e-05, + "loss": 0.4026, + "step": 24434 + }, + { + "epoch": 67.12912087912088, + "grad_norm": 3.928373336791992, + "learning_rate": 1.643543956043956e-05, + "loss": 0.0438, + "step": 24435 + }, + { + "epoch": 67.13186813186813, + "grad_norm": 17.78227996826172, + "learning_rate": 1.6434065934065935e-05, + "loss": 0.3095, + "step": 24436 + }, + { + "epoch": 67.13461538461539, + "grad_norm": 9.831026077270508, + "learning_rate": 1.643269230769231e-05, + "loss": 0.1831, + "step": 24437 + }, + { + "epoch": 67.13736263736264, + "grad_norm": 15.71291732788086, + "learning_rate": 1.6431318681318682e-05, + "loss": 0.4219, + "step": 24438 + }, + { + "epoch": 67.14010989010988, + "grad_norm": 9.523904800415039, + "learning_rate": 1.6429945054945055e-05, + "loss": 0.1704, + "step": 24439 + }, + { + "epoch": 67.14285714285714, + "grad_norm": 15.074344635009766, + "learning_rate": 1.642857142857143e-05, + "loss": 0.2261, + "step": 24440 + }, + { + "epoch": 67.1456043956044, + "grad_norm": 13.949922561645508, + "learning_rate": 1.6427197802197802e-05, + "loss": 0.2082, + "step": 24441 + }, + { + "epoch": 67.14835164835165, + "grad_norm": 12.204066276550293, + "learning_rate": 1.6425824175824175e-05, + "loss": 0.2572, + "step": 24442 + }, + { + "epoch": 67.1510989010989, + "grad_norm": 11.990873336791992, + "learning_rate": 1.6424450549450552e-05, + "loss": 0.106, + "step": 24443 + }, + { + "epoch": 67.15384615384616, + "grad_norm": 4.392578601837158, + "learning_rate": 1.6423076923076926e-05, + "loss": 0.0722, + "step": 24444 + }, + { + "epoch": 67.1565934065934, + "grad_norm": 19.263330459594727, + "learning_rate": 1.64217032967033e-05, + "loss": 0.5568, + "step": 24445 + }, + { + "epoch": 67.15934065934066, + "grad_norm": 19.030555725097656, + "learning_rate": 1.642032967032967e-05, + "loss": 0.6418, + "step": 24446 + }, + { + "epoch": 67.16208791208791, + "grad_norm": 12.225584983825684, + "learning_rate": 1.6418956043956042e-05, + "loss": 0.5607, + "step": 24447 + }, + { + "epoch": 67.16483516483517, + "grad_norm": 9.4949312210083, + "learning_rate": 1.641758241758242e-05, + "loss": 0.2378, + "step": 24448 + }, + { + "epoch": 67.16758241758242, + "grad_norm": 19.92646026611328, + "learning_rate": 1.6416208791208793e-05, + "loss": 0.502, + "step": 24449 + }, + { + "epoch": 67.17032967032966, + "grad_norm": 2.696805238723755, + "learning_rate": 1.6414835164835166e-05, + "loss": 0.0288, + "step": 24450 + }, + { + "epoch": 67.17307692307692, + "grad_norm": 4.533792495727539, + "learning_rate": 1.641346153846154e-05, + "loss": 0.0739, + "step": 24451 + }, + { + "epoch": 67.17582417582418, + "grad_norm": 4.836893081665039, + "learning_rate": 1.6412087912087913e-05, + "loss": 0.0551, + "step": 24452 + }, + { + "epoch": 67.17857142857143, + "grad_norm": 6.868083477020264, + "learning_rate": 1.6410714285714286e-05, + "loss": 0.2037, + "step": 24453 + }, + { + "epoch": 67.18131868131869, + "grad_norm": 12.344161033630371, + "learning_rate": 1.640934065934066e-05, + "loss": 0.1138, + "step": 24454 + }, + { + "epoch": 67.18406593406593, + "grad_norm": 6.80355167388916, + "learning_rate": 1.6407967032967033e-05, + "loss": 0.1003, + "step": 24455 + }, + { + "epoch": 67.18681318681318, + "grad_norm": 15.221240043640137, + "learning_rate": 1.6406593406593406e-05, + "loss": 0.4046, + "step": 24456 + }, + { + "epoch": 67.18956043956044, + "grad_norm": 9.09788990020752, + "learning_rate": 1.640521978021978e-05, + "loss": 0.0962, + "step": 24457 + }, + { + "epoch": 67.1923076923077, + "grad_norm": 11.558603286743164, + "learning_rate": 1.6403846153846157e-05, + "loss": 0.3021, + "step": 24458 + }, + { + "epoch": 67.19505494505495, + "grad_norm": 11.051397323608398, + "learning_rate": 1.640247252747253e-05, + "loss": 0.1698, + "step": 24459 + }, + { + "epoch": 67.1978021978022, + "grad_norm": 8.809773445129395, + "learning_rate": 1.6401098901098903e-05, + "loss": 0.0962, + "step": 24460 + }, + { + "epoch": 67.20054945054945, + "grad_norm": 17.138614654541016, + "learning_rate": 1.6399725274725274e-05, + "loss": 0.4886, + "step": 24461 + }, + { + "epoch": 67.2032967032967, + "grad_norm": 10.492338180541992, + "learning_rate": 1.6398351648351647e-05, + "loss": 0.1426, + "step": 24462 + }, + { + "epoch": 67.20604395604396, + "grad_norm": 11.517288208007812, + "learning_rate": 1.6396978021978024e-05, + "loss": 0.1422, + "step": 24463 + }, + { + "epoch": 67.20879120879121, + "grad_norm": 12.461196899414062, + "learning_rate": 1.6395604395604397e-05, + "loss": 0.1939, + "step": 24464 + }, + { + "epoch": 67.21153846153847, + "grad_norm": 4.163658142089844, + "learning_rate": 1.639423076923077e-05, + "loss": 0.1042, + "step": 24465 + }, + { + "epoch": 67.21428571428571, + "grad_norm": 15.78242301940918, + "learning_rate": 1.6392857142857144e-05, + "loss": 0.3473, + "step": 24466 + }, + { + "epoch": 67.21703296703296, + "grad_norm": 21.82394027709961, + "learning_rate": 1.6391483516483517e-05, + "loss": 0.3665, + "step": 24467 + }, + { + "epoch": 67.21978021978022, + "grad_norm": 11.995532989501953, + "learning_rate": 1.639010989010989e-05, + "loss": 0.1914, + "step": 24468 + }, + { + "epoch": 67.22252747252747, + "grad_norm": 14.788900375366211, + "learning_rate": 1.6388736263736264e-05, + "loss": 0.3019, + "step": 24469 + }, + { + "epoch": 67.22527472527473, + "grad_norm": 11.501447677612305, + "learning_rate": 1.6387362637362638e-05, + "loss": 0.2425, + "step": 24470 + }, + { + "epoch": 67.22802197802197, + "grad_norm": 7.262992858886719, + "learning_rate": 1.638598901098901e-05, + "loss": 0.1067, + "step": 24471 + }, + { + "epoch": 67.23076923076923, + "grad_norm": 24.37325668334961, + "learning_rate": 1.6384615384615384e-05, + "loss": 0.73, + "step": 24472 + }, + { + "epoch": 67.23351648351648, + "grad_norm": 12.902223587036133, + "learning_rate": 1.638324175824176e-05, + "loss": 0.1171, + "step": 24473 + }, + { + "epoch": 67.23626373626374, + "grad_norm": 15.242663383483887, + "learning_rate": 1.6381868131868135e-05, + "loss": 0.2923, + "step": 24474 + }, + { + "epoch": 67.23901098901099, + "grad_norm": 11.085000991821289, + "learning_rate": 1.6380494505494508e-05, + "loss": 0.2216, + "step": 24475 + }, + { + "epoch": 67.24175824175825, + "grad_norm": 1.5556758642196655, + "learning_rate": 1.6379120879120878e-05, + "loss": 0.015, + "step": 24476 + }, + { + "epoch": 67.24450549450549, + "grad_norm": 11.736861228942871, + "learning_rate": 1.637774725274725e-05, + "loss": 0.2517, + "step": 24477 + }, + { + "epoch": 67.24725274725274, + "grad_norm": 16.98922348022461, + "learning_rate": 1.6376373626373628e-05, + "loss": 0.264, + "step": 24478 + }, + { + "epoch": 67.25, + "grad_norm": 6.643080711364746, + "learning_rate": 1.6375e-05, + "loss": 0.1327, + "step": 24479 + }, + { + "epoch": 67.25274725274726, + "grad_norm": 2.620820999145508, + "learning_rate": 1.6373626373626375e-05, + "loss": 0.0408, + "step": 24480 + }, + { + "epoch": 67.25549450549451, + "grad_norm": 7.007717609405518, + "learning_rate": 1.637225274725275e-05, + "loss": 0.0964, + "step": 24481 + }, + { + "epoch": 67.25824175824175, + "grad_norm": 12.493502616882324, + "learning_rate": 1.6370879120879122e-05, + "loss": 0.2208, + "step": 24482 + }, + { + "epoch": 67.26098901098901, + "grad_norm": 5.902337074279785, + "learning_rate": 1.6369505494505495e-05, + "loss": 0.1188, + "step": 24483 + }, + { + "epoch": 67.26373626373626, + "grad_norm": 10.78665828704834, + "learning_rate": 1.636813186813187e-05, + "loss": 0.1737, + "step": 24484 + }, + { + "epoch": 67.26648351648352, + "grad_norm": 18.58342933654785, + "learning_rate": 1.6366758241758242e-05, + "loss": 0.3797, + "step": 24485 + }, + { + "epoch": 67.26923076923077, + "grad_norm": 3.078662872314453, + "learning_rate": 1.6365384615384615e-05, + "loss": 0.0321, + "step": 24486 + }, + { + "epoch": 67.27197802197803, + "grad_norm": 6.04484748840332, + "learning_rate": 1.636401098901099e-05, + "loss": 0.0709, + "step": 24487 + }, + { + "epoch": 67.27472527472527, + "grad_norm": 12.448830604553223, + "learning_rate": 1.6362637362637366e-05, + "loss": 0.1154, + "step": 24488 + }, + { + "epoch": 67.27747252747253, + "grad_norm": 18.489154815673828, + "learning_rate": 1.636126373626374e-05, + "loss": 0.4723, + "step": 24489 + }, + { + "epoch": 67.28021978021978, + "grad_norm": 4.079322338104248, + "learning_rate": 1.6359890109890112e-05, + "loss": 0.0337, + "step": 24490 + }, + { + "epoch": 67.28296703296704, + "grad_norm": 11.0010986328125, + "learning_rate": 1.6358516483516482e-05, + "loss": 0.136, + "step": 24491 + }, + { + "epoch": 67.28571428571429, + "grad_norm": 15.034907341003418, + "learning_rate": 1.6357142857142856e-05, + "loss": 0.3376, + "step": 24492 + }, + { + "epoch": 67.28846153846153, + "grad_norm": 6.790104866027832, + "learning_rate": 1.6355769230769233e-05, + "loss": 0.0786, + "step": 24493 + }, + { + "epoch": 67.29120879120879, + "grad_norm": 11.173002243041992, + "learning_rate": 1.6354395604395606e-05, + "loss": 0.2408, + "step": 24494 + }, + { + "epoch": 67.29395604395604, + "grad_norm": 11.020898818969727, + "learning_rate": 1.635302197802198e-05, + "loss": 0.1325, + "step": 24495 + }, + { + "epoch": 67.2967032967033, + "grad_norm": 20.96384620666504, + "learning_rate": 1.6351648351648353e-05, + "loss": 0.3165, + "step": 24496 + }, + { + "epoch": 67.29945054945055, + "grad_norm": 15.905679702758789, + "learning_rate": 1.6350274725274726e-05, + "loss": 0.4041, + "step": 24497 + }, + { + "epoch": 67.3021978021978, + "grad_norm": 9.700841903686523, + "learning_rate": 1.63489010989011e-05, + "loss": 0.1204, + "step": 24498 + }, + { + "epoch": 67.30494505494505, + "grad_norm": 15.694123268127441, + "learning_rate": 1.6347527472527473e-05, + "loss": 0.2605, + "step": 24499 + }, + { + "epoch": 67.3076923076923, + "grad_norm": 3.361039638519287, + "learning_rate": 1.6346153846153847e-05, + "loss": 0.0921, + "step": 24500 + }, + { + "epoch": 67.31043956043956, + "grad_norm": 19.132740020751953, + "learning_rate": 1.634478021978022e-05, + "loss": 0.5646, + "step": 24501 + }, + { + "epoch": 67.31318681318682, + "grad_norm": 11.520052909851074, + "learning_rate": 1.6343406593406593e-05, + "loss": 0.4616, + "step": 24502 + }, + { + "epoch": 67.31593406593407, + "grad_norm": 8.499619483947754, + "learning_rate": 1.634203296703297e-05, + "loss": 0.0869, + "step": 24503 + }, + { + "epoch": 67.31868131868131, + "grad_norm": 13.124518394470215, + "learning_rate": 1.6340659340659344e-05, + "loss": 0.41, + "step": 24504 + }, + { + "epoch": 67.32142857142857, + "grad_norm": 19.737382888793945, + "learning_rate": 1.6339285714285717e-05, + "loss": 0.53, + "step": 24505 + }, + { + "epoch": 67.32417582417582, + "grad_norm": 8.709512710571289, + "learning_rate": 1.6337912087912087e-05, + "loss": 0.1051, + "step": 24506 + }, + { + "epoch": 67.32692307692308, + "grad_norm": 18.97745132446289, + "learning_rate": 1.633653846153846e-05, + "loss": 0.4372, + "step": 24507 + }, + { + "epoch": 67.32967032967034, + "grad_norm": 8.562945365905762, + "learning_rate": 1.6335164835164837e-05, + "loss": 0.2384, + "step": 24508 + }, + { + "epoch": 67.33241758241758, + "grad_norm": 14.777900695800781, + "learning_rate": 1.633379120879121e-05, + "loss": 0.2305, + "step": 24509 + }, + { + "epoch": 67.33516483516483, + "grad_norm": 19.15964698791504, + "learning_rate": 1.6332417582417584e-05, + "loss": 0.3111, + "step": 24510 + }, + { + "epoch": 67.33791208791209, + "grad_norm": 9.343419075012207, + "learning_rate": 1.6331043956043957e-05, + "loss": 0.2374, + "step": 24511 + }, + { + "epoch": 67.34065934065934, + "grad_norm": 10.203739166259766, + "learning_rate": 1.632967032967033e-05, + "loss": 0.2751, + "step": 24512 + }, + { + "epoch": 67.3434065934066, + "grad_norm": 12.320937156677246, + "learning_rate": 1.6328296703296704e-05, + "loss": 0.2464, + "step": 24513 + }, + { + "epoch": 67.34615384615384, + "grad_norm": 16.923240661621094, + "learning_rate": 1.6326923076923078e-05, + "loss": 0.2954, + "step": 24514 + }, + { + "epoch": 67.3489010989011, + "grad_norm": 20.378028869628906, + "learning_rate": 1.632554945054945e-05, + "loss": 0.5949, + "step": 24515 + }, + { + "epoch": 67.35164835164835, + "grad_norm": 17.18183708190918, + "learning_rate": 1.6324175824175824e-05, + "loss": 0.3155, + "step": 24516 + }, + { + "epoch": 67.3543956043956, + "grad_norm": 7.802654266357422, + "learning_rate": 1.6322802197802198e-05, + "loss": 0.1783, + "step": 24517 + }, + { + "epoch": 67.35714285714286, + "grad_norm": 12.830674171447754, + "learning_rate": 1.632142857142857e-05, + "loss": 0.1752, + "step": 24518 + }, + { + "epoch": 67.35989010989012, + "grad_norm": 19.736167907714844, + "learning_rate": 1.6320054945054948e-05, + "loss": 0.8162, + "step": 24519 + }, + { + "epoch": 67.36263736263736, + "grad_norm": 10.769834518432617, + "learning_rate": 1.631868131868132e-05, + "loss": 0.105, + "step": 24520 + }, + { + "epoch": 67.36538461538461, + "grad_norm": 8.522506713867188, + "learning_rate": 1.631730769230769e-05, + "loss": 0.1795, + "step": 24521 + }, + { + "epoch": 67.36813186813187, + "grad_norm": 9.805267333984375, + "learning_rate": 1.6315934065934065e-05, + "loss": 0.149, + "step": 24522 + }, + { + "epoch": 67.37087912087912, + "grad_norm": 17.22586441040039, + "learning_rate": 1.6314560439560438e-05, + "loss": 0.3658, + "step": 24523 + }, + { + "epoch": 67.37362637362638, + "grad_norm": 10.904672622680664, + "learning_rate": 1.6313186813186815e-05, + "loss": 0.1386, + "step": 24524 + }, + { + "epoch": 67.37637362637362, + "grad_norm": 11.491667747497559, + "learning_rate": 1.631181318681319e-05, + "loss": 0.1614, + "step": 24525 + }, + { + "epoch": 67.37912087912088, + "grad_norm": 3.8999557495117188, + "learning_rate": 1.6310439560439562e-05, + "loss": 0.057, + "step": 24526 + }, + { + "epoch": 67.38186813186813, + "grad_norm": 19.31629753112793, + "learning_rate": 1.6309065934065935e-05, + "loss": 0.7003, + "step": 24527 + }, + { + "epoch": 67.38461538461539, + "grad_norm": 17.388277053833008, + "learning_rate": 1.630769230769231e-05, + "loss": 0.1929, + "step": 24528 + }, + { + "epoch": 67.38736263736264, + "grad_norm": 13.856075286865234, + "learning_rate": 1.6306318681318682e-05, + "loss": 0.2668, + "step": 24529 + }, + { + "epoch": 67.39010989010988, + "grad_norm": 11.963129997253418, + "learning_rate": 1.6304945054945056e-05, + "loss": 0.1245, + "step": 24530 + }, + { + "epoch": 67.39285714285714, + "grad_norm": 7.229351043701172, + "learning_rate": 1.630357142857143e-05, + "loss": 0.1755, + "step": 24531 + }, + { + "epoch": 67.3956043956044, + "grad_norm": 9.054078102111816, + "learning_rate": 1.6302197802197802e-05, + "loss": 0.1579, + "step": 24532 + }, + { + "epoch": 67.39835164835165, + "grad_norm": 14.478964805603027, + "learning_rate": 1.6300824175824176e-05, + "loss": 0.251, + "step": 24533 + }, + { + "epoch": 67.4010989010989, + "grad_norm": 15.717703819274902, + "learning_rate": 1.6299450549450553e-05, + "loss": 0.4814, + "step": 24534 + }, + { + "epoch": 67.40384615384616, + "grad_norm": 12.235260009765625, + "learning_rate": 1.6298076923076923e-05, + "loss": 0.3451, + "step": 24535 + }, + { + "epoch": 67.4065934065934, + "grad_norm": 10.919517517089844, + "learning_rate": 1.6296703296703296e-05, + "loss": 0.1913, + "step": 24536 + }, + { + "epoch": 67.40934065934066, + "grad_norm": 6.449135780334473, + "learning_rate": 1.629532967032967e-05, + "loss": 0.0838, + "step": 24537 + }, + { + "epoch": 67.41208791208791, + "grad_norm": 17.832763671875, + "learning_rate": 1.6293956043956043e-05, + "loss": 0.2772, + "step": 24538 + }, + { + "epoch": 67.41483516483517, + "grad_norm": 7.901849746704102, + "learning_rate": 1.629258241758242e-05, + "loss": 0.1984, + "step": 24539 + }, + { + "epoch": 67.41758241758242, + "grad_norm": 15.424992561340332, + "learning_rate": 1.6291208791208793e-05, + "loss": 0.166, + "step": 24540 + }, + { + "epoch": 67.42032967032966, + "grad_norm": 15.378352165222168, + "learning_rate": 1.6289835164835166e-05, + "loss": 0.3133, + "step": 24541 + }, + { + "epoch": 67.42307692307692, + "grad_norm": 18.22676658630371, + "learning_rate": 1.628846153846154e-05, + "loss": 0.4989, + "step": 24542 + }, + { + "epoch": 67.42582417582418, + "grad_norm": 18.84351921081543, + "learning_rate": 1.6287087912087913e-05, + "loss": 0.321, + "step": 24543 + }, + { + "epoch": 67.42857142857143, + "grad_norm": 14.320701599121094, + "learning_rate": 1.6285714285714287e-05, + "loss": 0.3013, + "step": 24544 + }, + { + "epoch": 67.43131868131869, + "grad_norm": 18.38020133972168, + "learning_rate": 1.628434065934066e-05, + "loss": 0.415, + "step": 24545 + }, + { + "epoch": 67.43406593406593, + "grad_norm": 19.275253295898438, + "learning_rate": 1.6282967032967033e-05, + "loss": 0.3883, + "step": 24546 + }, + { + "epoch": 67.43681318681318, + "grad_norm": 16.1021671295166, + "learning_rate": 1.6281593406593407e-05, + "loss": 0.2051, + "step": 24547 + }, + { + "epoch": 67.43956043956044, + "grad_norm": 4.978922367095947, + "learning_rate": 1.628021978021978e-05, + "loss": 0.0916, + "step": 24548 + }, + { + "epoch": 67.4423076923077, + "grad_norm": 19.193979263305664, + "learning_rate": 1.6278846153846157e-05, + "loss": 0.4323, + "step": 24549 + }, + { + "epoch": 67.44505494505495, + "grad_norm": 12.08098316192627, + "learning_rate": 1.6277472527472527e-05, + "loss": 0.2012, + "step": 24550 + }, + { + "epoch": 67.4478021978022, + "grad_norm": 3.516693353652954, + "learning_rate": 1.62760989010989e-05, + "loss": 0.0276, + "step": 24551 + }, + { + "epoch": 67.45054945054945, + "grad_norm": 5.697373867034912, + "learning_rate": 1.6274725274725274e-05, + "loss": 0.0721, + "step": 24552 + }, + { + "epoch": 67.4532967032967, + "grad_norm": 3.927590847015381, + "learning_rate": 1.6273351648351647e-05, + "loss": 0.0676, + "step": 24553 + }, + { + "epoch": 67.45604395604396, + "grad_norm": 5.64178991317749, + "learning_rate": 1.6271978021978024e-05, + "loss": 0.0735, + "step": 24554 + }, + { + "epoch": 67.45879120879121, + "grad_norm": 15.835885047912598, + "learning_rate": 1.6270604395604397e-05, + "loss": 0.2966, + "step": 24555 + }, + { + "epoch": 67.46153846153847, + "grad_norm": 12.839850425720215, + "learning_rate": 1.626923076923077e-05, + "loss": 0.3355, + "step": 24556 + }, + { + "epoch": 67.46428571428571, + "grad_norm": 12.808119773864746, + "learning_rate": 1.6267857142857144e-05, + "loss": 0.1925, + "step": 24557 + }, + { + "epoch": 67.46703296703296, + "grad_norm": 6.909031867980957, + "learning_rate": 1.6266483516483518e-05, + "loss": 0.076, + "step": 24558 + }, + { + "epoch": 67.46978021978022, + "grad_norm": 9.803757667541504, + "learning_rate": 1.626510989010989e-05, + "loss": 0.1482, + "step": 24559 + }, + { + "epoch": 67.47252747252747, + "grad_norm": 6.243087291717529, + "learning_rate": 1.6263736263736265e-05, + "loss": 0.0749, + "step": 24560 + }, + { + "epoch": 67.47527472527473, + "grad_norm": 10.878157615661621, + "learning_rate": 1.6262362637362638e-05, + "loss": 0.2135, + "step": 24561 + }, + { + "epoch": 67.47802197802197, + "grad_norm": 5.814761161804199, + "learning_rate": 1.626098901098901e-05, + "loss": 0.1413, + "step": 24562 + }, + { + "epoch": 67.48076923076923, + "grad_norm": 7.017789363861084, + "learning_rate": 1.6259615384615385e-05, + "loss": 0.105, + "step": 24563 + }, + { + "epoch": 67.48351648351648, + "grad_norm": 7.5468549728393555, + "learning_rate": 1.625824175824176e-05, + "loss": 0.0911, + "step": 24564 + }, + { + "epoch": 67.48626373626374, + "grad_norm": 17.48237419128418, + "learning_rate": 1.625686813186813e-05, + "loss": 0.5513, + "step": 24565 + }, + { + "epoch": 67.48901098901099, + "grad_norm": 20.148624420166016, + "learning_rate": 1.6255494505494505e-05, + "loss": 0.4007, + "step": 24566 + }, + { + "epoch": 67.49175824175825, + "grad_norm": 7.611449718475342, + "learning_rate": 1.625412087912088e-05, + "loss": 0.1463, + "step": 24567 + }, + { + "epoch": 67.49450549450549, + "grad_norm": 12.211085319519043, + "learning_rate": 1.6252747252747252e-05, + "loss": 0.2659, + "step": 24568 + }, + { + "epoch": 67.49725274725274, + "grad_norm": 11.457291603088379, + "learning_rate": 1.625137362637363e-05, + "loss": 0.3172, + "step": 24569 + }, + { + "epoch": 67.5, + "grad_norm": 14.0787992477417, + "learning_rate": 1.6250000000000002e-05, + "loss": 0.2557, + "step": 24570 + }, + { + "epoch": 67.50274725274726, + "grad_norm": 12.478188514709473, + "learning_rate": 1.6248626373626375e-05, + "loss": 0.3219, + "step": 24571 + }, + { + "epoch": 67.50549450549451, + "grad_norm": 7.884701251983643, + "learning_rate": 1.624725274725275e-05, + "loss": 0.2294, + "step": 24572 + }, + { + "epoch": 67.50824175824175, + "grad_norm": 12.664645195007324, + "learning_rate": 1.6245879120879122e-05, + "loss": 0.2618, + "step": 24573 + }, + { + "epoch": 67.51098901098901, + "grad_norm": 15.712546348571777, + "learning_rate": 1.6244505494505496e-05, + "loss": 0.1799, + "step": 24574 + }, + { + "epoch": 67.51373626373626, + "grad_norm": 9.498730659484863, + "learning_rate": 1.624313186813187e-05, + "loss": 0.1862, + "step": 24575 + }, + { + "epoch": 67.51648351648352, + "grad_norm": 12.517109870910645, + "learning_rate": 1.6241758241758242e-05, + "loss": 0.1475, + "step": 24576 + }, + { + "epoch": 67.51923076923077, + "grad_norm": 10.568746566772461, + "learning_rate": 1.6240384615384616e-05, + "loss": 0.2305, + "step": 24577 + }, + { + "epoch": 67.52197802197803, + "grad_norm": 11.889580726623535, + "learning_rate": 1.623901098901099e-05, + "loss": 0.2087, + "step": 24578 + }, + { + "epoch": 67.52472527472527, + "grad_norm": 16.55701446533203, + "learning_rate": 1.6237637362637366e-05, + "loss": 0.2629, + "step": 24579 + }, + { + "epoch": 67.52747252747253, + "grad_norm": 22.30695152282715, + "learning_rate": 1.6236263736263736e-05, + "loss": 0.5586, + "step": 24580 + }, + { + "epoch": 67.53021978021978, + "grad_norm": 6.547029495239258, + "learning_rate": 1.623489010989011e-05, + "loss": 0.1864, + "step": 24581 + }, + { + "epoch": 67.53296703296704, + "grad_norm": 5.927203178405762, + "learning_rate": 1.6233516483516483e-05, + "loss": 0.1, + "step": 24582 + }, + { + "epoch": 67.53571428571429, + "grad_norm": 6.839993476867676, + "learning_rate": 1.6232142857142856e-05, + "loss": 0.1993, + "step": 24583 + }, + { + "epoch": 67.53846153846153, + "grad_norm": 12.938111305236816, + "learning_rate": 1.6230769230769233e-05, + "loss": 0.248, + "step": 24584 + }, + { + "epoch": 67.54120879120879, + "grad_norm": 10.882969856262207, + "learning_rate": 1.6229395604395606e-05, + "loss": 0.1662, + "step": 24585 + }, + { + "epoch": 67.54395604395604, + "grad_norm": 11.506296157836914, + "learning_rate": 1.622802197802198e-05, + "loss": 0.1485, + "step": 24586 + }, + { + "epoch": 67.5467032967033, + "grad_norm": 4.773288249969482, + "learning_rate": 1.6226648351648353e-05, + "loss": 0.0774, + "step": 24587 + }, + { + "epoch": 67.54945054945055, + "grad_norm": 19.4005069732666, + "learning_rate": 1.6225274725274727e-05, + "loss": 0.2875, + "step": 24588 + }, + { + "epoch": 67.5521978021978, + "grad_norm": 8.04853630065918, + "learning_rate": 1.62239010989011e-05, + "loss": 0.125, + "step": 24589 + }, + { + "epoch": 67.55494505494505, + "grad_norm": 15.493684768676758, + "learning_rate": 1.6222527472527473e-05, + "loss": 0.3128, + "step": 24590 + }, + { + "epoch": 67.5576923076923, + "grad_norm": 13.110466003417969, + "learning_rate": 1.6221153846153847e-05, + "loss": 0.38, + "step": 24591 + }, + { + "epoch": 67.56043956043956, + "grad_norm": 12.739221572875977, + "learning_rate": 1.621978021978022e-05, + "loss": 0.1994, + "step": 24592 + }, + { + "epoch": 67.56318681318682, + "grad_norm": 12.95974349975586, + "learning_rate": 1.6218406593406594e-05, + "loss": 0.3121, + "step": 24593 + }, + { + "epoch": 67.56593406593407, + "grad_norm": 10.757070541381836, + "learning_rate": 1.621703296703297e-05, + "loss": 0.2512, + "step": 24594 + }, + { + "epoch": 67.56868131868131, + "grad_norm": 8.41551399230957, + "learning_rate": 1.621565934065934e-05, + "loss": 0.1254, + "step": 24595 + }, + { + "epoch": 67.57142857142857, + "grad_norm": 12.948114395141602, + "learning_rate": 1.6214285714285714e-05, + "loss": 0.4387, + "step": 24596 + }, + { + "epoch": 67.57417582417582, + "grad_norm": 3.6994078159332275, + "learning_rate": 1.6212912087912087e-05, + "loss": 0.0598, + "step": 24597 + }, + { + "epoch": 67.57692307692308, + "grad_norm": 12.690781593322754, + "learning_rate": 1.621153846153846e-05, + "loss": 0.2592, + "step": 24598 + }, + { + "epoch": 67.57967032967034, + "grad_norm": 18.82907485961914, + "learning_rate": 1.6210164835164838e-05, + "loss": 0.2825, + "step": 24599 + }, + { + "epoch": 67.58241758241758, + "grad_norm": 10.25150203704834, + "learning_rate": 1.620879120879121e-05, + "loss": 0.2061, + "step": 24600 + }, + { + "epoch": 67.58516483516483, + "grad_norm": 3.7423624992370605, + "learning_rate": 1.6207417582417584e-05, + "loss": 0.0491, + "step": 24601 + }, + { + "epoch": 67.58791208791209, + "grad_norm": 18.06998634338379, + "learning_rate": 1.6206043956043958e-05, + "loss": 0.4104, + "step": 24602 + }, + { + "epoch": 67.59065934065934, + "grad_norm": 23.346229553222656, + "learning_rate": 1.620467032967033e-05, + "loss": 0.3318, + "step": 24603 + }, + { + "epoch": 67.5934065934066, + "grad_norm": 5.9372239112854, + "learning_rate": 1.6203296703296705e-05, + "loss": 0.1014, + "step": 24604 + }, + { + "epoch": 67.59615384615384, + "grad_norm": 8.95969009399414, + "learning_rate": 1.6201923076923078e-05, + "loss": 0.1101, + "step": 24605 + }, + { + "epoch": 67.5989010989011, + "grad_norm": 5.865720748901367, + "learning_rate": 1.620054945054945e-05, + "loss": 0.0812, + "step": 24606 + }, + { + "epoch": 67.60164835164835, + "grad_norm": 6.538366317749023, + "learning_rate": 1.6199175824175825e-05, + "loss": 0.1184, + "step": 24607 + }, + { + "epoch": 67.6043956043956, + "grad_norm": 14.103170394897461, + "learning_rate": 1.6197802197802198e-05, + "loss": 0.2562, + "step": 24608 + }, + { + "epoch": 67.60714285714286, + "grad_norm": 22.418378829956055, + "learning_rate": 1.6196428571428575e-05, + "loss": 0.4162, + "step": 24609 + }, + { + "epoch": 67.60989010989012, + "grad_norm": 7.95340633392334, + "learning_rate": 1.6195054945054945e-05, + "loss": 0.09, + "step": 24610 + }, + { + "epoch": 67.61263736263736, + "grad_norm": 8.684959411621094, + "learning_rate": 1.619368131868132e-05, + "loss": 0.1796, + "step": 24611 + }, + { + "epoch": 67.61538461538461, + "grad_norm": 5.508207321166992, + "learning_rate": 1.6192307692307692e-05, + "loss": 0.1329, + "step": 24612 + }, + { + "epoch": 67.61813186813187, + "grad_norm": 7.645790100097656, + "learning_rate": 1.6190934065934065e-05, + "loss": 0.1998, + "step": 24613 + }, + { + "epoch": 67.62087912087912, + "grad_norm": 14.036916732788086, + "learning_rate": 1.6189560439560442e-05, + "loss": 0.4314, + "step": 24614 + }, + { + "epoch": 67.62362637362638, + "grad_norm": 4.5212483406066895, + "learning_rate": 1.6188186813186815e-05, + "loss": 0.0806, + "step": 24615 + }, + { + "epoch": 67.62637362637362, + "grad_norm": 12.0789213180542, + "learning_rate": 1.618681318681319e-05, + "loss": 0.2227, + "step": 24616 + }, + { + "epoch": 67.62912087912088, + "grad_norm": 20.742645263671875, + "learning_rate": 1.6185439560439562e-05, + "loss": 0.566, + "step": 24617 + }, + { + "epoch": 67.63186813186813, + "grad_norm": 8.996530532836914, + "learning_rate": 1.6184065934065936e-05, + "loss": 0.0806, + "step": 24618 + }, + { + "epoch": 67.63461538461539, + "grad_norm": 7.410346508026123, + "learning_rate": 1.618269230769231e-05, + "loss": 0.2331, + "step": 24619 + }, + { + "epoch": 67.63736263736264, + "grad_norm": 6.884849548339844, + "learning_rate": 1.6181318681318682e-05, + "loss": 0.0795, + "step": 24620 + }, + { + "epoch": 67.64010989010988, + "grad_norm": 10.435259819030762, + "learning_rate": 1.6179945054945056e-05, + "loss": 0.1457, + "step": 24621 + }, + { + "epoch": 67.64285714285714, + "grad_norm": 17.18873405456543, + "learning_rate": 1.617857142857143e-05, + "loss": 0.397, + "step": 24622 + }, + { + "epoch": 67.6456043956044, + "grad_norm": 17.0318603515625, + "learning_rate": 1.6177197802197803e-05, + "loss": 0.2717, + "step": 24623 + }, + { + "epoch": 67.64835164835165, + "grad_norm": 4.531194686889648, + "learning_rate": 1.617582417582418e-05, + "loss": 0.0789, + "step": 24624 + }, + { + "epoch": 67.6510989010989, + "grad_norm": 13.018150329589844, + "learning_rate": 1.617445054945055e-05, + "loss": 0.2719, + "step": 24625 + }, + { + "epoch": 67.65384615384616, + "grad_norm": 6.359016418457031, + "learning_rate": 1.6173076923076923e-05, + "loss": 0.1548, + "step": 24626 + }, + { + "epoch": 67.6565934065934, + "grad_norm": 6.248198509216309, + "learning_rate": 1.6171703296703296e-05, + "loss": 0.0718, + "step": 24627 + }, + { + "epoch": 67.65934065934066, + "grad_norm": 4.041712760925293, + "learning_rate": 1.617032967032967e-05, + "loss": 0.0629, + "step": 24628 + }, + { + "epoch": 67.66208791208791, + "grad_norm": 3.9553468227386475, + "learning_rate": 1.6168956043956043e-05, + "loss": 0.0625, + "step": 24629 + }, + { + "epoch": 67.66483516483517, + "grad_norm": 13.177157402038574, + "learning_rate": 1.616758241758242e-05, + "loss": 0.3186, + "step": 24630 + }, + { + "epoch": 67.66758241758242, + "grad_norm": 11.769366264343262, + "learning_rate": 1.6166208791208793e-05, + "loss": 0.1784, + "step": 24631 + }, + { + "epoch": 67.67032967032966, + "grad_norm": 11.917999267578125, + "learning_rate": 1.6164835164835167e-05, + "loss": 0.2635, + "step": 24632 + }, + { + "epoch": 67.67307692307692, + "grad_norm": 13.265623092651367, + "learning_rate": 1.6163461538461537e-05, + "loss": 0.2767, + "step": 24633 + }, + { + "epoch": 67.67582417582418, + "grad_norm": 7.137042045593262, + "learning_rate": 1.616208791208791e-05, + "loss": 0.092, + "step": 24634 + }, + { + "epoch": 67.67857142857143, + "grad_norm": 11.126485824584961, + "learning_rate": 1.6160714285714287e-05, + "loss": 0.1149, + "step": 24635 + }, + { + "epoch": 67.68131868131869, + "grad_norm": 16.52980613708496, + "learning_rate": 1.615934065934066e-05, + "loss": 0.2771, + "step": 24636 + }, + { + "epoch": 67.68406593406593, + "grad_norm": 8.246644020080566, + "learning_rate": 1.6157967032967034e-05, + "loss": 0.1588, + "step": 24637 + }, + { + "epoch": 67.68681318681318, + "grad_norm": 14.642388343811035, + "learning_rate": 1.6156593406593407e-05, + "loss": 0.204, + "step": 24638 + }, + { + "epoch": 67.68956043956044, + "grad_norm": 6.525252819061279, + "learning_rate": 1.615521978021978e-05, + "loss": 0.132, + "step": 24639 + }, + { + "epoch": 67.6923076923077, + "grad_norm": 3.0761544704437256, + "learning_rate": 1.6153846153846154e-05, + "loss": 0.0347, + "step": 24640 + }, + { + "epoch": 67.69505494505495, + "grad_norm": 19.677663803100586, + "learning_rate": 1.6152472527472527e-05, + "loss": 0.6037, + "step": 24641 + }, + { + "epoch": 67.6978021978022, + "grad_norm": 6.643566608428955, + "learning_rate": 1.61510989010989e-05, + "loss": 0.1351, + "step": 24642 + }, + { + "epoch": 67.70054945054945, + "grad_norm": 7.075283050537109, + "learning_rate": 1.6149725274725274e-05, + "loss": 0.1769, + "step": 24643 + }, + { + "epoch": 67.7032967032967, + "grad_norm": 18.877004623413086, + "learning_rate": 1.6148351648351648e-05, + "loss": 0.4863, + "step": 24644 + }, + { + "epoch": 67.70604395604396, + "grad_norm": 9.84197998046875, + "learning_rate": 1.6146978021978024e-05, + "loss": 0.1697, + "step": 24645 + }, + { + "epoch": 67.70879120879121, + "grad_norm": 13.88454818725586, + "learning_rate": 1.6145604395604398e-05, + "loss": 0.2363, + "step": 24646 + }, + { + "epoch": 67.71153846153847, + "grad_norm": 8.206109046936035, + "learning_rate": 1.614423076923077e-05, + "loss": 0.1426, + "step": 24647 + }, + { + "epoch": 67.71428571428571, + "grad_norm": 8.058145523071289, + "learning_rate": 1.614285714285714e-05, + "loss": 0.1158, + "step": 24648 + }, + { + "epoch": 67.71703296703296, + "grad_norm": 11.85200309753418, + "learning_rate": 1.6141483516483515e-05, + "loss": 0.2642, + "step": 24649 + }, + { + "epoch": 67.71978021978022, + "grad_norm": 8.791322708129883, + "learning_rate": 1.614010989010989e-05, + "loss": 0.1519, + "step": 24650 + }, + { + "epoch": 67.72252747252747, + "grad_norm": 19.66067886352539, + "learning_rate": 1.6138736263736265e-05, + "loss": 0.3578, + "step": 24651 + }, + { + "epoch": 67.72527472527473, + "grad_norm": 16.183908462524414, + "learning_rate": 1.6137362637362638e-05, + "loss": 0.3028, + "step": 24652 + }, + { + "epoch": 67.72802197802197, + "grad_norm": 11.234647750854492, + "learning_rate": 1.613598901098901e-05, + "loss": 0.2208, + "step": 24653 + }, + { + "epoch": 67.73076923076923, + "grad_norm": 17.10330581665039, + "learning_rate": 1.6134615384615385e-05, + "loss": 0.4764, + "step": 24654 + }, + { + "epoch": 67.73351648351648, + "grad_norm": 7.398573398590088, + "learning_rate": 1.613324175824176e-05, + "loss": 0.1098, + "step": 24655 + }, + { + "epoch": 67.73626373626374, + "grad_norm": 2.8269355297088623, + "learning_rate": 1.6131868131868132e-05, + "loss": 0.0491, + "step": 24656 + }, + { + "epoch": 67.73901098901099, + "grad_norm": 18.46109390258789, + "learning_rate": 1.6130494505494505e-05, + "loss": 0.4448, + "step": 24657 + }, + { + "epoch": 67.74175824175825, + "grad_norm": 17.86366081237793, + "learning_rate": 1.612912087912088e-05, + "loss": 0.4162, + "step": 24658 + }, + { + "epoch": 67.74450549450549, + "grad_norm": 7.873173713684082, + "learning_rate": 1.6127747252747252e-05, + "loss": 0.1809, + "step": 24659 + }, + { + "epoch": 67.74725274725274, + "grad_norm": 11.223529815673828, + "learning_rate": 1.612637362637363e-05, + "loss": 0.2162, + "step": 24660 + }, + { + "epoch": 67.75, + "grad_norm": 13.125858306884766, + "learning_rate": 1.6125000000000002e-05, + "loss": 0.2497, + "step": 24661 + }, + { + "epoch": 67.75274725274726, + "grad_norm": 18.04589080810547, + "learning_rate": 1.6123626373626376e-05, + "loss": 0.343, + "step": 24662 + }, + { + "epoch": 67.75549450549451, + "grad_norm": 17.82352638244629, + "learning_rate": 1.6122252747252746e-05, + "loss": 0.3406, + "step": 24663 + }, + { + "epoch": 67.75824175824175, + "grad_norm": 10.062251091003418, + "learning_rate": 1.612087912087912e-05, + "loss": 0.1678, + "step": 24664 + }, + { + "epoch": 67.76098901098901, + "grad_norm": 13.722976684570312, + "learning_rate": 1.6119505494505496e-05, + "loss": 0.1181, + "step": 24665 + }, + { + "epoch": 67.76373626373626, + "grad_norm": 7.881528854370117, + "learning_rate": 1.611813186813187e-05, + "loss": 0.1049, + "step": 24666 + }, + { + "epoch": 67.76648351648352, + "grad_norm": 22.746597290039062, + "learning_rate": 1.6116758241758243e-05, + "loss": 0.3487, + "step": 24667 + }, + { + "epoch": 67.76923076923077, + "grad_norm": 6.404820919036865, + "learning_rate": 1.6115384615384616e-05, + "loss": 0.0769, + "step": 24668 + }, + { + "epoch": 67.77197802197803, + "grad_norm": 14.925851821899414, + "learning_rate": 1.611401098901099e-05, + "loss": 0.2022, + "step": 24669 + }, + { + "epoch": 67.77472527472527, + "grad_norm": 4.416627407073975, + "learning_rate": 1.6112637362637363e-05, + "loss": 0.0736, + "step": 24670 + }, + { + "epoch": 67.77747252747253, + "grad_norm": 4.345456600189209, + "learning_rate": 1.6111263736263736e-05, + "loss": 0.0479, + "step": 24671 + }, + { + "epoch": 67.78021978021978, + "grad_norm": 12.645809173583984, + "learning_rate": 1.610989010989011e-05, + "loss": 0.2467, + "step": 24672 + }, + { + "epoch": 67.78296703296704, + "grad_norm": 12.676104545593262, + "learning_rate": 1.6108516483516483e-05, + "loss": 0.1863, + "step": 24673 + }, + { + "epoch": 67.78571428571429, + "grad_norm": 11.579300880432129, + "learning_rate": 1.6107142857142857e-05, + "loss": 0.3235, + "step": 24674 + }, + { + "epoch": 67.78846153846153, + "grad_norm": 10.782461166381836, + "learning_rate": 1.6105769230769233e-05, + "loss": 0.3027, + "step": 24675 + }, + { + "epoch": 67.79120879120879, + "grad_norm": 9.922651290893555, + "learning_rate": 1.6104395604395607e-05, + "loss": 0.2375, + "step": 24676 + }, + { + "epoch": 67.79395604395604, + "grad_norm": 5.958675384521484, + "learning_rate": 1.610302197802198e-05, + "loss": 0.0662, + "step": 24677 + }, + { + "epoch": 67.7967032967033, + "grad_norm": 19.14789581298828, + "learning_rate": 1.610164835164835e-05, + "loss": 0.2927, + "step": 24678 + }, + { + "epoch": 67.79945054945055, + "grad_norm": 12.864039421081543, + "learning_rate": 1.6100274725274724e-05, + "loss": 0.1878, + "step": 24679 + }, + { + "epoch": 67.8021978021978, + "grad_norm": 1.9046934843063354, + "learning_rate": 1.60989010989011e-05, + "loss": 0.0239, + "step": 24680 + }, + { + "epoch": 67.80494505494505, + "grad_norm": 7.5359625816345215, + "learning_rate": 1.6097527472527474e-05, + "loss": 0.2116, + "step": 24681 + }, + { + "epoch": 67.8076923076923, + "grad_norm": 18.722200393676758, + "learning_rate": 1.6096153846153847e-05, + "loss": 0.2911, + "step": 24682 + }, + { + "epoch": 67.81043956043956, + "grad_norm": 20.658540725708008, + "learning_rate": 1.609478021978022e-05, + "loss": 0.2638, + "step": 24683 + }, + { + "epoch": 67.81318681318682, + "grad_norm": 14.221049308776855, + "learning_rate": 1.6093406593406594e-05, + "loss": 0.116, + "step": 24684 + }, + { + "epoch": 67.81593406593407, + "grad_norm": 7.033794403076172, + "learning_rate": 1.6092032967032967e-05, + "loss": 0.1036, + "step": 24685 + }, + { + "epoch": 67.81868131868131, + "grad_norm": 14.275592803955078, + "learning_rate": 1.609065934065934e-05, + "loss": 0.2726, + "step": 24686 + }, + { + "epoch": 67.82142857142857, + "grad_norm": 5.8567094802856445, + "learning_rate": 1.6089285714285714e-05, + "loss": 0.0624, + "step": 24687 + }, + { + "epoch": 67.82417582417582, + "grad_norm": 9.737309455871582, + "learning_rate": 1.6087912087912088e-05, + "loss": 0.0971, + "step": 24688 + }, + { + "epoch": 67.82692307692308, + "grad_norm": 11.534887313842773, + "learning_rate": 1.608653846153846e-05, + "loss": 0.3036, + "step": 24689 + }, + { + "epoch": 67.82967032967034, + "grad_norm": 9.611673355102539, + "learning_rate": 1.6085164835164838e-05, + "loss": 0.2608, + "step": 24690 + }, + { + "epoch": 67.83241758241758, + "grad_norm": 7.770574569702148, + "learning_rate": 1.608379120879121e-05, + "loss": 0.1681, + "step": 24691 + }, + { + "epoch": 67.83516483516483, + "grad_norm": 2.775991678237915, + "learning_rate": 1.6082417582417585e-05, + "loss": 0.0387, + "step": 24692 + }, + { + "epoch": 67.83791208791209, + "grad_norm": 21.064525604248047, + "learning_rate": 1.6081043956043955e-05, + "loss": 0.3133, + "step": 24693 + }, + { + "epoch": 67.84065934065934, + "grad_norm": 14.006980895996094, + "learning_rate": 1.6079670329670328e-05, + "loss": 0.2465, + "step": 24694 + }, + { + "epoch": 67.8434065934066, + "grad_norm": 1.6419641971588135, + "learning_rate": 1.6078296703296705e-05, + "loss": 0.019, + "step": 24695 + }, + { + "epoch": 67.84615384615384, + "grad_norm": 11.234192848205566, + "learning_rate": 1.607692307692308e-05, + "loss": 0.1712, + "step": 24696 + }, + { + "epoch": 67.8489010989011, + "grad_norm": 14.59529972076416, + "learning_rate": 1.6075549450549452e-05, + "loss": 0.2844, + "step": 24697 + }, + { + "epoch": 67.85164835164835, + "grad_norm": 3.5304360389709473, + "learning_rate": 1.6074175824175825e-05, + "loss": 0.0339, + "step": 24698 + }, + { + "epoch": 67.8543956043956, + "grad_norm": 17.562667846679688, + "learning_rate": 1.60728021978022e-05, + "loss": 0.3919, + "step": 24699 + }, + { + "epoch": 67.85714285714286, + "grad_norm": 9.961017608642578, + "learning_rate": 1.6071428571428572e-05, + "loss": 0.3451, + "step": 24700 + }, + { + "epoch": 67.85989010989012, + "grad_norm": 10.804559707641602, + "learning_rate": 1.6070054945054945e-05, + "loss": 0.1767, + "step": 24701 + }, + { + "epoch": 67.86263736263736, + "grad_norm": 11.504283905029297, + "learning_rate": 1.606868131868132e-05, + "loss": 0.0955, + "step": 24702 + }, + { + "epoch": 67.86538461538461, + "grad_norm": 19.019163131713867, + "learning_rate": 1.6067307692307692e-05, + "loss": 0.7377, + "step": 24703 + }, + { + "epoch": 67.86813186813187, + "grad_norm": 11.207161903381348, + "learning_rate": 1.6065934065934066e-05, + "loss": 0.138, + "step": 24704 + }, + { + "epoch": 67.87087912087912, + "grad_norm": 22.489015579223633, + "learning_rate": 1.6064560439560442e-05, + "loss": 0.6184, + "step": 24705 + }, + { + "epoch": 67.87362637362638, + "grad_norm": 12.145280838012695, + "learning_rate": 1.6063186813186816e-05, + "loss": 0.2031, + "step": 24706 + }, + { + "epoch": 67.87637362637362, + "grad_norm": 3.611436367034912, + "learning_rate": 1.606181318681319e-05, + "loss": 0.0385, + "step": 24707 + }, + { + "epoch": 67.87912087912088, + "grad_norm": 7.245014667510986, + "learning_rate": 1.606043956043956e-05, + "loss": 0.0946, + "step": 24708 + }, + { + "epoch": 67.88186813186813, + "grad_norm": 7.630512714385986, + "learning_rate": 1.6059065934065933e-05, + "loss": 0.1619, + "step": 24709 + }, + { + "epoch": 67.88461538461539, + "grad_norm": 7.652864456176758, + "learning_rate": 1.605769230769231e-05, + "loss": 0.1474, + "step": 24710 + }, + { + "epoch": 67.88736263736264, + "grad_norm": 12.108570098876953, + "learning_rate": 1.6056318681318683e-05, + "loss": 0.3034, + "step": 24711 + }, + { + "epoch": 67.89010989010988, + "grad_norm": 11.1168212890625, + "learning_rate": 1.6054945054945056e-05, + "loss": 0.2149, + "step": 24712 + }, + { + "epoch": 67.89285714285714, + "grad_norm": 8.693269729614258, + "learning_rate": 1.605357142857143e-05, + "loss": 0.1953, + "step": 24713 + }, + { + "epoch": 67.8956043956044, + "grad_norm": 16.365047454833984, + "learning_rate": 1.6052197802197803e-05, + "loss": 0.2575, + "step": 24714 + }, + { + "epoch": 67.89835164835165, + "grad_norm": 8.660493850708008, + "learning_rate": 1.6050824175824176e-05, + "loss": 0.1126, + "step": 24715 + }, + { + "epoch": 67.9010989010989, + "grad_norm": 14.510007858276367, + "learning_rate": 1.604945054945055e-05, + "loss": 0.2594, + "step": 24716 + }, + { + "epoch": 67.90384615384616, + "grad_norm": 13.01220703125, + "learning_rate": 1.6048076923076923e-05, + "loss": 0.1562, + "step": 24717 + }, + { + "epoch": 67.9065934065934, + "grad_norm": 6.419704914093018, + "learning_rate": 1.6046703296703297e-05, + "loss": 0.1476, + "step": 24718 + }, + { + "epoch": 67.90934065934066, + "grad_norm": 4.245439052581787, + "learning_rate": 1.604532967032967e-05, + "loss": 0.0472, + "step": 24719 + }, + { + "epoch": 67.91208791208791, + "grad_norm": 8.658614158630371, + "learning_rate": 1.6043956043956047e-05, + "loss": 0.1832, + "step": 24720 + }, + { + "epoch": 67.91483516483517, + "grad_norm": 13.004898071289062, + "learning_rate": 1.604258241758242e-05, + "loss": 0.2907, + "step": 24721 + }, + { + "epoch": 67.91758241758242, + "grad_norm": 11.557031631469727, + "learning_rate": 1.6041208791208794e-05, + "loss": 0.1077, + "step": 24722 + }, + { + "epoch": 67.92032967032966, + "grad_norm": 11.180861473083496, + "learning_rate": 1.6039835164835164e-05, + "loss": 0.1439, + "step": 24723 + }, + { + "epoch": 67.92307692307692, + "grad_norm": 3.5160043239593506, + "learning_rate": 1.6038461538461537e-05, + "loss": 0.0529, + "step": 24724 + }, + { + "epoch": 67.92582417582418, + "grad_norm": 6.77316951751709, + "learning_rate": 1.6037087912087914e-05, + "loss": 0.0966, + "step": 24725 + }, + { + "epoch": 67.92857142857143, + "grad_norm": 8.044245719909668, + "learning_rate": 1.6035714285714287e-05, + "loss": 0.1503, + "step": 24726 + }, + { + "epoch": 67.93131868131869, + "grad_norm": 10.640108108520508, + "learning_rate": 1.603434065934066e-05, + "loss": 0.1906, + "step": 24727 + }, + { + "epoch": 67.93406593406593, + "grad_norm": 25.727941513061523, + "learning_rate": 1.6032967032967034e-05, + "loss": 0.5381, + "step": 24728 + }, + { + "epoch": 67.93681318681318, + "grad_norm": 7.369847297668457, + "learning_rate": 1.6031593406593408e-05, + "loss": 0.2994, + "step": 24729 + }, + { + "epoch": 67.93956043956044, + "grad_norm": 10.090991020202637, + "learning_rate": 1.603021978021978e-05, + "loss": 0.1704, + "step": 24730 + }, + { + "epoch": 67.9423076923077, + "grad_norm": 31.535093307495117, + "learning_rate": 1.6028846153846154e-05, + "loss": 0.7549, + "step": 24731 + }, + { + "epoch": 67.94505494505495, + "grad_norm": 1.2802289724349976, + "learning_rate": 1.6027472527472528e-05, + "loss": 0.0211, + "step": 24732 + }, + { + "epoch": 67.9478021978022, + "grad_norm": 7.781015872955322, + "learning_rate": 1.60260989010989e-05, + "loss": 0.0781, + "step": 24733 + }, + { + "epoch": 67.95054945054945, + "grad_norm": 14.042900085449219, + "learning_rate": 1.6024725274725275e-05, + "loss": 0.4979, + "step": 24734 + }, + { + "epoch": 67.9532967032967, + "grad_norm": 2.4217894077301025, + "learning_rate": 1.602335164835165e-05, + "loss": 0.0263, + "step": 24735 + }, + { + "epoch": 67.95604395604396, + "grad_norm": 13.327292442321777, + "learning_rate": 1.6021978021978025e-05, + "loss": 0.3156, + "step": 24736 + }, + { + "epoch": 67.95879120879121, + "grad_norm": 14.023025512695312, + "learning_rate": 1.6020604395604398e-05, + "loss": 0.3326, + "step": 24737 + }, + { + "epoch": 67.96153846153847, + "grad_norm": 13.060999870300293, + "learning_rate": 1.6019230769230768e-05, + "loss": 0.2642, + "step": 24738 + }, + { + "epoch": 67.96428571428571, + "grad_norm": 13.922826766967773, + "learning_rate": 1.601785714285714e-05, + "loss": 0.2098, + "step": 24739 + }, + { + "epoch": 67.96703296703296, + "grad_norm": 13.941475868225098, + "learning_rate": 1.601648351648352e-05, + "loss": 0.2139, + "step": 24740 + }, + { + "epoch": 67.96978021978022, + "grad_norm": 16.803430557250977, + "learning_rate": 1.6015109890109892e-05, + "loss": 0.4992, + "step": 24741 + }, + { + "epoch": 67.97252747252747, + "grad_norm": 7.178408145904541, + "learning_rate": 1.6013736263736265e-05, + "loss": 0.1301, + "step": 24742 + }, + { + "epoch": 67.97527472527473, + "grad_norm": 11.235255241394043, + "learning_rate": 1.601236263736264e-05, + "loss": 0.091, + "step": 24743 + }, + { + "epoch": 67.97802197802197, + "grad_norm": 9.959951400756836, + "learning_rate": 1.6010989010989012e-05, + "loss": 0.1384, + "step": 24744 + }, + { + "epoch": 67.98076923076923, + "grad_norm": 15.39102840423584, + "learning_rate": 1.6009615384615385e-05, + "loss": 0.4248, + "step": 24745 + }, + { + "epoch": 67.98351648351648, + "grad_norm": 15.38549518585205, + "learning_rate": 1.600824175824176e-05, + "loss": 0.2961, + "step": 24746 + }, + { + "epoch": 67.98626373626374, + "grad_norm": 12.850394248962402, + "learning_rate": 1.6006868131868132e-05, + "loss": 0.2717, + "step": 24747 + }, + { + "epoch": 67.98901098901099, + "grad_norm": 11.227350234985352, + "learning_rate": 1.6005494505494506e-05, + "loss": 0.2025, + "step": 24748 + }, + { + "epoch": 67.99175824175825, + "grad_norm": 21.567306518554688, + "learning_rate": 1.600412087912088e-05, + "loss": 0.6729, + "step": 24749 + }, + { + "epoch": 67.99450549450549, + "grad_norm": 7.066049098968506, + "learning_rate": 1.6002747252747252e-05, + "loss": 0.1365, + "step": 24750 + }, + { + "epoch": 67.99725274725274, + "grad_norm": 8.640445709228516, + "learning_rate": 1.600137362637363e-05, + "loss": 0.2438, + "step": 24751 + }, + { + "epoch": 68.0, + "grad_norm": 78.49259948730469, + "learning_rate": 1.6000000000000003e-05, + "loss": 1.3597, + "step": 24752 + }, + { + "epoch": 68.0, + "eval_accuracy": 0.7575757575757576, + "eval_f1": 0.7412981738458179, + "eval_f1_DuraRiadoRio_64x64": 0.46808510638297873, + "eval_f1_Mole_64x64": 0.8034188034188035, + "eval_f1_Quebrado_64x64": 0.8791946308724832, + "eval_f1_RiadoRio_64x64": 0.6494252873563219, + "eval_f1_RioFechado_64x64": 0.9063670411985019, + "eval_loss": 1.2537258863449097, + "eval_precision": 0.8152678766368208, + "eval_precision_DuraRiadoRio_64x64": 1.0, + "eval_precision_Mole_64x64": 0.6811594202898551, + "eval_precision_Quebrado_64x64": 0.8506493506493507, + "eval_precision_RiadoRio_64x64": 0.576530612244898, + "eval_precision_RioFechado_64x64": 0.968, + "eval_recall": 0.7579956346264722, + "eval_recall_DuraRiadoRio_64x64": 0.3055555555555556, + "eval_recall_Mole_64x64": 0.9791666666666666, + "eval_recall_Quebrado_64x64": 0.9097222222222222, + "eval_recall_RiadoRio_64x64": 0.743421052631579, + "eval_recall_RioFechado_64x64": 0.852112676056338, + "eval_runtime": 1.7289, + "eval_samples_per_second": 419.915, + "eval_steps_per_second": 26.606, + "step": 24752 + }, + { + "epoch": 68.00274725274726, + "grad_norm": 7.228466510772705, + "learning_rate": 1.5998626373626373e-05, + "loss": 0.1288, + "step": 24753 + }, + { + "epoch": 68.00549450549451, + "grad_norm": 11.4791898727417, + "learning_rate": 1.5997252747252746e-05, + "loss": 0.3348, + "step": 24754 + }, + { + "epoch": 68.00824175824175, + "grad_norm": 13.114482879638672, + "learning_rate": 1.599587912087912e-05, + "loss": 0.1876, + "step": 24755 + }, + { + "epoch": 68.01098901098901, + "grad_norm": 17.63150405883789, + "learning_rate": 1.5994505494505496e-05, + "loss": 0.4654, + "step": 24756 + }, + { + "epoch": 68.01373626373626, + "grad_norm": 21.451580047607422, + "learning_rate": 1.599313186813187e-05, + "loss": 0.3551, + "step": 24757 + }, + { + "epoch": 68.01648351648352, + "grad_norm": 4.692785739898682, + "learning_rate": 1.5991758241758243e-05, + "loss": 0.075, + "step": 24758 + }, + { + "epoch": 68.01923076923077, + "grad_norm": 10.354865074157715, + "learning_rate": 1.5990384615384617e-05, + "loss": 0.3547, + "step": 24759 + }, + { + "epoch": 68.02197802197803, + "grad_norm": 9.83481216430664, + "learning_rate": 1.598901098901099e-05, + "loss": 0.1649, + "step": 24760 + }, + { + "epoch": 68.02472527472527, + "grad_norm": 16.62119483947754, + "learning_rate": 1.5987637362637363e-05, + "loss": 0.3793, + "step": 24761 + }, + { + "epoch": 68.02747252747253, + "grad_norm": 14.14745044708252, + "learning_rate": 1.5986263736263737e-05, + "loss": 0.42, + "step": 24762 + }, + { + "epoch": 68.03021978021978, + "grad_norm": 19.83915138244629, + "learning_rate": 1.598489010989011e-05, + "loss": 0.1711, + "step": 24763 + }, + { + "epoch": 68.03296703296704, + "grad_norm": 2.196948289871216, + "learning_rate": 1.5983516483516484e-05, + "loss": 0.0449, + "step": 24764 + }, + { + "epoch": 68.03571428571429, + "grad_norm": 8.40568733215332, + "learning_rate": 1.5982142857142857e-05, + "loss": 0.2061, + "step": 24765 + }, + { + "epoch": 68.03846153846153, + "grad_norm": 8.44288158416748, + "learning_rate": 1.5980769230769234e-05, + "loss": 0.1822, + "step": 24766 + }, + { + "epoch": 68.04120879120879, + "grad_norm": 6.90822696685791, + "learning_rate": 1.5979395604395607e-05, + "loss": 0.1471, + "step": 24767 + }, + { + "epoch": 68.04395604395604, + "grad_norm": 11.951050758361816, + "learning_rate": 1.5978021978021977e-05, + "loss": 0.2477, + "step": 24768 + }, + { + "epoch": 68.0467032967033, + "grad_norm": 10.14046573638916, + "learning_rate": 1.597664835164835e-05, + "loss": 0.0695, + "step": 24769 + }, + { + "epoch": 68.04945054945055, + "grad_norm": 18.66660499572754, + "learning_rate": 1.5975274725274724e-05, + "loss": 0.6526, + "step": 24770 + }, + { + "epoch": 68.0521978021978, + "grad_norm": 14.114568710327148, + "learning_rate": 1.59739010989011e-05, + "loss": 0.27, + "step": 24771 + }, + { + "epoch": 68.05494505494505, + "grad_norm": 12.263769149780273, + "learning_rate": 1.5972527472527474e-05, + "loss": 0.2246, + "step": 24772 + }, + { + "epoch": 68.0576923076923, + "grad_norm": 8.860492706298828, + "learning_rate": 1.5971153846153848e-05, + "loss": 0.1647, + "step": 24773 + }, + { + "epoch": 68.06043956043956, + "grad_norm": 22.386760711669922, + "learning_rate": 1.596978021978022e-05, + "loss": 0.3926, + "step": 24774 + }, + { + "epoch": 68.06318681318682, + "grad_norm": 1.1723639965057373, + "learning_rate": 1.5968406593406594e-05, + "loss": 0.018, + "step": 24775 + }, + { + "epoch": 68.06593406593407, + "grad_norm": 14.574381828308105, + "learning_rate": 1.5967032967032968e-05, + "loss": 0.385, + "step": 24776 + }, + { + "epoch": 68.06868131868131, + "grad_norm": 13.029108047485352, + "learning_rate": 1.596565934065934e-05, + "loss": 0.265, + "step": 24777 + }, + { + "epoch": 68.07142857142857, + "grad_norm": 11.011748313903809, + "learning_rate": 1.5964285714285715e-05, + "loss": 0.142, + "step": 24778 + }, + { + "epoch": 68.07417582417582, + "grad_norm": 14.420860290527344, + "learning_rate": 1.5962912087912088e-05, + "loss": 0.184, + "step": 24779 + }, + { + "epoch": 68.07692307692308, + "grad_norm": 13.510429382324219, + "learning_rate": 1.596153846153846e-05, + "loss": 0.29, + "step": 24780 + }, + { + "epoch": 68.07967032967034, + "grad_norm": 22.4318904876709, + "learning_rate": 1.5960164835164838e-05, + "loss": 0.43, + "step": 24781 + }, + { + "epoch": 68.08241758241758, + "grad_norm": 17.20271873474121, + "learning_rate": 1.595879120879121e-05, + "loss": 0.3245, + "step": 24782 + }, + { + "epoch": 68.08516483516483, + "grad_norm": 14.282580375671387, + "learning_rate": 1.595741758241758e-05, + "loss": 0.3579, + "step": 24783 + }, + { + "epoch": 68.08791208791209, + "grad_norm": 16.57023048400879, + "learning_rate": 1.5956043956043955e-05, + "loss": 0.3564, + "step": 24784 + }, + { + "epoch": 68.09065934065934, + "grad_norm": 15.139365196228027, + "learning_rate": 1.595467032967033e-05, + "loss": 0.2975, + "step": 24785 + }, + { + "epoch": 68.0934065934066, + "grad_norm": 12.306109428405762, + "learning_rate": 1.5953296703296705e-05, + "loss": 0.2489, + "step": 24786 + }, + { + "epoch": 68.09615384615384, + "grad_norm": 11.444133758544922, + "learning_rate": 1.595192307692308e-05, + "loss": 0.3128, + "step": 24787 + }, + { + "epoch": 68.0989010989011, + "grad_norm": 16.36076545715332, + "learning_rate": 1.5950549450549452e-05, + "loss": 0.1961, + "step": 24788 + }, + { + "epoch": 68.10164835164835, + "grad_norm": 18.80946922302246, + "learning_rate": 1.5949175824175825e-05, + "loss": 0.5168, + "step": 24789 + }, + { + "epoch": 68.1043956043956, + "grad_norm": 18.387954711914062, + "learning_rate": 1.59478021978022e-05, + "loss": 0.4098, + "step": 24790 + }, + { + "epoch": 68.10714285714286, + "grad_norm": 7.341878414154053, + "learning_rate": 1.5946428571428572e-05, + "loss": 0.1791, + "step": 24791 + }, + { + "epoch": 68.10989010989012, + "grad_norm": 13.96921443939209, + "learning_rate": 1.5945054945054946e-05, + "loss": 0.3211, + "step": 24792 + }, + { + "epoch": 68.11263736263736, + "grad_norm": 7.419088840484619, + "learning_rate": 1.594368131868132e-05, + "loss": 0.1737, + "step": 24793 + }, + { + "epoch": 68.11538461538461, + "grad_norm": 8.521535873413086, + "learning_rate": 1.5942307692307693e-05, + "loss": 0.1909, + "step": 24794 + }, + { + "epoch": 68.11813186813187, + "grad_norm": 15.194236755371094, + "learning_rate": 1.5940934065934066e-05, + "loss": 0.3463, + "step": 24795 + }, + { + "epoch": 68.12087912087912, + "grad_norm": 12.06139087677002, + "learning_rate": 1.5939560439560443e-05, + "loss": 0.2756, + "step": 24796 + }, + { + "epoch": 68.12362637362638, + "grad_norm": 10.106279373168945, + "learning_rate": 1.5938186813186816e-05, + "loss": 0.2303, + "step": 24797 + }, + { + "epoch": 68.12637362637362, + "grad_norm": 12.711678504943848, + "learning_rate": 1.5936813186813186e-05, + "loss": 0.1729, + "step": 24798 + }, + { + "epoch": 68.12912087912088, + "grad_norm": 17.90772247314453, + "learning_rate": 1.593543956043956e-05, + "loss": 0.171, + "step": 24799 + }, + { + "epoch": 68.13186813186813, + "grad_norm": 24.292255401611328, + "learning_rate": 1.5934065934065933e-05, + "loss": 0.6919, + "step": 24800 + }, + { + "epoch": 68.13461538461539, + "grad_norm": 16.230262756347656, + "learning_rate": 1.593269230769231e-05, + "loss": 0.2413, + "step": 24801 + }, + { + "epoch": 68.13736263736264, + "grad_norm": 19.341047286987305, + "learning_rate": 1.5931318681318683e-05, + "loss": 0.6482, + "step": 24802 + }, + { + "epoch": 68.14010989010988, + "grad_norm": 18.60413360595703, + "learning_rate": 1.5929945054945057e-05, + "loss": 0.255, + "step": 24803 + }, + { + "epoch": 68.14285714285714, + "grad_norm": 16.75487518310547, + "learning_rate": 1.592857142857143e-05, + "loss": 0.2619, + "step": 24804 + }, + { + "epoch": 68.1456043956044, + "grad_norm": 14.525622367858887, + "learning_rate": 1.5927197802197803e-05, + "loss": 0.3728, + "step": 24805 + }, + { + "epoch": 68.14835164835165, + "grad_norm": 7.182401180267334, + "learning_rate": 1.5925824175824177e-05, + "loss": 0.1373, + "step": 24806 + }, + { + "epoch": 68.1510989010989, + "grad_norm": 4.710933685302734, + "learning_rate": 1.592445054945055e-05, + "loss": 0.1297, + "step": 24807 + }, + { + "epoch": 68.15384615384616, + "grad_norm": 14.943096160888672, + "learning_rate": 1.5923076923076924e-05, + "loss": 0.1614, + "step": 24808 + }, + { + "epoch": 68.1565934065934, + "grad_norm": 9.466023445129395, + "learning_rate": 1.5921703296703297e-05, + "loss": 0.2189, + "step": 24809 + }, + { + "epoch": 68.15934065934066, + "grad_norm": 2.8992578983306885, + "learning_rate": 1.592032967032967e-05, + "loss": 0.0497, + "step": 24810 + }, + { + "epoch": 68.16208791208791, + "grad_norm": 3.0771946907043457, + "learning_rate": 1.5918956043956047e-05, + "loss": 0.0488, + "step": 24811 + }, + { + "epoch": 68.16483516483517, + "grad_norm": 12.764701843261719, + "learning_rate": 1.591758241758242e-05, + "loss": 0.2612, + "step": 24812 + }, + { + "epoch": 68.16758241758242, + "grad_norm": 9.648213386535645, + "learning_rate": 1.591620879120879e-05, + "loss": 0.112, + "step": 24813 + }, + { + "epoch": 68.17032967032966, + "grad_norm": 12.22984504699707, + "learning_rate": 1.5914835164835164e-05, + "loss": 0.2665, + "step": 24814 + }, + { + "epoch": 68.17307692307692, + "grad_norm": 15.0717134475708, + "learning_rate": 1.5913461538461537e-05, + "loss": 0.3986, + "step": 24815 + }, + { + "epoch": 68.17582417582418, + "grad_norm": 7.066655158996582, + "learning_rate": 1.5912087912087914e-05, + "loss": 0.2585, + "step": 24816 + }, + { + "epoch": 68.17857142857143, + "grad_norm": 5.853249549865723, + "learning_rate": 1.5910714285714288e-05, + "loss": 0.0535, + "step": 24817 + }, + { + "epoch": 68.18131868131869, + "grad_norm": 13.717832565307617, + "learning_rate": 1.590934065934066e-05, + "loss": 0.1679, + "step": 24818 + }, + { + "epoch": 68.18406593406593, + "grad_norm": 8.650181770324707, + "learning_rate": 1.5907967032967034e-05, + "loss": 0.1932, + "step": 24819 + }, + { + "epoch": 68.18681318681318, + "grad_norm": 2.2286221981048584, + "learning_rate": 1.5906593406593408e-05, + "loss": 0.026, + "step": 24820 + }, + { + "epoch": 68.18956043956044, + "grad_norm": 15.593953132629395, + "learning_rate": 1.590521978021978e-05, + "loss": 0.3431, + "step": 24821 + }, + { + "epoch": 68.1923076923077, + "grad_norm": 13.72451400756836, + "learning_rate": 1.5903846153846155e-05, + "loss": 0.4941, + "step": 24822 + }, + { + "epoch": 68.19505494505495, + "grad_norm": 24.045045852661133, + "learning_rate": 1.5902472527472528e-05, + "loss": 0.6297, + "step": 24823 + }, + { + "epoch": 68.1978021978022, + "grad_norm": 22.601015090942383, + "learning_rate": 1.59010989010989e-05, + "loss": 0.2483, + "step": 24824 + }, + { + "epoch": 68.20054945054945, + "grad_norm": 23.38080596923828, + "learning_rate": 1.5899725274725275e-05, + "loss": 0.6993, + "step": 24825 + }, + { + "epoch": 68.2032967032967, + "grad_norm": 10.009970664978027, + "learning_rate": 1.5898351648351652e-05, + "loss": 0.2776, + "step": 24826 + }, + { + "epoch": 68.20604395604396, + "grad_norm": 8.771913528442383, + "learning_rate": 1.5896978021978025e-05, + "loss": 0.1018, + "step": 24827 + }, + { + "epoch": 68.20879120879121, + "grad_norm": 10.925840377807617, + "learning_rate": 1.5895604395604395e-05, + "loss": 0.1844, + "step": 24828 + }, + { + "epoch": 68.21153846153847, + "grad_norm": 7.947921276092529, + "learning_rate": 1.589423076923077e-05, + "loss": 0.2641, + "step": 24829 + }, + { + "epoch": 68.21428571428571, + "grad_norm": 9.76512622833252, + "learning_rate": 1.5892857142857142e-05, + "loss": 0.1833, + "step": 24830 + }, + { + "epoch": 68.21703296703296, + "grad_norm": 17.29886817932129, + "learning_rate": 1.589148351648352e-05, + "loss": 0.3179, + "step": 24831 + }, + { + "epoch": 68.21978021978022, + "grad_norm": 9.111281394958496, + "learning_rate": 1.5890109890109892e-05, + "loss": 0.2713, + "step": 24832 + }, + { + "epoch": 68.22252747252747, + "grad_norm": 9.793639183044434, + "learning_rate": 1.5888736263736266e-05, + "loss": 0.3186, + "step": 24833 + }, + { + "epoch": 68.22527472527473, + "grad_norm": 11.853888511657715, + "learning_rate": 1.588736263736264e-05, + "loss": 0.3162, + "step": 24834 + }, + { + "epoch": 68.22802197802197, + "grad_norm": 18.797082901000977, + "learning_rate": 1.5885989010989012e-05, + "loss": 0.2584, + "step": 24835 + }, + { + "epoch": 68.23076923076923, + "grad_norm": 8.722921371459961, + "learning_rate": 1.5884615384615386e-05, + "loss": 0.0964, + "step": 24836 + }, + { + "epoch": 68.23351648351648, + "grad_norm": 25.057598114013672, + "learning_rate": 1.588324175824176e-05, + "loss": 0.4752, + "step": 24837 + }, + { + "epoch": 68.23626373626374, + "grad_norm": 9.260397911071777, + "learning_rate": 1.5881868131868133e-05, + "loss": 0.1379, + "step": 24838 + }, + { + "epoch": 68.23901098901099, + "grad_norm": 8.373618125915527, + "learning_rate": 1.5880494505494506e-05, + "loss": 0.1539, + "step": 24839 + }, + { + "epoch": 68.24175824175825, + "grad_norm": 15.604473114013672, + "learning_rate": 1.587912087912088e-05, + "loss": 0.1818, + "step": 24840 + }, + { + "epoch": 68.24450549450549, + "grad_norm": 25.24138069152832, + "learning_rate": 1.5877747252747256e-05, + "loss": 0.923, + "step": 24841 + }, + { + "epoch": 68.24725274725274, + "grad_norm": 2.9300544261932373, + "learning_rate": 1.5876373626373626e-05, + "loss": 0.0385, + "step": 24842 + }, + { + "epoch": 68.25, + "grad_norm": 4.813348293304443, + "learning_rate": 1.5875e-05, + "loss": 0.1418, + "step": 24843 + }, + { + "epoch": 68.25274725274726, + "grad_norm": 6.178951263427734, + "learning_rate": 1.5873626373626373e-05, + "loss": 0.0666, + "step": 24844 + }, + { + "epoch": 68.25549450549451, + "grad_norm": 16.53649139404297, + "learning_rate": 1.5872252747252746e-05, + "loss": 0.5935, + "step": 24845 + }, + { + "epoch": 68.25824175824175, + "grad_norm": 5.307562828063965, + "learning_rate": 1.5870879120879123e-05, + "loss": 0.1012, + "step": 24846 + }, + { + "epoch": 68.26098901098901, + "grad_norm": 6.561450004577637, + "learning_rate": 1.5869505494505497e-05, + "loss": 0.1593, + "step": 24847 + }, + { + "epoch": 68.26373626373626, + "grad_norm": 11.369571685791016, + "learning_rate": 1.586813186813187e-05, + "loss": 0.1987, + "step": 24848 + }, + { + "epoch": 68.26648351648352, + "grad_norm": 15.129934310913086, + "learning_rate": 1.5866758241758243e-05, + "loss": 0.1658, + "step": 24849 + }, + { + "epoch": 68.26923076923077, + "grad_norm": 14.11625862121582, + "learning_rate": 1.5865384615384617e-05, + "loss": 0.3163, + "step": 24850 + }, + { + "epoch": 68.27197802197803, + "grad_norm": 13.02206039428711, + "learning_rate": 1.586401098901099e-05, + "loss": 0.2564, + "step": 24851 + }, + { + "epoch": 68.27472527472527, + "grad_norm": 9.116456031799316, + "learning_rate": 1.5862637362637364e-05, + "loss": 0.2812, + "step": 24852 + }, + { + "epoch": 68.27747252747253, + "grad_norm": 11.10724925994873, + "learning_rate": 1.5861263736263737e-05, + "loss": 0.2169, + "step": 24853 + }, + { + "epoch": 68.28021978021978, + "grad_norm": 6.476485729217529, + "learning_rate": 1.585989010989011e-05, + "loss": 0.0872, + "step": 24854 + }, + { + "epoch": 68.28296703296704, + "grad_norm": 4.419343948364258, + "learning_rate": 1.5858516483516484e-05, + "loss": 0.0734, + "step": 24855 + }, + { + "epoch": 68.28571428571429, + "grad_norm": 15.419188499450684, + "learning_rate": 1.5857142857142857e-05, + "loss": 0.1883, + "step": 24856 + }, + { + "epoch": 68.28846153846153, + "grad_norm": 16.814796447753906, + "learning_rate": 1.585576923076923e-05, + "loss": 0.3821, + "step": 24857 + }, + { + "epoch": 68.29120879120879, + "grad_norm": 13.383411407470703, + "learning_rate": 1.5854395604395604e-05, + "loss": 0.1625, + "step": 24858 + }, + { + "epoch": 68.29395604395604, + "grad_norm": 4.505424976348877, + "learning_rate": 1.5853021978021978e-05, + "loss": 0.0514, + "step": 24859 + }, + { + "epoch": 68.2967032967033, + "grad_norm": 3.9537465572357178, + "learning_rate": 1.585164835164835e-05, + "loss": 0.0353, + "step": 24860 + }, + { + "epoch": 68.29945054945055, + "grad_norm": 20.165451049804688, + "learning_rate": 1.5850274725274724e-05, + "loss": 0.7675, + "step": 24861 + }, + { + "epoch": 68.3021978021978, + "grad_norm": 15.835775375366211, + "learning_rate": 1.58489010989011e-05, + "loss": 0.4485, + "step": 24862 + }, + { + "epoch": 68.30494505494505, + "grad_norm": 5.505848407745361, + "learning_rate": 1.5847527472527475e-05, + "loss": 0.0518, + "step": 24863 + }, + { + "epoch": 68.3076923076923, + "grad_norm": 11.84652328491211, + "learning_rate": 1.5846153846153848e-05, + "loss": 0.1508, + "step": 24864 + }, + { + "epoch": 68.31043956043956, + "grad_norm": 8.795327186584473, + "learning_rate": 1.584478021978022e-05, + "loss": 0.1635, + "step": 24865 + }, + { + "epoch": 68.31318681318682, + "grad_norm": 11.559540748596191, + "learning_rate": 1.584340659340659e-05, + "loss": 0.1868, + "step": 24866 + }, + { + "epoch": 68.31593406593407, + "grad_norm": 16.243896484375, + "learning_rate": 1.5842032967032968e-05, + "loss": 0.4608, + "step": 24867 + }, + { + "epoch": 68.31868131868131, + "grad_norm": 13.31679630279541, + "learning_rate": 1.584065934065934e-05, + "loss": 0.2307, + "step": 24868 + }, + { + "epoch": 68.32142857142857, + "grad_norm": 18.431949615478516, + "learning_rate": 1.5839285714285715e-05, + "loss": 0.3346, + "step": 24869 + }, + { + "epoch": 68.32417582417582, + "grad_norm": 17.76839256286621, + "learning_rate": 1.583791208791209e-05, + "loss": 0.4899, + "step": 24870 + }, + { + "epoch": 68.32692307692308, + "grad_norm": 7.759730339050293, + "learning_rate": 1.5836538461538462e-05, + "loss": 0.1189, + "step": 24871 + }, + { + "epoch": 68.32967032967034, + "grad_norm": 18.574827194213867, + "learning_rate": 1.5835164835164835e-05, + "loss": 0.5813, + "step": 24872 + }, + { + "epoch": 68.33241758241758, + "grad_norm": 10.710659980773926, + "learning_rate": 1.583379120879121e-05, + "loss": 0.2609, + "step": 24873 + }, + { + "epoch": 68.33516483516483, + "grad_norm": 3.459756374359131, + "learning_rate": 1.5832417582417582e-05, + "loss": 0.0315, + "step": 24874 + }, + { + "epoch": 68.33791208791209, + "grad_norm": 9.30509090423584, + "learning_rate": 1.5831043956043955e-05, + "loss": 0.1494, + "step": 24875 + }, + { + "epoch": 68.34065934065934, + "grad_norm": 2.829771041870117, + "learning_rate": 1.582967032967033e-05, + "loss": 0.0812, + "step": 24876 + }, + { + "epoch": 68.3434065934066, + "grad_norm": 5.598073959350586, + "learning_rate": 1.5828296703296706e-05, + "loss": 0.0738, + "step": 24877 + }, + { + "epoch": 68.34615384615384, + "grad_norm": 8.16425609588623, + "learning_rate": 1.582692307692308e-05, + "loss": 0.1097, + "step": 24878 + }, + { + "epoch": 68.3489010989011, + "grad_norm": 12.640165328979492, + "learning_rate": 1.5825549450549452e-05, + "loss": 0.2595, + "step": 24879 + }, + { + "epoch": 68.35164835164835, + "grad_norm": 5.555089473724365, + "learning_rate": 1.5824175824175826e-05, + "loss": 0.1476, + "step": 24880 + }, + { + "epoch": 68.3543956043956, + "grad_norm": 9.369080543518066, + "learning_rate": 1.5822802197802196e-05, + "loss": 0.152, + "step": 24881 + }, + { + "epoch": 68.35714285714286, + "grad_norm": 21.819555282592773, + "learning_rate": 1.5821428571428573e-05, + "loss": 0.8017, + "step": 24882 + }, + { + "epoch": 68.35989010989012, + "grad_norm": 7.056588649749756, + "learning_rate": 1.5820054945054946e-05, + "loss": 0.0705, + "step": 24883 + }, + { + "epoch": 68.36263736263736, + "grad_norm": 20.260189056396484, + "learning_rate": 1.581868131868132e-05, + "loss": 0.4133, + "step": 24884 + }, + { + "epoch": 68.36538461538461, + "grad_norm": 9.392644882202148, + "learning_rate": 1.5817307692307693e-05, + "loss": 0.155, + "step": 24885 + }, + { + "epoch": 68.36813186813187, + "grad_norm": 26.03388023376465, + "learning_rate": 1.5815934065934066e-05, + "loss": 0.7489, + "step": 24886 + }, + { + "epoch": 68.37087912087912, + "grad_norm": 19.87959861755371, + "learning_rate": 1.581456043956044e-05, + "loss": 0.7545, + "step": 24887 + }, + { + "epoch": 68.37362637362638, + "grad_norm": 13.013964653015137, + "learning_rate": 1.5813186813186813e-05, + "loss": 0.1726, + "step": 24888 + }, + { + "epoch": 68.37637362637362, + "grad_norm": 8.23564338684082, + "learning_rate": 1.5811813186813187e-05, + "loss": 0.2063, + "step": 24889 + }, + { + "epoch": 68.37912087912088, + "grad_norm": 17.754728317260742, + "learning_rate": 1.581043956043956e-05, + "loss": 0.6011, + "step": 24890 + }, + { + "epoch": 68.38186813186813, + "grad_norm": 10.596891403198242, + "learning_rate": 1.5809065934065933e-05, + "loss": 0.1888, + "step": 24891 + }, + { + "epoch": 68.38461538461539, + "grad_norm": 9.853372573852539, + "learning_rate": 1.580769230769231e-05, + "loss": 0.122, + "step": 24892 + }, + { + "epoch": 68.38736263736264, + "grad_norm": 19.1171875, + "learning_rate": 1.5806318681318684e-05, + "loss": 0.274, + "step": 24893 + }, + { + "epoch": 68.39010989010988, + "grad_norm": 4.361262798309326, + "learning_rate": 1.5804945054945057e-05, + "loss": 0.0839, + "step": 24894 + }, + { + "epoch": 68.39285714285714, + "grad_norm": 9.140325546264648, + "learning_rate": 1.580357142857143e-05, + "loss": 0.1774, + "step": 24895 + }, + { + "epoch": 68.3956043956044, + "grad_norm": 3.388979911804199, + "learning_rate": 1.58021978021978e-05, + "loss": 0.044, + "step": 24896 + }, + { + "epoch": 68.39835164835165, + "grad_norm": 14.621384620666504, + "learning_rate": 1.5800824175824177e-05, + "loss": 0.3384, + "step": 24897 + }, + { + "epoch": 68.4010989010989, + "grad_norm": 6.9333930015563965, + "learning_rate": 1.579945054945055e-05, + "loss": 0.0841, + "step": 24898 + }, + { + "epoch": 68.40384615384616, + "grad_norm": 9.099474906921387, + "learning_rate": 1.5798076923076924e-05, + "loss": 0.1235, + "step": 24899 + }, + { + "epoch": 68.4065934065934, + "grad_norm": 6.085608959197998, + "learning_rate": 1.5796703296703297e-05, + "loss": 0.1254, + "step": 24900 + }, + { + "epoch": 68.40934065934066, + "grad_norm": 17.10777473449707, + "learning_rate": 1.579532967032967e-05, + "loss": 0.2351, + "step": 24901 + }, + { + "epoch": 68.41208791208791, + "grad_norm": 11.802122116088867, + "learning_rate": 1.5793956043956044e-05, + "loss": 0.1417, + "step": 24902 + }, + { + "epoch": 68.41483516483517, + "grad_norm": 6.454465389251709, + "learning_rate": 1.5792582417582418e-05, + "loss": 0.1735, + "step": 24903 + }, + { + "epoch": 68.41758241758242, + "grad_norm": 17.227977752685547, + "learning_rate": 1.579120879120879e-05, + "loss": 0.3702, + "step": 24904 + }, + { + "epoch": 68.42032967032966, + "grad_norm": 4.927837371826172, + "learning_rate": 1.5789835164835164e-05, + "loss": 0.0692, + "step": 24905 + }, + { + "epoch": 68.42307692307692, + "grad_norm": 19.88868522644043, + "learning_rate": 1.5788461538461538e-05, + "loss": 0.6799, + "step": 24906 + }, + { + "epoch": 68.42582417582418, + "grad_norm": 10.55674934387207, + "learning_rate": 1.5787087912087915e-05, + "loss": 0.1148, + "step": 24907 + }, + { + "epoch": 68.42857142857143, + "grad_norm": 14.389018058776855, + "learning_rate": 1.5785714285714288e-05, + "loss": 0.3493, + "step": 24908 + }, + { + "epoch": 68.43131868131869, + "grad_norm": 3.6585376262664795, + "learning_rate": 1.578434065934066e-05, + "loss": 0.0864, + "step": 24909 + }, + { + "epoch": 68.43406593406593, + "grad_norm": 8.658700942993164, + "learning_rate": 1.5782967032967035e-05, + "loss": 0.2073, + "step": 24910 + }, + { + "epoch": 68.43681318681318, + "grad_norm": 16.429561614990234, + "learning_rate": 1.5781593406593405e-05, + "loss": 0.3178, + "step": 24911 + }, + { + "epoch": 68.43956043956044, + "grad_norm": 8.906757354736328, + "learning_rate": 1.578021978021978e-05, + "loss": 0.0579, + "step": 24912 + }, + { + "epoch": 68.4423076923077, + "grad_norm": 16.486774444580078, + "learning_rate": 1.5778846153846155e-05, + "loss": 0.1992, + "step": 24913 + }, + { + "epoch": 68.44505494505495, + "grad_norm": 11.162713050842285, + "learning_rate": 1.577747252747253e-05, + "loss": 0.2174, + "step": 24914 + }, + { + "epoch": 68.4478021978022, + "grad_norm": 11.328230857849121, + "learning_rate": 1.5776098901098902e-05, + "loss": 0.1262, + "step": 24915 + }, + { + "epoch": 68.45054945054945, + "grad_norm": 6.965075492858887, + "learning_rate": 1.5774725274725275e-05, + "loss": 0.1556, + "step": 24916 + }, + { + "epoch": 68.4532967032967, + "grad_norm": 6.25892448425293, + "learning_rate": 1.577335164835165e-05, + "loss": 0.1084, + "step": 24917 + }, + { + "epoch": 68.45604395604396, + "grad_norm": 9.608381271362305, + "learning_rate": 1.5771978021978022e-05, + "loss": 0.16, + "step": 24918 + }, + { + "epoch": 68.45879120879121, + "grad_norm": 23.231611251831055, + "learning_rate": 1.5770604395604395e-05, + "loss": 0.6118, + "step": 24919 + }, + { + "epoch": 68.46153846153847, + "grad_norm": 10.284024238586426, + "learning_rate": 1.576923076923077e-05, + "loss": 0.1744, + "step": 24920 + }, + { + "epoch": 68.46428571428571, + "grad_norm": 8.506628036499023, + "learning_rate": 1.5767857142857142e-05, + "loss": 0.2276, + "step": 24921 + }, + { + "epoch": 68.46703296703296, + "grad_norm": 14.832330703735352, + "learning_rate": 1.576648351648352e-05, + "loss": 0.1637, + "step": 24922 + }, + { + "epoch": 68.46978021978022, + "grad_norm": 15.603397369384766, + "learning_rate": 1.5765109890109893e-05, + "loss": 0.1836, + "step": 24923 + }, + { + "epoch": 68.47252747252747, + "grad_norm": 10.945256233215332, + "learning_rate": 1.5763736263736266e-05, + "loss": 0.1679, + "step": 24924 + }, + { + "epoch": 68.47527472527473, + "grad_norm": 22.027721405029297, + "learning_rate": 1.576236263736264e-05, + "loss": 0.667, + "step": 24925 + }, + { + "epoch": 68.47802197802197, + "grad_norm": 8.808906555175781, + "learning_rate": 1.576098901098901e-05, + "loss": 0.1031, + "step": 24926 + }, + { + "epoch": 68.48076923076923, + "grad_norm": 5.4848456382751465, + "learning_rate": 1.5759615384615386e-05, + "loss": 0.084, + "step": 24927 + }, + { + "epoch": 68.48351648351648, + "grad_norm": 20.541566848754883, + "learning_rate": 1.575824175824176e-05, + "loss": 0.5377, + "step": 24928 + }, + { + "epoch": 68.48626373626374, + "grad_norm": 6.6604790687561035, + "learning_rate": 1.5756868131868133e-05, + "loss": 0.1238, + "step": 24929 + }, + { + "epoch": 68.48901098901099, + "grad_norm": 5.659639358520508, + "learning_rate": 1.5755494505494506e-05, + "loss": 0.0456, + "step": 24930 + }, + { + "epoch": 68.49175824175825, + "grad_norm": 4.952286243438721, + "learning_rate": 1.575412087912088e-05, + "loss": 0.0672, + "step": 24931 + }, + { + "epoch": 68.49450549450549, + "grad_norm": 10.213984489440918, + "learning_rate": 1.5752747252747253e-05, + "loss": 0.1088, + "step": 24932 + }, + { + "epoch": 68.49725274725274, + "grad_norm": 7.353777885437012, + "learning_rate": 1.5751373626373627e-05, + "loss": 0.2303, + "step": 24933 + }, + { + "epoch": 68.5, + "grad_norm": 12.77501106262207, + "learning_rate": 1.575e-05, + "loss": 0.1409, + "step": 24934 + }, + { + "epoch": 68.50274725274726, + "grad_norm": 8.349843978881836, + "learning_rate": 1.5748626373626373e-05, + "loss": 0.3873, + "step": 24935 + }, + { + "epoch": 68.50549450549451, + "grad_norm": 3.255681037902832, + "learning_rate": 1.5747252747252747e-05, + "loss": 0.0716, + "step": 24936 + }, + { + "epoch": 68.50824175824175, + "grad_norm": 9.15450382232666, + "learning_rate": 1.5745879120879124e-05, + "loss": 0.1586, + "step": 24937 + }, + { + "epoch": 68.51098901098901, + "grad_norm": 20.378646850585938, + "learning_rate": 1.5744505494505497e-05, + "loss": 0.4236, + "step": 24938 + }, + { + "epoch": 68.51373626373626, + "grad_norm": 18.72340202331543, + "learning_rate": 1.574313186813187e-05, + "loss": 0.2625, + "step": 24939 + }, + { + "epoch": 68.51648351648352, + "grad_norm": 12.979904174804688, + "learning_rate": 1.574175824175824e-05, + "loss": 0.3963, + "step": 24940 + }, + { + "epoch": 68.51923076923077, + "grad_norm": 11.210864067077637, + "learning_rate": 1.5740384615384614e-05, + "loss": 0.19, + "step": 24941 + }, + { + "epoch": 68.52197802197803, + "grad_norm": 2.392939567565918, + "learning_rate": 1.573901098901099e-05, + "loss": 0.0347, + "step": 24942 + }, + { + "epoch": 68.52472527472527, + "grad_norm": 18.449596405029297, + "learning_rate": 1.5737637362637364e-05, + "loss": 0.267, + "step": 24943 + }, + { + "epoch": 68.52747252747253, + "grad_norm": 10.917518615722656, + "learning_rate": 1.5736263736263737e-05, + "loss": 0.1689, + "step": 24944 + }, + { + "epoch": 68.53021978021978, + "grad_norm": 10.274697303771973, + "learning_rate": 1.573489010989011e-05, + "loss": 0.3858, + "step": 24945 + }, + { + "epoch": 68.53296703296704, + "grad_norm": 14.681514739990234, + "learning_rate": 1.5733516483516484e-05, + "loss": 0.2607, + "step": 24946 + }, + { + "epoch": 68.53571428571429, + "grad_norm": 3.3855948448181152, + "learning_rate": 1.5732142857142858e-05, + "loss": 0.0375, + "step": 24947 + }, + { + "epoch": 68.53846153846153, + "grad_norm": 11.19382095336914, + "learning_rate": 1.573076923076923e-05, + "loss": 0.1838, + "step": 24948 + }, + { + "epoch": 68.54120879120879, + "grad_norm": 14.530982971191406, + "learning_rate": 1.5729395604395604e-05, + "loss": 0.2901, + "step": 24949 + }, + { + "epoch": 68.54395604395604, + "grad_norm": 9.047096252441406, + "learning_rate": 1.5728021978021978e-05, + "loss": 0.1855, + "step": 24950 + }, + { + "epoch": 68.5467032967033, + "grad_norm": 13.723735809326172, + "learning_rate": 1.572664835164835e-05, + "loss": 0.3121, + "step": 24951 + }, + { + "epoch": 68.54945054945055, + "grad_norm": 3.521451473236084, + "learning_rate": 1.5725274725274728e-05, + "loss": 0.0624, + "step": 24952 + }, + { + "epoch": 68.5521978021978, + "grad_norm": 7.114954471588135, + "learning_rate": 1.57239010989011e-05, + "loss": 0.0891, + "step": 24953 + }, + { + "epoch": 68.55494505494505, + "grad_norm": 22.070178985595703, + "learning_rate": 1.5722527472527475e-05, + "loss": 0.6342, + "step": 24954 + }, + { + "epoch": 68.5576923076923, + "grad_norm": 20.582401275634766, + "learning_rate": 1.5721153846153845e-05, + "loss": 0.5798, + "step": 24955 + }, + { + "epoch": 68.56043956043956, + "grad_norm": 9.239893913269043, + "learning_rate": 1.571978021978022e-05, + "loss": 0.2003, + "step": 24956 + }, + { + "epoch": 68.56318681318682, + "grad_norm": 6.741466522216797, + "learning_rate": 1.5718406593406595e-05, + "loss": 0.1325, + "step": 24957 + }, + { + "epoch": 68.56593406593407, + "grad_norm": 12.643102645874023, + "learning_rate": 1.571703296703297e-05, + "loss": 0.2428, + "step": 24958 + }, + { + "epoch": 68.56868131868131, + "grad_norm": 3.4786577224731445, + "learning_rate": 1.5715659340659342e-05, + "loss": 0.0697, + "step": 24959 + }, + { + "epoch": 68.57142857142857, + "grad_norm": 6.580837726593018, + "learning_rate": 1.5714285714285715e-05, + "loss": 0.0611, + "step": 24960 + }, + { + "epoch": 68.57417582417582, + "grad_norm": 24.27728271484375, + "learning_rate": 1.571291208791209e-05, + "loss": 0.6254, + "step": 24961 + }, + { + "epoch": 68.57692307692308, + "grad_norm": 13.509820938110352, + "learning_rate": 1.5711538461538462e-05, + "loss": 0.2344, + "step": 24962 + }, + { + "epoch": 68.57967032967034, + "grad_norm": 11.1402006149292, + "learning_rate": 1.5710164835164836e-05, + "loss": 0.2558, + "step": 24963 + }, + { + "epoch": 68.58241758241758, + "grad_norm": 18.32025146484375, + "learning_rate": 1.570879120879121e-05, + "loss": 0.3982, + "step": 24964 + }, + { + "epoch": 68.58516483516483, + "grad_norm": 5.01671838760376, + "learning_rate": 1.5707417582417582e-05, + "loss": 0.1074, + "step": 24965 + }, + { + "epoch": 68.58791208791209, + "grad_norm": 10.78010082244873, + "learning_rate": 1.5706043956043956e-05, + "loss": 0.1719, + "step": 24966 + }, + { + "epoch": 68.59065934065934, + "grad_norm": 17.12200355529785, + "learning_rate": 1.5704670329670333e-05, + "loss": 0.5146, + "step": 24967 + }, + { + "epoch": 68.5934065934066, + "grad_norm": 7.967151641845703, + "learning_rate": 1.5703296703296706e-05, + "loss": 0.1842, + "step": 24968 + }, + { + "epoch": 68.59615384615384, + "grad_norm": 6.264787197113037, + "learning_rate": 1.570192307692308e-05, + "loss": 0.103, + "step": 24969 + }, + { + "epoch": 68.5989010989011, + "grad_norm": 23.609437942504883, + "learning_rate": 1.570054945054945e-05, + "loss": 0.5579, + "step": 24970 + }, + { + "epoch": 68.60164835164835, + "grad_norm": 10.458821296691895, + "learning_rate": 1.5699175824175823e-05, + "loss": 0.1099, + "step": 24971 + }, + { + "epoch": 68.6043956043956, + "grad_norm": 12.563165664672852, + "learning_rate": 1.5697802197802196e-05, + "loss": 0.2739, + "step": 24972 + }, + { + "epoch": 68.60714285714286, + "grad_norm": 11.311140060424805, + "learning_rate": 1.5696428571428573e-05, + "loss": 0.2664, + "step": 24973 + }, + { + "epoch": 68.60989010989012, + "grad_norm": 16.222270965576172, + "learning_rate": 1.5695054945054946e-05, + "loss": 0.3026, + "step": 24974 + }, + { + "epoch": 68.61263736263736, + "grad_norm": 23.38773536682129, + "learning_rate": 1.569368131868132e-05, + "loss": 0.3277, + "step": 24975 + }, + { + "epoch": 68.61538461538461, + "grad_norm": 6.646293640136719, + "learning_rate": 1.5692307692307693e-05, + "loss": 0.1005, + "step": 24976 + }, + { + "epoch": 68.61813186813187, + "grad_norm": 12.660002708435059, + "learning_rate": 1.5690934065934067e-05, + "loss": 0.2501, + "step": 24977 + }, + { + "epoch": 68.62087912087912, + "grad_norm": 23.10896110534668, + "learning_rate": 1.568956043956044e-05, + "loss": 0.372, + "step": 24978 + }, + { + "epoch": 68.62362637362638, + "grad_norm": 8.635844230651855, + "learning_rate": 1.5688186813186813e-05, + "loss": 0.1472, + "step": 24979 + }, + { + "epoch": 68.62637362637362, + "grad_norm": 2.6431009769439697, + "learning_rate": 1.5686813186813187e-05, + "loss": 0.0469, + "step": 24980 + }, + { + "epoch": 68.62912087912088, + "grad_norm": 0.9167957305908203, + "learning_rate": 1.568543956043956e-05, + "loss": 0.0137, + "step": 24981 + }, + { + "epoch": 68.63186813186813, + "grad_norm": 20.132970809936523, + "learning_rate": 1.5684065934065934e-05, + "loss": 0.5335, + "step": 24982 + }, + { + "epoch": 68.63461538461539, + "grad_norm": 5.411406993865967, + "learning_rate": 1.568269230769231e-05, + "loss": 0.0681, + "step": 24983 + }, + { + "epoch": 68.63736263736264, + "grad_norm": 8.685053825378418, + "learning_rate": 1.5681318681318684e-05, + "loss": 0.1424, + "step": 24984 + }, + { + "epoch": 68.64010989010988, + "grad_norm": 12.46972370147705, + "learning_rate": 1.5679945054945054e-05, + "loss": 0.1954, + "step": 24985 + }, + { + "epoch": 68.64285714285714, + "grad_norm": 11.756288528442383, + "learning_rate": 1.5678571428571427e-05, + "loss": 0.1445, + "step": 24986 + }, + { + "epoch": 68.6456043956044, + "grad_norm": 9.95073127746582, + "learning_rate": 1.56771978021978e-05, + "loss": 0.197, + "step": 24987 + }, + { + "epoch": 68.64835164835165, + "grad_norm": 12.198410987854004, + "learning_rate": 1.5675824175824177e-05, + "loss": 0.3957, + "step": 24988 + }, + { + "epoch": 68.6510989010989, + "grad_norm": 3.997882127761841, + "learning_rate": 1.567445054945055e-05, + "loss": 0.0478, + "step": 24989 + }, + { + "epoch": 68.65384615384616, + "grad_norm": 9.4777250289917, + "learning_rate": 1.5673076923076924e-05, + "loss": 0.1756, + "step": 24990 + }, + { + "epoch": 68.6565934065934, + "grad_norm": 18.657939910888672, + "learning_rate": 1.5671703296703298e-05, + "loss": 0.2942, + "step": 24991 + }, + { + "epoch": 68.65934065934066, + "grad_norm": 14.844047546386719, + "learning_rate": 1.567032967032967e-05, + "loss": 0.2875, + "step": 24992 + }, + { + "epoch": 68.66208791208791, + "grad_norm": 11.158498764038086, + "learning_rate": 1.5668956043956045e-05, + "loss": 0.1741, + "step": 24993 + }, + { + "epoch": 68.66483516483517, + "grad_norm": 7.262125015258789, + "learning_rate": 1.5667582417582418e-05, + "loss": 0.0772, + "step": 24994 + }, + { + "epoch": 68.66758241758242, + "grad_norm": 9.639004707336426, + "learning_rate": 1.566620879120879e-05, + "loss": 0.1056, + "step": 24995 + }, + { + "epoch": 68.67032967032966, + "grad_norm": 7.674405097961426, + "learning_rate": 1.5664835164835165e-05, + "loss": 0.0943, + "step": 24996 + }, + { + "epoch": 68.67307692307692, + "grad_norm": 20.533504486083984, + "learning_rate": 1.5663461538461538e-05, + "loss": 0.5781, + "step": 24997 + }, + { + "epoch": 68.67582417582418, + "grad_norm": 4.287011623382568, + "learning_rate": 1.5662087912087915e-05, + "loss": 0.0867, + "step": 24998 + }, + { + "epoch": 68.67857142857143, + "grad_norm": 17.763439178466797, + "learning_rate": 1.566071428571429e-05, + "loss": 0.3882, + "step": 24999 + }, + { + "epoch": 68.68131868131869, + "grad_norm": 3.8332371711730957, + "learning_rate": 1.565934065934066e-05, + "loss": 0.0472, + "step": 25000 + }, + { + "epoch": 68.68406593406593, + "grad_norm": 18.767414093017578, + "learning_rate": 1.5657967032967032e-05, + "loss": 0.4082, + "step": 25001 + }, + { + "epoch": 68.68681318681318, + "grad_norm": 8.820439338684082, + "learning_rate": 1.5656593406593405e-05, + "loss": 0.1668, + "step": 25002 + }, + { + "epoch": 68.68956043956044, + "grad_norm": 3.3871805667877197, + "learning_rate": 1.5655219780219782e-05, + "loss": 0.0419, + "step": 25003 + }, + { + "epoch": 68.6923076923077, + "grad_norm": 7.6346235275268555, + "learning_rate": 1.5653846153846155e-05, + "loss": 0.0895, + "step": 25004 + }, + { + "epoch": 68.69505494505495, + "grad_norm": 11.068148612976074, + "learning_rate": 1.565247252747253e-05, + "loss": 0.1484, + "step": 25005 + }, + { + "epoch": 68.6978021978022, + "grad_norm": 5.454277038574219, + "learning_rate": 1.5651098901098902e-05, + "loss": 0.1046, + "step": 25006 + }, + { + "epoch": 68.70054945054945, + "grad_norm": 19.42958641052246, + "learning_rate": 1.5649725274725276e-05, + "loss": 0.479, + "step": 25007 + }, + { + "epoch": 68.7032967032967, + "grad_norm": 12.072622299194336, + "learning_rate": 1.564835164835165e-05, + "loss": 0.3133, + "step": 25008 + }, + { + "epoch": 68.70604395604396, + "grad_norm": 11.950557708740234, + "learning_rate": 1.5646978021978022e-05, + "loss": 0.2364, + "step": 25009 + }, + { + "epoch": 68.70879120879121, + "grad_norm": 12.372354507446289, + "learning_rate": 1.5645604395604396e-05, + "loss": 0.2076, + "step": 25010 + }, + { + "epoch": 68.71153846153847, + "grad_norm": 20.80105972290039, + "learning_rate": 1.564423076923077e-05, + "loss": 0.3713, + "step": 25011 + }, + { + "epoch": 68.71428571428571, + "grad_norm": 6.676794528961182, + "learning_rate": 1.5642857142857143e-05, + "loss": 0.072, + "step": 25012 + }, + { + "epoch": 68.71703296703296, + "grad_norm": 17.94865608215332, + "learning_rate": 1.564148351648352e-05, + "loss": 0.3168, + "step": 25013 + }, + { + "epoch": 68.71978021978022, + "grad_norm": 6.230889320373535, + "learning_rate": 1.5640109890109893e-05, + "loss": 0.1384, + "step": 25014 + }, + { + "epoch": 68.72252747252747, + "grad_norm": 10.95080280303955, + "learning_rate": 1.5638736263736263e-05, + "loss": 0.1304, + "step": 25015 + }, + { + "epoch": 68.72527472527473, + "grad_norm": 19.1841983795166, + "learning_rate": 1.5637362637362636e-05, + "loss": 0.381, + "step": 25016 + }, + { + "epoch": 68.72802197802197, + "grad_norm": 1.9827468395233154, + "learning_rate": 1.563598901098901e-05, + "loss": 0.0245, + "step": 25017 + }, + { + "epoch": 68.73076923076923, + "grad_norm": 6.621016502380371, + "learning_rate": 1.5634615384615386e-05, + "loss": 0.1116, + "step": 25018 + }, + { + "epoch": 68.73351648351648, + "grad_norm": 8.685898780822754, + "learning_rate": 1.563324175824176e-05, + "loss": 0.2796, + "step": 25019 + }, + { + "epoch": 68.73626373626374, + "grad_norm": 13.961888313293457, + "learning_rate": 1.5631868131868133e-05, + "loss": 0.2522, + "step": 25020 + }, + { + "epoch": 68.73901098901099, + "grad_norm": 15.489175796508789, + "learning_rate": 1.5630494505494507e-05, + "loss": 0.2053, + "step": 25021 + }, + { + "epoch": 68.74175824175825, + "grad_norm": 6.399268627166748, + "learning_rate": 1.562912087912088e-05, + "loss": 0.0666, + "step": 25022 + }, + { + "epoch": 68.74450549450549, + "grad_norm": 13.063161849975586, + "learning_rate": 1.5627747252747254e-05, + "loss": 0.1471, + "step": 25023 + }, + { + "epoch": 68.74725274725274, + "grad_norm": 5.055651664733887, + "learning_rate": 1.5626373626373627e-05, + "loss": 0.0584, + "step": 25024 + }, + { + "epoch": 68.75, + "grad_norm": 9.492980003356934, + "learning_rate": 1.5625e-05, + "loss": 0.0996, + "step": 25025 + }, + { + "epoch": 68.75274725274726, + "grad_norm": 8.988906860351562, + "learning_rate": 1.5623626373626374e-05, + "loss": 0.148, + "step": 25026 + }, + { + "epoch": 68.75549450549451, + "grad_norm": 10.708349227905273, + "learning_rate": 1.5622252747252747e-05, + "loss": 0.1421, + "step": 25027 + }, + { + "epoch": 68.75824175824175, + "grad_norm": 9.299552917480469, + "learning_rate": 1.5620879120879124e-05, + "loss": 0.2494, + "step": 25028 + }, + { + "epoch": 68.76098901098901, + "grad_norm": 24.03949737548828, + "learning_rate": 1.5619505494505497e-05, + "loss": 0.7123, + "step": 25029 + }, + { + "epoch": 68.76373626373626, + "grad_norm": 4.335649490356445, + "learning_rate": 1.5618131868131867e-05, + "loss": 0.0371, + "step": 25030 + }, + { + "epoch": 68.76648351648352, + "grad_norm": 7.766294002532959, + "learning_rate": 1.561675824175824e-05, + "loss": 0.2757, + "step": 25031 + }, + { + "epoch": 68.76923076923077, + "grad_norm": 13.614584922790527, + "learning_rate": 1.5615384615384614e-05, + "loss": 0.2313, + "step": 25032 + }, + { + "epoch": 68.77197802197803, + "grad_norm": 8.364494323730469, + "learning_rate": 1.561401098901099e-05, + "loss": 0.2518, + "step": 25033 + }, + { + "epoch": 68.77472527472527, + "grad_norm": 3.636639356613159, + "learning_rate": 1.5612637362637364e-05, + "loss": 0.0509, + "step": 25034 + }, + { + "epoch": 68.77747252747253, + "grad_norm": 10.264347076416016, + "learning_rate": 1.5611263736263738e-05, + "loss": 0.1371, + "step": 25035 + }, + { + "epoch": 68.78021978021978, + "grad_norm": 9.631221771240234, + "learning_rate": 1.560989010989011e-05, + "loss": 0.2471, + "step": 25036 + }, + { + "epoch": 68.78296703296704, + "grad_norm": 33.28459930419922, + "learning_rate": 1.5608516483516485e-05, + "loss": 0.8401, + "step": 25037 + }, + { + "epoch": 68.78571428571429, + "grad_norm": 4.882171630859375, + "learning_rate": 1.5607142857142858e-05, + "loss": 0.1299, + "step": 25038 + }, + { + "epoch": 68.78846153846153, + "grad_norm": 16.315826416015625, + "learning_rate": 1.560576923076923e-05, + "loss": 0.3497, + "step": 25039 + }, + { + "epoch": 68.79120879120879, + "grad_norm": 9.777310371398926, + "learning_rate": 1.5604395604395605e-05, + "loss": 0.1487, + "step": 25040 + }, + { + "epoch": 68.79395604395604, + "grad_norm": 10.35673713684082, + "learning_rate": 1.5603021978021978e-05, + "loss": 0.069, + "step": 25041 + }, + { + "epoch": 68.7967032967033, + "grad_norm": 9.631498336791992, + "learning_rate": 1.560164835164835e-05, + "loss": 0.067, + "step": 25042 + }, + { + "epoch": 68.79945054945055, + "grad_norm": 7.310652256011963, + "learning_rate": 1.560027472527473e-05, + "loss": 0.0914, + "step": 25043 + }, + { + "epoch": 68.8021978021978, + "grad_norm": 9.771997451782227, + "learning_rate": 1.5598901098901102e-05, + "loss": 0.1228, + "step": 25044 + }, + { + "epoch": 68.80494505494505, + "grad_norm": 2.046704053878784, + "learning_rate": 1.5597527472527472e-05, + "loss": 0.0273, + "step": 25045 + }, + { + "epoch": 68.8076923076923, + "grad_norm": 19.332674026489258, + "learning_rate": 1.5596153846153845e-05, + "loss": 0.4592, + "step": 25046 + }, + { + "epoch": 68.81043956043956, + "grad_norm": 6.811877727508545, + "learning_rate": 1.559478021978022e-05, + "loss": 0.1144, + "step": 25047 + }, + { + "epoch": 68.81318681318682, + "grad_norm": 7.7047810554504395, + "learning_rate": 1.5593406593406595e-05, + "loss": 0.2159, + "step": 25048 + }, + { + "epoch": 68.81593406593407, + "grad_norm": 14.962210655212402, + "learning_rate": 1.559203296703297e-05, + "loss": 0.2579, + "step": 25049 + }, + { + "epoch": 68.81868131868131, + "grad_norm": 3.0204408168792725, + "learning_rate": 1.5590659340659342e-05, + "loss": 0.0419, + "step": 25050 + }, + { + "epoch": 68.82142857142857, + "grad_norm": 19.364797592163086, + "learning_rate": 1.5589285714285716e-05, + "loss": 0.4095, + "step": 25051 + }, + { + "epoch": 68.82417582417582, + "grad_norm": 14.636224746704102, + "learning_rate": 1.558791208791209e-05, + "loss": 0.2114, + "step": 25052 + }, + { + "epoch": 68.82692307692308, + "grad_norm": 12.131266593933105, + "learning_rate": 1.5586538461538462e-05, + "loss": 0.2066, + "step": 25053 + }, + { + "epoch": 68.82967032967034, + "grad_norm": 25.46122169494629, + "learning_rate": 1.5585164835164836e-05, + "loss": 0.6961, + "step": 25054 + }, + { + "epoch": 68.83241758241758, + "grad_norm": 11.37939167022705, + "learning_rate": 1.558379120879121e-05, + "loss": 0.2748, + "step": 25055 + }, + { + "epoch": 68.83516483516483, + "grad_norm": 22.813447952270508, + "learning_rate": 1.5582417582417583e-05, + "loss": 0.4295, + "step": 25056 + }, + { + "epoch": 68.83791208791209, + "grad_norm": 11.482446670532227, + "learning_rate": 1.5581043956043956e-05, + "loss": 0.1779, + "step": 25057 + }, + { + "epoch": 68.84065934065934, + "grad_norm": 10.259116172790527, + "learning_rate": 1.5579670329670333e-05, + "loss": 0.1472, + "step": 25058 + }, + { + "epoch": 68.8434065934066, + "grad_norm": 5.372138023376465, + "learning_rate": 1.5578296703296706e-05, + "loss": 0.0925, + "step": 25059 + }, + { + "epoch": 68.84615384615384, + "grad_norm": 8.64262866973877, + "learning_rate": 1.5576923076923076e-05, + "loss": 0.3438, + "step": 25060 + }, + { + "epoch": 68.8489010989011, + "grad_norm": 12.035534858703613, + "learning_rate": 1.557554945054945e-05, + "loss": 0.1686, + "step": 25061 + }, + { + "epoch": 68.85164835164835, + "grad_norm": 18.26066780090332, + "learning_rate": 1.5574175824175823e-05, + "loss": 0.4152, + "step": 25062 + }, + { + "epoch": 68.8543956043956, + "grad_norm": 10.806467056274414, + "learning_rate": 1.55728021978022e-05, + "loss": 0.2786, + "step": 25063 + }, + { + "epoch": 68.85714285714286, + "grad_norm": 10.720430374145508, + "learning_rate": 1.5571428571428573e-05, + "loss": 0.1895, + "step": 25064 + }, + { + "epoch": 68.85989010989012, + "grad_norm": 18.87067222595215, + "learning_rate": 1.5570054945054947e-05, + "loss": 0.4069, + "step": 25065 + }, + { + "epoch": 68.86263736263736, + "grad_norm": 8.975736618041992, + "learning_rate": 1.556868131868132e-05, + "loss": 0.1076, + "step": 25066 + }, + { + "epoch": 68.86538461538461, + "grad_norm": 18.573284149169922, + "learning_rate": 1.5567307692307694e-05, + "loss": 0.1686, + "step": 25067 + }, + { + "epoch": 68.86813186813187, + "grad_norm": 19.691791534423828, + "learning_rate": 1.5565934065934067e-05, + "loss": 0.3461, + "step": 25068 + }, + { + "epoch": 68.87087912087912, + "grad_norm": 13.509921073913574, + "learning_rate": 1.556456043956044e-05, + "loss": 0.3076, + "step": 25069 + }, + { + "epoch": 68.87362637362638, + "grad_norm": 10.012032508850098, + "learning_rate": 1.5563186813186814e-05, + "loss": 0.1644, + "step": 25070 + }, + { + "epoch": 68.87637362637362, + "grad_norm": 16.140159606933594, + "learning_rate": 1.5561813186813187e-05, + "loss": 0.4037, + "step": 25071 + }, + { + "epoch": 68.87912087912088, + "grad_norm": 3.7228081226348877, + "learning_rate": 1.556043956043956e-05, + "loss": 0.0451, + "step": 25072 + }, + { + "epoch": 68.88186813186813, + "grad_norm": 7.711812496185303, + "learning_rate": 1.5559065934065937e-05, + "loss": 0.1085, + "step": 25073 + }, + { + "epoch": 68.88461538461539, + "grad_norm": 8.636994361877441, + "learning_rate": 1.555769230769231e-05, + "loss": 0.163, + "step": 25074 + }, + { + "epoch": 68.88736263736264, + "grad_norm": 7.910884380340576, + "learning_rate": 1.555631868131868e-05, + "loss": 0.1452, + "step": 25075 + }, + { + "epoch": 68.89010989010988, + "grad_norm": 18.151065826416016, + "learning_rate": 1.5554945054945054e-05, + "loss": 0.383, + "step": 25076 + }, + { + "epoch": 68.89285714285714, + "grad_norm": 18.439918518066406, + "learning_rate": 1.5553571428571428e-05, + "loss": 0.3639, + "step": 25077 + }, + { + "epoch": 68.8956043956044, + "grad_norm": 3.7900819778442383, + "learning_rate": 1.5552197802197804e-05, + "loss": 0.0491, + "step": 25078 + }, + { + "epoch": 68.89835164835165, + "grad_norm": 15.786547660827637, + "learning_rate": 1.5550824175824178e-05, + "loss": 0.2096, + "step": 25079 + }, + { + "epoch": 68.9010989010989, + "grad_norm": 25.86520004272461, + "learning_rate": 1.554945054945055e-05, + "loss": 0.6422, + "step": 25080 + }, + { + "epoch": 68.90384615384616, + "grad_norm": 9.437960624694824, + "learning_rate": 1.5548076923076925e-05, + "loss": 0.1878, + "step": 25081 + }, + { + "epoch": 68.9065934065934, + "grad_norm": 9.09679126739502, + "learning_rate": 1.5546703296703298e-05, + "loss": 0.1647, + "step": 25082 + }, + { + "epoch": 68.90934065934066, + "grad_norm": 9.135047912597656, + "learning_rate": 1.5545329670329668e-05, + "loss": 0.281, + "step": 25083 + }, + { + "epoch": 68.91208791208791, + "grad_norm": 17.114356994628906, + "learning_rate": 1.5543956043956045e-05, + "loss": 0.4018, + "step": 25084 + }, + { + "epoch": 68.91483516483517, + "grad_norm": 7.407888412475586, + "learning_rate": 1.5542582417582418e-05, + "loss": 0.0859, + "step": 25085 + }, + { + "epoch": 68.91758241758242, + "grad_norm": 6.221116542816162, + "learning_rate": 1.554120879120879e-05, + "loss": 0.0693, + "step": 25086 + }, + { + "epoch": 68.92032967032966, + "grad_norm": 6.392224311828613, + "learning_rate": 1.5539835164835165e-05, + "loss": 0.0763, + "step": 25087 + }, + { + "epoch": 68.92307692307692, + "grad_norm": 16.8579158782959, + "learning_rate": 1.553846153846154e-05, + "loss": 0.5527, + "step": 25088 + }, + { + "epoch": 68.92582417582418, + "grad_norm": 6.970005035400391, + "learning_rate": 1.5537087912087915e-05, + "loss": 0.1046, + "step": 25089 + }, + { + "epoch": 68.92857142857143, + "grad_norm": 15.544456481933594, + "learning_rate": 1.5535714285714285e-05, + "loss": 0.4276, + "step": 25090 + }, + { + "epoch": 68.93131868131869, + "grad_norm": 7.724533557891846, + "learning_rate": 1.553434065934066e-05, + "loss": 0.0973, + "step": 25091 + }, + { + "epoch": 68.93406593406593, + "grad_norm": 13.626794815063477, + "learning_rate": 1.5532967032967032e-05, + "loss": 0.3347, + "step": 25092 + }, + { + "epoch": 68.93681318681318, + "grad_norm": 25.55303382873535, + "learning_rate": 1.5531593406593406e-05, + "loss": 0.6948, + "step": 25093 + }, + { + "epoch": 68.93956043956044, + "grad_norm": 13.530843734741211, + "learning_rate": 1.5530219780219782e-05, + "loss": 0.2296, + "step": 25094 + }, + { + "epoch": 68.9423076923077, + "grad_norm": 15.101147651672363, + "learning_rate": 1.5528846153846156e-05, + "loss": 0.3727, + "step": 25095 + }, + { + "epoch": 68.94505494505495, + "grad_norm": 11.075645446777344, + "learning_rate": 1.552747252747253e-05, + "loss": 0.2433, + "step": 25096 + }, + { + "epoch": 68.9478021978022, + "grad_norm": 8.617781639099121, + "learning_rate": 1.5526098901098903e-05, + "loss": 0.1991, + "step": 25097 + }, + { + "epoch": 68.95054945054945, + "grad_norm": 12.856979370117188, + "learning_rate": 1.5524725274725273e-05, + "loss": 0.1953, + "step": 25098 + }, + { + "epoch": 68.9532967032967, + "grad_norm": 12.933581352233887, + "learning_rate": 1.552335164835165e-05, + "loss": 0.2564, + "step": 25099 + }, + { + "epoch": 68.95604395604396, + "grad_norm": 3.7007899284362793, + "learning_rate": 1.5521978021978023e-05, + "loss": 0.0563, + "step": 25100 + }, + { + "epoch": 68.95879120879121, + "grad_norm": 13.378084182739258, + "learning_rate": 1.5520604395604396e-05, + "loss": 0.2764, + "step": 25101 + }, + { + "epoch": 68.96153846153847, + "grad_norm": 10.410385131835938, + "learning_rate": 1.551923076923077e-05, + "loss": 0.2456, + "step": 25102 + }, + { + "epoch": 68.96428571428571, + "grad_norm": 15.547652244567871, + "learning_rate": 1.5517857142857143e-05, + "loss": 0.3394, + "step": 25103 + }, + { + "epoch": 68.96703296703296, + "grad_norm": 11.62077808380127, + "learning_rate": 1.551648351648352e-05, + "loss": 0.2293, + "step": 25104 + }, + { + "epoch": 68.96978021978022, + "grad_norm": 25.335391998291016, + "learning_rate": 1.551510989010989e-05, + "loss": 0.3914, + "step": 25105 + }, + { + "epoch": 68.97252747252747, + "grad_norm": 17.57118797302246, + "learning_rate": 1.5513736263736263e-05, + "loss": 0.4125, + "step": 25106 + }, + { + "epoch": 68.97527472527473, + "grad_norm": 2.0983800888061523, + "learning_rate": 1.5512362637362637e-05, + "loss": 0.0157, + "step": 25107 + }, + { + "epoch": 68.97802197802197, + "grad_norm": 8.927053451538086, + "learning_rate": 1.551098901098901e-05, + "loss": 0.1741, + "step": 25108 + }, + { + "epoch": 68.98076923076923, + "grad_norm": 12.91060733795166, + "learning_rate": 1.5509615384615387e-05, + "loss": 0.1778, + "step": 25109 + }, + { + "epoch": 68.98351648351648, + "grad_norm": 12.447509765625, + "learning_rate": 1.550824175824176e-05, + "loss": 0.2921, + "step": 25110 + }, + { + "epoch": 68.98626373626374, + "grad_norm": 6.179123878479004, + "learning_rate": 1.5506868131868134e-05, + "loss": 0.091, + "step": 25111 + }, + { + "epoch": 68.98901098901099, + "grad_norm": 3.561732769012451, + "learning_rate": 1.5505494505494507e-05, + "loss": 0.0716, + "step": 25112 + }, + { + "epoch": 68.99175824175825, + "grad_norm": 12.490642547607422, + "learning_rate": 1.5504120879120877e-05, + "loss": 0.1765, + "step": 25113 + }, + { + "epoch": 68.99450549450549, + "grad_norm": 5.470922946929932, + "learning_rate": 1.5502747252747254e-05, + "loss": 0.0665, + "step": 25114 + }, + { + "epoch": 68.99725274725274, + "grad_norm": 9.239082336425781, + "learning_rate": 1.5501373626373627e-05, + "loss": 0.0868, + "step": 25115 + }, + { + "epoch": 69.0, + "grad_norm": 82.41600799560547, + "learning_rate": 1.55e-05, + "loss": 1.0185, + "step": 25116 + }, + { + "epoch": 69.0, + "eval_accuracy": 0.8236914600550964, + "eval_f1": 0.8282582146980019, + "eval_f1_DuraRiadoRio_64x64": 0.7962962962962963, + "eval_f1_Mole_64x64": 0.9010989010989011, + "eval_f1_Quebrado_64x64": 0.8469750889679716, + "eval_f1_RiadoRio_64x64": 0.6905537459283387, + "eval_f1_RioFechado_64x64": 0.9063670411985019, + "eval_loss": 0.6910470128059387, + "eval_precision": 0.8381278290375513, + "eval_precision_DuraRiadoRio_64x64": 0.7166666666666667, + "eval_precision_Mole_64x64": 0.9534883720930233, + "eval_precision_Quebrado_64x64": 0.8686131386861314, + "eval_precision_RiadoRio_64x64": 0.6838709677419355, + "eval_precision_RioFechado_64x64": 0.968, + "eval_recall": 0.8251739971995716, + "eval_recall_DuraRiadoRio_64x64": 0.8958333333333334, + "eval_recall_Mole_64x64": 0.8541666666666666, + "eval_recall_Quebrado_64x64": 0.8263888888888888, + "eval_recall_RiadoRio_64x64": 0.6973684210526315, + "eval_recall_RioFechado_64x64": 0.852112676056338, + "eval_runtime": 1.7586, + "eval_samples_per_second": 412.817, + "eval_steps_per_second": 26.156, + "step": 25116 + }, + { + "epoch": 69.00274725274726, + "grad_norm": 22.886926651000977, + "learning_rate": 1.5498626373626374e-05, + "loss": 0.3768, + "step": 25117 + }, + { + "epoch": 69.00549450549451, + "grad_norm": 19.1007022857666, + "learning_rate": 1.5497252747252747e-05, + "loss": 0.4125, + "step": 25118 + }, + { + "epoch": 69.00824175824175, + "grad_norm": 19.530399322509766, + "learning_rate": 1.5495879120879124e-05, + "loss": 0.5485, + "step": 25119 + }, + { + "epoch": 69.01098901098901, + "grad_norm": 9.358154296875, + "learning_rate": 1.5494505494505494e-05, + "loss": 0.157, + "step": 25120 + }, + { + "epoch": 69.01373626373626, + "grad_norm": 3.153512477874756, + "learning_rate": 1.5493131868131868e-05, + "loss": 0.0495, + "step": 25121 + }, + { + "epoch": 69.01648351648352, + "grad_norm": 4.239480495452881, + "learning_rate": 1.549175824175824e-05, + "loss": 0.0444, + "step": 25122 + }, + { + "epoch": 69.01923076923077, + "grad_norm": 7.810837268829346, + "learning_rate": 1.5490384615384615e-05, + "loss": 0.1465, + "step": 25123 + }, + { + "epoch": 69.02197802197803, + "grad_norm": 5.610079288482666, + "learning_rate": 1.548901098901099e-05, + "loss": 0.0974, + "step": 25124 + }, + { + "epoch": 69.02472527472527, + "grad_norm": 9.348443984985352, + "learning_rate": 1.5487637362637365e-05, + "loss": 0.1653, + "step": 25125 + }, + { + "epoch": 69.02747252747253, + "grad_norm": 7.384785175323486, + "learning_rate": 1.5486263736263738e-05, + "loss": 0.2911, + "step": 25126 + }, + { + "epoch": 69.03021978021978, + "grad_norm": 6.83676815032959, + "learning_rate": 1.548489010989011e-05, + "loss": 0.0899, + "step": 25127 + }, + { + "epoch": 69.03296703296704, + "grad_norm": 12.973618507385254, + "learning_rate": 1.548351648351648e-05, + "loss": 0.4394, + "step": 25128 + }, + { + "epoch": 69.03571428571429, + "grad_norm": 5.8781561851501465, + "learning_rate": 1.548214285714286e-05, + "loss": 0.1563, + "step": 25129 + }, + { + "epoch": 69.03846153846153, + "grad_norm": 2.425280809402466, + "learning_rate": 1.5480769230769232e-05, + "loss": 0.0492, + "step": 25130 + }, + { + "epoch": 69.04120879120879, + "grad_norm": 11.958342552185059, + "learning_rate": 1.5479395604395605e-05, + "loss": 0.235, + "step": 25131 + }, + { + "epoch": 69.04395604395604, + "grad_norm": 6.918518543243408, + "learning_rate": 1.547802197802198e-05, + "loss": 0.1112, + "step": 25132 + }, + { + "epoch": 69.0467032967033, + "grad_norm": 13.843003273010254, + "learning_rate": 1.5476648351648352e-05, + "loss": 0.2736, + "step": 25133 + }, + { + "epoch": 69.04945054945055, + "grad_norm": 4.366063117980957, + "learning_rate": 1.547527472527473e-05, + "loss": 0.0682, + "step": 25134 + }, + { + "epoch": 69.0521978021978, + "grad_norm": 17.7609806060791, + "learning_rate": 1.54739010989011e-05, + "loss": 0.3486, + "step": 25135 + }, + { + "epoch": 69.05494505494505, + "grad_norm": 14.071624755859375, + "learning_rate": 1.5472527472527472e-05, + "loss": 0.0842, + "step": 25136 + }, + { + "epoch": 69.0576923076923, + "grad_norm": 8.803666114807129, + "learning_rate": 1.5471153846153846e-05, + "loss": 0.1418, + "step": 25137 + }, + { + "epoch": 69.06043956043956, + "grad_norm": 8.62879467010498, + "learning_rate": 1.546978021978022e-05, + "loss": 0.1093, + "step": 25138 + }, + { + "epoch": 69.06318681318682, + "grad_norm": 17.979999542236328, + "learning_rate": 1.5468406593406596e-05, + "loss": 0.3806, + "step": 25139 + }, + { + "epoch": 69.06593406593407, + "grad_norm": 5.347019195556641, + "learning_rate": 1.546703296703297e-05, + "loss": 0.0861, + "step": 25140 + }, + { + "epoch": 69.06868131868131, + "grad_norm": 6.8440961837768555, + "learning_rate": 1.5465659340659343e-05, + "loss": 0.1399, + "step": 25141 + }, + { + "epoch": 69.07142857142857, + "grad_norm": 5.287960529327393, + "learning_rate": 1.5464285714285716e-05, + "loss": 0.0519, + "step": 25142 + }, + { + "epoch": 69.07417582417582, + "grad_norm": 6.246399879455566, + "learning_rate": 1.5462912087912086e-05, + "loss": 0.164, + "step": 25143 + }, + { + "epoch": 69.07692307692308, + "grad_norm": 1.7190316915512085, + "learning_rate": 1.5461538461538463e-05, + "loss": 0.025, + "step": 25144 + }, + { + "epoch": 69.07967032967034, + "grad_norm": 8.152278900146484, + "learning_rate": 1.5460164835164836e-05, + "loss": 0.1608, + "step": 25145 + }, + { + "epoch": 69.08241758241758, + "grad_norm": 11.729697227478027, + "learning_rate": 1.545879120879121e-05, + "loss": 0.2019, + "step": 25146 + }, + { + "epoch": 69.08516483516483, + "grad_norm": 26.362689971923828, + "learning_rate": 1.5457417582417583e-05, + "loss": 0.6033, + "step": 25147 + }, + { + "epoch": 69.08791208791209, + "grad_norm": 6.834974765777588, + "learning_rate": 1.5456043956043956e-05, + "loss": 0.1439, + "step": 25148 + }, + { + "epoch": 69.09065934065934, + "grad_norm": 17.737077713012695, + "learning_rate": 1.5454670329670333e-05, + "loss": 0.2658, + "step": 25149 + }, + { + "epoch": 69.0934065934066, + "grad_norm": 5.5642991065979, + "learning_rate": 1.5453296703296703e-05, + "loss": 0.0944, + "step": 25150 + }, + { + "epoch": 69.09615384615384, + "grad_norm": 12.698848724365234, + "learning_rate": 1.5451923076923077e-05, + "loss": 0.2493, + "step": 25151 + }, + { + "epoch": 69.0989010989011, + "grad_norm": 11.134429931640625, + "learning_rate": 1.545054945054945e-05, + "loss": 0.0983, + "step": 25152 + }, + { + "epoch": 69.10164835164835, + "grad_norm": 17.90713119506836, + "learning_rate": 1.5449175824175824e-05, + "loss": 0.2813, + "step": 25153 + }, + { + "epoch": 69.1043956043956, + "grad_norm": 17.130287170410156, + "learning_rate": 1.54478021978022e-05, + "loss": 0.3347, + "step": 25154 + }, + { + "epoch": 69.10714285714286, + "grad_norm": 11.53615665435791, + "learning_rate": 1.5446428571428574e-05, + "loss": 0.2992, + "step": 25155 + }, + { + "epoch": 69.10989010989012, + "grad_norm": 11.381462097167969, + "learning_rate": 1.5445054945054947e-05, + "loss": 0.2504, + "step": 25156 + }, + { + "epoch": 69.11263736263736, + "grad_norm": 8.010263442993164, + "learning_rate": 1.544368131868132e-05, + "loss": 0.1451, + "step": 25157 + }, + { + "epoch": 69.11538461538461, + "grad_norm": 7.663431644439697, + "learning_rate": 1.544230769230769e-05, + "loss": 0.0659, + "step": 25158 + }, + { + "epoch": 69.11813186813187, + "grad_norm": 2.566946506500244, + "learning_rate": 1.5440934065934067e-05, + "loss": 0.043, + "step": 25159 + }, + { + "epoch": 69.12087912087912, + "grad_norm": 14.580523490905762, + "learning_rate": 1.543956043956044e-05, + "loss": 0.4639, + "step": 25160 + }, + { + "epoch": 69.12362637362638, + "grad_norm": 16.30358123779297, + "learning_rate": 1.5438186813186814e-05, + "loss": 0.3767, + "step": 25161 + }, + { + "epoch": 69.12637362637362, + "grad_norm": 6.379891872406006, + "learning_rate": 1.5436813186813188e-05, + "loss": 0.0745, + "step": 25162 + }, + { + "epoch": 69.12912087912088, + "grad_norm": 11.200100898742676, + "learning_rate": 1.543543956043956e-05, + "loss": 0.268, + "step": 25163 + }, + { + "epoch": 69.13186813186813, + "grad_norm": 20.805980682373047, + "learning_rate": 1.5434065934065934e-05, + "loss": 0.2613, + "step": 25164 + }, + { + "epoch": 69.13461538461539, + "grad_norm": 17.969160079956055, + "learning_rate": 1.5432692307692308e-05, + "loss": 0.3573, + "step": 25165 + }, + { + "epoch": 69.13736263736264, + "grad_norm": 15.807844161987305, + "learning_rate": 1.543131868131868e-05, + "loss": 0.4485, + "step": 25166 + }, + { + "epoch": 69.14010989010988, + "grad_norm": 10.343911170959473, + "learning_rate": 1.5429945054945055e-05, + "loss": 0.3094, + "step": 25167 + }, + { + "epoch": 69.14285714285714, + "grad_norm": 17.929101943969727, + "learning_rate": 1.5428571428571428e-05, + "loss": 0.3661, + "step": 25168 + }, + { + "epoch": 69.1456043956044, + "grad_norm": 22.749210357666016, + "learning_rate": 1.5427197802197805e-05, + "loss": 0.5412, + "step": 25169 + }, + { + "epoch": 69.14835164835165, + "grad_norm": 7.727097511291504, + "learning_rate": 1.5425824175824178e-05, + "loss": 0.1339, + "step": 25170 + }, + { + "epoch": 69.1510989010989, + "grad_norm": 11.805909156799316, + "learning_rate": 1.542445054945055e-05, + "loss": 0.1957, + "step": 25171 + }, + { + "epoch": 69.15384615384616, + "grad_norm": 11.293828964233398, + "learning_rate": 1.5423076923076925e-05, + "loss": 0.2331, + "step": 25172 + }, + { + "epoch": 69.1565934065934, + "grad_norm": 2.2435765266418457, + "learning_rate": 1.5421703296703295e-05, + "loss": 0.0308, + "step": 25173 + }, + { + "epoch": 69.15934065934066, + "grad_norm": 14.925027847290039, + "learning_rate": 1.5420329670329672e-05, + "loss": 0.2885, + "step": 25174 + }, + { + "epoch": 69.16208791208791, + "grad_norm": 13.608716011047363, + "learning_rate": 1.5418956043956045e-05, + "loss": 0.2411, + "step": 25175 + }, + { + "epoch": 69.16483516483517, + "grad_norm": 19.2145938873291, + "learning_rate": 1.541758241758242e-05, + "loss": 0.3235, + "step": 25176 + }, + { + "epoch": 69.16758241758242, + "grad_norm": 4.185487270355225, + "learning_rate": 1.5416208791208792e-05, + "loss": 0.0666, + "step": 25177 + }, + { + "epoch": 69.17032967032966, + "grad_norm": 3.5368502140045166, + "learning_rate": 1.5414835164835165e-05, + "loss": 0.0384, + "step": 25178 + }, + { + "epoch": 69.17307692307692, + "grad_norm": 14.384841918945312, + "learning_rate": 1.541346153846154e-05, + "loss": 0.2946, + "step": 25179 + }, + { + "epoch": 69.17582417582418, + "grad_norm": 12.666875839233398, + "learning_rate": 1.5412087912087912e-05, + "loss": 0.2807, + "step": 25180 + }, + { + "epoch": 69.17857142857143, + "grad_norm": 11.126065254211426, + "learning_rate": 1.5410714285714286e-05, + "loss": 0.1727, + "step": 25181 + }, + { + "epoch": 69.18131868131869, + "grad_norm": 7.801193714141846, + "learning_rate": 1.540934065934066e-05, + "loss": 0.0766, + "step": 25182 + }, + { + "epoch": 69.18406593406593, + "grad_norm": 16.373924255371094, + "learning_rate": 1.5407967032967032e-05, + "loss": 0.5609, + "step": 25183 + }, + { + "epoch": 69.18681318681318, + "grad_norm": 19.095762252807617, + "learning_rate": 1.540659340659341e-05, + "loss": 0.1736, + "step": 25184 + }, + { + "epoch": 69.18956043956044, + "grad_norm": 16.34760093688965, + "learning_rate": 1.5405219780219783e-05, + "loss": 0.1534, + "step": 25185 + }, + { + "epoch": 69.1923076923077, + "grad_norm": 13.418335914611816, + "learning_rate": 1.5403846153846156e-05, + "loss": 0.2333, + "step": 25186 + }, + { + "epoch": 69.19505494505495, + "grad_norm": 5.7299981117248535, + "learning_rate": 1.540247252747253e-05, + "loss": 0.0692, + "step": 25187 + }, + { + "epoch": 69.1978021978022, + "grad_norm": 12.761887550354004, + "learning_rate": 1.54010989010989e-05, + "loss": 0.283, + "step": 25188 + }, + { + "epoch": 69.20054945054945, + "grad_norm": 7.908290386199951, + "learning_rate": 1.5399725274725276e-05, + "loss": 0.0883, + "step": 25189 + }, + { + "epoch": 69.2032967032967, + "grad_norm": 6.686834335327148, + "learning_rate": 1.539835164835165e-05, + "loss": 0.099, + "step": 25190 + }, + { + "epoch": 69.20604395604396, + "grad_norm": 11.510720252990723, + "learning_rate": 1.5396978021978023e-05, + "loss": 0.0996, + "step": 25191 + }, + { + "epoch": 69.20879120879121, + "grad_norm": 13.71786117553711, + "learning_rate": 1.5395604395604397e-05, + "loss": 0.3302, + "step": 25192 + }, + { + "epoch": 69.21153846153847, + "grad_norm": 9.601127624511719, + "learning_rate": 1.539423076923077e-05, + "loss": 0.1609, + "step": 25193 + }, + { + "epoch": 69.21428571428571, + "grad_norm": 30.856393814086914, + "learning_rate": 1.5392857142857143e-05, + "loss": 0.4699, + "step": 25194 + }, + { + "epoch": 69.21703296703296, + "grad_norm": 10.49673843383789, + "learning_rate": 1.5391483516483517e-05, + "loss": 0.262, + "step": 25195 + }, + { + "epoch": 69.21978021978022, + "grad_norm": 4.647485256195068, + "learning_rate": 1.539010989010989e-05, + "loss": 0.0439, + "step": 25196 + }, + { + "epoch": 69.22252747252747, + "grad_norm": 12.595906257629395, + "learning_rate": 1.5388736263736264e-05, + "loss": 0.2748, + "step": 25197 + }, + { + "epoch": 69.22527472527473, + "grad_norm": 7.89315128326416, + "learning_rate": 1.5387362637362637e-05, + "loss": 0.1611, + "step": 25198 + }, + { + "epoch": 69.22802197802197, + "grad_norm": 9.097402572631836, + "learning_rate": 1.538598901098901e-05, + "loss": 0.1753, + "step": 25199 + }, + { + "epoch": 69.23076923076923, + "grad_norm": 26.621845245361328, + "learning_rate": 1.5384615384615387e-05, + "loss": 0.4933, + "step": 25200 + }, + { + "epoch": 69.23351648351648, + "grad_norm": 14.40670108795166, + "learning_rate": 1.538324175824176e-05, + "loss": 0.2434, + "step": 25201 + }, + { + "epoch": 69.23626373626374, + "grad_norm": 16.657621383666992, + "learning_rate": 1.5381868131868134e-05, + "loss": 0.3348, + "step": 25202 + }, + { + "epoch": 69.23901098901099, + "grad_norm": 5.935795307159424, + "learning_rate": 1.5380494505494504e-05, + "loss": 0.0611, + "step": 25203 + }, + { + "epoch": 69.24175824175825, + "grad_norm": 15.086737632751465, + "learning_rate": 1.5379120879120877e-05, + "loss": 0.427, + "step": 25204 + }, + { + "epoch": 69.24450549450549, + "grad_norm": 13.930211067199707, + "learning_rate": 1.5377747252747254e-05, + "loss": 0.118, + "step": 25205 + }, + { + "epoch": 69.24725274725274, + "grad_norm": 11.182841300964355, + "learning_rate": 1.5376373626373628e-05, + "loss": 0.2222, + "step": 25206 + }, + { + "epoch": 69.25, + "grad_norm": 11.605446815490723, + "learning_rate": 1.5375e-05, + "loss": 0.3571, + "step": 25207 + }, + { + "epoch": 69.25274725274726, + "grad_norm": 17.348648071289062, + "learning_rate": 1.5373626373626374e-05, + "loss": 0.5562, + "step": 25208 + }, + { + "epoch": 69.25549450549451, + "grad_norm": 8.668396949768066, + "learning_rate": 1.5372252747252748e-05, + "loss": 0.1604, + "step": 25209 + }, + { + "epoch": 69.25824175824175, + "grad_norm": 24.894739151000977, + "learning_rate": 1.537087912087912e-05, + "loss": 0.8464, + "step": 25210 + }, + { + "epoch": 69.26098901098901, + "grad_norm": 10.14136028289795, + "learning_rate": 1.5369505494505495e-05, + "loss": 0.1383, + "step": 25211 + }, + { + "epoch": 69.26373626373626, + "grad_norm": 15.108454704284668, + "learning_rate": 1.5368131868131868e-05, + "loss": 0.3164, + "step": 25212 + }, + { + "epoch": 69.26648351648352, + "grad_norm": 18.16861915588379, + "learning_rate": 1.536675824175824e-05, + "loss": 0.3183, + "step": 25213 + }, + { + "epoch": 69.26923076923077, + "grad_norm": 5.543912887573242, + "learning_rate": 1.5365384615384615e-05, + "loss": 0.1337, + "step": 25214 + }, + { + "epoch": 69.27197802197803, + "grad_norm": 13.677377700805664, + "learning_rate": 1.536401098901099e-05, + "loss": 0.3518, + "step": 25215 + }, + { + "epoch": 69.27472527472527, + "grad_norm": 9.2691011428833, + "learning_rate": 1.5362637362637365e-05, + "loss": 0.162, + "step": 25216 + }, + { + "epoch": 69.27747252747253, + "grad_norm": 12.20124340057373, + "learning_rate": 1.536126373626374e-05, + "loss": 0.2882, + "step": 25217 + }, + { + "epoch": 69.28021978021978, + "grad_norm": 16.881601333618164, + "learning_rate": 1.535989010989011e-05, + "loss": 0.2013, + "step": 25218 + }, + { + "epoch": 69.28296703296704, + "grad_norm": 6.93611478805542, + "learning_rate": 1.5358516483516482e-05, + "loss": 0.1408, + "step": 25219 + }, + { + "epoch": 69.28571428571429, + "grad_norm": 6.443593502044678, + "learning_rate": 1.535714285714286e-05, + "loss": 0.0706, + "step": 25220 + }, + { + "epoch": 69.28846153846153, + "grad_norm": 14.948397636413574, + "learning_rate": 1.5355769230769232e-05, + "loss": 0.4507, + "step": 25221 + }, + { + "epoch": 69.29120879120879, + "grad_norm": 13.070609092712402, + "learning_rate": 1.5354395604395606e-05, + "loss": 0.1965, + "step": 25222 + }, + { + "epoch": 69.29395604395604, + "grad_norm": 3.6470861434936523, + "learning_rate": 1.535302197802198e-05, + "loss": 0.0468, + "step": 25223 + }, + { + "epoch": 69.2967032967033, + "grad_norm": 10.671302795410156, + "learning_rate": 1.5351648351648352e-05, + "loss": 0.2458, + "step": 25224 + }, + { + "epoch": 69.29945054945055, + "grad_norm": 6.18196964263916, + "learning_rate": 1.5350274725274726e-05, + "loss": 0.0679, + "step": 25225 + }, + { + "epoch": 69.3021978021978, + "grad_norm": 8.9737548828125, + "learning_rate": 1.53489010989011e-05, + "loss": 0.2596, + "step": 25226 + }, + { + "epoch": 69.30494505494505, + "grad_norm": 10.540094375610352, + "learning_rate": 1.5347527472527473e-05, + "loss": 0.1309, + "step": 25227 + }, + { + "epoch": 69.3076923076923, + "grad_norm": 15.479621887207031, + "learning_rate": 1.5346153846153846e-05, + "loss": 0.5204, + "step": 25228 + }, + { + "epoch": 69.31043956043956, + "grad_norm": 12.607701301574707, + "learning_rate": 1.534478021978022e-05, + "loss": 0.1436, + "step": 25229 + }, + { + "epoch": 69.31318681318682, + "grad_norm": 4.621301651000977, + "learning_rate": 1.5343406593406596e-05, + "loss": 0.0729, + "step": 25230 + }, + { + "epoch": 69.31593406593407, + "grad_norm": 9.273360252380371, + "learning_rate": 1.534203296703297e-05, + "loss": 0.0702, + "step": 25231 + }, + { + "epoch": 69.31868131868131, + "grad_norm": 10.230072021484375, + "learning_rate": 1.5340659340659343e-05, + "loss": 0.1705, + "step": 25232 + }, + { + "epoch": 69.32142857142857, + "grad_norm": 12.848197937011719, + "learning_rate": 1.5339285714285713e-05, + "loss": 0.17, + "step": 25233 + }, + { + "epoch": 69.32417582417582, + "grad_norm": 20.185062408447266, + "learning_rate": 1.5337912087912086e-05, + "loss": 0.2588, + "step": 25234 + }, + { + "epoch": 69.32692307692308, + "grad_norm": 6.4677348136901855, + "learning_rate": 1.5336538461538463e-05, + "loss": 0.1458, + "step": 25235 + }, + { + "epoch": 69.32967032967034, + "grad_norm": 7.613990306854248, + "learning_rate": 1.5335164835164837e-05, + "loss": 0.15, + "step": 25236 + }, + { + "epoch": 69.33241758241758, + "grad_norm": 15.708455085754395, + "learning_rate": 1.533379120879121e-05, + "loss": 0.1674, + "step": 25237 + }, + { + "epoch": 69.33516483516483, + "grad_norm": 10.005884170532227, + "learning_rate": 1.5332417582417583e-05, + "loss": 0.1588, + "step": 25238 + }, + { + "epoch": 69.33791208791209, + "grad_norm": 20.257856369018555, + "learning_rate": 1.5331043956043957e-05, + "loss": 0.8764, + "step": 25239 + }, + { + "epoch": 69.34065934065934, + "grad_norm": 14.797104835510254, + "learning_rate": 1.532967032967033e-05, + "loss": 0.2015, + "step": 25240 + }, + { + "epoch": 69.3434065934066, + "grad_norm": 7.940303802490234, + "learning_rate": 1.5328296703296704e-05, + "loss": 0.0739, + "step": 25241 + }, + { + "epoch": 69.34615384615384, + "grad_norm": 7.447722911834717, + "learning_rate": 1.5326923076923077e-05, + "loss": 0.0943, + "step": 25242 + }, + { + "epoch": 69.3489010989011, + "grad_norm": 17.5155086517334, + "learning_rate": 1.532554945054945e-05, + "loss": 0.2892, + "step": 25243 + }, + { + "epoch": 69.35164835164835, + "grad_norm": 20.71033477783203, + "learning_rate": 1.5324175824175824e-05, + "loss": 0.5437, + "step": 25244 + }, + { + "epoch": 69.3543956043956, + "grad_norm": 8.960351943969727, + "learning_rate": 1.53228021978022e-05, + "loss": 0.2041, + "step": 25245 + }, + { + "epoch": 69.35714285714286, + "grad_norm": 14.319622039794922, + "learning_rate": 1.5321428571428574e-05, + "loss": 0.3115, + "step": 25246 + }, + { + "epoch": 69.35989010989012, + "grad_norm": 21.421560287475586, + "learning_rate": 1.5320054945054944e-05, + "loss": 0.1892, + "step": 25247 + }, + { + "epoch": 69.36263736263736, + "grad_norm": 13.648465156555176, + "learning_rate": 1.5318681318681317e-05, + "loss": 0.317, + "step": 25248 + }, + { + "epoch": 69.36538461538461, + "grad_norm": 15.312607765197754, + "learning_rate": 1.531730769230769e-05, + "loss": 0.2445, + "step": 25249 + }, + { + "epoch": 69.36813186813187, + "grad_norm": 5.771483898162842, + "learning_rate": 1.5315934065934068e-05, + "loss": 0.1205, + "step": 25250 + }, + { + "epoch": 69.37087912087912, + "grad_norm": 19.768985748291016, + "learning_rate": 1.531456043956044e-05, + "loss": 0.6296, + "step": 25251 + }, + { + "epoch": 69.37362637362638, + "grad_norm": 2.429983377456665, + "learning_rate": 1.5313186813186814e-05, + "loss": 0.0319, + "step": 25252 + }, + { + "epoch": 69.37637362637362, + "grad_norm": 23.56502914428711, + "learning_rate": 1.5311813186813188e-05, + "loss": 0.5909, + "step": 25253 + }, + { + "epoch": 69.37912087912088, + "grad_norm": 11.889330863952637, + "learning_rate": 1.531043956043956e-05, + "loss": 0.1638, + "step": 25254 + }, + { + "epoch": 69.38186813186813, + "grad_norm": 4.613020420074463, + "learning_rate": 1.5309065934065935e-05, + "loss": 0.0643, + "step": 25255 + }, + { + "epoch": 69.38461538461539, + "grad_norm": 6.541438579559326, + "learning_rate": 1.5307692307692308e-05, + "loss": 0.122, + "step": 25256 + }, + { + "epoch": 69.38736263736264, + "grad_norm": 7.30927038192749, + "learning_rate": 1.530631868131868e-05, + "loss": 0.1806, + "step": 25257 + }, + { + "epoch": 69.39010989010988, + "grad_norm": 10.745266914367676, + "learning_rate": 1.5304945054945055e-05, + "loss": 0.257, + "step": 25258 + }, + { + "epoch": 69.39285714285714, + "grad_norm": 14.812091827392578, + "learning_rate": 1.530357142857143e-05, + "loss": 0.1439, + "step": 25259 + }, + { + "epoch": 69.3956043956044, + "grad_norm": 10.178532600402832, + "learning_rate": 1.5302197802197805e-05, + "loss": 0.109, + "step": 25260 + }, + { + "epoch": 69.39835164835165, + "grad_norm": 6.535592555999756, + "learning_rate": 1.530082417582418e-05, + "loss": 0.0767, + "step": 25261 + }, + { + "epoch": 69.4010989010989, + "grad_norm": 5.1553802490234375, + "learning_rate": 1.529945054945055e-05, + "loss": 0.1105, + "step": 25262 + }, + { + "epoch": 69.40384615384616, + "grad_norm": 16.178131103515625, + "learning_rate": 1.5298076923076922e-05, + "loss": 0.2468, + "step": 25263 + }, + { + "epoch": 69.4065934065934, + "grad_norm": 3.8940067291259766, + "learning_rate": 1.5296703296703295e-05, + "loss": 0.0983, + "step": 25264 + }, + { + "epoch": 69.40934065934066, + "grad_norm": 8.6263427734375, + "learning_rate": 1.5295329670329672e-05, + "loss": 0.1536, + "step": 25265 + }, + { + "epoch": 69.41208791208791, + "grad_norm": 11.546161651611328, + "learning_rate": 1.5293956043956046e-05, + "loss": 0.1397, + "step": 25266 + }, + { + "epoch": 69.41483516483517, + "grad_norm": 20.05282211303711, + "learning_rate": 1.529258241758242e-05, + "loss": 0.3184, + "step": 25267 + }, + { + "epoch": 69.41758241758242, + "grad_norm": 11.57628345489502, + "learning_rate": 1.5291208791208792e-05, + "loss": 0.1088, + "step": 25268 + }, + { + "epoch": 69.42032967032966, + "grad_norm": 7.240428924560547, + "learning_rate": 1.5289835164835166e-05, + "loss": 0.0925, + "step": 25269 + }, + { + "epoch": 69.42307692307692, + "grad_norm": 5.737756729125977, + "learning_rate": 1.528846153846154e-05, + "loss": 0.0996, + "step": 25270 + }, + { + "epoch": 69.42582417582418, + "grad_norm": 9.672431945800781, + "learning_rate": 1.5287087912087913e-05, + "loss": 0.2707, + "step": 25271 + }, + { + "epoch": 69.42857142857143, + "grad_norm": 16.531848907470703, + "learning_rate": 1.5285714285714286e-05, + "loss": 0.3991, + "step": 25272 + }, + { + "epoch": 69.43131868131869, + "grad_norm": 10.991510391235352, + "learning_rate": 1.528434065934066e-05, + "loss": 0.3081, + "step": 25273 + }, + { + "epoch": 69.43406593406593, + "grad_norm": 4.653820991516113, + "learning_rate": 1.5282967032967033e-05, + "loss": 0.0776, + "step": 25274 + }, + { + "epoch": 69.43681318681318, + "grad_norm": 3.2245094776153564, + "learning_rate": 1.528159340659341e-05, + "loss": 0.0284, + "step": 25275 + }, + { + "epoch": 69.43956043956044, + "grad_norm": 7.874775409698486, + "learning_rate": 1.5280219780219783e-05, + "loss": 0.1417, + "step": 25276 + }, + { + "epoch": 69.4423076923077, + "grad_norm": 25.23811149597168, + "learning_rate": 1.5278846153846153e-05, + "loss": 1.0201, + "step": 25277 + }, + { + "epoch": 69.44505494505495, + "grad_norm": 9.445002555847168, + "learning_rate": 1.5277472527472526e-05, + "loss": 0.1161, + "step": 25278 + }, + { + "epoch": 69.4478021978022, + "grad_norm": 6.642879009246826, + "learning_rate": 1.52760989010989e-05, + "loss": 0.0789, + "step": 25279 + }, + { + "epoch": 69.45054945054945, + "grad_norm": 2.975511074066162, + "learning_rate": 1.5274725274725277e-05, + "loss": 0.0462, + "step": 25280 + }, + { + "epoch": 69.4532967032967, + "grad_norm": 9.038886070251465, + "learning_rate": 1.527335164835165e-05, + "loss": 0.1027, + "step": 25281 + }, + { + "epoch": 69.45604395604396, + "grad_norm": 26.926143646240234, + "learning_rate": 1.5271978021978023e-05, + "loss": 0.6009, + "step": 25282 + }, + { + "epoch": 69.45879120879121, + "grad_norm": 7.880935192108154, + "learning_rate": 1.5270604395604397e-05, + "loss": 0.1579, + "step": 25283 + }, + { + "epoch": 69.46153846153847, + "grad_norm": 16.846452713012695, + "learning_rate": 1.526923076923077e-05, + "loss": 0.2999, + "step": 25284 + }, + { + "epoch": 69.46428571428571, + "grad_norm": 11.620373725891113, + "learning_rate": 1.5267857142857144e-05, + "loss": 0.2009, + "step": 25285 + }, + { + "epoch": 69.46703296703296, + "grad_norm": 6.523848533630371, + "learning_rate": 1.5266483516483517e-05, + "loss": 0.078, + "step": 25286 + }, + { + "epoch": 69.46978021978022, + "grad_norm": 26.435447692871094, + "learning_rate": 1.526510989010989e-05, + "loss": 0.2671, + "step": 25287 + }, + { + "epoch": 69.47252747252747, + "grad_norm": 21.361278533935547, + "learning_rate": 1.5263736263736264e-05, + "loss": 0.292, + "step": 25288 + }, + { + "epoch": 69.47527472527473, + "grad_norm": 16.700054168701172, + "learning_rate": 1.5262362637362637e-05, + "loss": 0.6035, + "step": 25289 + }, + { + "epoch": 69.47802197802197, + "grad_norm": 16.366416931152344, + "learning_rate": 1.5260989010989014e-05, + "loss": 0.3365, + "step": 25290 + }, + { + "epoch": 69.48076923076923, + "grad_norm": 16.1931209564209, + "learning_rate": 1.5259615384615388e-05, + "loss": 0.3407, + "step": 25291 + }, + { + "epoch": 69.48351648351648, + "grad_norm": 5.404366970062256, + "learning_rate": 1.525824175824176e-05, + "loss": 0.116, + "step": 25292 + }, + { + "epoch": 69.48626373626374, + "grad_norm": 12.926548957824707, + "learning_rate": 1.5256868131868133e-05, + "loss": 0.375, + "step": 25293 + }, + { + "epoch": 69.48901098901099, + "grad_norm": 25.769376754760742, + "learning_rate": 1.5255494505494506e-05, + "loss": 0.7287, + "step": 25294 + }, + { + "epoch": 69.49175824175825, + "grad_norm": 6.66518497467041, + "learning_rate": 1.5254120879120881e-05, + "loss": 0.1177, + "step": 25295 + }, + { + "epoch": 69.49450549450549, + "grad_norm": 19.10594367980957, + "learning_rate": 1.5252747252747255e-05, + "loss": 0.4106, + "step": 25296 + }, + { + "epoch": 69.49725274725274, + "grad_norm": 16.762109756469727, + "learning_rate": 1.5251373626373628e-05, + "loss": 0.5203, + "step": 25297 + }, + { + "epoch": 69.5, + "grad_norm": 12.907292366027832, + "learning_rate": 1.525e-05, + "loss": 0.3237, + "step": 25298 + }, + { + "epoch": 69.50274725274726, + "grad_norm": 18.891368865966797, + "learning_rate": 1.5248626373626373e-05, + "loss": 0.4246, + "step": 25299 + }, + { + "epoch": 69.50549450549451, + "grad_norm": 11.032151222229004, + "learning_rate": 1.524725274725275e-05, + "loss": 0.269, + "step": 25300 + }, + { + "epoch": 69.50824175824175, + "grad_norm": 6.01408576965332, + "learning_rate": 1.5245879120879122e-05, + "loss": 0.0698, + "step": 25301 + }, + { + "epoch": 69.51098901098901, + "grad_norm": 2.731539487838745, + "learning_rate": 1.5244505494505495e-05, + "loss": 0.04, + "step": 25302 + }, + { + "epoch": 69.51373626373626, + "grad_norm": 21.647994995117188, + "learning_rate": 1.5243131868131868e-05, + "loss": 0.458, + "step": 25303 + }, + { + "epoch": 69.51648351648352, + "grad_norm": 14.634078979492188, + "learning_rate": 1.5241758241758242e-05, + "loss": 0.2329, + "step": 25304 + }, + { + "epoch": 69.51923076923077, + "grad_norm": 16.25418472290039, + "learning_rate": 1.5240384615384617e-05, + "loss": 0.2753, + "step": 25305 + }, + { + "epoch": 69.52197802197803, + "grad_norm": 12.782249450683594, + "learning_rate": 1.523901098901099e-05, + "loss": 0.1452, + "step": 25306 + }, + { + "epoch": 69.52472527472527, + "grad_norm": 17.895469665527344, + "learning_rate": 1.5237637362637364e-05, + "loss": 0.6086, + "step": 25307 + }, + { + "epoch": 69.52747252747253, + "grad_norm": 8.2146635055542, + "learning_rate": 1.5236263736263737e-05, + "loss": 0.0922, + "step": 25308 + }, + { + "epoch": 69.53021978021978, + "grad_norm": 15.405065536499023, + "learning_rate": 1.523489010989011e-05, + "loss": 0.3144, + "step": 25309 + }, + { + "epoch": 69.53296703296704, + "grad_norm": 5.33172607421875, + "learning_rate": 1.5233516483516482e-05, + "loss": 0.0799, + "step": 25310 + }, + { + "epoch": 69.53571428571429, + "grad_norm": 6.244354724884033, + "learning_rate": 1.5232142857142859e-05, + "loss": 0.1386, + "step": 25311 + }, + { + "epoch": 69.53846153846153, + "grad_norm": 2.649141311645508, + "learning_rate": 1.5230769230769232e-05, + "loss": 0.0226, + "step": 25312 + }, + { + "epoch": 69.54120879120879, + "grad_norm": 9.083720207214355, + "learning_rate": 1.5229395604395604e-05, + "loss": 0.243, + "step": 25313 + }, + { + "epoch": 69.54395604395604, + "grad_norm": 6.501982688903809, + "learning_rate": 1.5228021978021978e-05, + "loss": 0.0651, + "step": 25314 + }, + { + "epoch": 69.5467032967033, + "grad_norm": 11.058319091796875, + "learning_rate": 1.5226648351648351e-05, + "loss": 0.2074, + "step": 25315 + }, + { + "epoch": 69.54945054945055, + "grad_norm": 17.284191131591797, + "learning_rate": 1.5225274725274726e-05, + "loss": 0.2517, + "step": 25316 + }, + { + "epoch": 69.5521978021978, + "grad_norm": 14.05467414855957, + "learning_rate": 1.52239010989011e-05, + "loss": 0.4379, + "step": 25317 + }, + { + "epoch": 69.55494505494505, + "grad_norm": 11.825704574584961, + "learning_rate": 1.5222527472527473e-05, + "loss": 0.1424, + "step": 25318 + }, + { + "epoch": 69.5576923076923, + "grad_norm": 8.1368989944458, + "learning_rate": 1.5221153846153846e-05, + "loss": 0.1332, + "step": 25319 + }, + { + "epoch": 69.56043956043956, + "grad_norm": 6.863047122955322, + "learning_rate": 1.521978021978022e-05, + "loss": 0.0928, + "step": 25320 + }, + { + "epoch": 69.56318681318682, + "grad_norm": 7.462343215942383, + "learning_rate": 1.5218406593406595e-05, + "loss": 0.0842, + "step": 25321 + }, + { + "epoch": 69.56593406593407, + "grad_norm": 12.22755241394043, + "learning_rate": 1.5217032967032968e-05, + "loss": 0.3651, + "step": 25322 + }, + { + "epoch": 69.56868131868131, + "grad_norm": 16.50377082824707, + "learning_rate": 1.5215659340659342e-05, + "loss": 0.2178, + "step": 25323 + }, + { + "epoch": 69.57142857142857, + "grad_norm": 9.81043815612793, + "learning_rate": 1.5214285714285715e-05, + "loss": 0.1215, + "step": 25324 + }, + { + "epoch": 69.57417582417582, + "grad_norm": 4.39883279800415, + "learning_rate": 1.5212912087912087e-05, + "loss": 0.061, + "step": 25325 + }, + { + "epoch": 69.57692307692308, + "grad_norm": 8.169371604919434, + "learning_rate": 1.5211538461538464e-05, + "loss": 0.1557, + "step": 25326 + }, + { + "epoch": 69.57967032967034, + "grad_norm": 3.271723985671997, + "learning_rate": 1.5210164835164837e-05, + "loss": 0.0582, + "step": 25327 + }, + { + "epoch": 69.58241758241758, + "grad_norm": 7.303197860717773, + "learning_rate": 1.5208791208791209e-05, + "loss": 0.1559, + "step": 25328 + }, + { + "epoch": 69.58516483516483, + "grad_norm": 16.661273956298828, + "learning_rate": 1.5207417582417582e-05, + "loss": 0.5836, + "step": 25329 + }, + { + "epoch": 69.58791208791209, + "grad_norm": 13.367900848388672, + "learning_rate": 1.5206043956043955e-05, + "loss": 0.1511, + "step": 25330 + }, + { + "epoch": 69.59065934065934, + "grad_norm": 14.326262474060059, + "learning_rate": 1.520467032967033e-05, + "loss": 0.32, + "step": 25331 + }, + { + "epoch": 69.5934065934066, + "grad_norm": 14.518390655517578, + "learning_rate": 1.5203296703296704e-05, + "loss": 0.2066, + "step": 25332 + }, + { + "epoch": 69.59615384615384, + "grad_norm": 10.851221084594727, + "learning_rate": 1.5201923076923077e-05, + "loss": 0.1742, + "step": 25333 + }, + { + "epoch": 69.5989010989011, + "grad_norm": 7.282585144042969, + "learning_rate": 1.520054945054945e-05, + "loss": 0.1217, + "step": 25334 + }, + { + "epoch": 69.60164835164835, + "grad_norm": 8.338249206542969, + "learning_rate": 1.5199175824175824e-05, + "loss": 0.1266, + "step": 25335 + }, + { + "epoch": 69.6043956043956, + "grad_norm": 9.336589813232422, + "learning_rate": 1.51978021978022e-05, + "loss": 0.3118, + "step": 25336 + }, + { + "epoch": 69.60714285714286, + "grad_norm": 12.22751235961914, + "learning_rate": 1.5196428571428573e-05, + "loss": 0.1831, + "step": 25337 + }, + { + "epoch": 69.60989010989012, + "grad_norm": 12.646095275878906, + "learning_rate": 1.5195054945054946e-05, + "loss": 0.1345, + "step": 25338 + }, + { + "epoch": 69.61263736263736, + "grad_norm": 9.079813003540039, + "learning_rate": 1.519368131868132e-05, + "loss": 0.2441, + "step": 25339 + }, + { + "epoch": 69.61538461538461, + "grad_norm": 1.750046968460083, + "learning_rate": 1.5192307692307691e-05, + "loss": 0.0205, + "step": 25340 + }, + { + "epoch": 69.61813186813187, + "grad_norm": 11.460797309875488, + "learning_rate": 1.5190934065934068e-05, + "loss": 0.1434, + "step": 25341 + }, + { + "epoch": 69.62087912087912, + "grad_norm": 19.853317260742188, + "learning_rate": 1.5189560439560441e-05, + "loss": 0.4196, + "step": 25342 + }, + { + "epoch": 69.62362637362638, + "grad_norm": 15.350674629211426, + "learning_rate": 1.5188186813186813e-05, + "loss": 0.4148, + "step": 25343 + }, + { + "epoch": 69.62637362637362, + "grad_norm": 3.90427827835083, + "learning_rate": 1.5186813186813187e-05, + "loss": 0.0479, + "step": 25344 + }, + { + "epoch": 69.62912087912088, + "grad_norm": 14.342053413391113, + "learning_rate": 1.518543956043956e-05, + "loss": 0.3267, + "step": 25345 + }, + { + "epoch": 69.63186813186813, + "grad_norm": 12.153331756591797, + "learning_rate": 1.5184065934065935e-05, + "loss": 0.2618, + "step": 25346 + }, + { + "epoch": 69.63461538461539, + "grad_norm": 6.5964035987854, + "learning_rate": 1.5182692307692308e-05, + "loss": 0.1167, + "step": 25347 + }, + { + "epoch": 69.63736263736264, + "grad_norm": 7.8010663986206055, + "learning_rate": 1.5181318681318682e-05, + "loss": 0.1169, + "step": 25348 + }, + { + "epoch": 69.64010989010988, + "grad_norm": 14.506637573242188, + "learning_rate": 1.5179945054945055e-05, + "loss": 0.2019, + "step": 25349 + }, + { + "epoch": 69.64285714285714, + "grad_norm": 13.013964653015137, + "learning_rate": 1.5178571428571429e-05, + "loss": 0.2396, + "step": 25350 + }, + { + "epoch": 69.6456043956044, + "grad_norm": 11.143354415893555, + "learning_rate": 1.5177197802197804e-05, + "loss": 0.0632, + "step": 25351 + }, + { + "epoch": 69.64835164835165, + "grad_norm": 12.670439720153809, + "learning_rate": 1.5175824175824177e-05, + "loss": 0.1565, + "step": 25352 + }, + { + "epoch": 69.6510989010989, + "grad_norm": 4.574213027954102, + "learning_rate": 1.517445054945055e-05, + "loss": 0.0682, + "step": 25353 + }, + { + "epoch": 69.65384615384616, + "grad_norm": 13.706729888916016, + "learning_rate": 1.5173076923076924e-05, + "loss": 0.1866, + "step": 25354 + }, + { + "epoch": 69.6565934065934, + "grad_norm": 7.81367826461792, + "learning_rate": 1.5171703296703296e-05, + "loss": 0.128, + "step": 25355 + }, + { + "epoch": 69.65934065934066, + "grad_norm": 9.92506217956543, + "learning_rate": 1.5170329670329673e-05, + "loss": 0.33, + "step": 25356 + }, + { + "epoch": 69.66208791208791, + "grad_norm": 21.943077087402344, + "learning_rate": 1.5168956043956046e-05, + "loss": 0.5485, + "step": 25357 + }, + { + "epoch": 69.66483516483517, + "grad_norm": 13.234650611877441, + "learning_rate": 1.5167582417582418e-05, + "loss": 0.1916, + "step": 25358 + }, + { + "epoch": 69.66758241758242, + "grad_norm": 8.170272827148438, + "learning_rate": 1.5166208791208791e-05, + "loss": 0.1506, + "step": 25359 + }, + { + "epoch": 69.67032967032966, + "grad_norm": 10.301251411437988, + "learning_rate": 1.5164835164835164e-05, + "loss": 0.1498, + "step": 25360 + }, + { + "epoch": 69.67307692307692, + "grad_norm": 10.264328002929688, + "learning_rate": 1.516346153846154e-05, + "loss": 0.1256, + "step": 25361 + }, + { + "epoch": 69.67582417582418, + "grad_norm": 7.761999130249023, + "learning_rate": 1.5162087912087913e-05, + "loss": 0.1158, + "step": 25362 + }, + { + "epoch": 69.67857142857143, + "grad_norm": 5.191104412078857, + "learning_rate": 1.5160714285714286e-05, + "loss": 0.1012, + "step": 25363 + }, + { + "epoch": 69.68131868131869, + "grad_norm": 6.557402610778809, + "learning_rate": 1.515934065934066e-05, + "loss": 0.0998, + "step": 25364 + }, + { + "epoch": 69.68406593406593, + "grad_norm": 26.93250274658203, + "learning_rate": 1.5157967032967033e-05, + "loss": 0.9822, + "step": 25365 + }, + { + "epoch": 69.68681318681318, + "grad_norm": 8.105300903320312, + "learning_rate": 1.5156593406593408e-05, + "loss": 0.0813, + "step": 25366 + }, + { + "epoch": 69.68956043956044, + "grad_norm": 13.997793197631836, + "learning_rate": 1.5155219780219782e-05, + "loss": 0.1385, + "step": 25367 + }, + { + "epoch": 69.6923076923077, + "grad_norm": 12.041536331176758, + "learning_rate": 1.5153846153846155e-05, + "loss": 0.1521, + "step": 25368 + }, + { + "epoch": 69.69505494505495, + "grad_norm": 20.17198944091797, + "learning_rate": 1.5152472527472527e-05, + "loss": 0.2797, + "step": 25369 + }, + { + "epoch": 69.6978021978022, + "grad_norm": 11.276856422424316, + "learning_rate": 1.51510989010989e-05, + "loss": 0.1085, + "step": 25370 + }, + { + "epoch": 69.70054945054945, + "grad_norm": 9.617154121398926, + "learning_rate": 1.5149725274725277e-05, + "loss": 0.1072, + "step": 25371 + }, + { + "epoch": 69.7032967032967, + "grad_norm": 12.747849464416504, + "learning_rate": 1.514835164835165e-05, + "loss": 0.1999, + "step": 25372 + }, + { + "epoch": 69.70604395604396, + "grad_norm": 7.320565700531006, + "learning_rate": 1.5146978021978022e-05, + "loss": 0.1597, + "step": 25373 + }, + { + "epoch": 69.70879120879121, + "grad_norm": 12.097064971923828, + "learning_rate": 1.5145604395604396e-05, + "loss": 0.1219, + "step": 25374 + }, + { + "epoch": 69.71153846153847, + "grad_norm": 11.772876739501953, + "learning_rate": 1.5144230769230769e-05, + "loss": 0.0858, + "step": 25375 + }, + { + "epoch": 69.71428571428571, + "grad_norm": 5.090108871459961, + "learning_rate": 1.5142857142857144e-05, + "loss": 0.1353, + "step": 25376 + }, + { + "epoch": 69.71703296703296, + "grad_norm": 18.403030395507812, + "learning_rate": 1.5141483516483517e-05, + "loss": 0.2928, + "step": 25377 + }, + { + "epoch": 69.71978021978022, + "grad_norm": 11.94698429107666, + "learning_rate": 1.5140109890109891e-05, + "loss": 0.1568, + "step": 25378 + }, + { + "epoch": 69.72252747252747, + "grad_norm": 17.140914916992188, + "learning_rate": 1.5138736263736264e-05, + "loss": 0.6064, + "step": 25379 + }, + { + "epoch": 69.72527472527473, + "grad_norm": 13.761829376220703, + "learning_rate": 1.5137362637362638e-05, + "loss": 0.2398, + "step": 25380 + }, + { + "epoch": 69.72802197802197, + "grad_norm": 15.065114974975586, + "learning_rate": 1.5135989010989013e-05, + "loss": 0.2223, + "step": 25381 + }, + { + "epoch": 69.73076923076923, + "grad_norm": 9.416183471679688, + "learning_rate": 1.5134615384615386e-05, + "loss": 0.1049, + "step": 25382 + }, + { + "epoch": 69.73351648351648, + "grad_norm": 17.250858306884766, + "learning_rate": 1.513324175824176e-05, + "loss": 0.3081, + "step": 25383 + }, + { + "epoch": 69.73626373626374, + "grad_norm": 6.164388656616211, + "learning_rate": 1.5131868131868131e-05, + "loss": 0.1378, + "step": 25384 + }, + { + "epoch": 69.73901098901099, + "grad_norm": 2.135803699493408, + "learning_rate": 1.5130494505494505e-05, + "loss": 0.0278, + "step": 25385 + }, + { + "epoch": 69.74175824175825, + "grad_norm": 12.854836463928223, + "learning_rate": 1.5129120879120882e-05, + "loss": 0.1844, + "step": 25386 + }, + { + "epoch": 69.74450549450549, + "grad_norm": 20.539569854736328, + "learning_rate": 1.5127747252747255e-05, + "loss": 0.7444, + "step": 25387 + }, + { + "epoch": 69.74725274725274, + "grad_norm": 9.19870662689209, + "learning_rate": 1.5126373626373627e-05, + "loss": 0.0833, + "step": 25388 + }, + { + "epoch": 69.75, + "grad_norm": 13.710230827331543, + "learning_rate": 1.5125e-05, + "loss": 0.1645, + "step": 25389 + }, + { + "epoch": 69.75274725274726, + "grad_norm": 12.298460960388184, + "learning_rate": 1.5123626373626373e-05, + "loss": 0.2695, + "step": 25390 + }, + { + "epoch": 69.75549450549451, + "grad_norm": 14.674322128295898, + "learning_rate": 1.5122252747252749e-05, + "loss": 0.28, + "step": 25391 + }, + { + "epoch": 69.75824175824175, + "grad_norm": 12.082290649414062, + "learning_rate": 1.5120879120879122e-05, + "loss": 0.245, + "step": 25392 + }, + { + "epoch": 69.76098901098901, + "grad_norm": 13.41418170928955, + "learning_rate": 1.5119505494505495e-05, + "loss": 0.4319, + "step": 25393 + }, + { + "epoch": 69.76373626373626, + "grad_norm": 31.97579002380371, + "learning_rate": 1.5118131868131869e-05, + "loss": 1.1293, + "step": 25394 + }, + { + "epoch": 69.76648351648352, + "grad_norm": 19.823503494262695, + "learning_rate": 1.5116758241758242e-05, + "loss": 0.7427, + "step": 25395 + }, + { + "epoch": 69.76923076923077, + "grad_norm": 4.366357326507568, + "learning_rate": 1.5115384615384617e-05, + "loss": 0.0844, + "step": 25396 + }, + { + "epoch": 69.77197802197803, + "grad_norm": 18.476539611816406, + "learning_rate": 1.511401098901099e-05, + "loss": 0.2197, + "step": 25397 + }, + { + "epoch": 69.77472527472527, + "grad_norm": 11.257704734802246, + "learning_rate": 1.5112637362637364e-05, + "loss": 0.141, + "step": 25398 + }, + { + "epoch": 69.77747252747253, + "grad_norm": 10.136574745178223, + "learning_rate": 1.5111263736263736e-05, + "loss": 0.3332, + "step": 25399 + }, + { + "epoch": 69.78021978021978, + "grad_norm": 10.76024341583252, + "learning_rate": 1.510989010989011e-05, + "loss": 0.0838, + "step": 25400 + }, + { + "epoch": 69.78296703296704, + "grad_norm": 16.25613784790039, + "learning_rate": 1.5108516483516486e-05, + "loss": 0.449, + "step": 25401 + }, + { + "epoch": 69.78571428571429, + "grad_norm": 8.51618766784668, + "learning_rate": 1.510714285714286e-05, + "loss": 0.0903, + "step": 25402 + }, + { + "epoch": 69.78846153846153, + "grad_norm": 11.817804336547852, + "learning_rate": 1.5105769230769231e-05, + "loss": 0.2253, + "step": 25403 + }, + { + "epoch": 69.79120879120879, + "grad_norm": 11.580544471740723, + "learning_rate": 1.5104395604395605e-05, + "loss": 0.2275, + "step": 25404 + }, + { + "epoch": 69.79395604395604, + "grad_norm": 14.642047882080078, + "learning_rate": 1.5103021978021978e-05, + "loss": 0.3059, + "step": 25405 + }, + { + "epoch": 69.7967032967033, + "grad_norm": 9.228995323181152, + "learning_rate": 1.5101648351648353e-05, + "loss": 0.2356, + "step": 25406 + }, + { + "epoch": 69.79945054945055, + "grad_norm": 1.5983593463897705, + "learning_rate": 1.5100274725274726e-05, + "loss": 0.0247, + "step": 25407 + }, + { + "epoch": 69.8021978021978, + "grad_norm": 12.10537338256836, + "learning_rate": 1.50989010989011e-05, + "loss": 0.2431, + "step": 25408 + }, + { + "epoch": 69.80494505494505, + "grad_norm": 11.539921760559082, + "learning_rate": 1.5097527472527473e-05, + "loss": 0.1502, + "step": 25409 + }, + { + "epoch": 69.8076923076923, + "grad_norm": 13.816045761108398, + "learning_rate": 1.5096153846153847e-05, + "loss": 0.1783, + "step": 25410 + }, + { + "epoch": 69.81043956043956, + "grad_norm": 9.698244094848633, + "learning_rate": 1.5094780219780222e-05, + "loss": 0.1564, + "step": 25411 + }, + { + "epoch": 69.81318681318682, + "grad_norm": 5.238050937652588, + "learning_rate": 1.5093406593406595e-05, + "loss": 0.0802, + "step": 25412 + }, + { + "epoch": 69.81593406593407, + "grad_norm": 11.996493339538574, + "learning_rate": 1.5092032967032969e-05, + "loss": 0.2695, + "step": 25413 + }, + { + "epoch": 69.81868131868131, + "grad_norm": 4.313386917114258, + "learning_rate": 1.509065934065934e-05, + "loss": 0.1, + "step": 25414 + }, + { + "epoch": 69.82142857142857, + "grad_norm": 21.418743133544922, + "learning_rate": 1.5089285714285714e-05, + "loss": 0.4244, + "step": 25415 + }, + { + "epoch": 69.82417582417582, + "grad_norm": 14.633380889892578, + "learning_rate": 1.508791208791209e-05, + "loss": 0.1463, + "step": 25416 + }, + { + "epoch": 69.82692307692308, + "grad_norm": 13.367860794067383, + "learning_rate": 1.5086538461538464e-05, + "loss": 0.29, + "step": 25417 + }, + { + "epoch": 69.82967032967034, + "grad_norm": 10.377485275268555, + "learning_rate": 1.5085164835164836e-05, + "loss": 0.1655, + "step": 25418 + }, + { + "epoch": 69.83241758241758, + "grad_norm": 10.879386901855469, + "learning_rate": 1.5083791208791209e-05, + "loss": 0.2615, + "step": 25419 + }, + { + "epoch": 69.83516483516483, + "grad_norm": 12.193716049194336, + "learning_rate": 1.5082417582417582e-05, + "loss": 0.1752, + "step": 25420 + }, + { + "epoch": 69.83791208791209, + "grad_norm": 25.84554100036621, + "learning_rate": 1.5081043956043958e-05, + "loss": 0.5009, + "step": 25421 + }, + { + "epoch": 69.84065934065934, + "grad_norm": 25.039718627929688, + "learning_rate": 1.5079670329670331e-05, + "loss": 0.4367, + "step": 25422 + }, + { + "epoch": 69.8434065934066, + "grad_norm": 12.312240600585938, + "learning_rate": 1.5078296703296704e-05, + "loss": 0.3255, + "step": 25423 + }, + { + "epoch": 69.84615384615384, + "grad_norm": 5.825946807861328, + "learning_rate": 1.5076923076923078e-05, + "loss": 0.1186, + "step": 25424 + }, + { + "epoch": 69.8489010989011, + "grad_norm": 7.04674768447876, + "learning_rate": 1.5075549450549451e-05, + "loss": 0.1474, + "step": 25425 + }, + { + "epoch": 69.85164835164835, + "grad_norm": 16.523174285888672, + "learning_rate": 1.5074175824175823e-05, + "loss": 0.2892, + "step": 25426 + }, + { + "epoch": 69.8543956043956, + "grad_norm": 22.134389877319336, + "learning_rate": 1.50728021978022e-05, + "loss": 0.3318, + "step": 25427 + }, + { + "epoch": 69.85714285714286, + "grad_norm": 5.554014205932617, + "learning_rate": 1.5071428571428573e-05, + "loss": 0.1098, + "step": 25428 + }, + { + "epoch": 69.85989010989012, + "grad_norm": 11.7496976852417, + "learning_rate": 1.5070054945054945e-05, + "loss": 0.2711, + "step": 25429 + }, + { + "epoch": 69.86263736263736, + "grad_norm": 7.804085731506348, + "learning_rate": 1.5068681318681318e-05, + "loss": 0.0927, + "step": 25430 + }, + { + "epoch": 69.86538461538461, + "grad_norm": 13.809698104858398, + "learning_rate": 1.5067307692307692e-05, + "loss": 0.3576, + "step": 25431 + }, + { + "epoch": 69.86813186813187, + "grad_norm": 14.390130043029785, + "learning_rate": 1.5065934065934067e-05, + "loss": 0.199, + "step": 25432 + }, + { + "epoch": 69.87087912087912, + "grad_norm": 9.276681900024414, + "learning_rate": 1.506456043956044e-05, + "loss": 0.1016, + "step": 25433 + }, + { + "epoch": 69.87362637362638, + "grad_norm": 15.352032661437988, + "learning_rate": 1.5063186813186814e-05, + "loss": 0.166, + "step": 25434 + }, + { + "epoch": 69.87637362637362, + "grad_norm": 6.050793170928955, + "learning_rate": 1.5061813186813187e-05, + "loss": 0.1011, + "step": 25435 + }, + { + "epoch": 69.87912087912088, + "grad_norm": 7.035337924957275, + "learning_rate": 1.506043956043956e-05, + "loss": 0.1604, + "step": 25436 + }, + { + "epoch": 69.88186813186813, + "grad_norm": 2.237499713897705, + "learning_rate": 1.5059065934065935e-05, + "loss": 0.0323, + "step": 25437 + }, + { + "epoch": 69.88461538461539, + "grad_norm": 8.666818618774414, + "learning_rate": 1.5057692307692309e-05, + "loss": 0.252, + "step": 25438 + }, + { + "epoch": 69.88736263736264, + "grad_norm": 11.784215927124023, + "learning_rate": 1.5056318681318682e-05, + "loss": 0.2182, + "step": 25439 + }, + { + "epoch": 69.89010989010988, + "grad_norm": 8.053223609924316, + "learning_rate": 1.5054945054945056e-05, + "loss": 0.0908, + "step": 25440 + }, + { + "epoch": 69.89285714285714, + "grad_norm": 10.995832443237305, + "learning_rate": 1.5053571428571427e-05, + "loss": 0.1051, + "step": 25441 + }, + { + "epoch": 69.8956043956044, + "grad_norm": 9.252070426940918, + "learning_rate": 1.5052197802197804e-05, + "loss": 0.1352, + "step": 25442 + }, + { + "epoch": 69.89835164835165, + "grad_norm": 18.685819625854492, + "learning_rate": 1.5050824175824178e-05, + "loss": 0.4806, + "step": 25443 + }, + { + "epoch": 69.9010989010989, + "grad_norm": 6.8470282554626465, + "learning_rate": 1.504945054945055e-05, + "loss": 0.1291, + "step": 25444 + }, + { + "epoch": 69.90384615384616, + "grad_norm": 6.725849151611328, + "learning_rate": 1.5048076923076923e-05, + "loss": 0.0614, + "step": 25445 + }, + { + "epoch": 69.9065934065934, + "grad_norm": 11.148255348205566, + "learning_rate": 1.5046703296703296e-05, + "loss": 0.1865, + "step": 25446 + }, + { + "epoch": 69.90934065934066, + "grad_norm": 23.07481575012207, + "learning_rate": 1.5045329670329671e-05, + "loss": 0.1734, + "step": 25447 + }, + { + "epoch": 69.91208791208791, + "grad_norm": 9.61015510559082, + "learning_rate": 1.5043956043956045e-05, + "loss": 0.2524, + "step": 25448 + }, + { + "epoch": 69.91483516483517, + "grad_norm": 9.053045272827148, + "learning_rate": 1.5042582417582418e-05, + "loss": 0.1354, + "step": 25449 + }, + { + "epoch": 69.91758241758242, + "grad_norm": 9.766533851623535, + "learning_rate": 1.5041208791208791e-05, + "loss": 0.1961, + "step": 25450 + }, + { + "epoch": 69.92032967032966, + "grad_norm": 6.922056674957275, + "learning_rate": 1.5039835164835165e-05, + "loss": 0.0955, + "step": 25451 + }, + { + "epoch": 69.92307692307692, + "grad_norm": 13.305072784423828, + "learning_rate": 1.503846153846154e-05, + "loss": 0.2657, + "step": 25452 + }, + { + "epoch": 69.92582417582418, + "grad_norm": 11.712510108947754, + "learning_rate": 1.5037087912087913e-05, + "loss": 0.2399, + "step": 25453 + }, + { + "epoch": 69.92857142857143, + "grad_norm": 8.916828155517578, + "learning_rate": 1.5035714285714287e-05, + "loss": 0.1572, + "step": 25454 + }, + { + "epoch": 69.93131868131869, + "grad_norm": 10.59996223449707, + "learning_rate": 1.503434065934066e-05, + "loss": 0.1957, + "step": 25455 + }, + { + "epoch": 69.93406593406593, + "grad_norm": 10.290623664855957, + "learning_rate": 1.5032967032967032e-05, + "loss": 0.1468, + "step": 25456 + }, + { + "epoch": 69.93681318681318, + "grad_norm": 14.943807601928711, + "learning_rate": 1.5031593406593409e-05, + "loss": 0.2634, + "step": 25457 + }, + { + "epoch": 69.93956043956044, + "grad_norm": 12.002349853515625, + "learning_rate": 1.5030219780219782e-05, + "loss": 0.1714, + "step": 25458 + }, + { + "epoch": 69.9423076923077, + "grad_norm": 3.594270944595337, + "learning_rate": 1.5028846153846154e-05, + "loss": 0.047, + "step": 25459 + }, + { + "epoch": 69.94505494505495, + "grad_norm": 26.431629180908203, + "learning_rate": 1.5027472527472527e-05, + "loss": 0.5452, + "step": 25460 + }, + { + "epoch": 69.9478021978022, + "grad_norm": 8.34628963470459, + "learning_rate": 1.50260989010989e-05, + "loss": 0.0924, + "step": 25461 + }, + { + "epoch": 69.95054945054945, + "grad_norm": 11.720064163208008, + "learning_rate": 1.5024725274725276e-05, + "loss": 0.0895, + "step": 25462 + }, + { + "epoch": 69.9532967032967, + "grad_norm": 2.516853094100952, + "learning_rate": 1.5023351648351649e-05, + "loss": 0.0259, + "step": 25463 + }, + { + "epoch": 69.95604395604396, + "grad_norm": 15.139749526977539, + "learning_rate": 1.5021978021978022e-05, + "loss": 0.2264, + "step": 25464 + }, + { + "epoch": 69.95879120879121, + "grad_norm": 8.746621131896973, + "learning_rate": 1.5020604395604396e-05, + "loss": 0.1294, + "step": 25465 + }, + { + "epoch": 69.96153846153847, + "grad_norm": 6.9245524406433105, + "learning_rate": 1.501923076923077e-05, + "loss": 0.1069, + "step": 25466 + }, + { + "epoch": 69.96428571428571, + "grad_norm": 17.833528518676758, + "learning_rate": 1.5017857142857144e-05, + "loss": 0.3329, + "step": 25467 + }, + { + "epoch": 69.96703296703296, + "grad_norm": 6.761590480804443, + "learning_rate": 1.5016483516483518e-05, + "loss": 0.0714, + "step": 25468 + }, + { + "epoch": 69.96978021978022, + "grad_norm": 13.111509323120117, + "learning_rate": 1.5015109890109891e-05, + "loss": 0.3598, + "step": 25469 + }, + { + "epoch": 69.97252747252747, + "grad_norm": 15.555488586425781, + "learning_rate": 1.5013736263736265e-05, + "loss": 0.4399, + "step": 25470 + }, + { + "epoch": 69.97527472527473, + "grad_norm": 3.8718535900115967, + "learning_rate": 1.5012362637362636e-05, + "loss": 0.0421, + "step": 25471 + }, + { + "epoch": 69.97802197802197, + "grad_norm": 9.690410614013672, + "learning_rate": 1.5010989010989013e-05, + "loss": 0.3795, + "step": 25472 + }, + { + "epoch": 69.98076923076923, + "grad_norm": 5.0225982666015625, + "learning_rate": 1.5009615384615387e-05, + "loss": 0.0442, + "step": 25473 + }, + { + "epoch": 69.98351648351648, + "grad_norm": 15.428821563720703, + "learning_rate": 1.5008241758241758e-05, + "loss": 0.2472, + "step": 25474 + }, + { + "epoch": 69.98626373626374, + "grad_norm": 9.576910972595215, + "learning_rate": 1.5006868131868132e-05, + "loss": 0.1275, + "step": 25475 + }, + { + "epoch": 69.98901098901099, + "grad_norm": 9.447715759277344, + "learning_rate": 1.5005494505494505e-05, + "loss": 0.1208, + "step": 25476 + }, + { + "epoch": 69.99175824175825, + "grad_norm": 10.905956268310547, + "learning_rate": 1.500412087912088e-05, + "loss": 0.1102, + "step": 25477 + }, + { + "epoch": 69.99450549450549, + "grad_norm": 6.398440361022949, + "learning_rate": 1.5002747252747254e-05, + "loss": 0.0899, + "step": 25478 + }, + { + "epoch": 69.99725274725274, + "grad_norm": 20.080799102783203, + "learning_rate": 1.5001373626373627e-05, + "loss": 0.3531, + "step": 25479 + }, + { + "epoch": 70.0, + "grad_norm": 82.03520965576172, + "learning_rate": 1.5e-05, + "loss": 1.5741, + "step": 25480 + }, + { + "epoch": 70.0, + "eval_accuracy": 0.5826446280991735, + "eval_f1": 0.5316815217969385, + "eval_f1_DuraRiadoRio_64x64": 0.27380952380952384, + "eval_f1_Mole_64x64": 0.19875776397515527, + "eval_f1_Quebrado_64x64": 0.8481012658227848, + "eval_f1_RiadoRio_64x64": 0.5038167938931297, + "eval_f1_RioFechado_64x64": 0.833922261484099, + "eval_loss": 4.0807061195373535, + "eval_precision": 0.7740595427329924, + "eval_precision_DuraRiadoRio_64x64": 0.9583333333333334, + "eval_precision_Mole_64x64": 0.9411764705882353, + "eval_precision_Quebrado_64x64": 0.7790697674418605, + "eval_precision_RiadoRio_64x64": 0.3548387096774194, + "eval_precision_RioFechado_64x64": 0.8368794326241135, + "eval_recall": 0.5801591714026852, + "eval_recall_DuraRiadoRio_64x64": 0.1597222222222222, + "eval_recall_Mole_64x64": 0.1111111111111111, + "eval_recall_Quebrado_64x64": 0.9305555555555556, + "eval_recall_RiadoRio_64x64": 0.868421052631579, + "eval_recall_RioFechado_64x64": 0.8309859154929577, + "eval_runtime": 1.8054, + "eval_samples_per_second": 402.127, + "eval_steps_per_second": 25.479, + "step": 25480 + }, + { + "epoch": 70.00274725274726, + "grad_norm": 20.305063247680664, + "learning_rate": 1.4998626373626374e-05, + "loss": 0.6673, + "step": 25481 + }, + { + "epoch": 70.00549450549451, + "grad_norm": 10.158546447753906, + "learning_rate": 1.4997252747252749e-05, + "loss": 0.2101, + "step": 25482 + }, + { + "epoch": 70.00824175824175, + "grad_norm": 20.437326431274414, + "learning_rate": 1.4995879120879122e-05, + "loss": 0.5578, + "step": 25483 + }, + { + "epoch": 70.01098901098901, + "grad_norm": 14.232653617858887, + "learning_rate": 1.4994505494505496e-05, + "loss": 0.2161, + "step": 25484 + }, + { + "epoch": 70.01373626373626, + "grad_norm": 7.685301780700684, + "learning_rate": 1.4993131868131869e-05, + "loss": 0.1285, + "step": 25485 + }, + { + "epoch": 70.01648351648352, + "grad_norm": 17.029279708862305, + "learning_rate": 1.499175824175824e-05, + "loss": 0.2226, + "step": 25486 + }, + { + "epoch": 70.01923076923077, + "grad_norm": 7.914189338684082, + "learning_rate": 1.4990384615384618e-05, + "loss": 0.1173, + "step": 25487 + }, + { + "epoch": 70.02197802197803, + "grad_norm": 5.1520843505859375, + "learning_rate": 1.4989010989010991e-05, + "loss": 0.0757, + "step": 25488 + }, + { + "epoch": 70.02472527472527, + "grad_norm": 11.426606178283691, + "learning_rate": 1.4987637362637363e-05, + "loss": 0.1285, + "step": 25489 + }, + { + "epoch": 70.02747252747253, + "grad_norm": 3.8999743461608887, + "learning_rate": 1.4986263736263736e-05, + "loss": 0.0645, + "step": 25490 + }, + { + "epoch": 70.03021978021978, + "grad_norm": 18.40715789794922, + "learning_rate": 1.498489010989011e-05, + "loss": 0.4661, + "step": 25491 + }, + { + "epoch": 70.03296703296704, + "grad_norm": 10.600918769836426, + "learning_rate": 1.4983516483516485e-05, + "loss": 0.1636, + "step": 25492 + }, + { + "epoch": 70.03571428571429, + "grad_norm": 7.826562881469727, + "learning_rate": 1.4982142857142858e-05, + "loss": 0.1091, + "step": 25493 + }, + { + "epoch": 70.03846153846153, + "grad_norm": 10.666518211364746, + "learning_rate": 1.4980769230769231e-05, + "loss": 0.1069, + "step": 25494 + }, + { + "epoch": 70.04120879120879, + "grad_norm": 10.262396812438965, + "learning_rate": 1.4979395604395605e-05, + "loss": 0.2356, + "step": 25495 + }, + { + "epoch": 70.04395604395604, + "grad_norm": 9.283329963684082, + "learning_rate": 1.4978021978021978e-05, + "loss": 0.1167, + "step": 25496 + }, + { + "epoch": 70.0467032967033, + "grad_norm": 15.127007484436035, + "learning_rate": 1.4976648351648353e-05, + "loss": 0.4171, + "step": 25497 + }, + { + "epoch": 70.04945054945055, + "grad_norm": 17.328508377075195, + "learning_rate": 1.4975274725274727e-05, + "loss": 0.248, + "step": 25498 + }, + { + "epoch": 70.0521978021978, + "grad_norm": 14.728544235229492, + "learning_rate": 1.49739010989011e-05, + "loss": 0.2826, + "step": 25499 + }, + { + "epoch": 70.05494505494505, + "grad_norm": 11.39664363861084, + "learning_rate": 1.4972527472527474e-05, + "loss": 0.2377, + "step": 25500 + }, + { + "epoch": 70.0576923076923, + "grad_norm": 6.367802619934082, + "learning_rate": 1.4971153846153845e-05, + "loss": 0.0542, + "step": 25501 + }, + { + "epoch": 70.06043956043956, + "grad_norm": 20.650239944458008, + "learning_rate": 1.4969780219780222e-05, + "loss": 0.6347, + "step": 25502 + }, + { + "epoch": 70.06318681318682, + "grad_norm": 6.575701713562012, + "learning_rate": 1.4968406593406596e-05, + "loss": 0.0998, + "step": 25503 + }, + { + "epoch": 70.06593406593407, + "grad_norm": 19.826580047607422, + "learning_rate": 1.4967032967032967e-05, + "loss": 0.5664, + "step": 25504 + }, + { + "epoch": 70.06868131868131, + "grad_norm": 4.132758617401123, + "learning_rate": 1.496565934065934e-05, + "loss": 0.0589, + "step": 25505 + }, + { + "epoch": 70.07142857142857, + "grad_norm": 9.87574291229248, + "learning_rate": 1.4964285714285714e-05, + "loss": 0.184, + "step": 25506 + }, + { + "epoch": 70.07417582417582, + "grad_norm": 12.622384071350098, + "learning_rate": 1.4962912087912089e-05, + "loss": 0.2313, + "step": 25507 + }, + { + "epoch": 70.07692307692308, + "grad_norm": 15.303569793701172, + "learning_rate": 1.4961538461538463e-05, + "loss": 0.2729, + "step": 25508 + }, + { + "epoch": 70.07967032967034, + "grad_norm": 7.477684497833252, + "learning_rate": 1.4960164835164836e-05, + "loss": 0.1124, + "step": 25509 + }, + { + "epoch": 70.08241758241758, + "grad_norm": 12.78520679473877, + "learning_rate": 1.495879120879121e-05, + "loss": 0.2251, + "step": 25510 + }, + { + "epoch": 70.08516483516483, + "grad_norm": 14.265064239501953, + "learning_rate": 1.4957417582417583e-05, + "loss": 0.2785, + "step": 25511 + }, + { + "epoch": 70.08791208791209, + "grad_norm": 16.41580581665039, + "learning_rate": 1.4956043956043958e-05, + "loss": 0.559, + "step": 25512 + }, + { + "epoch": 70.09065934065934, + "grad_norm": 12.493535041809082, + "learning_rate": 1.4954670329670331e-05, + "loss": 0.1967, + "step": 25513 + }, + { + "epoch": 70.0934065934066, + "grad_norm": 15.569284439086914, + "learning_rate": 1.4953296703296705e-05, + "loss": 0.2404, + "step": 25514 + }, + { + "epoch": 70.09615384615384, + "grad_norm": 10.018037796020508, + "learning_rate": 1.4951923076923078e-05, + "loss": 0.1402, + "step": 25515 + }, + { + "epoch": 70.0989010989011, + "grad_norm": 0.8418948650360107, + "learning_rate": 1.495054945054945e-05, + "loss": 0.0131, + "step": 25516 + }, + { + "epoch": 70.10164835164835, + "grad_norm": 6.5018792152404785, + "learning_rate": 1.4949175824175827e-05, + "loss": 0.1096, + "step": 25517 + }, + { + "epoch": 70.1043956043956, + "grad_norm": 18.24435043334961, + "learning_rate": 1.49478021978022e-05, + "loss": 0.4407, + "step": 25518 + }, + { + "epoch": 70.10714285714286, + "grad_norm": 5.496464729309082, + "learning_rate": 1.4946428571428572e-05, + "loss": 0.1178, + "step": 25519 + }, + { + "epoch": 70.10989010989012, + "grad_norm": 9.487055778503418, + "learning_rate": 1.4945054945054945e-05, + "loss": 0.1053, + "step": 25520 + }, + { + "epoch": 70.11263736263736, + "grad_norm": 1.9775136709213257, + "learning_rate": 1.4943681318681319e-05, + "loss": 0.0335, + "step": 25521 + }, + { + "epoch": 70.11538461538461, + "grad_norm": 12.850192070007324, + "learning_rate": 1.4942307692307694e-05, + "loss": 0.1549, + "step": 25522 + }, + { + "epoch": 70.11813186813187, + "grad_norm": 9.304197311401367, + "learning_rate": 1.4940934065934067e-05, + "loss": 0.3582, + "step": 25523 + }, + { + "epoch": 70.12087912087912, + "grad_norm": 18.788240432739258, + "learning_rate": 1.493956043956044e-05, + "loss": 0.3177, + "step": 25524 + }, + { + "epoch": 70.12362637362638, + "grad_norm": 3.7762339115142822, + "learning_rate": 1.4938186813186814e-05, + "loss": 0.0483, + "step": 25525 + }, + { + "epoch": 70.12637362637362, + "grad_norm": 7.3940510749816895, + "learning_rate": 1.4936813186813187e-05, + "loss": 0.1376, + "step": 25526 + }, + { + "epoch": 70.12912087912088, + "grad_norm": 9.160079956054688, + "learning_rate": 1.4935439560439562e-05, + "loss": 0.104, + "step": 25527 + }, + { + "epoch": 70.13186813186813, + "grad_norm": 12.631945610046387, + "learning_rate": 1.4934065934065936e-05, + "loss": 0.1928, + "step": 25528 + }, + { + "epoch": 70.13461538461539, + "grad_norm": 3.106351613998413, + "learning_rate": 1.493269230769231e-05, + "loss": 0.0407, + "step": 25529 + }, + { + "epoch": 70.13736263736264, + "grad_norm": 7.612825393676758, + "learning_rate": 1.4931318681318681e-05, + "loss": 0.129, + "step": 25530 + }, + { + "epoch": 70.14010989010988, + "grad_norm": 16.950044631958008, + "learning_rate": 1.4929945054945054e-05, + "loss": 0.2457, + "step": 25531 + }, + { + "epoch": 70.14285714285714, + "grad_norm": 7.156064033508301, + "learning_rate": 1.4928571428571431e-05, + "loss": 0.1044, + "step": 25532 + }, + { + "epoch": 70.1456043956044, + "grad_norm": 3.9178502559661865, + "learning_rate": 1.4927197802197804e-05, + "loss": 0.0485, + "step": 25533 + }, + { + "epoch": 70.14835164835165, + "grad_norm": 4.820893287658691, + "learning_rate": 1.4925824175824176e-05, + "loss": 0.0708, + "step": 25534 + }, + { + "epoch": 70.1510989010989, + "grad_norm": 11.450055122375488, + "learning_rate": 1.492445054945055e-05, + "loss": 0.1618, + "step": 25535 + }, + { + "epoch": 70.15384615384616, + "grad_norm": 22.23038673400879, + "learning_rate": 1.4923076923076923e-05, + "loss": 0.4495, + "step": 25536 + }, + { + "epoch": 70.1565934065934, + "grad_norm": 30.41498565673828, + "learning_rate": 1.4921703296703296e-05, + "loss": 0.3626, + "step": 25537 + }, + { + "epoch": 70.15934065934066, + "grad_norm": 11.979660987854004, + "learning_rate": 1.4920329670329672e-05, + "loss": 0.1577, + "step": 25538 + }, + { + "epoch": 70.16208791208791, + "grad_norm": 13.887977600097656, + "learning_rate": 1.4918956043956045e-05, + "loss": 0.1826, + "step": 25539 + }, + { + "epoch": 70.16483516483517, + "grad_norm": 17.115966796875, + "learning_rate": 1.4917582417582418e-05, + "loss": 0.4326, + "step": 25540 + }, + { + "epoch": 70.16758241758242, + "grad_norm": 8.011049270629883, + "learning_rate": 1.4916208791208792e-05, + "loss": 0.143, + "step": 25541 + }, + { + "epoch": 70.17032967032966, + "grad_norm": 12.256691932678223, + "learning_rate": 1.4914835164835163e-05, + "loss": 0.2829, + "step": 25542 + }, + { + "epoch": 70.17307692307692, + "grad_norm": 21.46158218383789, + "learning_rate": 1.491346153846154e-05, + "loss": 0.6261, + "step": 25543 + }, + { + "epoch": 70.17582417582418, + "grad_norm": 11.675475120544434, + "learning_rate": 1.4912087912087914e-05, + "loss": 0.225, + "step": 25544 + }, + { + "epoch": 70.17857142857143, + "grad_norm": 3.11639404296875, + "learning_rate": 1.4910714285714285e-05, + "loss": 0.0586, + "step": 25545 + }, + { + "epoch": 70.18131868131869, + "grad_norm": 10.918252944946289, + "learning_rate": 1.4909340659340659e-05, + "loss": 0.1483, + "step": 25546 + }, + { + "epoch": 70.18406593406593, + "grad_norm": 9.020679473876953, + "learning_rate": 1.4907967032967032e-05, + "loss": 0.1513, + "step": 25547 + }, + { + "epoch": 70.18681318681318, + "grad_norm": 4.052426815032959, + "learning_rate": 1.4906593406593409e-05, + "loss": 0.0775, + "step": 25548 + }, + { + "epoch": 70.18956043956044, + "grad_norm": 4.829541206359863, + "learning_rate": 1.490521978021978e-05, + "loss": 0.0706, + "step": 25549 + }, + { + "epoch": 70.1923076923077, + "grad_norm": 9.963586807250977, + "learning_rate": 1.4903846153846154e-05, + "loss": 0.1346, + "step": 25550 + }, + { + "epoch": 70.19505494505495, + "grad_norm": 17.189531326293945, + "learning_rate": 1.4902472527472528e-05, + "loss": 0.5652, + "step": 25551 + }, + { + "epoch": 70.1978021978022, + "grad_norm": 2.3817570209503174, + "learning_rate": 1.4901098901098901e-05, + "loss": 0.0222, + "step": 25552 + }, + { + "epoch": 70.20054945054945, + "grad_norm": 7.821382522583008, + "learning_rate": 1.4899725274725276e-05, + "loss": 0.2324, + "step": 25553 + }, + { + "epoch": 70.2032967032967, + "grad_norm": 11.651317596435547, + "learning_rate": 1.489835164835165e-05, + "loss": 0.1876, + "step": 25554 + }, + { + "epoch": 70.20604395604396, + "grad_norm": 13.969157218933105, + "learning_rate": 1.4896978021978023e-05, + "loss": 0.2299, + "step": 25555 + }, + { + "epoch": 70.20879120879121, + "grad_norm": 11.68218994140625, + "learning_rate": 1.4895604395604396e-05, + "loss": 0.4262, + "step": 25556 + }, + { + "epoch": 70.21153846153847, + "grad_norm": 2.9678544998168945, + "learning_rate": 1.4894230769230768e-05, + "loss": 0.0509, + "step": 25557 + }, + { + "epoch": 70.21428571428571, + "grad_norm": 4.472078800201416, + "learning_rate": 1.4892857142857145e-05, + "loss": 0.043, + "step": 25558 + }, + { + "epoch": 70.21703296703296, + "grad_norm": 17.65179443359375, + "learning_rate": 1.4891483516483518e-05, + "loss": 0.4627, + "step": 25559 + }, + { + "epoch": 70.21978021978022, + "grad_norm": 1.0228776931762695, + "learning_rate": 1.489010989010989e-05, + "loss": 0.0098, + "step": 25560 + }, + { + "epoch": 70.22252747252747, + "grad_norm": 19.277862548828125, + "learning_rate": 1.4888736263736263e-05, + "loss": 0.3186, + "step": 25561 + }, + { + "epoch": 70.22527472527473, + "grad_norm": 18.621532440185547, + "learning_rate": 1.4887362637362637e-05, + "loss": 0.3869, + "step": 25562 + }, + { + "epoch": 70.22802197802197, + "grad_norm": 8.239472389221191, + "learning_rate": 1.4885989010989013e-05, + "loss": 0.163, + "step": 25563 + }, + { + "epoch": 70.23076923076923, + "grad_norm": 11.163712501525879, + "learning_rate": 1.4884615384615385e-05, + "loss": 0.3782, + "step": 25564 + }, + { + "epoch": 70.23351648351648, + "grad_norm": 4.244811058044434, + "learning_rate": 1.4883241758241759e-05, + "loss": 0.0684, + "step": 25565 + }, + { + "epoch": 70.23626373626374, + "grad_norm": 8.839482307434082, + "learning_rate": 1.4881868131868132e-05, + "loss": 0.1433, + "step": 25566 + }, + { + "epoch": 70.23901098901099, + "grad_norm": 4.433534622192383, + "learning_rate": 1.4880494505494505e-05, + "loss": 0.0829, + "step": 25567 + }, + { + "epoch": 70.24175824175825, + "grad_norm": 8.111213684082031, + "learning_rate": 1.487912087912088e-05, + "loss": 0.0975, + "step": 25568 + }, + { + "epoch": 70.24450549450549, + "grad_norm": 25.114103317260742, + "learning_rate": 1.4877747252747254e-05, + "loss": 0.4751, + "step": 25569 + }, + { + "epoch": 70.24725274725274, + "grad_norm": 2.1014981269836426, + "learning_rate": 1.4876373626373627e-05, + "loss": 0.046, + "step": 25570 + }, + { + "epoch": 70.25, + "grad_norm": 14.13313102722168, + "learning_rate": 1.4875e-05, + "loss": 0.1967, + "step": 25571 + }, + { + "epoch": 70.25274725274726, + "grad_norm": 9.778238296508789, + "learning_rate": 1.4873626373626372e-05, + "loss": 0.2647, + "step": 25572 + }, + { + "epoch": 70.25549450549451, + "grad_norm": 11.920586585998535, + "learning_rate": 1.487225274725275e-05, + "loss": 0.145, + "step": 25573 + }, + { + "epoch": 70.25824175824175, + "grad_norm": 7.947422981262207, + "learning_rate": 1.4870879120879123e-05, + "loss": 0.116, + "step": 25574 + }, + { + "epoch": 70.26098901098901, + "grad_norm": 2.732649326324463, + "learning_rate": 1.4869505494505494e-05, + "loss": 0.0398, + "step": 25575 + }, + { + "epoch": 70.26373626373626, + "grad_norm": 6.586058616638184, + "learning_rate": 1.4868131868131868e-05, + "loss": 0.188, + "step": 25576 + }, + { + "epoch": 70.26648351648352, + "grad_norm": 13.098657608032227, + "learning_rate": 1.4866758241758241e-05, + "loss": 0.2715, + "step": 25577 + }, + { + "epoch": 70.26923076923077, + "grad_norm": 10.70850944519043, + "learning_rate": 1.4865384615384618e-05, + "loss": 0.4087, + "step": 25578 + }, + { + "epoch": 70.27197802197803, + "grad_norm": 14.064148902893066, + "learning_rate": 1.486401098901099e-05, + "loss": 0.5102, + "step": 25579 + }, + { + "epoch": 70.27472527472527, + "grad_norm": 13.280497550964355, + "learning_rate": 1.4862637362637363e-05, + "loss": 0.1508, + "step": 25580 + }, + { + "epoch": 70.27747252747253, + "grad_norm": 22.31624984741211, + "learning_rate": 1.4861263736263736e-05, + "loss": 0.4071, + "step": 25581 + }, + { + "epoch": 70.28021978021978, + "grad_norm": 5.566316604614258, + "learning_rate": 1.485989010989011e-05, + "loss": 0.0972, + "step": 25582 + }, + { + "epoch": 70.28296703296704, + "grad_norm": 10.513282775878906, + "learning_rate": 1.4858516483516485e-05, + "loss": 0.2299, + "step": 25583 + }, + { + "epoch": 70.28571428571429, + "grad_norm": 19.4814510345459, + "learning_rate": 1.4857142857142858e-05, + "loss": 0.5105, + "step": 25584 + }, + { + "epoch": 70.28846153846153, + "grad_norm": 12.238991737365723, + "learning_rate": 1.4855769230769232e-05, + "loss": 0.1447, + "step": 25585 + }, + { + "epoch": 70.29120879120879, + "grad_norm": 26.538606643676758, + "learning_rate": 1.4854395604395605e-05, + "loss": 0.614, + "step": 25586 + }, + { + "epoch": 70.29395604395604, + "grad_norm": 17.94916534423828, + "learning_rate": 1.4853021978021977e-05, + "loss": 0.4064, + "step": 25587 + }, + { + "epoch": 70.2967032967033, + "grad_norm": 12.860703468322754, + "learning_rate": 1.4851648351648354e-05, + "loss": 0.236, + "step": 25588 + }, + { + "epoch": 70.29945054945055, + "grad_norm": 24.696474075317383, + "learning_rate": 1.4850274725274727e-05, + "loss": 0.6076, + "step": 25589 + }, + { + "epoch": 70.3021978021978, + "grad_norm": 7.010581016540527, + "learning_rate": 1.4848901098901099e-05, + "loss": 0.1134, + "step": 25590 + }, + { + "epoch": 70.30494505494505, + "grad_norm": 9.011509895324707, + "learning_rate": 1.4847527472527472e-05, + "loss": 0.1846, + "step": 25591 + }, + { + "epoch": 70.3076923076923, + "grad_norm": 3.555912971496582, + "learning_rate": 1.4846153846153846e-05, + "loss": 0.0452, + "step": 25592 + }, + { + "epoch": 70.31043956043956, + "grad_norm": 14.296357154846191, + "learning_rate": 1.484478021978022e-05, + "loss": 0.2639, + "step": 25593 + }, + { + "epoch": 70.31318681318682, + "grad_norm": 17.648853302001953, + "learning_rate": 1.4843406593406594e-05, + "loss": 0.2957, + "step": 25594 + }, + { + "epoch": 70.31593406593407, + "grad_norm": 2.1107888221740723, + "learning_rate": 1.4842032967032968e-05, + "loss": 0.0222, + "step": 25595 + }, + { + "epoch": 70.31868131868131, + "grad_norm": 9.145669937133789, + "learning_rate": 1.4840659340659341e-05, + "loss": 0.2004, + "step": 25596 + }, + { + "epoch": 70.32142857142857, + "grad_norm": 2.974119186401367, + "learning_rate": 1.4839285714285714e-05, + "loss": 0.0276, + "step": 25597 + }, + { + "epoch": 70.32417582417582, + "grad_norm": 19.265975952148438, + "learning_rate": 1.483791208791209e-05, + "loss": 0.3051, + "step": 25598 + }, + { + "epoch": 70.32692307692308, + "grad_norm": 9.432746887207031, + "learning_rate": 1.4836538461538463e-05, + "loss": 0.3235, + "step": 25599 + }, + { + "epoch": 70.32967032967034, + "grad_norm": 12.921079635620117, + "learning_rate": 1.4835164835164836e-05, + "loss": 0.183, + "step": 25600 + }, + { + "epoch": 70.33241758241758, + "grad_norm": 6.435696601867676, + "learning_rate": 1.483379120879121e-05, + "loss": 0.0815, + "step": 25601 + }, + { + "epoch": 70.33516483516483, + "grad_norm": 12.640914916992188, + "learning_rate": 1.4832417582417581e-05, + "loss": 0.2134, + "step": 25602 + }, + { + "epoch": 70.33791208791209, + "grad_norm": 8.109391212463379, + "learning_rate": 1.4831043956043958e-05, + "loss": 0.2438, + "step": 25603 + }, + { + "epoch": 70.34065934065934, + "grad_norm": 14.598013877868652, + "learning_rate": 1.4829670329670332e-05, + "loss": 0.446, + "step": 25604 + }, + { + "epoch": 70.3434065934066, + "grad_norm": 12.06512451171875, + "learning_rate": 1.4828296703296703e-05, + "loss": 0.2879, + "step": 25605 + }, + { + "epoch": 70.34615384615384, + "grad_norm": 12.781218528747559, + "learning_rate": 1.4826923076923077e-05, + "loss": 0.1311, + "step": 25606 + }, + { + "epoch": 70.3489010989011, + "grad_norm": 4.384880065917969, + "learning_rate": 1.482554945054945e-05, + "loss": 0.0779, + "step": 25607 + }, + { + "epoch": 70.35164835164835, + "grad_norm": 9.199814796447754, + "learning_rate": 1.4824175824175825e-05, + "loss": 0.2858, + "step": 25608 + }, + { + "epoch": 70.3543956043956, + "grad_norm": 13.918079376220703, + "learning_rate": 1.4822802197802199e-05, + "loss": 0.436, + "step": 25609 + }, + { + "epoch": 70.35714285714286, + "grad_norm": 16.309099197387695, + "learning_rate": 1.4821428571428572e-05, + "loss": 0.5904, + "step": 25610 + }, + { + "epoch": 70.35989010989012, + "grad_norm": 8.74023723602295, + "learning_rate": 1.4820054945054945e-05, + "loss": 0.1045, + "step": 25611 + }, + { + "epoch": 70.36263736263736, + "grad_norm": 7.708005905151367, + "learning_rate": 1.4818681318681319e-05, + "loss": 0.1092, + "step": 25612 + }, + { + "epoch": 70.36538461538461, + "grad_norm": 8.425786972045898, + "learning_rate": 1.4817307692307694e-05, + "loss": 0.193, + "step": 25613 + }, + { + "epoch": 70.36813186813187, + "grad_norm": 11.907248497009277, + "learning_rate": 1.4815934065934067e-05, + "loss": 0.2315, + "step": 25614 + }, + { + "epoch": 70.37087912087912, + "grad_norm": 8.196855545043945, + "learning_rate": 1.481456043956044e-05, + "loss": 0.1131, + "step": 25615 + }, + { + "epoch": 70.37362637362638, + "grad_norm": 10.607873916625977, + "learning_rate": 1.4813186813186814e-05, + "loss": 0.1909, + "step": 25616 + }, + { + "epoch": 70.37637362637362, + "grad_norm": 2.3825020790100098, + "learning_rate": 1.4811813186813186e-05, + "loss": 0.02, + "step": 25617 + }, + { + "epoch": 70.37912087912088, + "grad_norm": 13.656895637512207, + "learning_rate": 1.4810439560439563e-05, + "loss": 0.3555, + "step": 25618 + }, + { + "epoch": 70.38186813186813, + "grad_norm": 11.795204162597656, + "learning_rate": 1.4809065934065936e-05, + "loss": 0.2715, + "step": 25619 + }, + { + "epoch": 70.38461538461539, + "grad_norm": 11.442008018493652, + "learning_rate": 1.4807692307692308e-05, + "loss": 0.2612, + "step": 25620 + }, + { + "epoch": 70.38736263736264, + "grad_norm": 23.132156372070312, + "learning_rate": 1.4806318681318681e-05, + "loss": 0.5169, + "step": 25621 + }, + { + "epoch": 70.39010989010988, + "grad_norm": 20.166664123535156, + "learning_rate": 1.4804945054945055e-05, + "loss": 0.7611, + "step": 25622 + }, + { + "epoch": 70.39285714285714, + "grad_norm": 6.030513286590576, + "learning_rate": 1.480357142857143e-05, + "loss": 0.0852, + "step": 25623 + }, + { + "epoch": 70.3956043956044, + "grad_norm": 4.7895097732543945, + "learning_rate": 1.4802197802197803e-05, + "loss": 0.1071, + "step": 25624 + }, + { + "epoch": 70.39835164835165, + "grad_norm": 9.415863037109375, + "learning_rate": 1.4800824175824177e-05, + "loss": 0.1775, + "step": 25625 + }, + { + "epoch": 70.4010989010989, + "grad_norm": 9.095451354980469, + "learning_rate": 1.479945054945055e-05, + "loss": 0.1367, + "step": 25626 + }, + { + "epoch": 70.40384615384616, + "grad_norm": 10.095773696899414, + "learning_rate": 1.4798076923076923e-05, + "loss": 0.1471, + "step": 25627 + }, + { + "epoch": 70.4065934065934, + "grad_norm": 4.7009453773498535, + "learning_rate": 1.4796703296703298e-05, + "loss": 0.1122, + "step": 25628 + }, + { + "epoch": 70.40934065934066, + "grad_norm": 11.165058135986328, + "learning_rate": 1.4795329670329672e-05, + "loss": 0.2587, + "step": 25629 + }, + { + "epoch": 70.41208791208791, + "grad_norm": 6.760613918304443, + "learning_rate": 1.4793956043956045e-05, + "loss": 0.0752, + "step": 25630 + }, + { + "epoch": 70.41483516483517, + "grad_norm": 11.365792274475098, + "learning_rate": 1.4792582417582419e-05, + "loss": 0.1061, + "step": 25631 + }, + { + "epoch": 70.41758241758242, + "grad_norm": 26.917293548583984, + "learning_rate": 1.479120879120879e-05, + "loss": 0.5057, + "step": 25632 + }, + { + "epoch": 70.42032967032966, + "grad_norm": 1.048510193824768, + "learning_rate": 1.4789835164835167e-05, + "loss": 0.0172, + "step": 25633 + }, + { + "epoch": 70.42307692307692, + "grad_norm": 2.150634765625, + "learning_rate": 1.478846153846154e-05, + "loss": 0.0372, + "step": 25634 + }, + { + "epoch": 70.42582417582418, + "grad_norm": 12.471132278442383, + "learning_rate": 1.4787087912087912e-05, + "loss": 0.2131, + "step": 25635 + }, + { + "epoch": 70.42857142857143, + "grad_norm": 13.542877197265625, + "learning_rate": 1.4785714285714286e-05, + "loss": 0.5697, + "step": 25636 + }, + { + "epoch": 70.43131868131869, + "grad_norm": 9.928943634033203, + "learning_rate": 1.4784340659340659e-05, + "loss": 0.2123, + "step": 25637 + }, + { + "epoch": 70.43406593406593, + "grad_norm": 11.263193130493164, + "learning_rate": 1.4782967032967034e-05, + "loss": 0.1528, + "step": 25638 + }, + { + "epoch": 70.43681318681318, + "grad_norm": 11.78093433380127, + "learning_rate": 1.4781593406593408e-05, + "loss": 0.1813, + "step": 25639 + }, + { + "epoch": 70.43956043956044, + "grad_norm": 7.285951614379883, + "learning_rate": 1.4780219780219781e-05, + "loss": 0.0782, + "step": 25640 + }, + { + "epoch": 70.4423076923077, + "grad_norm": 20.711322784423828, + "learning_rate": 1.4778846153846154e-05, + "loss": 0.3763, + "step": 25641 + }, + { + "epoch": 70.44505494505495, + "grad_norm": 5.62254524230957, + "learning_rate": 1.4777472527472528e-05, + "loss": 0.0716, + "step": 25642 + }, + { + "epoch": 70.4478021978022, + "grad_norm": 10.573080062866211, + "learning_rate": 1.4776098901098903e-05, + "loss": 0.18, + "step": 25643 + }, + { + "epoch": 70.45054945054945, + "grad_norm": 10.797492980957031, + "learning_rate": 1.4774725274725276e-05, + "loss": 0.2069, + "step": 25644 + }, + { + "epoch": 70.4532967032967, + "grad_norm": 10.908910751342773, + "learning_rate": 1.477335164835165e-05, + "loss": 0.1523, + "step": 25645 + }, + { + "epoch": 70.45604395604396, + "grad_norm": 5.095366954803467, + "learning_rate": 1.4771978021978023e-05, + "loss": 0.0465, + "step": 25646 + }, + { + "epoch": 70.45879120879121, + "grad_norm": 0.648027777671814, + "learning_rate": 1.4770604395604395e-05, + "loss": 0.0087, + "step": 25647 + }, + { + "epoch": 70.46153846153847, + "grad_norm": 9.124906539916992, + "learning_rate": 1.4769230769230772e-05, + "loss": 0.1797, + "step": 25648 + }, + { + "epoch": 70.46428571428571, + "grad_norm": 12.267311096191406, + "learning_rate": 1.4767857142857145e-05, + "loss": 0.2616, + "step": 25649 + }, + { + "epoch": 70.46703296703296, + "grad_norm": 8.763721466064453, + "learning_rate": 1.4766483516483517e-05, + "loss": 0.1608, + "step": 25650 + }, + { + "epoch": 70.46978021978022, + "grad_norm": 16.821557998657227, + "learning_rate": 1.476510989010989e-05, + "loss": 0.3228, + "step": 25651 + }, + { + "epoch": 70.47252747252747, + "grad_norm": 4.812289237976074, + "learning_rate": 1.4763736263736264e-05, + "loss": 0.1091, + "step": 25652 + }, + { + "epoch": 70.47527472527473, + "grad_norm": 14.00621223449707, + "learning_rate": 1.4762362637362637e-05, + "loss": 0.1993, + "step": 25653 + }, + { + "epoch": 70.47802197802197, + "grad_norm": 6.494765281677246, + "learning_rate": 1.4760989010989012e-05, + "loss": 0.1091, + "step": 25654 + }, + { + "epoch": 70.48076923076923, + "grad_norm": 11.480868339538574, + "learning_rate": 1.4759615384615386e-05, + "loss": 0.3704, + "step": 25655 + }, + { + "epoch": 70.48351648351648, + "grad_norm": 3.3889074325561523, + "learning_rate": 1.4758241758241759e-05, + "loss": 0.0445, + "step": 25656 + }, + { + "epoch": 70.48626373626374, + "grad_norm": 24.652795791625977, + "learning_rate": 1.4756868131868132e-05, + "loss": 0.4275, + "step": 25657 + }, + { + "epoch": 70.48901098901099, + "grad_norm": 9.180012702941895, + "learning_rate": 1.4755494505494504e-05, + "loss": 0.1417, + "step": 25658 + }, + { + "epoch": 70.49175824175825, + "grad_norm": 8.981736183166504, + "learning_rate": 1.4754120879120881e-05, + "loss": 0.0931, + "step": 25659 + }, + { + "epoch": 70.49450549450549, + "grad_norm": 19.195825576782227, + "learning_rate": 1.4752747252747254e-05, + "loss": 0.1681, + "step": 25660 + }, + { + "epoch": 70.49725274725274, + "grad_norm": 14.343439102172852, + "learning_rate": 1.4751373626373628e-05, + "loss": 0.3509, + "step": 25661 + }, + { + "epoch": 70.5, + "grad_norm": 8.053508758544922, + "learning_rate": 1.475e-05, + "loss": 0.1495, + "step": 25662 + }, + { + "epoch": 70.50274725274726, + "grad_norm": 6.6373138427734375, + "learning_rate": 1.4748626373626373e-05, + "loss": 0.1166, + "step": 25663 + }, + { + "epoch": 70.50549450549451, + "grad_norm": 6.188901901245117, + "learning_rate": 1.474725274725275e-05, + "loss": 0.098, + "step": 25664 + }, + { + "epoch": 70.50824175824175, + "grad_norm": 4.649137496948242, + "learning_rate": 1.4745879120879121e-05, + "loss": 0.0399, + "step": 25665 + }, + { + "epoch": 70.51098901098901, + "grad_norm": 10.504979133605957, + "learning_rate": 1.4744505494505495e-05, + "loss": 0.2694, + "step": 25666 + }, + { + "epoch": 70.51373626373626, + "grad_norm": 20.775434494018555, + "learning_rate": 1.4743131868131868e-05, + "loss": 0.323, + "step": 25667 + }, + { + "epoch": 70.51648351648352, + "grad_norm": 9.631049156188965, + "learning_rate": 1.4741758241758242e-05, + "loss": 0.3046, + "step": 25668 + }, + { + "epoch": 70.51923076923077, + "grad_norm": 5.3861494064331055, + "learning_rate": 1.4740384615384617e-05, + "loss": 0.0974, + "step": 25669 + }, + { + "epoch": 70.52197802197803, + "grad_norm": 22.324941635131836, + "learning_rate": 1.473901098901099e-05, + "loss": 0.4392, + "step": 25670 + }, + { + "epoch": 70.52472527472527, + "grad_norm": 5.9745635986328125, + "learning_rate": 1.4737637362637363e-05, + "loss": 0.0403, + "step": 25671 + }, + { + "epoch": 70.52747252747253, + "grad_norm": 11.084615707397461, + "learning_rate": 1.4736263736263737e-05, + "loss": 0.1698, + "step": 25672 + }, + { + "epoch": 70.53021978021978, + "grad_norm": 13.88633918762207, + "learning_rate": 1.4734890109890109e-05, + "loss": 0.1233, + "step": 25673 + }, + { + "epoch": 70.53296703296704, + "grad_norm": 20.518918991088867, + "learning_rate": 1.4733516483516485e-05, + "loss": 0.7276, + "step": 25674 + }, + { + "epoch": 70.53571428571429, + "grad_norm": 7.450631141662598, + "learning_rate": 1.4732142857142859e-05, + "loss": 0.0944, + "step": 25675 + }, + { + "epoch": 70.53846153846153, + "grad_norm": 9.098641395568848, + "learning_rate": 1.4730769230769232e-05, + "loss": 0.1689, + "step": 25676 + }, + { + "epoch": 70.54120879120879, + "grad_norm": 6.652166843414307, + "learning_rate": 1.4729395604395604e-05, + "loss": 0.091, + "step": 25677 + }, + { + "epoch": 70.54395604395604, + "grad_norm": 9.667003631591797, + "learning_rate": 1.4728021978021977e-05, + "loss": 0.1927, + "step": 25678 + }, + { + "epoch": 70.5467032967033, + "grad_norm": 20.050661087036133, + "learning_rate": 1.4726648351648354e-05, + "loss": 0.6376, + "step": 25679 + }, + { + "epoch": 70.54945054945055, + "grad_norm": 11.845052719116211, + "learning_rate": 1.4725274725274726e-05, + "loss": 0.2073, + "step": 25680 + }, + { + "epoch": 70.5521978021978, + "grad_norm": 20.87403106689453, + "learning_rate": 1.47239010989011e-05, + "loss": 0.3387, + "step": 25681 + }, + { + "epoch": 70.55494505494505, + "grad_norm": 11.052324295043945, + "learning_rate": 1.4722527472527473e-05, + "loss": 0.1204, + "step": 25682 + }, + { + "epoch": 70.5576923076923, + "grad_norm": 12.902168273925781, + "learning_rate": 1.4721153846153846e-05, + "loss": 0.2339, + "step": 25683 + }, + { + "epoch": 70.56043956043956, + "grad_norm": 9.987968444824219, + "learning_rate": 1.4719780219780221e-05, + "loss": 0.1508, + "step": 25684 + }, + { + "epoch": 70.56318681318682, + "grad_norm": 18.95937156677246, + "learning_rate": 1.4718406593406595e-05, + "loss": 0.381, + "step": 25685 + }, + { + "epoch": 70.56593406593407, + "grad_norm": 15.699207305908203, + "learning_rate": 1.4717032967032968e-05, + "loss": 0.3055, + "step": 25686 + }, + { + "epoch": 70.56868131868131, + "grad_norm": 10.41434383392334, + "learning_rate": 1.4715659340659341e-05, + "loss": 0.1797, + "step": 25687 + }, + { + "epoch": 70.57142857142857, + "grad_norm": 3.2566745281219482, + "learning_rate": 1.4714285714285713e-05, + "loss": 0.0621, + "step": 25688 + }, + { + "epoch": 70.57417582417582, + "grad_norm": 23.538166046142578, + "learning_rate": 1.471291208791209e-05, + "loss": 0.4333, + "step": 25689 + }, + { + "epoch": 70.57692307692308, + "grad_norm": 8.325530052185059, + "learning_rate": 1.4711538461538463e-05, + "loss": 0.2531, + "step": 25690 + }, + { + "epoch": 70.57967032967034, + "grad_norm": 1.947262167930603, + "learning_rate": 1.4710164835164835e-05, + "loss": 0.0286, + "step": 25691 + }, + { + "epoch": 70.58241758241758, + "grad_norm": 17.972700119018555, + "learning_rate": 1.4708791208791208e-05, + "loss": 0.3609, + "step": 25692 + }, + { + "epoch": 70.58516483516483, + "grad_norm": 11.681403160095215, + "learning_rate": 1.4707417582417582e-05, + "loss": 0.3709, + "step": 25693 + }, + { + "epoch": 70.58791208791209, + "grad_norm": 8.53368091583252, + "learning_rate": 1.4706043956043959e-05, + "loss": 0.1219, + "step": 25694 + }, + { + "epoch": 70.59065934065934, + "grad_norm": 13.316988945007324, + "learning_rate": 1.470467032967033e-05, + "loss": 0.1625, + "step": 25695 + }, + { + "epoch": 70.5934065934066, + "grad_norm": 24.218135833740234, + "learning_rate": 1.4703296703296704e-05, + "loss": 0.3744, + "step": 25696 + }, + { + "epoch": 70.59615384615384, + "grad_norm": 5.562161445617676, + "learning_rate": 1.4701923076923077e-05, + "loss": 0.122, + "step": 25697 + }, + { + "epoch": 70.5989010989011, + "grad_norm": 3.9449331760406494, + "learning_rate": 1.470054945054945e-05, + "loss": 0.0597, + "step": 25698 + }, + { + "epoch": 70.60164835164835, + "grad_norm": 3.68636155128479, + "learning_rate": 1.4699175824175826e-05, + "loss": 0.0533, + "step": 25699 + }, + { + "epoch": 70.6043956043956, + "grad_norm": 23.450899124145508, + "learning_rate": 1.4697802197802199e-05, + "loss": 0.621, + "step": 25700 + }, + { + "epoch": 70.60714285714286, + "grad_norm": 19.154380798339844, + "learning_rate": 1.4696428571428572e-05, + "loss": 0.5992, + "step": 25701 + }, + { + "epoch": 70.60989010989012, + "grad_norm": 6.727184772491455, + "learning_rate": 1.4695054945054946e-05, + "loss": 0.1173, + "step": 25702 + }, + { + "epoch": 70.61263736263736, + "grad_norm": 9.435885429382324, + "learning_rate": 1.4693681318681318e-05, + "loss": 0.2667, + "step": 25703 + }, + { + "epoch": 70.61538461538461, + "grad_norm": 13.538543701171875, + "learning_rate": 1.4692307692307694e-05, + "loss": 0.426, + "step": 25704 + }, + { + "epoch": 70.61813186813187, + "grad_norm": 10.060663223266602, + "learning_rate": 1.4690934065934068e-05, + "loss": 0.1902, + "step": 25705 + }, + { + "epoch": 70.62087912087912, + "grad_norm": 12.814940452575684, + "learning_rate": 1.468956043956044e-05, + "loss": 0.2744, + "step": 25706 + }, + { + "epoch": 70.62362637362638, + "grad_norm": 3.109463930130005, + "learning_rate": 1.4688186813186813e-05, + "loss": 0.0406, + "step": 25707 + }, + { + "epoch": 70.62637362637362, + "grad_norm": 9.992806434631348, + "learning_rate": 1.4686813186813186e-05, + "loss": 0.2394, + "step": 25708 + }, + { + "epoch": 70.62912087912088, + "grad_norm": 22.325448989868164, + "learning_rate": 1.4685439560439563e-05, + "loss": 0.3174, + "step": 25709 + }, + { + "epoch": 70.63186813186813, + "grad_norm": 15.965677261352539, + "learning_rate": 1.4684065934065935e-05, + "loss": 0.35, + "step": 25710 + }, + { + "epoch": 70.63461538461539, + "grad_norm": 14.097305297851562, + "learning_rate": 1.4682692307692308e-05, + "loss": 0.2415, + "step": 25711 + }, + { + "epoch": 70.63736263736264, + "grad_norm": 7.095414638519287, + "learning_rate": 1.4681318681318682e-05, + "loss": 0.201, + "step": 25712 + }, + { + "epoch": 70.64010989010988, + "grad_norm": 17.062973022460938, + "learning_rate": 1.4679945054945055e-05, + "loss": 0.3642, + "step": 25713 + }, + { + "epoch": 70.64285714285714, + "grad_norm": 7.926285266876221, + "learning_rate": 1.467857142857143e-05, + "loss": 0.1076, + "step": 25714 + }, + { + "epoch": 70.6456043956044, + "grad_norm": 38.93842697143555, + "learning_rate": 1.4677197802197803e-05, + "loss": 1.7603, + "step": 25715 + }, + { + "epoch": 70.64835164835165, + "grad_norm": 11.507332801818848, + "learning_rate": 1.4675824175824177e-05, + "loss": 0.1353, + "step": 25716 + }, + { + "epoch": 70.6510989010989, + "grad_norm": 15.161924362182617, + "learning_rate": 1.467445054945055e-05, + "loss": 0.4402, + "step": 25717 + }, + { + "epoch": 70.65384615384616, + "grad_norm": 17.137340545654297, + "learning_rate": 1.4673076923076922e-05, + "loss": 0.4515, + "step": 25718 + }, + { + "epoch": 70.6565934065934, + "grad_norm": 13.338667869567871, + "learning_rate": 1.4671703296703299e-05, + "loss": 0.2748, + "step": 25719 + }, + { + "epoch": 70.65934065934066, + "grad_norm": 4.576650142669678, + "learning_rate": 1.4670329670329672e-05, + "loss": 0.0408, + "step": 25720 + }, + { + "epoch": 70.66208791208791, + "grad_norm": 14.33267593383789, + "learning_rate": 1.4668956043956044e-05, + "loss": 0.2468, + "step": 25721 + }, + { + "epoch": 70.66483516483517, + "grad_norm": 17.293046951293945, + "learning_rate": 1.4667582417582417e-05, + "loss": 0.2681, + "step": 25722 + }, + { + "epoch": 70.66758241758242, + "grad_norm": 8.005074501037598, + "learning_rate": 1.466620879120879e-05, + "loss": 0.0673, + "step": 25723 + }, + { + "epoch": 70.67032967032966, + "grad_norm": 4.4538984298706055, + "learning_rate": 1.4664835164835168e-05, + "loss": 0.052, + "step": 25724 + }, + { + "epoch": 70.67307692307692, + "grad_norm": 20.646568298339844, + "learning_rate": 1.466346153846154e-05, + "loss": 0.2291, + "step": 25725 + }, + { + "epoch": 70.67582417582418, + "grad_norm": 14.084258079528809, + "learning_rate": 1.4662087912087913e-05, + "loss": 0.3706, + "step": 25726 + }, + { + "epoch": 70.67857142857143, + "grad_norm": 10.923208236694336, + "learning_rate": 1.4660714285714286e-05, + "loss": 0.1434, + "step": 25727 + }, + { + "epoch": 70.68131868131869, + "grad_norm": 18.141813278198242, + "learning_rate": 1.465934065934066e-05, + "loss": 0.4498, + "step": 25728 + }, + { + "epoch": 70.68406593406593, + "grad_norm": 10.936368942260742, + "learning_rate": 1.4657967032967035e-05, + "loss": 0.2187, + "step": 25729 + }, + { + "epoch": 70.68681318681318, + "grad_norm": 8.111032485961914, + "learning_rate": 1.4656593406593408e-05, + "loss": 0.1133, + "step": 25730 + }, + { + "epoch": 70.68956043956044, + "grad_norm": 5.329535007476807, + "learning_rate": 1.4655219780219781e-05, + "loss": 0.0479, + "step": 25731 + }, + { + "epoch": 70.6923076923077, + "grad_norm": 8.931798934936523, + "learning_rate": 1.4653846153846155e-05, + "loss": 0.1355, + "step": 25732 + }, + { + "epoch": 70.69505494505495, + "grad_norm": 11.240964889526367, + "learning_rate": 1.4652472527472527e-05, + "loss": 0.1512, + "step": 25733 + }, + { + "epoch": 70.6978021978022, + "grad_norm": 24.071195602416992, + "learning_rate": 1.4651098901098903e-05, + "loss": 0.4983, + "step": 25734 + }, + { + "epoch": 70.70054945054945, + "grad_norm": 6.389357566833496, + "learning_rate": 1.4649725274725277e-05, + "loss": 0.1202, + "step": 25735 + }, + { + "epoch": 70.7032967032967, + "grad_norm": 28.7552433013916, + "learning_rate": 1.4648351648351648e-05, + "loss": 0.7257, + "step": 25736 + }, + { + "epoch": 70.70604395604396, + "grad_norm": 8.089799880981445, + "learning_rate": 1.4646978021978022e-05, + "loss": 0.0893, + "step": 25737 + }, + { + "epoch": 70.70879120879121, + "grad_norm": 10.001612663269043, + "learning_rate": 1.4645604395604395e-05, + "loss": 0.2371, + "step": 25738 + }, + { + "epoch": 70.71153846153847, + "grad_norm": 1.9770193099975586, + "learning_rate": 1.4644230769230772e-05, + "loss": 0.0276, + "step": 25739 + }, + { + "epoch": 70.71428571428571, + "grad_norm": 1.9189873933792114, + "learning_rate": 1.4642857142857144e-05, + "loss": 0.0193, + "step": 25740 + }, + { + "epoch": 70.71703296703296, + "grad_norm": 11.875958442687988, + "learning_rate": 1.4641483516483517e-05, + "loss": 0.1737, + "step": 25741 + }, + { + "epoch": 70.71978021978022, + "grad_norm": 15.779895782470703, + "learning_rate": 1.464010989010989e-05, + "loss": 0.4912, + "step": 25742 + }, + { + "epoch": 70.72252747252747, + "grad_norm": 29.280271530151367, + "learning_rate": 1.4638736263736264e-05, + "loss": 0.3406, + "step": 25743 + }, + { + "epoch": 70.72527472527473, + "grad_norm": 17.982328414916992, + "learning_rate": 1.4637362637362639e-05, + "loss": 0.4245, + "step": 25744 + }, + { + "epoch": 70.72802197802197, + "grad_norm": 16.13945770263672, + "learning_rate": 1.4635989010989012e-05, + "loss": 0.4498, + "step": 25745 + }, + { + "epoch": 70.73076923076923, + "grad_norm": 10.104283332824707, + "learning_rate": 1.4634615384615386e-05, + "loss": 0.2681, + "step": 25746 + }, + { + "epoch": 70.73351648351648, + "grad_norm": 14.50088119506836, + "learning_rate": 1.463324175824176e-05, + "loss": 0.1814, + "step": 25747 + }, + { + "epoch": 70.73626373626374, + "grad_norm": 2.76242995262146, + "learning_rate": 1.4631868131868131e-05, + "loss": 0.0406, + "step": 25748 + }, + { + "epoch": 70.73901098901099, + "grad_norm": 11.04171085357666, + "learning_rate": 1.4630494505494508e-05, + "loss": 0.1829, + "step": 25749 + }, + { + "epoch": 70.74175824175825, + "grad_norm": 1.9074113368988037, + "learning_rate": 1.4629120879120881e-05, + "loss": 0.0199, + "step": 25750 + }, + { + "epoch": 70.74450549450549, + "grad_norm": 4.559991836547852, + "learning_rate": 1.4627747252747253e-05, + "loss": 0.0668, + "step": 25751 + }, + { + "epoch": 70.74725274725274, + "grad_norm": 12.864109992980957, + "learning_rate": 1.4626373626373626e-05, + "loss": 0.2837, + "step": 25752 + }, + { + "epoch": 70.75, + "grad_norm": 3.8002288341522217, + "learning_rate": 1.4625e-05, + "loss": 0.0418, + "step": 25753 + }, + { + "epoch": 70.75274725274726, + "grad_norm": 11.842147827148438, + "learning_rate": 1.4623626373626375e-05, + "loss": 0.4075, + "step": 25754 + }, + { + "epoch": 70.75549450549451, + "grad_norm": 19.777116775512695, + "learning_rate": 1.4622252747252748e-05, + "loss": 0.4442, + "step": 25755 + }, + { + "epoch": 70.75824175824175, + "grad_norm": 17.04688835144043, + "learning_rate": 1.4620879120879122e-05, + "loss": 0.2997, + "step": 25756 + }, + { + "epoch": 70.76098901098901, + "grad_norm": 15.020913124084473, + "learning_rate": 1.4619505494505495e-05, + "loss": 0.2987, + "step": 25757 + }, + { + "epoch": 70.76373626373626, + "grad_norm": 15.145751953125, + "learning_rate": 1.4618131868131868e-05, + "loss": 0.3412, + "step": 25758 + }, + { + "epoch": 70.76648351648352, + "grad_norm": 14.307884216308594, + "learning_rate": 1.4616758241758244e-05, + "loss": 0.2324, + "step": 25759 + }, + { + "epoch": 70.76923076923077, + "grad_norm": 20.94060707092285, + "learning_rate": 1.4615384615384617e-05, + "loss": 0.5209, + "step": 25760 + }, + { + "epoch": 70.77197802197803, + "grad_norm": 12.998785972595215, + "learning_rate": 1.461401098901099e-05, + "loss": 0.1294, + "step": 25761 + }, + { + "epoch": 70.77472527472527, + "grad_norm": 11.797782897949219, + "learning_rate": 1.4612637362637364e-05, + "loss": 0.1728, + "step": 25762 + }, + { + "epoch": 70.77747252747253, + "grad_norm": 16.818052291870117, + "learning_rate": 1.4611263736263735e-05, + "loss": 0.4503, + "step": 25763 + }, + { + "epoch": 70.78021978021978, + "grad_norm": 6.064773082733154, + "learning_rate": 1.4609890109890112e-05, + "loss": 0.1408, + "step": 25764 + }, + { + "epoch": 70.78296703296704, + "grad_norm": 12.982327461242676, + "learning_rate": 1.4608516483516486e-05, + "loss": 0.197, + "step": 25765 + }, + { + "epoch": 70.78571428571429, + "grad_norm": 11.24012279510498, + "learning_rate": 1.4607142857142857e-05, + "loss": 0.1336, + "step": 25766 + }, + { + "epoch": 70.78846153846153, + "grad_norm": 28.842126846313477, + "learning_rate": 1.460576923076923e-05, + "loss": 0.5135, + "step": 25767 + }, + { + "epoch": 70.79120879120879, + "grad_norm": 28.39802360534668, + "learning_rate": 1.4604395604395604e-05, + "loss": 0.6656, + "step": 25768 + }, + { + "epoch": 70.79395604395604, + "grad_norm": 14.167619705200195, + "learning_rate": 1.4603021978021978e-05, + "loss": 0.182, + "step": 25769 + }, + { + "epoch": 70.7967032967033, + "grad_norm": 18.23468589782715, + "learning_rate": 1.4601648351648353e-05, + "loss": 0.4046, + "step": 25770 + }, + { + "epoch": 70.79945054945055, + "grad_norm": 22.262901306152344, + "learning_rate": 1.4600274725274726e-05, + "loss": 0.5316, + "step": 25771 + }, + { + "epoch": 70.8021978021978, + "grad_norm": 27.50884246826172, + "learning_rate": 1.45989010989011e-05, + "loss": 1.2095, + "step": 25772 + }, + { + "epoch": 70.80494505494505, + "grad_norm": 9.846710205078125, + "learning_rate": 1.4597527472527473e-05, + "loss": 0.2115, + "step": 25773 + }, + { + "epoch": 70.8076923076923, + "grad_norm": 11.251726150512695, + "learning_rate": 1.4596153846153845e-05, + "loss": 0.2445, + "step": 25774 + }, + { + "epoch": 70.81043956043956, + "grad_norm": 24.25432586669922, + "learning_rate": 1.4594780219780221e-05, + "loss": 0.7401, + "step": 25775 + }, + { + "epoch": 70.81318681318682, + "grad_norm": 2.981001615524292, + "learning_rate": 1.4593406593406595e-05, + "loss": 0.0456, + "step": 25776 + }, + { + "epoch": 70.81593406593407, + "grad_norm": 4.792572021484375, + "learning_rate": 1.4592032967032968e-05, + "loss": 0.091, + "step": 25777 + }, + { + "epoch": 70.81868131868131, + "grad_norm": 22.268402099609375, + "learning_rate": 1.459065934065934e-05, + "loss": 0.4461, + "step": 25778 + }, + { + "epoch": 70.82142857142857, + "grad_norm": 5.573419094085693, + "learning_rate": 1.4589285714285713e-05, + "loss": 0.1177, + "step": 25779 + }, + { + "epoch": 70.82417582417582, + "grad_norm": 7.3001909255981445, + "learning_rate": 1.458791208791209e-05, + "loss": 0.1201, + "step": 25780 + }, + { + "epoch": 70.82692307692308, + "grad_norm": 11.944741249084473, + "learning_rate": 1.4586538461538462e-05, + "loss": 0.2609, + "step": 25781 + }, + { + "epoch": 70.82967032967034, + "grad_norm": 15.550314903259277, + "learning_rate": 1.4585164835164835e-05, + "loss": 0.5518, + "step": 25782 + }, + { + "epoch": 70.83241758241758, + "grad_norm": 11.952423095703125, + "learning_rate": 1.4583791208791209e-05, + "loss": 0.1484, + "step": 25783 + }, + { + "epoch": 70.83516483516483, + "grad_norm": 14.880545616149902, + "learning_rate": 1.4582417582417582e-05, + "loss": 0.2237, + "step": 25784 + }, + { + "epoch": 70.83791208791209, + "grad_norm": 14.355905532836914, + "learning_rate": 1.4581043956043957e-05, + "loss": 0.1294, + "step": 25785 + }, + { + "epoch": 70.84065934065934, + "grad_norm": 16.775672912597656, + "learning_rate": 1.457967032967033e-05, + "loss": 0.406, + "step": 25786 + }, + { + "epoch": 70.8434065934066, + "grad_norm": 20.55561637878418, + "learning_rate": 1.4578296703296704e-05, + "loss": 0.3233, + "step": 25787 + }, + { + "epoch": 70.84615384615384, + "grad_norm": 6.781336784362793, + "learning_rate": 1.4576923076923077e-05, + "loss": 0.093, + "step": 25788 + }, + { + "epoch": 70.8489010989011, + "grad_norm": 20.573713302612305, + "learning_rate": 1.457554945054945e-05, + "loss": 0.3226, + "step": 25789 + }, + { + "epoch": 70.85164835164835, + "grad_norm": 2.1984670162200928, + "learning_rate": 1.4574175824175826e-05, + "loss": 0.0197, + "step": 25790 + }, + { + "epoch": 70.8543956043956, + "grad_norm": 17.89335823059082, + "learning_rate": 1.45728021978022e-05, + "loss": 0.3696, + "step": 25791 + }, + { + "epoch": 70.85714285714286, + "grad_norm": 10.180304527282715, + "learning_rate": 1.4571428571428573e-05, + "loss": 0.1121, + "step": 25792 + }, + { + "epoch": 70.85989010989012, + "grad_norm": 16.261871337890625, + "learning_rate": 1.4570054945054944e-05, + "loss": 0.3286, + "step": 25793 + }, + { + "epoch": 70.86263736263736, + "grad_norm": 4.602844715118408, + "learning_rate": 1.4568681318681318e-05, + "loss": 0.084, + "step": 25794 + }, + { + "epoch": 70.86538461538461, + "grad_norm": 11.92237663269043, + "learning_rate": 1.4567307692307695e-05, + "loss": 0.2686, + "step": 25795 + }, + { + "epoch": 70.86813186813187, + "grad_norm": 17.475343704223633, + "learning_rate": 1.4565934065934066e-05, + "loss": 0.2909, + "step": 25796 + }, + { + "epoch": 70.87087912087912, + "grad_norm": 13.34220027923584, + "learning_rate": 1.456456043956044e-05, + "loss": 0.216, + "step": 25797 + }, + { + "epoch": 70.87362637362638, + "grad_norm": 3.567946434020996, + "learning_rate": 1.4563186813186813e-05, + "loss": 0.0498, + "step": 25798 + }, + { + "epoch": 70.87637362637362, + "grad_norm": 12.99227237701416, + "learning_rate": 1.4561813186813187e-05, + "loss": 0.3868, + "step": 25799 + }, + { + "epoch": 70.87912087912088, + "grad_norm": 16.889488220214844, + "learning_rate": 1.4560439560439562e-05, + "loss": 0.2101, + "step": 25800 + }, + { + "epoch": 70.88186813186813, + "grad_norm": 2.281898021697998, + "learning_rate": 1.4559065934065935e-05, + "loss": 0.0241, + "step": 25801 + }, + { + "epoch": 70.88461538461539, + "grad_norm": 9.955423355102539, + "learning_rate": 1.4557692307692309e-05, + "loss": 0.1847, + "step": 25802 + }, + { + "epoch": 70.88736263736264, + "grad_norm": 15.259366989135742, + "learning_rate": 1.4556318681318682e-05, + "loss": 0.321, + "step": 25803 + }, + { + "epoch": 70.89010989010988, + "grad_norm": 17.84905242919922, + "learning_rate": 1.4554945054945054e-05, + "loss": 0.3268, + "step": 25804 + }, + { + "epoch": 70.89285714285714, + "grad_norm": 15.648679733276367, + "learning_rate": 1.455357142857143e-05, + "loss": 0.3679, + "step": 25805 + }, + { + "epoch": 70.8956043956044, + "grad_norm": 10.98408317565918, + "learning_rate": 1.4552197802197804e-05, + "loss": 0.1961, + "step": 25806 + }, + { + "epoch": 70.89835164835165, + "grad_norm": 11.908578872680664, + "learning_rate": 1.4550824175824177e-05, + "loss": 0.4261, + "step": 25807 + }, + { + "epoch": 70.9010989010989, + "grad_norm": 23.848175048828125, + "learning_rate": 1.4549450549450549e-05, + "loss": 0.4915, + "step": 25808 + }, + { + "epoch": 70.90384615384616, + "grad_norm": 2.893003463745117, + "learning_rate": 1.4548076923076922e-05, + "loss": 0.0264, + "step": 25809 + }, + { + "epoch": 70.9065934065934, + "grad_norm": 12.037710189819336, + "learning_rate": 1.45467032967033e-05, + "loss": 0.1566, + "step": 25810 + }, + { + "epoch": 70.90934065934066, + "grad_norm": 3.447486639022827, + "learning_rate": 1.4545329670329671e-05, + "loss": 0.0321, + "step": 25811 + }, + { + "epoch": 70.91208791208791, + "grad_norm": 20.22467613220215, + "learning_rate": 1.4543956043956044e-05, + "loss": 0.3563, + "step": 25812 + }, + { + "epoch": 70.91483516483517, + "grad_norm": 15.694934844970703, + "learning_rate": 1.4542582417582418e-05, + "loss": 0.2845, + "step": 25813 + }, + { + "epoch": 70.91758241758242, + "grad_norm": 19.273681640625, + "learning_rate": 1.4541208791208791e-05, + "loss": 0.5232, + "step": 25814 + }, + { + "epoch": 70.92032967032966, + "grad_norm": 25.409589767456055, + "learning_rate": 1.4539835164835166e-05, + "loss": 0.4159, + "step": 25815 + }, + { + "epoch": 70.92307692307692, + "grad_norm": 8.625011444091797, + "learning_rate": 1.453846153846154e-05, + "loss": 0.1944, + "step": 25816 + }, + { + "epoch": 70.92582417582418, + "grad_norm": 2.6708974838256836, + "learning_rate": 1.4537087912087913e-05, + "loss": 0.0326, + "step": 25817 + }, + { + "epoch": 70.92857142857143, + "grad_norm": 7.052079677581787, + "learning_rate": 1.4535714285714286e-05, + "loss": 0.0966, + "step": 25818 + }, + { + "epoch": 70.93131868131869, + "grad_norm": 15.14441204071045, + "learning_rate": 1.4534340659340658e-05, + "loss": 0.2899, + "step": 25819 + }, + { + "epoch": 70.93406593406593, + "grad_norm": 15.797104835510254, + "learning_rate": 1.4532967032967035e-05, + "loss": 0.4207, + "step": 25820 + }, + { + "epoch": 70.93681318681318, + "grad_norm": 21.394575119018555, + "learning_rate": 1.4531593406593408e-05, + "loss": 0.3981, + "step": 25821 + }, + { + "epoch": 70.93956043956044, + "grad_norm": 10.296540260314941, + "learning_rate": 1.4530219780219782e-05, + "loss": 0.1551, + "step": 25822 + }, + { + "epoch": 70.9423076923077, + "grad_norm": 10.109373092651367, + "learning_rate": 1.4528846153846153e-05, + "loss": 0.1707, + "step": 25823 + }, + { + "epoch": 70.94505494505495, + "grad_norm": 16.84747314453125, + "learning_rate": 1.4527472527472527e-05, + "loss": 0.1766, + "step": 25824 + }, + { + "epoch": 70.9478021978022, + "grad_norm": 12.153663635253906, + "learning_rate": 1.4526098901098904e-05, + "loss": 0.4127, + "step": 25825 + }, + { + "epoch": 70.95054945054945, + "grad_norm": 9.799942016601562, + "learning_rate": 1.4524725274725275e-05, + "loss": 0.1362, + "step": 25826 + }, + { + "epoch": 70.9532967032967, + "grad_norm": 9.866085052490234, + "learning_rate": 1.4523351648351649e-05, + "loss": 0.1363, + "step": 25827 + }, + { + "epoch": 70.95604395604396, + "grad_norm": 28.37135124206543, + "learning_rate": 1.4521978021978022e-05, + "loss": 0.9494, + "step": 25828 + }, + { + "epoch": 70.95879120879121, + "grad_norm": 11.266860961914062, + "learning_rate": 1.4520604395604396e-05, + "loss": 0.226, + "step": 25829 + }, + { + "epoch": 70.96153846153847, + "grad_norm": 13.363266944885254, + "learning_rate": 1.451923076923077e-05, + "loss": 0.3704, + "step": 25830 + }, + { + "epoch": 70.96428571428571, + "grad_norm": 4.727382183074951, + "learning_rate": 1.4517857142857144e-05, + "loss": 0.073, + "step": 25831 + }, + { + "epoch": 70.96703296703296, + "grad_norm": 12.069958686828613, + "learning_rate": 1.4516483516483518e-05, + "loss": 0.201, + "step": 25832 + }, + { + "epoch": 70.96978021978022, + "grad_norm": 11.098583221435547, + "learning_rate": 1.4515109890109891e-05, + "loss": 0.3571, + "step": 25833 + }, + { + "epoch": 70.97252747252747, + "grad_norm": 18.59162139892578, + "learning_rate": 1.4513736263736263e-05, + "loss": 0.3848, + "step": 25834 + }, + { + "epoch": 70.97527472527473, + "grad_norm": 15.828341484069824, + "learning_rate": 1.451236263736264e-05, + "loss": 0.5359, + "step": 25835 + }, + { + "epoch": 70.97802197802197, + "grad_norm": 15.82421588897705, + "learning_rate": 1.4510989010989013e-05, + "loss": 0.2357, + "step": 25836 + }, + { + "epoch": 70.98076923076923, + "grad_norm": 6.137158393859863, + "learning_rate": 1.4509615384615385e-05, + "loss": 0.1389, + "step": 25837 + }, + { + "epoch": 70.98351648351648, + "grad_norm": 15.906641006469727, + "learning_rate": 1.4508241758241758e-05, + "loss": 0.3413, + "step": 25838 + }, + { + "epoch": 70.98626373626374, + "grad_norm": 7.685274600982666, + "learning_rate": 1.4506868131868131e-05, + "loss": 0.1053, + "step": 25839 + }, + { + "epoch": 70.98901098901099, + "grad_norm": 8.629964828491211, + "learning_rate": 1.4505494505494508e-05, + "loss": 0.2162, + "step": 25840 + }, + { + "epoch": 70.99175824175825, + "grad_norm": 10.359896659851074, + "learning_rate": 1.450412087912088e-05, + "loss": 0.2918, + "step": 25841 + }, + { + "epoch": 70.99450549450549, + "grad_norm": 10.031794548034668, + "learning_rate": 1.4502747252747253e-05, + "loss": 0.1899, + "step": 25842 + }, + { + "epoch": 70.99725274725274, + "grad_norm": 4.547440528869629, + "learning_rate": 1.4501373626373627e-05, + "loss": 0.1048, + "step": 25843 + }, + { + "epoch": 71.0, + "grad_norm": 6.093994617462158, + "learning_rate": 1.45e-05, + "loss": 0.0528, + "step": 25844 + }, + { + "epoch": 71.0, + "eval_accuracy": 0.7644628099173554, + "eval_f1": 0.771528348680788, + "eval_f1_DuraRiadoRio_64x64": 0.7962382445141066, + "eval_f1_Mole_64x64": 0.8805970149253731, + "eval_f1_Quebrado_64x64": 0.7536231884057971, + "eval_f1_RiadoRio_64x64": 0.6460674157303371, + "eval_f1_RioFechado_64x64": 0.7811158798283262, + "eval_loss": 0.9494712948799133, + "eval_precision": 0.8057862934029917, + "eval_precision_DuraRiadoRio_64x64": 0.7257142857142858, + "eval_precision_Mole_64x64": 0.9516129032258065, + "eval_precision_Quebrado_64x64": 0.7878787878787878, + "eval_precision_RiadoRio_64x64": 0.5637254901960784, + "eval_precision_RioFechado_64x64": 1.0, + "eval_recall": 0.7642070257804134, + "eval_recall_DuraRiadoRio_64x64": 0.8819444444444444, + "eval_recall_Mole_64x64": 0.8194444444444444, + "eval_recall_Quebrado_64x64": 0.7222222222222222, + "eval_recall_RiadoRio_64x64": 0.756578947368421, + "eval_recall_RioFechado_64x64": 0.6408450704225352, + "eval_runtime": 1.7833, + "eval_samples_per_second": 407.103, + "eval_steps_per_second": 25.794, + "step": 25844 + }, + { + "epoch": 71.00274725274726, + "grad_norm": 2.6071841716766357, + "learning_rate": 1.4498626373626375e-05, + "loss": 0.0274, + "step": 25845 + }, + { + "epoch": 71.00549450549451, + "grad_norm": 3.9264097213745117, + "learning_rate": 1.4497252747252749e-05, + "loss": 0.0532, + "step": 25846 + }, + { + "epoch": 71.00824175824175, + "grad_norm": 10.083562850952148, + "learning_rate": 1.4495879120879122e-05, + "loss": 0.2069, + "step": 25847 + }, + { + "epoch": 71.01098901098901, + "grad_norm": 21.14406394958496, + "learning_rate": 1.4494505494505495e-05, + "loss": 0.4681, + "step": 25848 + }, + { + "epoch": 71.01373626373626, + "grad_norm": 13.245635986328125, + "learning_rate": 1.4493131868131867e-05, + "loss": 0.2782, + "step": 25849 + }, + { + "epoch": 71.01648351648352, + "grad_norm": 13.756875038146973, + "learning_rate": 1.4491758241758244e-05, + "loss": 0.2158, + "step": 25850 + }, + { + "epoch": 71.01923076923077, + "grad_norm": 15.199812889099121, + "learning_rate": 1.4490384615384617e-05, + "loss": 0.289, + "step": 25851 + }, + { + "epoch": 71.02197802197803, + "grad_norm": 8.492096900939941, + "learning_rate": 1.4489010989010989e-05, + "loss": 0.1022, + "step": 25852 + }, + { + "epoch": 71.02472527472527, + "grad_norm": 11.36677360534668, + "learning_rate": 1.4487637362637362e-05, + "loss": 0.1979, + "step": 25853 + }, + { + "epoch": 71.02747252747253, + "grad_norm": 16.23103141784668, + "learning_rate": 1.4486263736263736e-05, + "loss": 0.3637, + "step": 25854 + }, + { + "epoch": 71.03021978021978, + "grad_norm": 3.4862935543060303, + "learning_rate": 1.4484890109890113e-05, + "loss": 0.0867, + "step": 25855 + }, + { + "epoch": 71.03296703296704, + "grad_norm": 2.0268635749816895, + "learning_rate": 1.4483516483516484e-05, + "loss": 0.0123, + "step": 25856 + }, + { + "epoch": 71.03571428571429, + "grad_norm": 9.124114990234375, + "learning_rate": 1.4482142857142858e-05, + "loss": 0.1099, + "step": 25857 + }, + { + "epoch": 71.03846153846153, + "grad_norm": 18.43842124938965, + "learning_rate": 1.4480769230769231e-05, + "loss": 0.2685, + "step": 25858 + }, + { + "epoch": 71.04120879120879, + "grad_norm": 2.9656193256378174, + "learning_rate": 1.4479395604395605e-05, + "loss": 0.0477, + "step": 25859 + }, + { + "epoch": 71.04395604395604, + "grad_norm": 19.892436981201172, + "learning_rate": 1.447802197802198e-05, + "loss": 0.3991, + "step": 25860 + }, + { + "epoch": 71.0467032967033, + "grad_norm": 7.444554328918457, + "learning_rate": 1.4476648351648353e-05, + "loss": 0.0978, + "step": 25861 + }, + { + "epoch": 71.04945054945055, + "grad_norm": 15.26043701171875, + "learning_rate": 1.4475274725274726e-05, + "loss": 0.1465, + "step": 25862 + }, + { + "epoch": 71.0521978021978, + "grad_norm": 1.215435266494751, + "learning_rate": 1.44739010989011e-05, + "loss": 0.0184, + "step": 25863 + }, + { + "epoch": 71.05494505494505, + "grad_norm": 8.386655807495117, + "learning_rate": 1.4472527472527472e-05, + "loss": 0.1405, + "step": 25864 + }, + { + "epoch": 71.0576923076923, + "grad_norm": 6.532092571258545, + "learning_rate": 1.4471153846153848e-05, + "loss": 0.1479, + "step": 25865 + }, + { + "epoch": 71.06043956043956, + "grad_norm": 3.105964422225952, + "learning_rate": 1.4469780219780222e-05, + "loss": 0.0409, + "step": 25866 + }, + { + "epoch": 71.06318681318682, + "grad_norm": 14.253669738769531, + "learning_rate": 1.4468406593406594e-05, + "loss": 0.3368, + "step": 25867 + }, + { + "epoch": 71.06593406593407, + "grad_norm": 12.730896949768066, + "learning_rate": 1.4467032967032967e-05, + "loss": 0.2592, + "step": 25868 + }, + { + "epoch": 71.06868131868131, + "grad_norm": 9.539803504943848, + "learning_rate": 1.446565934065934e-05, + "loss": 0.1969, + "step": 25869 + }, + { + "epoch": 71.07142857142857, + "grad_norm": 10.118677139282227, + "learning_rate": 1.4464285714285717e-05, + "loss": 0.1286, + "step": 25870 + }, + { + "epoch": 71.07417582417582, + "grad_norm": 12.124258041381836, + "learning_rate": 1.4462912087912089e-05, + "loss": 0.2857, + "step": 25871 + }, + { + "epoch": 71.07692307692308, + "grad_norm": 13.059391975402832, + "learning_rate": 1.4461538461538462e-05, + "loss": 0.2873, + "step": 25872 + }, + { + "epoch": 71.07967032967034, + "grad_norm": 18.251731872558594, + "learning_rate": 1.4460164835164836e-05, + "loss": 0.2343, + "step": 25873 + }, + { + "epoch": 71.08241758241758, + "grad_norm": 6.645303726196289, + "learning_rate": 1.4458791208791209e-05, + "loss": 0.0683, + "step": 25874 + }, + { + "epoch": 71.08516483516483, + "grad_norm": 16.28232192993164, + "learning_rate": 1.4457417582417584e-05, + "loss": 0.386, + "step": 25875 + }, + { + "epoch": 71.08791208791209, + "grad_norm": 8.570528030395508, + "learning_rate": 1.4456043956043958e-05, + "loss": 0.0791, + "step": 25876 + }, + { + "epoch": 71.09065934065934, + "grad_norm": 9.411073684692383, + "learning_rate": 1.4454670329670331e-05, + "loss": 0.1825, + "step": 25877 + }, + { + "epoch": 71.0934065934066, + "grad_norm": 11.007532119750977, + "learning_rate": 1.4453296703296704e-05, + "loss": 0.2628, + "step": 25878 + }, + { + "epoch": 71.09615384615384, + "grad_norm": 2.1650032997131348, + "learning_rate": 1.4451923076923076e-05, + "loss": 0.0231, + "step": 25879 + }, + { + "epoch": 71.0989010989011, + "grad_norm": 4.611636161804199, + "learning_rate": 1.445054945054945e-05, + "loss": 0.0872, + "step": 25880 + }, + { + "epoch": 71.10164835164835, + "grad_norm": 3.391423225402832, + "learning_rate": 1.4449175824175826e-05, + "loss": 0.0711, + "step": 25881 + }, + { + "epoch": 71.1043956043956, + "grad_norm": 4.969552993774414, + "learning_rate": 1.4447802197802198e-05, + "loss": 0.0675, + "step": 25882 + }, + { + "epoch": 71.10714285714286, + "grad_norm": 10.78006649017334, + "learning_rate": 1.4446428571428571e-05, + "loss": 0.1418, + "step": 25883 + }, + { + "epoch": 71.10989010989012, + "grad_norm": 10.176061630249023, + "learning_rate": 1.4445054945054945e-05, + "loss": 0.1322, + "step": 25884 + }, + { + "epoch": 71.11263736263736, + "grad_norm": 2.9778850078582764, + "learning_rate": 1.4443681318681318e-05, + "loss": 0.043, + "step": 25885 + }, + { + "epoch": 71.11538461538461, + "grad_norm": 16.248151779174805, + "learning_rate": 1.4442307692307693e-05, + "loss": 0.24, + "step": 25886 + }, + { + "epoch": 71.11813186813187, + "grad_norm": 14.198140144348145, + "learning_rate": 1.4440934065934067e-05, + "loss": 0.3932, + "step": 25887 + }, + { + "epoch": 71.12087912087912, + "grad_norm": 10.890824317932129, + "learning_rate": 1.443956043956044e-05, + "loss": 0.1401, + "step": 25888 + }, + { + "epoch": 71.12362637362638, + "grad_norm": 23.00234603881836, + "learning_rate": 1.4438186813186814e-05, + "loss": 0.438, + "step": 25889 + }, + { + "epoch": 71.12637362637362, + "grad_norm": 8.044153213500977, + "learning_rate": 1.4436813186813187e-05, + "loss": 0.1861, + "step": 25890 + }, + { + "epoch": 71.12912087912088, + "grad_norm": 12.80051040649414, + "learning_rate": 1.4435439560439562e-05, + "loss": 0.1979, + "step": 25891 + }, + { + "epoch": 71.13186813186813, + "grad_norm": 13.881246566772461, + "learning_rate": 1.4434065934065935e-05, + "loss": 0.1797, + "step": 25892 + }, + { + "epoch": 71.13461538461539, + "grad_norm": 15.804576873779297, + "learning_rate": 1.4432692307692309e-05, + "loss": 0.5263, + "step": 25893 + }, + { + "epoch": 71.13736263736264, + "grad_norm": 16.40585708618164, + "learning_rate": 1.443131868131868e-05, + "loss": 0.4255, + "step": 25894 + }, + { + "epoch": 71.14010989010988, + "grad_norm": 13.077324867248535, + "learning_rate": 1.4429945054945054e-05, + "loss": 0.2, + "step": 25895 + }, + { + "epoch": 71.14285714285714, + "grad_norm": 10.670438766479492, + "learning_rate": 1.442857142857143e-05, + "loss": 0.1881, + "step": 25896 + }, + { + "epoch": 71.1456043956044, + "grad_norm": 18.585601806640625, + "learning_rate": 1.4427197802197803e-05, + "loss": 0.496, + "step": 25897 + }, + { + "epoch": 71.14835164835165, + "grad_norm": 10.892748832702637, + "learning_rate": 1.4425824175824176e-05, + "loss": 0.2161, + "step": 25898 + }, + { + "epoch": 71.1510989010989, + "grad_norm": 8.549703598022461, + "learning_rate": 1.442445054945055e-05, + "loss": 0.1257, + "step": 25899 + }, + { + "epoch": 71.15384615384616, + "grad_norm": 12.899301528930664, + "learning_rate": 1.4423076923076923e-05, + "loss": 0.1899, + "step": 25900 + }, + { + "epoch": 71.1565934065934, + "grad_norm": 9.526984214782715, + "learning_rate": 1.4421703296703298e-05, + "loss": 0.0876, + "step": 25901 + }, + { + "epoch": 71.15934065934066, + "grad_norm": 14.155988693237305, + "learning_rate": 1.4420329670329671e-05, + "loss": 0.36, + "step": 25902 + }, + { + "epoch": 71.16208791208791, + "grad_norm": 4.2222490310668945, + "learning_rate": 1.4418956043956045e-05, + "loss": 0.0848, + "step": 25903 + }, + { + "epoch": 71.16483516483517, + "grad_norm": 6.382292747497559, + "learning_rate": 1.4417582417582418e-05, + "loss": 0.0868, + "step": 25904 + }, + { + "epoch": 71.16758241758242, + "grad_norm": 14.476229667663574, + "learning_rate": 1.4416208791208791e-05, + "loss": 0.36, + "step": 25905 + }, + { + "epoch": 71.17032967032966, + "grad_norm": 13.910848617553711, + "learning_rate": 1.4414835164835167e-05, + "loss": 0.1657, + "step": 25906 + }, + { + "epoch": 71.17307692307692, + "grad_norm": 13.81302261352539, + "learning_rate": 1.441346153846154e-05, + "loss": 0.3245, + "step": 25907 + }, + { + "epoch": 71.17582417582418, + "grad_norm": 12.144865036010742, + "learning_rate": 1.4412087912087913e-05, + "loss": 0.5204, + "step": 25908 + }, + { + "epoch": 71.17857142857143, + "grad_norm": 11.849088668823242, + "learning_rate": 1.4410714285714285e-05, + "loss": 0.2126, + "step": 25909 + }, + { + "epoch": 71.18131868131869, + "grad_norm": 20.753814697265625, + "learning_rate": 1.4409340659340658e-05, + "loss": 0.5578, + "step": 25910 + }, + { + "epoch": 71.18406593406593, + "grad_norm": 16.339557647705078, + "learning_rate": 1.4407967032967035e-05, + "loss": 0.2739, + "step": 25911 + }, + { + "epoch": 71.18681318681318, + "grad_norm": 7.496325969696045, + "learning_rate": 1.4406593406593407e-05, + "loss": 0.0968, + "step": 25912 + }, + { + "epoch": 71.18956043956044, + "grad_norm": 17.75505256652832, + "learning_rate": 1.440521978021978e-05, + "loss": 0.4196, + "step": 25913 + }, + { + "epoch": 71.1923076923077, + "grad_norm": 14.695496559143066, + "learning_rate": 1.4403846153846154e-05, + "loss": 0.1918, + "step": 25914 + }, + { + "epoch": 71.19505494505495, + "grad_norm": 5.182712554931641, + "learning_rate": 1.4402472527472527e-05, + "loss": 0.0593, + "step": 25915 + }, + { + "epoch": 71.1978021978022, + "grad_norm": 7.883602619171143, + "learning_rate": 1.4401098901098902e-05, + "loss": 0.2168, + "step": 25916 + }, + { + "epoch": 71.20054945054945, + "grad_norm": 21.19040870666504, + "learning_rate": 1.4399725274725276e-05, + "loss": 0.4717, + "step": 25917 + }, + { + "epoch": 71.2032967032967, + "grad_norm": 2.0033206939697266, + "learning_rate": 1.4398351648351649e-05, + "loss": 0.0168, + "step": 25918 + }, + { + "epoch": 71.20604395604396, + "grad_norm": 22.66674041748047, + "learning_rate": 1.4396978021978023e-05, + "loss": 0.4719, + "step": 25919 + }, + { + "epoch": 71.20879120879121, + "grad_norm": 8.977972030639648, + "learning_rate": 1.4395604395604396e-05, + "loss": 0.1669, + "step": 25920 + }, + { + "epoch": 71.21153846153847, + "grad_norm": 36.40445327758789, + "learning_rate": 1.4394230769230771e-05, + "loss": 0.3558, + "step": 25921 + }, + { + "epoch": 71.21428571428571, + "grad_norm": 13.434433937072754, + "learning_rate": 1.4392857142857144e-05, + "loss": 0.1563, + "step": 25922 + }, + { + "epoch": 71.21703296703296, + "grad_norm": 12.633611679077148, + "learning_rate": 1.4391483516483518e-05, + "loss": 0.1063, + "step": 25923 + }, + { + "epoch": 71.21978021978022, + "grad_norm": 2.4802067279815674, + "learning_rate": 1.439010989010989e-05, + "loss": 0.0338, + "step": 25924 + }, + { + "epoch": 71.22252747252747, + "grad_norm": 2.8664638996124268, + "learning_rate": 1.4388736263736263e-05, + "loss": 0.0315, + "step": 25925 + }, + { + "epoch": 71.22527472527473, + "grad_norm": 10.897475242614746, + "learning_rate": 1.438736263736264e-05, + "loss": 0.2122, + "step": 25926 + }, + { + "epoch": 71.22802197802197, + "grad_norm": 2.9078407287597656, + "learning_rate": 1.4385989010989011e-05, + "loss": 0.0387, + "step": 25927 + }, + { + "epoch": 71.23076923076923, + "grad_norm": 4.642323970794678, + "learning_rate": 1.4384615384615385e-05, + "loss": 0.0497, + "step": 25928 + }, + { + "epoch": 71.23351648351648, + "grad_norm": 17.239261627197266, + "learning_rate": 1.4383241758241758e-05, + "loss": 0.4063, + "step": 25929 + }, + { + "epoch": 71.23626373626374, + "grad_norm": 12.376534461975098, + "learning_rate": 1.4381868131868132e-05, + "loss": 0.2234, + "step": 25930 + }, + { + "epoch": 71.23901098901099, + "grad_norm": 10.718749046325684, + "learning_rate": 1.4380494505494507e-05, + "loss": 0.1499, + "step": 25931 + }, + { + "epoch": 71.24175824175825, + "grad_norm": 15.472851753234863, + "learning_rate": 1.437912087912088e-05, + "loss": 0.3017, + "step": 25932 + }, + { + "epoch": 71.24450549450549, + "grad_norm": 5.8107380867004395, + "learning_rate": 1.4377747252747254e-05, + "loss": 0.0788, + "step": 25933 + }, + { + "epoch": 71.24725274725274, + "grad_norm": 7.790698528289795, + "learning_rate": 1.4376373626373627e-05, + "loss": 0.1141, + "step": 25934 + }, + { + "epoch": 71.25, + "grad_norm": 14.786703109741211, + "learning_rate": 1.4374999999999999e-05, + "loss": 0.1984, + "step": 25935 + }, + { + "epoch": 71.25274725274726, + "grad_norm": 14.587124824523926, + "learning_rate": 1.4373626373626376e-05, + "loss": 0.3022, + "step": 25936 + }, + { + "epoch": 71.25549450549451, + "grad_norm": 17.26869010925293, + "learning_rate": 1.4372252747252749e-05, + "loss": 0.4506, + "step": 25937 + }, + { + "epoch": 71.25824175824175, + "grad_norm": 26.612152099609375, + "learning_rate": 1.4370879120879122e-05, + "loss": 0.5518, + "step": 25938 + }, + { + "epoch": 71.26098901098901, + "grad_norm": 4.311793327331543, + "learning_rate": 1.4369505494505494e-05, + "loss": 0.0671, + "step": 25939 + }, + { + "epoch": 71.26373626373626, + "grad_norm": 15.747961044311523, + "learning_rate": 1.4368131868131867e-05, + "loss": 0.1836, + "step": 25940 + }, + { + "epoch": 71.26648351648352, + "grad_norm": 6.672854900360107, + "learning_rate": 1.4366758241758244e-05, + "loss": 0.0946, + "step": 25941 + }, + { + "epoch": 71.26923076923077, + "grad_norm": 22.738557815551758, + "learning_rate": 1.4365384615384616e-05, + "loss": 0.2993, + "step": 25942 + }, + { + "epoch": 71.27197802197803, + "grad_norm": 9.7010498046875, + "learning_rate": 1.436401098901099e-05, + "loss": 0.1277, + "step": 25943 + }, + { + "epoch": 71.27472527472527, + "grad_norm": 2.391082525253296, + "learning_rate": 1.4362637362637363e-05, + "loss": 0.0311, + "step": 25944 + }, + { + "epoch": 71.27747252747253, + "grad_norm": 3.5980823040008545, + "learning_rate": 1.4361263736263736e-05, + "loss": 0.0594, + "step": 25945 + }, + { + "epoch": 71.28021978021978, + "grad_norm": 10.4500150680542, + "learning_rate": 1.4359890109890111e-05, + "loss": 0.2004, + "step": 25946 + }, + { + "epoch": 71.28296703296704, + "grad_norm": 13.77543830871582, + "learning_rate": 1.4358516483516485e-05, + "loss": 0.2924, + "step": 25947 + }, + { + "epoch": 71.28571428571429, + "grad_norm": 11.82313346862793, + "learning_rate": 1.4357142857142858e-05, + "loss": 0.3062, + "step": 25948 + }, + { + "epoch": 71.28846153846153, + "grad_norm": 10.494369506835938, + "learning_rate": 1.4355769230769232e-05, + "loss": 0.2956, + "step": 25949 + }, + { + "epoch": 71.29120879120879, + "grad_norm": 26.198217391967773, + "learning_rate": 1.4354395604395603e-05, + "loss": 0.7919, + "step": 25950 + }, + { + "epoch": 71.29395604395604, + "grad_norm": 11.427735328674316, + "learning_rate": 1.435302197802198e-05, + "loss": 0.3965, + "step": 25951 + }, + { + "epoch": 71.2967032967033, + "grad_norm": 7.769602298736572, + "learning_rate": 1.4351648351648353e-05, + "loss": 0.08, + "step": 25952 + }, + { + "epoch": 71.29945054945055, + "grad_norm": 7.4774017333984375, + "learning_rate": 1.4350274725274727e-05, + "loss": 0.115, + "step": 25953 + }, + { + "epoch": 71.3021978021978, + "grad_norm": 14.684649467468262, + "learning_rate": 1.4348901098901099e-05, + "loss": 0.3612, + "step": 25954 + }, + { + "epoch": 71.30494505494505, + "grad_norm": 6.748587131500244, + "learning_rate": 1.4347527472527472e-05, + "loss": 0.1022, + "step": 25955 + }, + { + "epoch": 71.3076923076923, + "grad_norm": 15.187663078308105, + "learning_rate": 1.4346153846153849e-05, + "loss": 0.7149, + "step": 25956 + }, + { + "epoch": 71.31043956043956, + "grad_norm": 9.185528755187988, + "learning_rate": 1.434478021978022e-05, + "loss": 0.1747, + "step": 25957 + }, + { + "epoch": 71.31318681318682, + "grad_norm": 22.41590118408203, + "learning_rate": 1.4343406593406594e-05, + "loss": 0.5155, + "step": 25958 + }, + { + "epoch": 71.31593406593407, + "grad_norm": 10.076496124267578, + "learning_rate": 1.4342032967032967e-05, + "loss": 0.1703, + "step": 25959 + }, + { + "epoch": 71.31868131868131, + "grad_norm": 6.838897705078125, + "learning_rate": 1.434065934065934e-05, + "loss": 0.1166, + "step": 25960 + }, + { + "epoch": 71.32142857142857, + "grad_norm": 10.997161865234375, + "learning_rate": 1.4339285714285716e-05, + "loss": 0.2003, + "step": 25961 + }, + { + "epoch": 71.32417582417582, + "grad_norm": 15.199923515319824, + "learning_rate": 1.433791208791209e-05, + "loss": 0.4054, + "step": 25962 + }, + { + "epoch": 71.32692307692308, + "grad_norm": 12.364181518554688, + "learning_rate": 1.4336538461538463e-05, + "loss": 0.1857, + "step": 25963 + }, + { + "epoch": 71.32967032967034, + "grad_norm": 16.452234268188477, + "learning_rate": 1.4335164835164836e-05, + "loss": 0.2808, + "step": 25964 + }, + { + "epoch": 71.33241758241758, + "grad_norm": 20.682804107666016, + "learning_rate": 1.4333791208791208e-05, + "loss": 0.3398, + "step": 25965 + }, + { + "epoch": 71.33516483516483, + "grad_norm": 8.948836326599121, + "learning_rate": 1.4332417582417585e-05, + "loss": 0.1162, + "step": 25966 + }, + { + "epoch": 71.33791208791209, + "grad_norm": 9.174388885498047, + "learning_rate": 1.4331043956043958e-05, + "loss": 0.1081, + "step": 25967 + }, + { + "epoch": 71.34065934065934, + "grad_norm": 2.319936990737915, + "learning_rate": 1.4329670329670331e-05, + "loss": 0.025, + "step": 25968 + }, + { + "epoch": 71.3434065934066, + "grad_norm": 4.931246280670166, + "learning_rate": 1.4328296703296703e-05, + "loss": 0.1057, + "step": 25969 + }, + { + "epoch": 71.34615384615384, + "grad_norm": 9.906548500061035, + "learning_rate": 1.4326923076923076e-05, + "loss": 0.2115, + "step": 25970 + }, + { + "epoch": 71.3489010989011, + "grad_norm": 8.900980949401855, + "learning_rate": 1.4325549450549453e-05, + "loss": 0.1566, + "step": 25971 + }, + { + "epoch": 71.35164835164835, + "grad_norm": 12.39486312866211, + "learning_rate": 1.4324175824175825e-05, + "loss": 0.1649, + "step": 25972 + }, + { + "epoch": 71.3543956043956, + "grad_norm": 11.418166160583496, + "learning_rate": 1.4322802197802198e-05, + "loss": 0.1381, + "step": 25973 + }, + { + "epoch": 71.35714285714286, + "grad_norm": 10.960258483886719, + "learning_rate": 1.4321428571428572e-05, + "loss": 0.2506, + "step": 25974 + }, + { + "epoch": 71.35989010989012, + "grad_norm": 20.391141891479492, + "learning_rate": 1.4320054945054945e-05, + "loss": 0.4331, + "step": 25975 + }, + { + "epoch": 71.36263736263736, + "grad_norm": 17.98860740661621, + "learning_rate": 1.431868131868132e-05, + "loss": 0.2949, + "step": 25976 + }, + { + "epoch": 71.36538461538461, + "grad_norm": 18.460800170898438, + "learning_rate": 1.4317307692307694e-05, + "loss": 0.2879, + "step": 25977 + }, + { + "epoch": 71.36813186813187, + "grad_norm": 5.794438362121582, + "learning_rate": 1.4315934065934067e-05, + "loss": 0.0933, + "step": 25978 + }, + { + "epoch": 71.37087912087912, + "grad_norm": 12.50103759765625, + "learning_rate": 1.431456043956044e-05, + "loss": 0.0847, + "step": 25979 + }, + { + "epoch": 71.37362637362638, + "grad_norm": 1.3938312530517578, + "learning_rate": 1.4313186813186812e-05, + "loss": 0.0176, + "step": 25980 + }, + { + "epoch": 71.37637362637362, + "grad_norm": 11.605292320251465, + "learning_rate": 1.4311813186813189e-05, + "loss": 0.2707, + "step": 25981 + }, + { + "epoch": 71.37912087912088, + "grad_norm": 14.121129989624023, + "learning_rate": 1.4310439560439562e-05, + "loss": 0.3752, + "step": 25982 + }, + { + "epoch": 71.38186813186813, + "grad_norm": 13.200969696044922, + "learning_rate": 1.4309065934065936e-05, + "loss": 0.3092, + "step": 25983 + }, + { + "epoch": 71.38461538461539, + "grad_norm": 7.8222808837890625, + "learning_rate": 1.4307692307692308e-05, + "loss": 0.1796, + "step": 25984 + }, + { + "epoch": 71.38736263736264, + "grad_norm": 10.71796989440918, + "learning_rate": 1.4306318681318681e-05, + "loss": 0.138, + "step": 25985 + }, + { + "epoch": 71.39010989010988, + "grad_norm": 18.739961624145508, + "learning_rate": 1.4304945054945058e-05, + "loss": 0.236, + "step": 25986 + }, + { + "epoch": 71.39285714285714, + "grad_norm": 8.727302551269531, + "learning_rate": 1.430357142857143e-05, + "loss": 0.0958, + "step": 25987 + }, + { + "epoch": 71.3956043956044, + "grad_norm": 13.761465072631836, + "learning_rate": 1.4302197802197803e-05, + "loss": 0.1105, + "step": 25988 + }, + { + "epoch": 71.39835164835165, + "grad_norm": 14.372446060180664, + "learning_rate": 1.4300824175824176e-05, + "loss": 0.5012, + "step": 25989 + }, + { + "epoch": 71.4010989010989, + "grad_norm": 7.852530002593994, + "learning_rate": 1.429945054945055e-05, + "loss": 0.151, + "step": 25990 + }, + { + "epoch": 71.40384615384616, + "grad_norm": 12.011223793029785, + "learning_rate": 1.4298076923076925e-05, + "loss": 0.3106, + "step": 25991 + }, + { + "epoch": 71.4065934065934, + "grad_norm": 14.992741584777832, + "learning_rate": 1.4296703296703298e-05, + "loss": 0.3408, + "step": 25992 + }, + { + "epoch": 71.40934065934066, + "grad_norm": 14.527290344238281, + "learning_rate": 1.4295329670329672e-05, + "loss": 0.3151, + "step": 25993 + }, + { + "epoch": 71.41208791208791, + "grad_norm": 3.42028546333313, + "learning_rate": 1.4293956043956045e-05, + "loss": 0.0529, + "step": 25994 + }, + { + "epoch": 71.41483516483517, + "grad_norm": 3.91377592086792, + "learning_rate": 1.4292582417582417e-05, + "loss": 0.0445, + "step": 25995 + }, + { + "epoch": 71.41758241758242, + "grad_norm": 7.423717498779297, + "learning_rate": 1.429120879120879e-05, + "loss": 0.1187, + "step": 25996 + }, + { + "epoch": 71.42032967032966, + "grad_norm": 6.40971565246582, + "learning_rate": 1.4289835164835167e-05, + "loss": 0.1123, + "step": 25997 + }, + { + "epoch": 71.42307692307692, + "grad_norm": 8.68679141998291, + "learning_rate": 1.4288461538461539e-05, + "loss": 0.1305, + "step": 25998 + }, + { + "epoch": 71.42582417582418, + "grad_norm": 18.72132110595703, + "learning_rate": 1.4287087912087912e-05, + "loss": 0.1606, + "step": 25999 + }, + { + "epoch": 71.42857142857143, + "grad_norm": 4.882715702056885, + "learning_rate": 1.4285714285714285e-05, + "loss": 0.0864, + "step": 26000 + }, + { + "epoch": 71.43131868131869, + "grad_norm": 4.5153093338012695, + "learning_rate": 1.4284340659340659e-05, + "loss": 0.0573, + "step": 26001 + }, + { + "epoch": 71.43406593406593, + "grad_norm": 0.8404911756515503, + "learning_rate": 1.4282967032967034e-05, + "loss": 0.01, + "step": 26002 + }, + { + "epoch": 71.43681318681318, + "grad_norm": 2.4770193099975586, + "learning_rate": 1.4281593406593407e-05, + "loss": 0.0433, + "step": 26003 + }, + { + "epoch": 71.43956043956044, + "grad_norm": 26.269866943359375, + "learning_rate": 1.428021978021978e-05, + "loss": 0.3987, + "step": 26004 + }, + { + "epoch": 71.4423076923077, + "grad_norm": 19.885154724121094, + "learning_rate": 1.4278846153846154e-05, + "loss": 0.366, + "step": 26005 + }, + { + "epoch": 71.44505494505495, + "grad_norm": 7.118767738342285, + "learning_rate": 1.4277472527472528e-05, + "loss": 0.2165, + "step": 26006 + }, + { + "epoch": 71.4478021978022, + "grad_norm": 9.784117698669434, + "learning_rate": 1.4276098901098903e-05, + "loss": 0.1124, + "step": 26007 + }, + { + "epoch": 71.45054945054945, + "grad_norm": 11.298981666564941, + "learning_rate": 1.4274725274725276e-05, + "loss": 0.1904, + "step": 26008 + }, + { + "epoch": 71.4532967032967, + "grad_norm": 13.09029769897461, + "learning_rate": 1.427335164835165e-05, + "loss": 0.364, + "step": 26009 + }, + { + "epoch": 71.45604395604396, + "grad_norm": 3.8698198795318604, + "learning_rate": 1.4271978021978021e-05, + "loss": 0.0643, + "step": 26010 + }, + { + "epoch": 71.45879120879121, + "grad_norm": 2.368283748626709, + "learning_rate": 1.4270604395604395e-05, + "loss": 0.0243, + "step": 26011 + }, + { + "epoch": 71.46153846153847, + "grad_norm": 8.339917182922363, + "learning_rate": 1.4269230769230771e-05, + "loss": 0.2058, + "step": 26012 + }, + { + "epoch": 71.46428571428571, + "grad_norm": 16.786483764648438, + "learning_rate": 1.4267857142857143e-05, + "loss": 0.4474, + "step": 26013 + }, + { + "epoch": 71.46703296703296, + "grad_norm": 9.012896537780762, + "learning_rate": 1.4266483516483517e-05, + "loss": 0.1468, + "step": 26014 + }, + { + "epoch": 71.46978021978022, + "grad_norm": 12.795011520385742, + "learning_rate": 1.426510989010989e-05, + "loss": 0.1915, + "step": 26015 + }, + { + "epoch": 71.47252747252747, + "grad_norm": 5.502204418182373, + "learning_rate": 1.4263736263736263e-05, + "loss": 0.0634, + "step": 26016 + }, + { + "epoch": 71.47527472527473, + "grad_norm": 22.191238403320312, + "learning_rate": 1.4262362637362638e-05, + "loss": 0.7149, + "step": 26017 + }, + { + "epoch": 71.47802197802197, + "grad_norm": 14.571710586547852, + "learning_rate": 1.4260989010989012e-05, + "loss": 0.2381, + "step": 26018 + }, + { + "epoch": 71.48076923076923, + "grad_norm": 6.663760185241699, + "learning_rate": 1.4259615384615385e-05, + "loss": 0.1173, + "step": 26019 + }, + { + "epoch": 71.48351648351648, + "grad_norm": 9.52772331237793, + "learning_rate": 1.4258241758241759e-05, + "loss": 0.1695, + "step": 26020 + }, + { + "epoch": 71.48626373626374, + "grad_norm": 20.158451080322266, + "learning_rate": 1.4256868131868132e-05, + "loss": 0.3578, + "step": 26021 + }, + { + "epoch": 71.48901098901099, + "grad_norm": 10.327688217163086, + "learning_rate": 1.4255494505494507e-05, + "loss": 0.2875, + "step": 26022 + }, + { + "epoch": 71.49175824175825, + "grad_norm": 9.673733711242676, + "learning_rate": 1.425412087912088e-05, + "loss": 0.1603, + "step": 26023 + }, + { + "epoch": 71.49450549450549, + "grad_norm": 22.675779342651367, + "learning_rate": 1.4252747252747254e-05, + "loss": 0.549, + "step": 26024 + }, + { + "epoch": 71.49725274725274, + "grad_norm": 17.234182357788086, + "learning_rate": 1.4251373626373626e-05, + "loss": 0.3519, + "step": 26025 + }, + { + "epoch": 71.5, + "grad_norm": 12.456804275512695, + "learning_rate": 1.4249999999999999e-05, + "loss": 0.2189, + "step": 26026 + }, + { + "epoch": 71.50274725274726, + "grad_norm": 2.4694881439208984, + "learning_rate": 1.4248626373626376e-05, + "loss": 0.0437, + "step": 26027 + }, + { + "epoch": 71.50549450549451, + "grad_norm": 15.971388816833496, + "learning_rate": 1.4247252747252748e-05, + "loss": 0.2238, + "step": 26028 + }, + { + "epoch": 71.50824175824175, + "grad_norm": 8.280900001525879, + "learning_rate": 1.4245879120879121e-05, + "loss": 0.1194, + "step": 26029 + }, + { + "epoch": 71.51098901098901, + "grad_norm": 5.566013813018799, + "learning_rate": 1.4244505494505494e-05, + "loss": 0.0691, + "step": 26030 + }, + { + "epoch": 71.51373626373626, + "grad_norm": 16.52488136291504, + "learning_rate": 1.4243131868131868e-05, + "loss": 0.3537, + "step": 26031 + }, + { + "epoch": 71.51648351648352, + "grad_norm": 7.54959774017334, + "learning_rate": 1.4241758241758243e-05, + "loss": 0.1933, + "step": 26032 + }, + { + "epoch": 71.51923076923077, + "grad_norm": 7.922191143035889, + "learning_rate": 1.4240384615384616e-05, + "loss": 0.0993, + "step": 26033 + }, + { + "epoch": 71.52197802197803, + "grad_norm": 15.456591606140137, + "learning_rate": 1.423901098901099e-05, + "loss": 0.2563, + "step": 26034 + }, + { + "epoch": 71.52472527472527, + "grad_norm": 17.816822052001953, + "learning_rate": 1.4237637362637363e-05, + "loss": 0.3159, + "step": 26035 + }, + { + "epoch": 71.52747252747253, + "grad_norm": 7.338577747344971, + "learning_rate": 1.4236263736263737e-05, + "loss": 0.1844, + "step": 26036 + }, + { + "epoch": 71.53021978021978, + "grad_norm": 4.871097087860107, + "learning_rate": 1.4234890109890112e-05, + "loss": 0.0558, + "step": 26037 + }, + { + "epoch": 71.53296703296704, + "grad_norm": 13.102914810180664, + "learning_rate": 1.4233516483516485e-05, + "loss": 0.2412, + "step": 26038 + }, + { + "epoch": 71.53571428571429, + "grad_norm": 6.366760730743408, + "learning_rate": 1.4232142857142858e-05, + "loss": 0.0922, + "step": 26039 + }, + { + "epoch": 71.53846153846153, + "grad_norm": 4.47410774230957, + "learning_rate": 1.423076923076923e-05, + "loss": 0.0786, + "step": 26040 + }, + { + "epoch": 71.54120879120879, + "grad_norm": 21.019149780273438, + "learning_rate": 1.4229395604395604e-05, + "loss": 0.6024, + "step": 26041 + }, + { + "epoch": 71.54395604395604, + "grad_norm": 4.7660017013549805, + "learning_rate": 1.422802197802198e-05, + "loss": 0.0651, + "step": 26042 + }, + { + "epoch": 71.5467032967033, + "grad_norm": 8.512272834777832, + "learning_rate": 1.4226648351648352e-05, + "loss": 0.1648, + "step": 26043 + }, + { + "epoch": 71.54945054945055, + "grad_norm": 8.759800910949707, + "learning_rate": 1.4225274725274725e-05, + "loss": 0.3141, + "step": 26044 + }, + { + "epoch": 71.5521978021978, + "grad_norm": 17.037290573120117, + "learning_rate": 1.4223901098901099e-05, + "loss": 0.2101, + "step": 26045 + }, + { + "epoch": 71.55494505494505, + "grad_norm": 14.147469520568848, + "learning_rate": 1.4222527472527472e-05, + "loss": 0.1641, + "step": 26046 + }, + { + "epoch": 71.5576923076923, + "grad_norm": 9.703547477722168, + "learning_rate": 1.4221153846153847e-05, + "loss": 0.1717, + "step": 26047 + }, + { + "epoch": 71.56043956043956, + "grad_norm": 6.842838764190674, + "learning_rate": 1.421978021978022e-05, + "loss": 0.1012, + "step": 26048 + }, + { + "epoch": 71.56318681318682, + "grad_norm": 20.904321670532227, + "learning_rate": 1.4218406593406594e-05, + "loss": 0.4835, + "step": 26049 + }, + { + "epoch": 71.56593406593407, + "grad_norm": 18.920652389526367, + "learning_rate": 1.4217032967032968e-05, + "loss": 0.2493, + "step": 26050 + }, + { + "epoch": 71.56868131868131, + "grad_norm": 15.686016082763672, + "learning_rate": 1.4215659340659341e-05, + "loss": 0.3202, + "step": 26051 + }, + { + "epoch": 71.57142857142857, + "grad_norm": 8.723128318786621, + "learning_rate": 1.4214285714285716e-05, + "loss": 0.1072, + "step": 26052 + }, + { + "epoch": 71.57417582417582, + "grad_norm": 3.444995641708374, + "learning_rate": 1.421291208791209e-05, + "loss": 0.0458, + "step": 26053 + }, + { + "epoch": 71.57692307692308, + "grad_norm": 13.999879837036133, + "learning_rate": 1.4211538461538463e-05, + "loss": 0.3226, + "step": 26054 + }, + { + "epoch": 71.57967032967034, + "grad_norm": 12.609187126159668, + "learning_rate": 1.4210164835164835e-05, + "loss": 0.2583, + "step": 26055 + }, + { + "epoch": 71.58241758241758, + "grad_norm": 12.50551986694336, + "learning_rate": 1.4208791208791208e-05, + "loss": 0.2878, + "step": 26056 + }, + { + "epoch": 71.58516483516483, + "grad_norm": 11.924158096313477, + "learning_rate": 1.4207417582417585e-05, + "loss": 0.1599, + "step": 26057 + }, + { + "epoch": 71.58791208791209, + "grad_norm": 5.000182628631592, + "learning_rate": 1.4206043956043957e-05, + "loss": 0.0776, + "step": 26058 + }, + { + "epoch": 71.59065934065934, + "grad_norm": 14.147698402404785, + "learning_rate": 1.420467032967033e-05, + "loss": 0.0896, + "step": 26059 + }, + { + "epoch": 71.5934065934066, + "grad_norm": 11.242280006408691, + "learning_rate": 1.4203296703296703e-05, + "loss": 0.0913, + "step": 26060 + }, + { + "epoch": 71.59615384615384, + "grad_norm": 3.1941049098968506, + "learning_rate": 1.4201923076923077e-05, + "loss": 0.0625, + "step": 26061 + }, + { + "epoch": 71.5989010989011, + "grad_norm": 10.25326156616211, + "learning_rate": 1.4200549450549452e-05, + "loss": 0.1586, + "step": 26062 + }, + { + "epoch": 71.60164835164835, + "grad_norm": 5.894521236419678, + "learning_rate": 1.4199175824175825e-05, + "loss": 0.1127, + "step": 26063 + }, + { + "epoch": 71.6043956043956, + "grad_norm": 24.377994537353516, + "learning_rate": 1.4197802197802199e-05, + "loss": 0.5062, + "step": 26064 + }, + { + "epoch": 71.60714285714286, + "grad_norm": 22.501365661621094, + "learning_rate": 1.4196428571428572e-05, + "loss": 0.6547, + "step": 26065 + }, + { + "epoch": 71.60989010989012, + "grad_norm": 9.421849250793457, + "learning_rate": 1.4195054945054946e-05, + "loss": 0.2442, + "step": 26066 + }, + { + "epoch": 71.61263736263736, + "grad_norm": 4.721630096435547, + "learning_rate": 1.419368131868132e-05, + "loss": 0.0786, + "step": 26067 + }, + { + "epoch": 71.61538461538461, + "grad_norm": 7.113814353942871, + "learning_rate": 1.4192307692307694e-05, + "loss": 0.1327, + "step": 26068 + }, + { + "epoch": 71.61813186813187, + "grad_norm": 19.007122039794922, + "learning_rate": 1.4190934065934067e-05, + "loss": 0.4292, + "step": 26069 + }, + { + "epoch": 71.62087912087912, + "grad_norm": 12.25094985961914, + "learning_rate": 1.418956043956044e-05, + "loss": 0.1652, + "step": 26070 + }, + { + "epoch": 71.62362637362638, + "grad_norm": 5.812304973602295, + "learning_rate": 1.4188186813186813e-05, + "loss": 0.1083, + "step": 26071 + }, + { + "epoch": 71.62637362637362, + "grad_norm": 5.059784412384033, + "learning_rate": 1.418681318681319e-05, + "loss": 0.0962, + "step": 26072 + }, + { + "epoch": 71.62912087912088, + "grad_norm": 13.569324493408203, + "learning_rate": 1.4185439560439561e-05, + "loss": 0.1725, + "step": 26073 + }, + { + "epoch": 71.63186813186813, + "grad_norm": 6.4561448097229, + "learning_rate": 1.4184065934065934e-05, + "loss": 0.0833, + "step": 26074 + }, + { + "epoch": 71.63461538461539, + "grad_norm": 27.941604614257812, + "learning_rate": 1.4182692307692308e-05, + "loss": 0.3409, + "step": 26075 + }, + { + "epoch": 71.63736263736264, + "grad_norm": 16.16647720336914, + "learning_rate": 1.4181318681318681e-05, + "loss": 0.3389, + "step": 26076 + }, + { + "epoch": 71.64010989010988, + "grad_norm": 14.719359397888184, + "learning_rate": 1.4179945054945056e-05, + "loss": 0.4621, + "step": 26077 + }, + { + "epoch": 71.64285714285714, + "grad_norm": 7.670405387878418, + "learning_rate": 1.417857142857143e-05, + "loss": 0.162, + "step": 26078 + }, + { + "epoch": 71.6456043956044, + "grad_norm": 7.144047260284424, + "learning_rate": 1.4177197802197803e-05, + "loss": 0.1333, + "step": 26079 + }, + { + "epoch": 71.64835164835165, + "grad_norm": 11.391530990600586, + "learning_rate": 1.4175824175824177e-05, + "loss": 0.3778, + "step": 26080 + }, + { + "epoch": 71.6510989010989, + "grad_norm": 19.748645782470703, + "learning_rate": 1.417445054945055e-05, + "loss": 0.619, + "step": 26081 + }, + { + "epoch": 71.65384615384616, + "grad_norm": 11.576912879943848, + "learning_rate": 1.4173076923076925e-05, + "loss": 0.1586, + "step": 26082 + }, + { + "epoch": 71.6565934065934, + "grad_norm": 6.120386600494385, + "learning_rate": 1.4171703296703299e-05, + "loss": 0.0757, + "step": 26083 + }, + { + "epoch": 71.65934065934066, + "grad_norm": 20.620290756225586, + "learning_rate": 1.4170329670329672e-05, + "loss": 0.5068, + "step": 26084 + }, + { + "epoch": 71.66208791208791, + "grad_norm": 11.086591720581055, + "learning_rate": 1.4168956043956044e-05, + "loss": 0.2295, + "step": 26085 + }, + { + "epoch": 71.66483516483517, + "grad_norm": 21.08681297302246, + "learning_rate": 1.4167582417582417e-05, + "loss": 0.395, + "step": 26086 + }, + { + "epoch": 71.66758241758242, + "grad_norm": 2.764204978942871, + "learning_rate": 1.4166208791208794e-05, + "loss": 0.0352, + "step": 26087 + }, + { + "epoch": 71.67032967032966, + "grad_norm": 12.395334243774414, + "learning_rate": 1.4164835164835166e-05, + "loss": 0.3079, + "step": 26088 + }, + { + "epoch": 71.67307692307692, + "grad_norm": 3.7978668212890625, + "learning_rate": 1.4163461538461539e-05, + "loss": 0.0313, + "step": 26089 + }, + { + "epoch": 71.67582417582418, + "grad_norm": 7.1963210105896, + "learning_rate": 1.4162087912087912e-05, + "loss": 0.1672, + "step": 26090 + }, + { + "epoch": 71.67857142857143, + "grad_norm": 10.1357421875, + "learning_rate": 1.4160714285714286e-05, + "loss": 0.2595, + "step": 26091 + }, + { + "epoch": 71.68131868131869, + "grad_norm": 13.399980545043945, + "learning_rate": 1.4159340659340661e-05, + "loss": 0.2813, + "step": 26092 + }, + { + "epoch": 71.68406593406593, + "grad_norm": 10.5325345993042, + "learning_rate": 1.4157967032967034e-05, + "loss": 0.2196, + "step": 26093 + }, + { + "epoch": 71.68681318681318, + "grad_norm": 12.660348892211914, + "learning_rate": 1.4156593406593408e-05, + "loss": 0.4774, + "step": 26094 + }, + { + "epoch": 71.68956043956044, + "grad_norm": 9.882047653198242, + "learning_rate": 1.4155219780219781e-05, + "loss": 0.1103, + "step": 26095 + }, + { + "epoch": 71.6923076923077, + "grad_norm": 19.32573699951172, + "learning_rate": 1.4153846153846153e-05, + "loss": 0.3666, + "step": 26096 + }, + { + "epoch": 71.69505494505495, + "grad_norm": 13.18693733215332, + "learning_rate": 1.415247252747253e-05, + "loss": 0.2309, + "step": 26097 + }, + { + "epoch": 71.6978021978022, + "grad_norm": 7.49739408493042, + "learning_rate": 1.4151098901098903e-05, + "loss": 0.0695, + "step": 26098 + }, + { + "epoch": 71.70054945054945, + "grad_norm": 7.5013556480407715, + "learning_rate": 1.4149725274725276e-05, + "loss": 0.0739, + "step": 26099 + }, + { + "epoch": 71.7032967032967, + "grad_norm": 13.649456977844238, + "learning_rate": 1.4148351648351648e-05, + "loss": 0.5559, + "step": 26100 + }, + { + "epoch": 71.70604395604396, + "grad_norm": 15.794013023376465, + "learning_rate": 1.4146978021978022e-05, + "loss": 0.2255, + "step": 26101 + }, + { + "epoch": 71.70879120879121, + "grad_norm": 8.438089370727539, + "learning_rate": 1.4145604395604398e-05, + "loss": 0.1544, + "step": 26102 + }, + { + "epoch": 71.71153846153847, + "grad_norm": 22.563108444213867, + "learning_rate": 1.414423076923077e-05, + "loss": 0.5994, + "step": 26103 + }, + { + "epoch": 71.71428571428571, + "grad_norm": 12.5651216506958, + "learning_rate": 1.4142857142857143e-05, + "loss": 0.3736, + "step": 26104 + }, + { + "epoch": 71.71703296703296, + "grad_norm": 5.907815456390381, + "learning_rate": 1.4141483516483517e-05, + "loss": 0.1596, + "step": 26105 + }, + { + "epoch": 71.71978021978022, + "grad_norm": 10.000792503356934, + "learning_rate": 1.414010989010989e-05, + "loss": 0.1722, + "step": 26106 + }, + { + "epoch": 71.72252747252747, + "grad_norm": 14.65341854095459, + "learning_rate": 1.4138736263736264e-05, + "loss": 0.2585, + "step": 26107 + }, + { + "epoch": 71.72527472527473, + "grad_norm": 7.162921905517578, + "learning_rate": 1.4137362637362639e-05, + "loss": 0.1179, + "step": 26108 + }, + { + "epoch": 71.72802197802197, + "grad_norm": 20.02005958557129, + "learning_rate": 1.4135989010989012e-05, + "loss": 0.5861, + "step": 26109 + }, + { + "epoch": 71.73076923076923, + "grad_norm": 11.454760551452637, + "learning_rate": 1.4134615384615386e-05, + "loss": 0.2037, + "step": 26110 + }, + { + "epoch": 71.73351648351648, + "grad_norm": 25.27041244506836, + "learning_rate": 1.4133241758241757e-05, + "loss": 0.6917, + "step": 26111 + }, + { + "epoch": 71.73626373626374, + "grad_norm": 11.603829383850098, + "learning_rate": 1.413186813186813e-05, + "loss": 0.1287, + "step": 26112 + }, + { + "epoch": 71.73901098901099, + "grad_norm": 8.746906280517578, + "learning_rate": 1.4130494505494508e-05, + "loss": 0.2083, + "step": 26113 + }, + { + "epoch": 71.74175824175825, + "grad_norm": 3.9306771755218506, + "learning_rate": 1.4129120879120881e-05, + "loss": 0.0626, + "step": 26114 + }, + { + "epoch": 71.74450549450549, + "grad_norm": 3.2656185626983643, + "learning_rate": 1.4127747252747253e-05, + "loss": 0.0415, + "step": 26115 + }, + { + "epoch": 71.74725274725274, + "grad_norm": 3.7034664154052734, + "learning_rate": 1.4126373626373626e-05, + "loss": 0.0867, + "step": 26116 + }, + { + "epoch": 71.75, + "grad_norm": 8.643921852111816, + "learning_rate": 1.4125e-05, + "loss": 0.1904, + "step": 26117 + }, + { + "epoch": 71.75274725274726, + "grad_norm": 27.442859649658203, + "learning_rate": 1.4123626373626375e-05, + "loss": 1.1365, + "step": 26118 + }, + { + "epoch": 71.75549450549451, + "grad_norm": 5.665157794952393, + "learning_rate": 1.4122252747252748e-05, + "loss": 0.1192, + "step": 26119 + }, + { + "epoch": 71.75824175824175, + "grad_norm": 8.84882926940918, + "learning_rate": 1.4120879120879121e-05, + "loss": 0.1455, + "step": 26120 + }, + { + "epoch": 71.76098901098901, + "grad_norm": 17.284496307373047, + "learning_rate": 1.4119505494505495e-05, + "loss": 0.2621, + "step": 26121 + }, + { + "epoch": 71.76373626373626, + "grad_norm": 4.576023578643799, + "learning_rate": 1.4118131868131868e-05, + "loss": 0.0475, + "step": 26122 + }, + { + "epoch": 71.76648351648352, + "grad_norm": 18.074583053588867, + "learning_rate": 1.4116758241758243e-05, + "loss": 0.5746, + "step": 26123 + }, + { + "epoch": 71.76923076923077, + "grad_norm": 5.102516174316406, + "learning_rate": 1.4115384615384617e-05, + "loss": 0.1189, + "step": 26124 + }, + { + "epoch": 71.77197802197803, + "grad_norm": 10.753949165344238, + "learning_rate": 1.411401098901099e-05, + "loss": 0.1565, + "step": 26125 + }, + { + "epoch": 71.77472527472527, + "grad_norm": 15.474641799926758, + "learning_rate": 1.4112637362637362e-05, + "loss": 0.2768, + "step": 26126 + }, + { + "epoch": 71.77747252747253, + "grad_norm": 12.986185073852539, + "learning_rate": 1.4111263736263735e-05, + "loss": 0.2009, + "step": 26127 + }, + { + "epoch": 71.78021978021978, + "grad_norm": 11.172542572021484, + "learning_rate": 1.4109890109890112e-05, + "loss": 0.3031, + "step": 26128 + }, + { + "epoch": 71.78296703296704, + "grad_norm": 9.872291564941406, + "learning_rate": 1.4108516483516485e-05, + "loss": 0.2276, + "step": 26129 + }, + { + "epoch": 71.78571428571429, + "grad_norm": 7.135236740112305, + "learning_rate": 1.4107142857142857e-05, + "loss": 0.2621, + "step": 26130 + }, + { + "epoch": 71.78846153846153, + "grad_norm": 13.326945304870605, + "learning_rate": 1.410576923076923e-05, + "loss": 0.3941, + "step": 26131 + }, + { + "epoch": 71.79120879120879, + "grad_norm": 9.63210391998291, + "learning_rate": 1.4104395604395604e-05, + "loss": 0.193, + "step": 26132 + }, + { + "epoch": 71.79395604395604, + "grad_norm": 28.38395881652832, + "learning_rate": 1.4103021978021979e-05, + "loss": 1.0025, + "step": 26133 + }, + { + "epoch": 71.7967032967033, + "grad_norm": 18.13994598388672, + "learning_rate": 1.4101648351648352e-05, + "loss": 0.4145, + "step": 26134 + }, + { + "epoch": 71.79945054945055, + "grad_norm": 20.264690399169922, + "learning_rate": 1.4100274725274726e-05, + "loss": 0.3992, + "step": 26135 + }, + { + "epoch": 71.8021978021978, + "grad_norm": 13.322522163391113, + "learning_rate": 1.40989010989011e-05, + "loss": 0.2063, + "step": 26136 + }, + { + "epoch": 71.80494505494505, + "grad_norm": 6.46217155456543, + "learning_rate": 1.4097527472527473e-05, + "loss": 0.0955, + "step": 26137 + }, + { + "epoch": 71.8076923076923, + "grad_norm": 4.007540225982666, + "learning_rate": 1.4096153846153848e-05, + "loss": 0.0619, + "step": 26138 + }, + { + "epoch": 71.81043956043956, + "grad_norm": 10.03760051727295, + "learning_rate": 1.4094780219780221e-05, + "loss": 0.251, + "step": 26139 + }, + { + "epoch": 71.81318681318682, + "grad_norm": 7.910526752471924, + "learning_rate": 1.4093406593406595e-05, + "loss": 0.0759, + "step": 26140 + }, + { + "epoch": 71.81593406593407, + "grad_norm": 4.473047733306885, + "learning_rate": 1.4092032967032966e-05, + "loss": 0.0932, + "step": 26141 + }, + { + "epoch": 71.81868131868131, + "grad_norm": 7.753711223602295, + "learning_rate": 1.409065934065934e-05, + "loss": 0.0681, + "step": 26142 + }, + { + "epoch": 71.82142857142857, + "grad_norm": 7.249425411224365, + "learning_rate": 1.4089285714285716e-05, + "loss": 0.0976, + "step": 26143 + }, + { + "epoch": 71.82417582417582, + "grad_norm": 8.16180419921875, + "learning_rate": 1.408791208791209e-05, + "loss": 0.1736, + "step": 26144 + }, + { + "epoch": 71.82692307692308, + "grad_norm": 15.342158317565918, + "learning_rate": 1.4086538461538462e-05, + "loss": 0.3573, + "step": 26145 + }, + { + "epoch": 71.82967032967034, + "grad_norm": 2.3065202236175537, + "learning_rate": 1.4085164835164835e-05, + "loss": 0.0185, + "step": 26146 + }, + { + "epoch": 71.83241758241758, + "grad_norm": 13.320291519165039, + "learning_rate": 1.4083791208791208e-05, + "loss": 0.2977, + "step": 26147 + }, + { + "epoch": 71.83516483516483, + "grad_norm": 10.881182670593262, + "learning_rate": 1.4082417582417584e-05, + "loss": 0.2186, + "step": 26148 + }, + { + "epoch": 71.83791208791209, + "grad_norm": 15.067885398864746, + "learning_rate": 1.4081043956043957e-05, + "loss": 0.3578, + "step": 26149 + }, + { + "epoch": 71.84065934065934, + "grad_norm": 6.346212387084961, + "learning_rate": 1.407967032967033e-05, + "loss": 0.0748, + "step": 26150 + }, + { + "epoch": 71.8434065934066, + "grad_norm": 7.04441499710083, + "learning_rate": 1.4078296703296704e-05, + "loss": 0.0897, + "step": 26151 + }, + { + "epoch": 71.84615384615384, + "grad_norm": 14.867247581481934, + "learning_rate": 1.4076923076923077e-05, + "loss": 0.396, + "step": 26152 + }, + { + "epoch": 71.8489010989011, + "grad_norm": 8.34060001373291, + "learning_rate": 1.4075549450549452e-05, + "loss": 0.1932, + "step": 26153 + }, + { + "epoch": 71.85164835164835, + "grad_norm": 8.903487205505371, + "learning_rate": 1.4074175824175826e-05, + "loss": 0.1735, + "step": 26154 + }, + { + "epoch": 71.8543956043956, + "grad_norm": 13.933308601379395, + "learning_rate": 1.4072802197802199e-05, + "loss": 0.3914, + "step": 26155 + }, + { + "epoch": 71.85714285714286, + "grad_norm": 4.412310600280762, + "learning_rate": 1.407142857142857e-05, + "loss": 0.0501, + "step": 26156 + }, + { + "epoch": 71.85989010989012, + "grad_norm": 20.87296485900879, + "learning_rate": 1.4070054945054944e-05, + "loss": 0.5461, + "step": 26157 + }, + { + "epoch": 71.86263736263736, + "grad_norm": 20.413822174072266, + "learning_rate": 1.4068681318681321e-05, + "loss": 0.2874, + "step": 26158 + }, + { + "epoch": 71.86538461538461, + "grad_norm": 15.143366813659668, + "learning_rate": 1.4067307692307693e-05, + "loss": 0.285, + "step": 26159 + }, + { + "epoch": 71.86813186813187, + "grad_norm": 13.566554069519043, + "learning_rate": 1.4065934065934066e-05, + "loss": 0.1683, + "step": 26160 + }, + { + "epoch": 71.87087912087912, + "grad_norm": 10.324163436889648, + "learning_rate": 1.406456043956044e-05, + "loss": 0.2163, + "step": 26161 + }, + { + "epoch": 71.87362637362638, + "grad_norm": 4.97953987121582, + "learning_rate": 1.4063186813186813e-05, + "loss": 0.1897, + "step": 26162 + }, + { + "epoch": 71.87637362637362, + "grad_norm": 16.59153175354004, + "learning_rate": 1.4061813186813188e-05, + "loss": 0.4415, + "step": 26163 + }, + { + "epoch": 71.87912087912088, + "grad_norm": 5.230602741241455, + "learning_rate": 1.4060439560439561e-05, + "loss": 0.0689, + "step": 26164 + }, + { + "epoch": 71.88186813186813, + "grad_norm": 15.666885375976562, + "learning_rate": 1.4059065934065935e-05, + "loss": 0.3799, + "step": 26165 + }, + { + "epoch": 71.88461538461539, + "grad_norm": 5.581096172332764, + "learning_rate": 1.4057692307692308e-05, + "loss": 0.0914, + "step": 26166 + }, + { + "epoch": 71.88736263736264, + "grad_norm": 5.773550033569336, + "learning_rate": 1.4056318681318682e-05, + "loss": 0.09, + "step": 26167 + }, + { + "epoch": 71.89010989010988, + "grad_norm": 22.291316986083984, + "learning_rate": 1.4054945054945057e-05, + "loss": 0.3823, + "step": 26168 + }, + { + "epoch": 71.89285714285714, + "grad_norm": 6.968430995941162, + "learning_rate": 1.405357142857143e-05, + "loss": 0.156, + "step": 26169 + }, + { + "epoch": 71.8956043956044, + "grad_norm": 15.756063461303711, + "learning_rate": 1.4052197802197804e-05, + "loss": 0.2754, + "step": 26170 + }, + { + "epoch": 71.89835164835165, + "grad_norm": 5.724554538726807, + "learning_rate": 1.4050824175824175e-05, + "loss": 0.0549, + "step": 26171 + }, + { + "epoch": 71.9010989010989, + "grad_norm": 5.276605129241943, + "learning_rate": 1.4049450549450549e-05, + "loss": 0.0902, + "step": 26172 + }, + { + "epoch": 71.90384615384616, + "grad_norm": 5.506741046905518, + "learning_rate": 1.4048076923076925e-05, + "loss": 0.0931, + "step": 26173 + }, + { + "epoch": 71.9065934065934, + "grad_norm": 14.693177223205566, + "learning_rate": 1.4046703296703297e-05, + "loss": 0.4331, + "step": 26174 + }, + { + "epoch": 71.90934065934066, + "grad_norm": 5.499126434326172, + "learning_rate": 1.404532967032967e-05, + "loss": 0.0744, + "step": 26175 + }, + { + "epoch": 71.91208791208791, + "grad_norm": 3.570997953414917, + "learning_rate": 1.4043956043956044e-05, + "loss": 0.055, + "step": 26176 + }, + { + "epoch": 71.91483516483517, + "grad_norm": 13.338444709777832, + "learning_rate": 1.4042582417582417e-05, + "loss": 0.3816, + "step": 26177 + }, + { + "epoch": 71.91758241758242, + "grad_norm": 8.054377555847168, + "learning_rate": 1.4041208791208793e-05, + "loss": 0.1099, + "step": 26178 + }, + { + "epoch": 71.92032967032966, + "grad_norm": 8.755720138549805, + "learning_rate": 1.4039835164835166e-05, + "loss": 0.2498, + "step": 26179 + }, + { + "epoch": 71.92307692307692, + "grad_norm": 10.757136344909668, + "learning_rate": 1.403846153846154e-05, + "loss": 0.0731, + "step": 26180 + }, + { + "epoch": 71.92582417582418, + "grad_norm": 15.823027610778809, + "learning_rate": 1.4037087912087913e-05, + "loss": 0.3939, + "step": 26181 + }, + { + "epoch": 71.92857142857143, + "grad_norm": 32.9695930480957, + "learning_rate": 1.4035714285714286e-05, + "loss": 0.9301, + "step": 26182 + }, + { + "epoch": 71.93131868131869, + "grad_norm": 7.872438907623291, + "learning_rate": 1.4034340659340661e-05, + "loss": 0.1244, + "step": 26183 + }, + { + "epoch": 71.93406593406593, + "grad_norm": 5.323581218719482, + "learning_rate": 1.4032967032967035e-05, + "loss": 0.1666, + "step": 26184 + }, + { + "epoch": 71.93681318681318, + "grad_norm": 4.644234657287598, + "learning_rate": 1.4031593406593408e-05, + "loss": 0.0583, + "step": 26185 + }, + { + "epoch": 71.93956043956044, + "grad_norm": 5.25294303894043, + "learning_rate": 1.403021978021978e-05, + "loss": 0.043, + "step": 26186 + }, + { + "epoch": 71.9423076923077, + "grad_norm": 8.635831832885742, + "learning_rate": 1.4028846153846153e-05, + "loss": 0.1289, + "step": 26187 + }, + { + "epoch": 71.94505494505495, + "grad_norm": 5.3180084228515625, + "learning_rate": 1.402747252747253e-05, + "loss": 0.0668, + "step": 26188 + }, + { + "epoch": 71.9478021978022, + "grad_norm": 9.540075302124023, + "learning_rate": 1.4026098901098902e-05, + "loss": 0.262, + "step": 26189 + }, + { + "epoch": 71.95054945054945, + "grad_norm": 8.474282264709473, + "learning_rate": 1.4024725274725275e-05, + "loss": 0.07, + "step": 26190 + }, + { + "epoch": 71.9532967032967, + "grad_norm": 5.907249450683594, + "learning_rate": 1.4023351648351648e-05, + "loss": 0.0415, + "step": 26191 + }, + { + "epoch": 71.95604395604396, + "grad_norm": 13.049363136291504, + "learning_rate": 1.4021978021978022e-05, + "loss": 0.1713, + "step": 26192 + }, + { + "epoch": 71.95879120879121, + "grad_norm": 10.843137741088867, + "learning_rate": 1.4020604395604397e-05, + "loss": 0.1311, + "step": 26193 + }, + { + "epoch": 71.96153846153847, + "grad_norm": 12.563124656677246, + "learning_rate": 1.401923076923077e-05, + "loss": 0.2085, + "step": 26194 + }, + { + "epoch": 71.96428571428571, + "grad_norm": 13.064399719238281, + "learning_rate": 1.4017857142857144e-05, + "loss": 0.479, + "step": 26195 + }, + { + "epoch": 71.96703296703296, + "grad_norm": 4.556397914886475, + "learning_rate": 1.4016483516483517e-05, + "loss": 0.0491, + "step": 26196 + }, + { + "epoch": 71.96978021978022, + "grad_norm": 5.002555847167969, + "learning_rate": 1.401510989010989e-05, + "loss": 0.0886, + "step": 26197 + }, + { + "epoch": 71.97252747252747, + "grad_norm": 14.908904075622559, + "learning_rate": 1.4013736263736266e-05, + "loss": 0.3695, + "step": 26198 + }, + { + "epoch": 71.97527472527473, + "grad_norm": 2.8915443420410156, + "learning_rate": 1.4012362637362639e-05, + "loss": 0.0585, + "step": 26199 + }, + { + "epoch": 71.97802197802197, + "grad_norm": 1.2635151147842407, + "learning_rate": 1.4010989010989013e-05, + "loss": 0.0176, + "step": 26200 + }, + { + "epoch": 71.98076923076923, + "grad_norm": 23.088714599609375, + "learning_rate": 1.4009615384615384e-05, + "loss": 0.6002, + "step": 26201 + }, + { + "epoch": 71.98351648351648, + "grad_norm": 14.477166175842285, + "learning_rate": 1.4008241758241758e-05, + "loss": 0.3004, + "step": 26202 + }, + { + "epoch": 71.98626373626374, + "grad_norm": 10.137002944946289, + "learning_rate": 1.4006868131868134e-05, + "loss": 0.2412, + "step": 26203 + }, + { + "epoch": 71.98901098901099, + "grad_norm": 12.089583396911621, + "learning_rate": 1.4005494505494506e-05, + "loss": 0.1721, + "step": 26204 + }, + { + "epoch": 71.99175824175825, + "grad_norm": 13.043898582458496, + "learning_rate": 1.400412087912088e-05, + "loss": 0.2694, + "step": 26205 + }, + { + "epoch": 71.99450549450549, + "grad_norm": 18.42681121826172, + "learning_rate": 1.4002747252747253e-05, + "loss": 0.2897, + "step": 26206 + }, + { + "epoch": 71.99725274725274, + "grad_norm": 17.748552322387695, + "learning_rate": 1.4001373626373626e-05, + "loss": 0.5369, + "step": 26207 + }, + { + "epoch": 72.0, + "grad_norm": 84.75647735595703, + "learning_rate": 1.4000000000000001e-05, + "loss": 2.4556, + "step": 26208 + }, + { + "epoch": 72.0, + "eval_accuracy": 0.5619834710743802, + "eval_f1": 0.5393209428918585, + "eval_f1_DuraRiadoRio_64x64": 0.5478260869565217, + "eval_f1_Mole_64x64": 0.7286821705426356, + "eval_f1_Quebrado_64x64": 0.2754491017964072, + "eval_f1_RiadoRio_64x64": 0.525, + "eval_f1_RioFechado_64x64": 0.6196473551637279, + "eval_loss": 3.1868698596954346, + "eval_precision": 0.6925719161988639, + "eval_precision_DuraRiadoRio_64x64": 0.7325581395348837, + "eval_precision_Mole_64x64": 0.8245614035087719, + "eval_precision_Quebrado_64x64": 1.0, + "eval_precision_RiadoRio_64x64": 0.42338709677419356, + "eval_precision_RioFechado_64x64": 0.4823529411764706, + "eval_recall": 0.5613973313565604, + "eval_recall_DuraRiadoRio_64x64": 0.4375, + "eval_recall_Mole_64x64": 0.6527777777777778, + "eval_recall_Quebrado_64x64": 0.1597222222222222, + "eval_recall_RiadoRio_64x64": 0.6907894736842105, + "eval_recall_RioFechado_64x64": 0.8661971830985915, + "eval_runtime": 1.7709, + "eval_samples_per_second": 409.954, + "eval_steps_per_second": 25.975, + "step": 26208 + }, + { + "epoch": 72.00274725274726, + "grad_norm": 27.369800567626953, + "learning_rate": 1.3998626373626375e-05, + "loss": 0.7188, + "step": 26209 + }, + { + "epoch": 72.00549450549451, + "grad_norm": 7.782116889953613, + "learning_rate": 1.3997252747252748e-05, + "loss": 0.2101, + "step": 26210 + }, + { + "epoch": 72.00824175824175, + "grad_norm": 17.61594581604004, + "learning_rate": 1.3995879120879122e-05, + "loss": 0.3535, + "step": 26211 + }, + { + "epoch": 72.01098901098901, + "grad_norm": 11.447171211242676, + "learning_rate": 1.3994505494505495e-05, + "loss": 0.2477, + "step": 26212 + }, + { + "epoch": 72.01373626373626, + "grad_norm": 11.840930938720703, + "learning_rate": 1.399313186813187e-05, + "loss": 0.1516, + "step": 26213 + }, + { + "epoch": 72.01648351648352, + "grad_norm": 9.169878005981445, + "learning_rate": 1.3991758241758244e-05, + "loss": 0.0776, + "step": 26214 + }, + { + "epoch": 72.01923076923077, + "grad_norm": 3.575968027114868, + "learning_rate": 1.3990384615384617e-05, + "loss": 0.0309, + "step": 26215 + }, + { + "epoch": 72.02197802197803, + "grad_norm": 16.400318145751953, + "learning_rate": 1.3989010989010989e-05, + "loss": 0.3778, + "step": 26216 + }, + { + "epoch": 72.02472527472527, + "grad_norm": 11.400177955627441, + "learning_rate": 1.3987637362637362e-05, + "loss": 0.1167, + "step": 26217 + }, + { + "epoch": 72.02747252747253, + "grad_norm": 6.646803855895996, + "learning_rate": 1.3986263736263739e-05, + "loss": 0.0862, + "step": 26218 + }, + { + "epoch": 72.03021978021978, + "grad_norm": 14.605924606323242, + "learning_rate": 1.398489010989011e-05, + "loss": 0.3201, + "step": 26219 + }, + { + "epoch": 72.03296703296704, + "grad_norm": 19.73888397216797, + "learning_rate": 1.3983516483516484e-05, + "loss": 0.7774, + "step": 26220 + }, + { + "epoch": 72.03571428571429, + "grad_norm": 8.103872299194336, + "learning_rate": 1.3982142857142857e-05, + "loss": 0.101, + "step": 26221 + }, + { + "epoch": 72.03846153846153, + "grad_norm": 6.280220031738281, + "learning_rate": 1.3980769230769231e-05, + "loss": 0.0858, + "step": 26222 + }, + { + "epoch": 72.04120879120879, + "grad_norm": 5.083017349243164, + "learning_rate": 1.3979395604395604e-05, + "loss": 0.0805, + "step": 26223 + }, + { + "epoch": 72.04395604395604, + "grad_norm": 17.840423583984375, + "learning_rate": 1.397802197802198e-05, + "loss": 0.4527, + "step": 26224 + }, + { + "epoch": 72.0467032967033, + "grad_norm": 24.73145866394043, + "learning_rate": 1.3976648351648353e-05, + "loss": 1.0057, + "step": 26225 + }, + { + "epoch": 72.04945054945055, + "grad_norm": 11.941259384155273, + "learning_rate": 1.3975274725274726e-05, + "loss": 0.2064, + "step": 26226 + }, + { + "epoch": 72.0521978021978, + "grad_norm": 7.813606262207031, + "learning_rate": 1.39739010989011e-05, + "loss": 0.2166, + "step": 26227 + }, + { + "epoch": 72.05494505494505, + "grad_norm": 3.4872663021087646, + "learning_rate": 1.3972527472527471e-05, + "loss": 0.0305, + "step": 26228 + }, + { + "epoch": 72.0576923076923, + "grad_norm": 14.846251487731934, + "learning_rate": 1.3971153846153848e-05, + "loss": 0.1447, + "step": 26229 + }, + { + "epoch": 72.06043956043956, + "grad_norm": 6.200465202331543, + "learning_rate": 1.3969780219780222e-05, + "loss": 0.072, + "step": 26230 + }, + { + "epoch": 72.06318681318682, + "grad_norm": 12.30642318725586, + "learning_rate": 1.3968406593406593e-05, + "loss": 0.1832, + "step": 26231 + }, + { + "epoch": 72.06593406593407, + "grad_norm": 10.930976867675781, + "learning_rate": 1.3967032967032967e-05, + "loss": 0.2521, + "step": 26232 + }, + { + "epoch": 72.06868131868131, + "grad_norm": 9.01310920715332, + "learning_rate": 1.396565934065934e-05, + "loss": 0.1068, + "step": 26233 + }, + { + "epoch": 72.07142857142857, + "grad_norm": 11.50010871887207, + "learning_rate": 1.3964285714285715e-05, + "loss": 0.3645, + "step": 26234 + }, + { + "epoch": 72.07417582417582, + "grad_norm": 11.892212867736816, + "learning_rate": 1.3962912087912089e-05, + "loss": 0.2258, + "step": 26235 + }, + { + "epoch": 72.07692307692308, + "grad_norm": 11.718399047851562, + "learning_rate": 1.3961538461538462e-05, + "loss": 0.139, + "step": 26236 + }, + { + "epoch": 72.07967032967034, + "grad_norm": 16.0720272064209, + "learning_rate": 1.3960164835164835e-05, + "loss": 0.381, + "step": 26237 + }, + { + "epoch": 72.08241758241758, + "grad_norm": 15.26199722290039, + "learning_rate": 1.3958791208791209e-05, + "loss": 0.3276, + "step": 26238 + }, + { + "epoch": 72.08516483516483, + "grad_norm": 19.956798553466797, + "learning_rate": 1.3957417582417584e-05, + "loss": 0.2985, + "step": 26239 + }, + { + "epoch": 72.08791208791209, + "grad_norm": 5.8684306144714355, + "learning_rate": 1.3956043956043957e-05, + "loss": 0.0622, + "step": 26240 + }, + { + "epoch": 72.09065934065934, + "grad_norm": 1.9825425148010254, + "learning_rate": 1.395467032967033e-05, + "loss": 0.0211, + "step": 26241 + }, + { + "epoch": 72.0934065934066, + "grad_norm": 14.350668907165527, + "learning_rate": 1.3953296703296702e-05, + "loss": 0.3425, + "step": 26242 + }, + { + "epoch": 72.09615384615384, + "grad_norm": 14.649330139160156, + "learning_rate": 1.3951923076923076e-05, + "loss": 0.1602, + "step": 26243 + }, + { + "epoch": 72.0989010989011, + "grad_norm": 19.162296295166016, + "learning_rate": 1.3950549450549453e-05, + "loss": 0.5547, + "step": 26244 + }, + { + "epoch": 72.10164835164835, + "grad_norm": 16.05992889404297, + "learning_rate": 1.3949175824175826e-05, + "loss": 0.3001, + "step": 26245 + }, + { + "epoch": 72.1043956043956, + "grad_norm": 10.996207237243652, + "learning_rate": 1.3947802197802198e-05, + "loss": 0.2039, + "step": 26246 + }, + { + "epoch": 72.10714285714286, + "grad_norm": 2.233227014541626, + "learning_rate": 1.3946428571428571e-05, + "loss": 0.0214, + "step": 26247 + }, + { + "epoch": 72.10989010989012, + "grad_norm": 12.415874481201172, + "learning_rate": 1.3945054945054945e-05, + "loss": 0.1381, + "step": 26248 + }, + { + "epoch": 72.11263736263736, + "grad_norm": 3.801783561706543, + "learning_rate": 1.394368131868132e-05, + "loss": 0.0816, + "step": 26249 + }, + { + "epoch": 72.11538461538461, + "grad_norm": 7.8484039306640625, + "learning_rate": 1.3942307692307693e-05, + "loss": 0.1053, + "step": 26250 + }, + { + "epoch": 72.11813186813187, + "grad_norm": 8.487398147583008, + "learning_rate": 1.3940934065934066e-05, + "loss": 0.1429, + "step": 26251 + }, + { + "epoch": 72.12087912087912, + "grad_norm": 12.265448570251465, + "learning_rate": 1.393956043956044e-05, + "loss": 0.1906, + "step": 26252 + }, + { + "epoch": 72.12362637362638, + "grad_norm": 19.233158111572266, + "learning_rate": 1.3938186813186813e-05, + "loss": 0.5491, + "step": 26253 + }, + { + "epoch": 72.12637362637362, + "grad_norm": 14.04792308807373, + "learning_rate": 1.3936813186813188e-05, + "loss": 0.166, + "step": 26254 + }, + { + "epoch": 72.12912087912088, + "grad_norm": 19.7900390625, + "learning_rate": 1.3935439560439562e-05, + "loss": 0.4893, + "step": 26255 + }, + { + "epoch": 72.13186813186813, + "grad_norm": 16.086763381958008, + "learning_rate": 1.3934065934065935e-05, + "loss": 0.3658, + "step": 26256 + }, + { + "epoch": 72.13461538461539, + "grad_norm": 7.691712379455566, + "learning_rate": 1.3932692307692307e-05, + "loss": 0.1273, + "step": 26257 + }, + { + "epoch": 72.13736263736264, + "grad_norm": 4.177437782287598, + "learning_rate": 1.393131868131868e-05, + "loss": 0.0995, + "step": 26258 + }, + { + "epoch": 72.14010989010988, + "grad_norm": 7.234407424926758, + "learning_rate": 1.3929945054945057e-05, + "loss": 0.1845, + "step": 26259 + }, + { + "epoch": 72.14285714285714, + "grad_norm": 5.347715854644775, + "learning_rate": 1.392857142857143e-05, + "loss": 0.0505, + "step": 26260 + }, + { + "epoch": 72.1456043956044, + "grad_norm": 17.568321228027344, + "learning_rate": 1.3927197802197802e-05, + "loss": 0.3064, + "step": 26261 + }, + { + "epoch": 72.14835164835165, + "grad_norm": 19.186458587646484, + "learning_rate": 1.3925824175824176e-05, + "loss": 0.4884, + "step": 26262 + }, + { + "epoch": 72.1510989010989, + "grad_norm": 21.03517723083496, + "learning_rate": 1.3924450549450549e-05, + "loss": 0.4376, + "step": 26263 + }, + { + "epoch": 72.15384615384616, + "grad_norm": 10.157346725463867, + "learning_rate": 1.3923076923076924e-05, + "loss": 0.3042, + "step": 26264 + }, + { + "epoch": 72.1565934065934, + "grad_norm": 4.828210353851318, + "learning_rate": 1.3921703296703298e-05, + "loss": 0.1375, + "step": 26265 + }, + { + "epoch": 72.15934065934066, + "grad_norm": 5.645579814910889, + "learning_rate": 1.3920329670329671e-05, + "loss": 0.0707, + "step": 26266 + }, + { + "epoch": 72.16208791208791, + "grad_norm": 9.823601722717285, + "learning_rate": 1.3918956043956044e-05, + "loss": 0.1242, + "step": 26267 + }, + { + "epoch": 72.16483516483517, + "grad_norm": 15.974465370178223, + "learning_rate": 1.3917582417582418e-05, + "loss": 0.2307, + "step": 26268 + }, + { + "epoch": 72.16758241758242, + "grad_norm": 20.906408309936523, + "learning_rate": 1.3916208791208793e-05, + "loss": 0.6555, + "step": 26269 + }, + { + "epoch": 72.17032967032966, + "grad_norm": 8.478558540344238, + "learning_rate": 1.3914835164835166e-05, + "loss": 0.127, + "step": 26270 + }, + { + "epoch": 72.17307692307692, + "grad_norm": 5.313493728637695, + "learning_rate": 1.391346153846154e-05, + "loss": 0.1105, + "step": 26271 + }, + { + "epoch": 72.17582417582418, + "grad_norm": 8.852222442626953, + "learning_rate": 1.3912087912087911e-05, + "loss": 0.2293, + "step": 26272 + }, + { + "epoch": 72.17857142857143, + "grad_norm": 4.8930768966674805, + "learning_rate": 1.3910714285714285e-05, + "loss": 0.0743, + "step": 26273 + }, + { + "epoch": 72.18131868131869, + "grad_norm": 9.843362808227539, + "learning_rate": 1.3909340659340662e-05, + "loss": 0.0889, + "step": 26274 + }, + { + "epoch": 72.18406593406593, + "grad_norm": 12.190936088562012, + "learning_rate": 1.3907967032967035e-05, + "loss": 0.1158, + "step": 26275 + }, + { + "epoch": 72.18681318681318, + "grad_norm": 8.014167785644531, + "learning_rate": 1.3906593406593407e-05, + "loss": 0.0487, + "step": 26276 + }, + { + "epoch": 72.18956043956044, + "grad_norm": 17.7054443359375, + "learning_rate": 1.390521978021978e-05, + "loss": 0.3556, + "step": 26277 + }, + { + "epoch": 72.1923076923077, + "grad_norm": 19.67665672302246, + "learning_rate": 1.3903846153846154e-05, + "loss": 0.523, + "step": 26278 + }, + { + "epoch": 72.19505494505495, + "grad_norm": 12.568351745605469, + "learning_rate": 1.3902472527472529e-05, + "loss": 0.2084, + "step": 26279 + }, + { + "epoch": 72.1978021978022, + "grad_norm": 13.11378288269043, + "learning_rate": 1.3901098901098902e-05, + "loss": 0.2759, + "step": 26280 + }, + { + "epoch": 72.20054945054945, + "grad_norm": 8.169569969177246, + "learning_rate": 1.3899725274725275e-05, + "loss": 0.1455, + "step": 26281 + }, + { + "epoch": 72.2032967032967, + "grad_norm": 8.056462287902832, + "learning_rate": 1.3898351648351649e-05, + "loss": 0.1777, + "step": 26282 + }, + { + "epoch": 72.20604395604396, + "grad_norm": 18.669179916381836, + "learning_rate": 1.3896978021978022e-05, + "loss": 0.4337, + "step": 26283 + }, + { + "epoch": 72.20879120879121, + "grad_norm": 19.272615432739258, + "learning_rate": 1.3895604395604397e-05, + "loss": 0.4093, + "step": 26284 + }, + { + "epoch": 72.21153846153847, + "grad_norm": 8.911833763122559, + "learning_rate": 1.389423076923077e-05, + "loss": 0.128, + "step": 26285 + }, + { + "epoch": 72.21428571428571, + "grad_norm": 11.402765274047852, + "learning_rate": 1.3892857142857144e-05, + "loss": 0.1295, + "step": 26286 + }, + { + "epoch": 72.21703296703296, + "grad_norm": 16.241615295410156, + "learning_rate": 1.3891483516483516e-05, + "loss": 0.3699, + "step": 26287 + }, + { + "epoch": 72.21978021978022, + "grad_norm": 21.905227661132812, + "learning_rate": 1.389010989010989e-05, + "loss": 0.3767, + "step": 26288 + }, + { + "epoch": 72.22252747252747, + "grad_norm": 8.512083053588867, + "learning_rate": 1.3888736263736266e-05, + "loss": 0.1261, + "step": 26289 + }, + { + "epoch": 72.22527472527473, + "grad_norm": 21.33867073059082, + "learning_rate": 1.388736263736264e-05, + "loss": 0.3842, + "step": 26290 + }, + { + "epoch": 72.22802197802197, + "grad_norm": 26.895164489746094, + "learning_rate": 1.3885989010989011e-05, + "loss": 0.6955, + "step": 26291 + }, + { + "epoch": 72.23076923076923, + "grad_norm": 18.70869255065918, + "learning_rate": 1.3884615384615385e-05, + "loss": 0.34, + "step": 26292 + }, + { + "epoch": 72.23351648351648, + "grad_norm": 14.348607063293457, + "learning_rate": 1.3883241758241758e-05, + "loss": 0.2747, + "step": 26293 + }, + { + "epoch": 72.23626373626374, + "grad_norm": 12.782196044921875, + "learning_rate": 1.3881868131868133e-05, + "loss": 0.1048, + "step": 26294 + }, + { + "epoch": 72.23901098901099, + "grad_norm": 7.562261581420898, + "learning_rate": 1.3880494505494507e-05, + "loss": 0.1912, + "step": 26295 + }, + { + "epoch": 72.24175824175825, + "grad_norm": 10.283354759216309, + "learning_rate": 1.387912087912088e-05, + "loss": 0.1194, + "step": 26296 + }, + { + "epoch": 72.24450549450549, + "grad_norm": 8.85574722290039, + "learning_rate": 1.3877747252747253e-05, + "loss": 0.2544, + "step": 26297 + }, + { + "epoch": 72.24725274725274, + "grad_norm": 12.143710136413574, + "learning_rate": 1.3876373626373627e-05, + "loss": 0.32, + "step": 26298 + }, + { + "epoch": 72.25, + "grad_norm": 10.455843925476074, + "learning_rate": 1.3875000000000002e-05, + "loss": 0.2006, + "step": 26299 + }, + { + "epoch": 72.25274725274726, + "grad_norm": 9.34461784362793, + "learning_rate": 1.3873626373626375e-05, + "loss": 0.1894, + "step": 26300 + }, + { + "epoch": 72.25549450549451, + "grad_norm": 19.01479721069336, + "learning_rate": 1.3872252747252749e-05, + "loss": 0.3197, + "step": 26301 + }, + { + "epoch": 72.25824175824175, + "grad_norm": 3.599398136138916, + "learning_rate": 1.387087912087912e-05, + "loss": 0.0296, + "step": 26302 + }, + { + "epoch": 72.26098901098901, + "grad_norm": 2.451723337173462, + "learning_rate": 1.3869505494505494e-05, + "loss": 0.0361, + "step": 26303 + }, + { + "epoch": 72.26373626373626, + "grad_norm": 9.506827354431152, + "learning_rate": 1.386813186813187e-05, + "loss": 0.143, + "step": 26304 + }, + { + "epoch": 72.26648351648352, + "grad_norm": 4.241682529449463, + "learning_rate": 1.3866758241758242e-05, + "loss": 0.0592, + "step": 26305 + }, + { + "epoch": 72.26923076923077, + "grad_norm": 9.403732299804688, + "learning_rate": 1.3865384615384616e-05, + "loss": 0.0993, + "step": 26306 + }, + { + "epoch": 72.27197802197803, + "grad_norm": 9.028775215148926, + "learning_rate": 1.3864010989010989e-05, + "loss": 0.1923, + "step": 26307 + }, + { + "epoch": 72.27472527472527, + "grad_norm": 8.231392860412598, + "learning_rate": 1.3862637362637362e-05, + "loss": 0.1037, + "step": 26308 + }, + { + "epoch": 72.27747252747253, + "grad_norm": 15.910497665405273, + "learning_rate": 1.3861263736263738e-05, + "loss": 0.4498, + "step": 26309 + }, + { + "epoch": 72.28021978021978, + "grad_norm": 11.334909439086914, + "learning_rate": 1.3859890109890111e-05, + "loss": 0.181, + "step": 26310 + }, + { + "epoch": 72.28296703296704, + "grad_norm": 20.41998291015625, + "learning_rate": 1.3858516483516484e-05, + "loss": 0.246, + "step": 26311 + }, + { + "epoch": 72.28571428571429, + "grad_norm": 23.66960334777832, + "learning_rate": 1.3857142857142858e-05, + "loss": 0.9139, + "step": 26312 + }, + { + "epoch": 72.28846153846153, + "grad_norm": 23.781564712524414, + "learning_rate": 1.3855769230769231e-05, + "loss": 0.5151, + "step": 26313 + }, + { + "epoch": 72.29120879120879, + "grad_norm": 9.827836036682129, + "learning_rate": 1.3854395604395606e-05, + "loss": 0.1738, + "step": 26314 + }, + { + "epoch": 72.29395604395604, + "grad_norm": 9.26505184173584, + "learning_rate": 1.385302197802198e-05, + "loss": 0.1476, + "step": 26315 + }, + { + "epoch": 72.2967032967033, + "grad_norm": 12.52607250213623, + "learning_rate": 1.3851648351648353e-05, + "loss": 0.24, + "step": 26316 + }, + { + "epoch": 72.29945054945055, + "grad_norm": 10.729942321777344, + "learning_rate": 1.3850274725274725e-05, + "loss": 0.1778, + "step": 26317 + }, + { + "epoch": 72.3021978021978, + "grad_norm": 16.22988510131836, + "learning_rate": 1.3848901098901098e-05, + "loss": 0.1389, + "step": 26318 + }, + { + "epoch": 72.30494505494505, + "grad_norm": 11.608365058898926, + "learning_rate": 1.3847527472527475e-05, + "loss": 0.2727, + "step": 26319 + }, + { + "epoch": 72.3076923076923, + "grad_norm": 19.49224281311035, + "learning_rate": 1.3846153846153847e-05, + "loss": 0.4214, + "step": 26320 + }, + { + "epoch": 72.31043956043956, + "grad_norm": 4.62768030166626, + "learning_rate": 1.384478021978022e-05, + "loss": 0.0678, + "step": 26321 + }, + { + "epoch": 72.31318681318682, + "grad_norm": 2.673961877822876, + "learning_rate": 1.3843406593406594e-05, + "loss": 0.0409, + "step": 26322 + }, + { + "epoch": 72.31593406593407, + "grad_norm": 11.587784767150879, + "learning_rate": 1.3842032967032967e-05, + "loss": 0.1241, + "step": 26323 + }, + { + "epoch": 72.31868131868131, + "grad_norm": 5.96270227432251, + "learning_rate": 1.3840659340659342e-05, + "loss": 0.0562, + "step": 26324 + }, + { + "epoch": 72.32142857142857, + "grad_norm": 6.6588454246521, + "learning_rate": 1.3839285714285715e-05, + "loss": 0.1972, + "step": 26325 + }, + { + "epoch": 72.32417582417582, + "grad_norm": 5.065403938293457, + "learning_rate": 1.3837912087912089e-05, + "loss": 0.0779, + "step": 26326 + }, + { + "epoch": 72.32692307692308, + "grad_norm": 3.6262738704681396, + "learning_rate": 1.3836538461538462e-05, + "loss": 0.0568, + "step": 26327 + }, + { + "epoch": 72.32967032967034, + "grad_norm": 10.700165748596191, + "learning_rate": 1.3835164835164836e-05, + "loss": 0.1654, + "step": 26328 + }, + { + "epoch": 72.33241758241758, + "grad_norm": 28.41349983215332, + "learning_rate": 1.383379120879121e-05, + "loss": 1.092, + "step": 26329 + }, + { + "epoch": 72.33516483516483, + "grad_norm": 17.863990783691406, + "learning_rate": 1.3832417582417584e-05, + "loss": 0.7362, + "step": 26330 + }, + { + "epoch": 72.33791208791209, + "grad_norm": 17.99156379699707, + "learning_rate": 1.3831043956043958e-05, + "loss": 0.4905, + "step": 26331 + }, + { + "epoch": 72.34065934065934, + "grad_norm": 37.239864349365234, + "learning_rate": 1.382967032967033e-05, + "loss": 1.0802, + "step": 26332 + }, + { + "epoch": 72.3434065934066, + "grad_norm": 6.5098724365234375, + "learning_rate": 1.3828296703296703e-05, + "loss": 0.0314, + "step": 26333 + }, + { + "epoch": 72.34615384615384, + "grad_norm": 15.892167091369629, + "learning_rate": 1.3826923076923076e-05, + "loss": 0.2658, + "step": 26334 + }, + { + "epoch": 72.3489010989011, + "grad_norm": 13.206942558288574, + "learning_rate": 1.3825549450549451e-05, + "loss": 0.2655, + "step": 26335 + }, + { + "epoch": 72.35164835164835, + "grad_norm": 10.81757640838623, + "learning_rate": 1.3824175824175825e-05, + "loss": 0.3345, + "step": 26336 + }, + { + "epoch": 72.3543956043956, + "grad_norm": 37.85511779785156, + "learning_rate": 1.3822802197802198e-05, + "loss": 1.8185, + "step": 26337 + }, + { + "epoch": 72.35714285714286, + "grad_norm": 29.619712829589844, + "learning_rate": 1.3821428571428571e-05, + "loss": 0.4242, + "step": 26338 + }, + { + "epoch": 72.35989010989012, + "grad_norm": 2.302960157394409, + "learning_rate": 1.3820054945054945e-05, + "loss": 0.0376, + "step": 26339 + }, + { + "epoch": 72.36263736263736, + "grad_norm": 14.381665229797363, + "learning_rate": 1.381868131868132e-05, + "loss": 0.6831, + "step": 26340 + }, + { + "epoch": 72.36538461538461, + "grad_norm": 11.641556739807129, + "learning_rate": 1.3817307692307693e-05, + "loss": 0.3381, + "step": 26341 + }, + { + "epoch": 72.36813186813187, + "grad_norm": 23.193450927734375, + "learning_rate": 1.3815934065934067e-05, + "loss": 0.6837, + "step": 26342 + }, + { + "epoch": 72.37087912087912, + "grad_norm": 3.7798919677734375, + "learning_rate": 1.381456043956044e-05, + "loss": 0.083, + "step": 26343 + }, + { + "epoch": 72.37362637362638, + "grad_norm": 6.898514747619629, + "learning_rate": 1.3813186813186812e-05, + "loss": 0.0917, + "step": 26344 + }, + { + "epoch": 72.37637362637362, + "grad_norm": 10.699705123901367, + "learning_rate": 1.3811813186813189e-05, + "loss": 0.1969, + "step": 26345 + }, + { + "epoch": 72.37912087912088, + "grad_norm": 14.267685890197754, + "learning_rate": 1.3810439560439562e-05, + "loss": 0.4286, + "step": 26346 + }, + { + "epoch": 72.38186813186813, + "grad_norm": 3.985987901687622, + "learning_rate": 1.3809065934065934e-05, + "loss": 0.0772, + "step": 26347 + }, + { + "epoch": 72.38461538461539, + "grad_norm": 6.931632995605469, + "learning_rate": 1.3807692307692307e-05, + "loss": 0.1123, + "step": 26348 + }, + { + "epoch": 72.38736263736264, + "grad_norm": 8.23910140991211, + "learning_rate": 1.380631868131868e-05, + "loss": 0.1152, + "step": 26349 + }, + { + "epoch": 72.39010989010988, + "grad_norm": 22.140716552734375, + "learning_rate": 1.3804945054945056e-05, + "loss": 0.5972, + "step": 26350 + }, + { + "epoch": 72.39285714285714, + "grad_norm": 6.160500526428223, + "learning_rate": 1.380357142857143e-05, + "loss": 0.125, + "step": 26351 + }, + { + "epoch": 72.3956043956044, + "grad_norm": 8.8617525100708, + "learning_rate": 1.3802197802197803e-05, + "loss": 0.0897, + "step": 26352 + }, + { + "epoch": 72.39835164835165, + "grad_norm": 16.041748046875, + "learning_rate": 1.3800824175824176e-05, + "loss": 0.1435, + "step": 26353 + }, + { + "epoch": 72.4010989010989, + "grad_norm": 8.812172889709473, + "learning_rate": 1.379945054945055e-05, + "loss": 0.1178, + "step": 26354 + }, + { + "epoch": 72.40384615384616, + "grad_norm": 11.102977752685547, + "learning_rate": 1.3798076923076924e-05, + "loss": 0.1276, + "step": 26355 + }, + { + "epoch": 72.4065934065934, + "grad_norm": 6.847700119018555, + "learning_rate": 1.3796703296703298e-05, + "loss": 0.1291, + "step": 26356 + }, + { + "epoch": 72.40934065934066, + "grad_norm": 8.811010360717773, + "learning_rate": 1.3795329670329671e-05, + "loss": 0.1033, + "step": 26357 + }, + { + "epoch": 72.41208791208791, + "grad_norm": 23.332290649414062, + "learning_rate": 1.3793956043956045e-05, + "loss": 0.4368, + "step": 26358 + }, + { + "epoch": 72.41483516483517, + "grad_norm": 4.009213447570801, + "learning_rate": 1.3792582417582416e-05, + "loss": 0.0797, + "step": 26359 + }, + { + "epoch": 72.41758241758242, + "grad_norm": 4.359284400939941, + "learning_rate": 1.3791208791208793e-05, + "loss": 0.0938, + "step": 26360 + }, + { + "epoch": 72.42032967032966, + "grad_norm": 10.745570182800293, + "learning_rate": 1.3789835164835167e-05, + "loss": 0.1712, + "step": 26361 + }, + { + "epoch": 72.42307692307692, + "grad_norm": 15.710097312927246, + "learning_rate": 1.3788461538461538e-05, + "loss": 0.2967, + "step": 26362 + }, + { + "epoch": 72.42582417582418, + "grad_norm": 5.288339614868164, + "learning_rate": 1.3787087912087912e-05, + "loss": 0.052, + "step": 26363 + }, + { + "epoch": 72.42857142857143, + "grad_norm": 14.516264915466309, + "learning_rate": 1.3785714285714285e-05, + "loss": 0.2181, + "step": 26364 + }, + { + "epoch": 72.43131868131869, + "grad_norm": 8.254716873168945, + "learning_rate": 1.378434065934066e-05, + "loss": 0.1177, + "step": 26365 + }, + { + "epoch": 72.43406593406593, + "grad_norm": 22.071382522583008, + "learning_rate": 1.3782967032967034e-05, + "loss": 0.3627, + "step": 26366 + }, + { + "epoch": 72.43681318681318, + "grad_norm": 7.946052551269531, + "learning_rate": 1.3781593406593407e-05, + "loss": 0.1014, + "step": 26367 + }, + { + "epoch": 72.43956043956044, + "grad_norm": 4.195338726043701, + "learning_rate": 1.378021978021978e-05, + "loss": 0.1206, + "step": 26368 + }, + { + "epoch": 72.4423076923077, + "grad_norm": 17.714313507080078, + "learning_rate": 1.3778846153846154e-05, + "loss": 0.2916, + "step": 26369 + }, + { + "epoch": 72.44505494505495, + "grad_norm": 7.345844745635986, + "learning_rate": 1.3777472527472529e-05, + "loss": 0.157, + "step": 26370 + }, + { + "epoch": 72.4478021978022, + "grad_norm": 5.015192985534668, + "learning_rate": 1.3776098901098902e-05, + "loss": 0.0579, + "step": 26371 + }, + { + "epoch": 72.45054945054945, + "grad_norm": 15.063899040222168, + "learning_rate": 1.3774725274725276e-05, + "loss": 0.1801, + "step": 26372 + }, + { + "epoch": 72.4532967032967, + "grad_norm": 15.072693824768066, + "learning_rate": 1.377335164835165e-05, + "loss": 0.4463, + "step": 26373 + }, + { + "epoch": 72.45604395604396, + "grad_norm": 12.14537239074707, + "learning_rate": 1.3771978021978021e-05, + "loss": 0.1519, + "step": 26374 + }, + { + "epoch": 72.45879120879121, + "grad_norm": 27.075298309326172, + "learning_rate": 1.3770604395604398e-05, + "loss": 0.7158, + "step": 26375 + }, + { + "epoch": 72.46153846153847, + "grad_norm": 14.36744213104248, + "learning_rate": 1.3769230769230771e-05, + "loss": 0.2735, + "step": 26376 + }, + { + "epoch": 72.46428571428571, + "grad_norm": 24.079532623291016, + "learning_rate": 1.3767857142857143e-05, + "loss": 0.4106, + "step": 26377 + }, + { + "epoch": 72.46703296703296, + "grad_norm": 5.146381855010986, + "learning_rate": 1.3766483516483516e-05, + "loss": 0.0821, + "step": 26378 + }, + { + "epoch": 72.46978021978022, + "grad_norm": 19.802013397216797, + "learning_rate": 1.376510989010989e-05, + "loss": 0.6279, + "step": 26379 + }, + { + "epoch": 72.47252747252747, + "grad_norm": 15.218788146972656, + "learning_rate": 1.3763736263736265e-05, + "loss": 0.2935, + "step": 26380 + }, + { + "epoch": 72.47527472527473, + "grad_norm": 6.4284539222717285, + "learning_rate": 1.3762362637362638e-05, + "loss": 0.1892, + "step": 26381 + }, + { + "epoch": 72.47802197802197, + "grad_norm": 5.884804725646973, + "learning_rate": 1.3760989010989012e-05, + "loss": 0.1088, + "step": 26382 + }, + { + "epoch": 72.48076923076923, + "grad_norm": 11.95290470123291, + "learning_rate": 1.3759615384615385e-05, + "loss": 0.188, + "step": 26383 + }, + { + "epoch": 72.48351648351648, + "grad_norm": 14.062610626220703, + "learning_rate": 1.3758241758241758e-05, + "loss": 0.1934, + "step": 26384 + }, + { + "epoch": 72.48626373626374, + "grad_norm": 12.686075210571289, + "learning_rate": 1.3756868131868133e-05, + "loss": 0.2079, + "step": 26385 + }, + { + "epoch": 72.48901098901099, + "grad_norm": 8.442546844482422, + "learning_rate": 1.3755494505494507e-05, + "loss": 0.1574, + "step": 26386 + }, + { + "epoch": 72.49175824175825, + "grad_norm": 12.69166374206543, + "learning_rate": 1.375412087912088e-05, + "loss": 0.236, + "step": 26387 + }, + { + "epoch": 72.49450549450549, + "grad_norm": 11.425643920898438, + "learning_rate": 1.3752747252747254e-05, + "loss": 0.0801, + "step": 26388 + }, + { + "epoch": 72.49725274725274, + "grad_norm": 24.202659606933594, + "learning_rate": 1.3751373626373625e-05, + "loss": 0.6962, + "step": 26389 + }, + { + "epoch": 72.5, + "grad_norm": 18.604299545288086, + "learning_rate": 1.3750000000000002e-05, + "loss": 0.418, + "step": 26390 + }, + { + "epoch": 72.50274725274726, + "grad_norm": 14.525215148925781, + "learning_rate": 1.3748626373626376e-05, + "loss": 0.129, + "step": 26391 + }, + { + "epoch": 72.50549450549451, + "grad_norm": 7.050580978393555, + "learning_rate": 1.3747252747252747e-05, + "loss": 0.1277, + "step": 26392 + }, + { + "epoch": 72.50824175824175, + "grad_norm": 7.827872276306152, + "learning_rate": 1.374587912087912e-05, + "loss": 0.075, + "step": 26393 + }, + { + "epoch": 72.51098901098901, + "grad_norm": 14.298338890075684, + "learning_rate": 1.3744505494505494e-05, + "loss": 0.4298, + "step": 26394 + }, + { + "epoch": 72.51373626373626, + "grad_norm": 13.722784996032715, + "learning_rate": 1.374313186813187e-05, + "loss": 0.5583, + "step": 26395 + }, + { + "epoch": 72.51648351648352, + "grad_norm": 7.081055164337158, + "learning_rate": 1.3741758241758243e-05, + "loss": 0.0926, + "step": 26396 + }, + { + "epoch": 72.51923076923077, + "grad_norm": 2.5807065963745117, + "learning_rate": 1.3740384615384616e-05, + "loss": 0.027, + "step": 26397 + }, + { + "epoch": 72.52197802197803, + "grad_norm": 5.877041816711426, + "learning_rate": 1.373901098901099e-05, + "loss": 0.0634, + "step": 26398 + }, + { + "epoch": 72.52472527472527, + "grad_norm": 5.562431335449219, + "learning_rate": 1.3737637362637363e-05, + "loss": 0.1006, + "step": 26399 + }, + { + "epoch": 72.52747252747253, + "grad_norm": 15.359366416931152, + "learning_rate": 1.3736263736263738e-05, + "loss": 0.1674, + "step": 26400 + }, + { + "epoch": 72.53021978021978, + "grad_norm": 21.40983009338379, + "learning_rate": 1.3734890109890111e-05, + "loss": 0.6395, + "step": 26401 + }, + { + "epoch": 72.53296703296704, + "grad_norm": 7.686334133148193, + "learning_rate": 1.3733516483516485e-05, + "loss": 0.0753, + "step": 26402 + }, + { + "epoch": 72.53571428571429, + "grad_norm": 8.03429889678955, + "learning_rate": 1.3732142857142856e-05, + "loss": 0.091, + "step": 26403 + }, + { + "epoch": 72.53846153846153, + "grad_norm": 18.060523986816406, + "learning_rate": 1.373076923076923e-05, + "loss": 0.47, + "step": 26404 + }, + { + "epoch": 72.54120879120879, + "grad_norm": 4.143049716949463, + "learning_rate": 1.3729395604395607e-05, + "loss": 0.0716, + "step": 26405 + }, + { + "epoch": 72.54395604395604, + "grad_norm": 9.310741424560547, + "learning_rate": 1.372802197802198e-05, + "loss": 0.2114, + "step": 26406 + }, + { + "epoch": 72.5467032967033, + "grad_norm": 18.425247192382812, + "learning_rate": 1.3726648351648352e-05, + "loss": 0.4926, + "step": 26407 + }, + { + "epoch": 72.54945054945055, + "grad_norm": 24.480297088623047, + "learning_rate": 1.3725274725274725e-05, + "loss": 0.3459, + "step": 26408 + }, + { + "epoch": 72.5521978021978, + "grad_norm": 11.467131614685059, + "learning_rate": 1.3723901098901099e-05, + "loss": 0.3508, + "step": 26409 + }, + { + "epoch": 72.55494505494505, + "grad_norm": 4.493481636047363, + "learning_rate": 1.3722527472527474e-05, + "loss": 0.0796, + "step": 26410 + }, + { + "epoch": 72.5576923076923, + "grad_norm": 12.193549156188965, + "learning_rate": 1.3721153846153847e-05, + "loss": 0.2183, + "step": 26411 + }, + { + "epoch": 72.56043956043956, + "grad_norm": 15.036979675292969, + "learning_rate": 1.371978021978022e-05, + "loss": 0.2131, + "step": 26412 + }, + { + "epoch": 72.56318681318682, + "grad_norm": 8.311392784118652, + "learning_rate": 1.3718406593406594e-05, + "loss": 0.0719, + "step": 26413 + }, + { + "epoch": 72.56593406593407, + "grad_norm": 10.892815589904785, + "learning_rate": 1.3717032967032967e-05, + "loss": 0.2496, + "step": 26414 + }, + { + "epoch": 72.56868131868131, + "grad_norm": 1.611096739768982, + "learning_rate": 1.3715659340659342e-05, + "loss": 0.0189, + "step": 26415 + }, + { + "epoch": 72.57142857142857, + "grad_norm": 11.574764251708984, + "learning_rate": 1.3714285714285716e-05, + "loss": 0.3234, + "step": 26416 + }, + { + "epoch": 72.57417582417582, + "grad_norm": 11.92527961730957, + "learning_rate": 1.371291208791209e-05, + "loss": 0.2987, + "step": 26417 + }, + { + "epoch": 72.57692307692308, + "grad_norm": 12.949957847595215, + "learning_rate": 1.3711538461538461e-05, + "loss": 0.1847, + "step": 26418 + }, + { + "epoch": 72.57967032967034, + "grad_norm": 12.223491668701172, + "learning_rate": 1.3710164835164834e-05, + "loss": 0.0961, + "step": 26419 + }, + { + "epoch": 72.58241758241758, + "grad_norm": 7.058562755584717, + "learning_rate": 1.3708791208791211e-05, + "loss": 0.0946, + "step": 26420 + }, + { + "epoch": 72.58516483516483, + "grad_norm": 9.966436386108398, + "learning_rate": 1.3707417582417585e-05, + "loss": 0.2971, + "step": 26421 + }, + { + "epoch": 72.58791208791209, + "grad_norm": 4.5432305335998535, + "learning_rate": 1.3706043956043956e-05, + "loss": 0.0417, + "step": 26422 + }, + { + "epoch": 72.59065934065934, + "grad_norm": 5.406007766723633, + "learning_rate": 1.370467032967033e-05, + "loss": 0.0853, + "step": 26423 + }, + { + "epoch": 72.5934065934066, + "grad_norm": 9.152199745178223, + "learning_rate": 1.3703296703296703e-05, + "loss": 0.1518, + "step": 26424 + }, + { + "epoch": 72.59615384615384, + "grad_norm": 8.447487831115723, + "learning_rate": 1.3701923076923078e-05, + "loss": 0.1517, + "step": 26425 + }, + { + "epoch": 72.5989010989011, + "grad_norm": 8.898799896240234, + "learning_rate": 1.3700549450549452e-05, + "loss": 0.0666, + "step": 26426 + }, + { + "epoch": 72.60164835164835, + "grad_norm": 9.154295921325684, + "learning_rate": 1.3699175824175825e-05, + "loss": 0.1053, + "step": 26427 + }, + { + "epoch": 72.6043956043956, + "grad_norm": 8.993924140930176, + "learning_rate": 1.3697802197802198e-05, + "loss": 0.1982, + "step": 26428 + }, + { + "epoch": 72.60714285714286, + "grad_norm": 8.063493728637695, + "learning_rate": 1.3696428571428572e-05, + "loss": 0.2866, + "step": 26429 + }, + { + "epoch": 72.60989010989012, + "grad_norm": 13.246353149414062, + "learning_rate": 1.3695054945054947e-05, + "loss": 0.1769, + "step": 26430 + }, + { + "epoch": 72.61263736263736, + "grad_norm": 13.916952133178711, + "learning_rate": 1.369368131868132e-05, + "loss": 0.2733, + "step": 26431 + }, + { + "epoch": 72.61538461538461, + "grad_norm": 13.616376876831055, + "learning_rate": 1.3692307692307694e-05, + "loss": 0.2175, + "step": 26432 + }, + { + "epoch": 72.61813186813187, + "grad_norm": 19.835268020629883, + "learning_rate": 1.3690934065934065e-05, + "loss": 0.5127, + "step": 26433 + }, + { + "epoch": 72.62087912087912, + "grad_norm": 3.265864372253418, + "learning_rate": 1.3689560439560439e-05, + "loss": 0.0464, + "step": 26434 + }, + { + "epoch": 72.62362637362638, + "grad_norm": 9.69545841217041, + "learning_rate": 1.3688186813186816e-05, + "loss": 0.1454, + "step": 26435 + }, + { + "epoch": 72.62637362637362, + "grad_norm": 29.493078231811523, + "learning_rate": 1.3686813186813189e-05, + "loss": 0.8166, + "step": 26436 + }, + { + "epoch": 72.62912087912088, + "grad_norm": 13.552494049072266, + "learning_rate": 1.368543956043956e-05, + "loss": 0.3912, + "step": 26437 + }, + { + "epoch": 72.63186813186813, + "grad_norm": 10.93863582611084, + "learning_rate": 1.3684065934065934e-05, + "loss": 0.0998, + "step": 26438 + }, + { + "epoch": 72.63461538461539, + "grad_norm": 16.09253692626953, + "learning_rate": 1.3682692307692308e-05, + "loss": 0.3018, + "step": 26439 + }, + { + "epoch": 72.63736263736264, + "grad_norm": 16.010669708251953, + "learning_rate": 1.3681318681318683e-05, + "loss": 0.5861, + "step": 26440 + }, + { + "epoch": 72.64010989010988, + "grad_norm": 5.762391567230225, + "learning_rate": 1.3679945054945056e-05, + "loss": 0.1566, + "step": 26441 + }, + { + "epoch": 72.64285714285714, + "grad_norm": 14.165331840515137, + "learning_rate": 1.367857142857143e-05, + "loss": 0.249, + "step": 26442 + }, + { + "epoch": 72.6456043956044, + "grad_norm": 17.533233642578125, + "learning_rate": 1.3677197802197803e-05, + "loss": 0.199, + "step": 26443 + }, + { + "epoch": 72.64835164835165, + "grad_norm": 34.188682556152344, + "learning_rate": 1.3675824175824176e-05, + "loss": 0.2566, + "step": 26444 + }, + { + "epoch": 72.6510989010989, + "grad_norm": 9.765212059020996, + "learning_rate": 1.3674450549450551e-05, + "loss": 0.1668, + "step": 26445 + }, + { + "epoch": 72.65384615384616, + "grad_norm": 12.394530296325684, + "learning_rate": 1.3673076923076925e-05, + "loss": 0.238, + "step": 26446 + }, + { + "epoch": 72.6565934065934, + "grad_norm": 8.24441909790039, + "learning_rate": 1.3671703296703298e-05, + "loss": 0.0824, + "step": 26447 + }, + { + "epoch": 72.65934065934066, + "grad_norm": 15.691512107849121, + "learning_rate": 1.367032967032967e-05, + "loss": 0.2582, + "step": 26448 + }, + { + "epoch": 72.66208791208791, + "grad_norm": 5.063154697418213, + "learning_rate": 1.3668956043956043e-05, + "loss": 0.1078, + "step": 26449 + }, + { + "epoch": 72.66483516483517, + "grad_norm": 18.701860427856445, + "learning_rate": 1.3667582417582417e-05, + "loss": 0.7708, + "step": 26450 + }, + { + "epoch": 72.66758241758242, + "grad_norm": 5.973544120788574, + "learning_rate": 1.3666208791208794e-05, + "loss": 0.095, + "step": 26451 + }, + { + "epoch": 72.67032967032966, + "grad_norm": 18.745365142822266, + "learning_rate": 1.3664835164835165e-05, + "loss": 0.3195, + "step": 26452 + }, + { + "epoch": 72.67307692307692, + "grad_norm": 9.858652114868164, + "learning_rate": 1.3663461538461539e-05, + "loss": 0.1191, + "step": 26453 + }, + { + "epoch": 72.67582417582418, + "grad_norm": 26.680219650268555, + "learning_rate": 1.3662087912087912e-05, + "loss": 0.7187, + "step": 26454 + }, + { + "epoch": 72.67857142857143, + "grad_norm": 12.167693138122559, + "learning_rate": 1.3660714285714285e-05, + "loss": 0.4187, + "step": 26455 + }, + { + "epoch": 72.68131868131869, + "grad_norm": 4.602164268493652, + "learning_rate": 1.365934065934066e-05, + "loss": 0.0667, + "step": 26456 + }, + { + "epoch": 72.68406593406593, + "grad_norm": 12.495168685913086, + "learning_rate": 1.3657967032967034e-05, + "loss": 0.1869, + "step": 26457 + }, + { + "epoch": 72.68681318681318, + "grad_norm": 14.63923454284668, + "learning_rate": 1.3656593406593407e-05, + "loss": 0.4272, + "step": 26458 + }, + { + "epoch": 72.68956043956044, + "grad_norm": 16.06204605102539, + "learning_rate": 1.365521978021978e-05, + "loss": 0.4262, + "step": 26459 + }, + { + "epoch": 72.6923076923077, + "grad_norm": 10.61237621307373, + "learning_rate": 1.3653846153846153e-05, + "loss": 0.2939, + "step": 26460 + }, + { + "epoch": 72.69505494505495, + "grad_norm": 7.596477031707764, + "learning_rate": 1.365247252747253e-05, + "loss": 0.1252, + "step": 26461 + }, + { + "epoch": 72.6978021978022, + "grad_norm": 18.102813720703125, + "learning_rate": 1.3651098901098903e-05, + "loss": 0.3295, + "step": 26462 + }, + { + "epoch": 72.70054945054945, + "grad_norm": 11.936507225036621, + "learning_rate": 1.3649725274725274e-05, + "loss": 0.2272, + "step": 26463 + }, + { + "epoch": 72.7032967032967, + "grad_norm": 11.41791820526123, + "learning_rate": 1.3648351648351648e-05, + "loss": 0.1968, + "step": 26464 + }, + { + "epoch": 72.70604395604396, + "grad_norm": 24.137325286865234, + "learning_rate": 1.3646978021978021e-05, + "loss": 0.632, + "step": 26465 + }, + { + "epoch": 72.70879120879121, + "grad_norm": 15.701067924499512, + "learning_rate": 1.3645604395604396e-05, + "loss": 0.1965, + "step": 26466 + }, + { + "epoch": 72.71153846153847, + "grad_norm": 23.2952938079834, + "learning_rate": 1.364423076923077e-05, + "loss": 0.6339, + "step": 26467 + }, + { + "epoch": 72.71428571428571, + "grad_norm": 14.712876319885254, + "learning_rate": 1.3642857142857143e-05, + "loss": 0.2166, + "step": 26468 + }, + { + "epoch": 72.71703296703296, + "grad_norm": 10.763773918151855, + "learning_rate": 1.3641483516483517e-05, + "loss": 0.1613, + "step": 26469 + }, + { + "epoch": 72.71978021978022, + "grad_norm": 12.692340850830078, + "learning_rate": 1.364010989010989e-05, + "loss": 0.2636, + "step": 26470 + }, + { + "epoch": 72.72252747252747, + "grad_norm": 7.876214504241943, + "learning_rate": 1.3638736263736265e-05, + "loss": 0.1809, + "step": 26471 + }, + { + "epoch": 72.72527472527473, + "grad_norm": 17.855146408081055, + "learning_rate": 1.3637362637362638e-05, + "loss": 0.2319, + "step": 26472 + }, + { + "epoch": 72.72802197802197, + "grad_norm": 7.903290748596191, + "learning_rate": 1.3635989010989012e-05, + "loss": 0.1288, + "step": 26473 + }, + { + "epoch": 72.73076923076923, + "grad_norm": 8.80440902709961, + "learning_rate": 1.3634615384615385e-05, + "loss": 0.1759, + "step": 26474 + }, + { + "epoch": 72.73351648351648, + "grad_norm": 7.7237091064453125, + "learning_rate": 1.3633241758241757e-05, + "loss": 0.104, + "step": 26475 + }, + { + "epoch": 72.73626373626374, + "grad_norm": 8.8262939453125, + "learning_rate": 1.3631868131868134e-05, + "loss": 0.2028, + "step": 26476 + }, + { + "epoch": 72.73901098901099, + "grad_norm": 12.219195365905762, + "learning_rate": 1.3630494505494507e-05, + "loss": 0.129, + "step": 26477 + }, + { + "epoch": 72.74175824175825, + "grad_norm": 10.746471405029297, + "learning_rate": 1.3629120879120879e-05, + "loss": 0.0857, + "step": 26478 + }, + { + "epoch": 72.74450549450549, + "grad_norm": 18.865318298339844, + "learning_rate": 1.3627747252747252e-05, + "loss": 0.5986, + "step": 26479 + }, + { + "epoch": 72.74725274725274, + "grad_norm": 13.592278480529785, + "learning_rate": 1.3626373626373626e-05, + "loss": 0.3551, + "step": 26480 + }, + { + "epoch": 72.75, + "grad_norm": 8.69018268585205, + "learning_rate": 1.3625e-05, + "loss": 0.1815, + "step": 26481 + }, + { + "epoch": 72.75274725274726, + "grad_norm": 9.186334609985352, + "learning_rate": 1.3623626373626374e-05, + "loss": 0.2366, + "step": 26482 + }, + { + "epoch": 72.75549450549451, + "grad_norm": 13.432450294494629, + "learning_rate": 1.3622252747252748e-05, + "loss": 0.2387, + "step": 26483 + }, + { + "epoch": 72.75824175824175, + "grad_norm": 17.71280288696289, + "learning_rate": 1.3620879120879121e-05, + "loss": 0.3015, + "step": 26484 + }, + { + "epoch": 72.76098901098901, + "grad_norm": 11.608221054077148, + "learning_rate": 1.3619505494505494e-05, + "loss": 0.1483, + "step": 26485 + }, + { + "epoch": 72.76373626373626, + "grad_norm": 9.52371883392334, + "learning_rate": 1.361813186813187e-05, + "loss": 0.2014, + "step": 26486 + }, + { + "epoch": 72.76648351648352, + "grad_norm": 14.652971267700195, + "learning_rate": 1.3616758241758243e-05, + "loss": 0.3281, + "step": 26487 + }, + { + "epoch": 72.76923076923077, + "grad_norm": 3.17602276802063, + "learning_rate": 1.3615384615384616e-05, + "loss": 0.0419, + "step": 26488 + }, + { + "epoch": 72.77197802197803, + "grad_norm": 20.052820205688477, + "learning_rate": 1.361401098901099e-05, + "loss": 0.5644, + "step": 26489 + }, + { + "epoch": 72.77472527472527, + "grad_norm": 16.441322326660156, + "learning_rate": 1.3612637362637362e-05, + "loss": 0.438, + "step": 26490 + }, + { + "epoch": 72.77747252747253, + "grad_norm": 22.913541793823242, + "learning_rate": 1.3611263736263738e-05, + "loss": 0.675, + "step": 26491 + }, + { + "epoch": 72.78021978021978, + "grad_norm": 18.06494903564453, + "learning_rate": 1.3609890109890112e-05, + "loss": 0.2595, + "step": 26492 + }, + { + "epoch": 72.78296703296704, + "grad_norm": 15.915793418884277, + "learning_rate": 1.3608516483516483e-05, + "loss": 0.2743, + "step": 26493 + }, + { + "epoch": 72.78571428571429, + "grad_norm": 2.7047955989837646, + "learning_rate": 1.3607142857142857e-05, + "loss": 0.0358, + "step": 26494 + }, + { + "epoch": 72.78846153846153, + "grad_norm": 14.824642181396484, + "learning_rate": 1.360576923076923e-05, + "loss": 0.32, + "step": 26495 + }, + { + "epoch": 72.79120879120879, + "grad_norm": 5.92544412612915, + "learning_rate": 1.3604395604395605e-05, + "loss": 0.1294, + "step": 26496 + }, + { + "epoch": 72.79395604395604, + "grad_norm": 18.054826736450195, + "learning_rate": 1.3603021978021979e-05, + "loss": 0.5076, + "step": 26497 + }, + { + "epoch": 72.7967032967033, + "grad_norm": 14.372440338134766, + "learning_rate": 1.3601648351648352e-05, + "loss": 0.5179, + "step": 26498 + }, + { + "epoch": 72.79945054945055, + "grad_norm": 17.30122947692871, + "learning_rate": 1.3600274725274726e-05, + "loss": 0.1796, + "step": 26499 + }, + { + "epoch": 72.8021978021978, + "grad_norm": 8.15158748626709, + "learning_rate": 1.3598901098901099e-05, + "loss": 0.1493, + "step": 26500 + }, + { + "epoch": 72.80494505494505, + "grad_norm": 13.45211124420166, + "learning_rate": 1.3597527472527474e-05, + "loss": 0.2737, + "step": 26501 + }, + { + "epoch": 72.8076923076923, + "grad_norm": 17.614187240600586, + "learning_rate": 1.3596153846153847e-05, + "loss": 0.2156, + "step": 26502 + }, + { + "epoch": 72.81043956043956, + "grad_norm": 9.451102256774902, + "learning_rate": 1.3594780219780221e-05, + "loss": 0.1931, + "step": 26503 + }, + { + "epoch": 72.81318681318682, + "grad_norm": 3.9527885913848877, + "learning_rate": 1.3593406593406594e-05, + "loss": 0.0318, + "step": 26504 + }, + { + "epoch": 72.81593406593407, + "grad_norm": 7.016637325286865, + "learning_rate": 1.3592032967032966e-05, + "loss": 0.1821, + "step": 26505 + }, + { + "epoch": 72.81868131868131, + "grad_norm": 12.173036575317383, + "learning_rate": 1.3590659340659343e-05, + "loss": 0.361, + "step": 26506 + }, + { + "epoch": 72.82142857142857, + "grad_norm": 9.539801597595215, + "learning_rate": 1.3589285714285716e-05, + "loss": 0.0846, + "step": 26507 + }, + { + "epoch": 72.82417582417582, + "grad_norm": 12.414554595947266, + "learning_rate": 1.3587912087912088e-05, + "loss": 0.2962, + "step": 26508 + }, + { + "epoch": 72.82692307692308, + "grad_norm": 11.244047164916992, + "learning_rate": 1.3586538461538461e-05, + "loss": 0.3164, + "step": 26509 + }, + { + "epoch": 72.82967032967034, + "grad_norm": 15.77322006225586, + "learning_rate": 1.3585164835164835e-05, + "loss": 0.1366, + "step": 26510 + }, + { + "epoch": 72.83241758241758, + "grad_norm": 0.903502881526947, + "learning_rate": 1.358379120879121e-05, + "loss": 0.0131, + "step": 26511 + }, + { + "epoch": 72.83516483516483, + "grad_norm": 13.985549926757812, + "learning_rate": 1.3582417582417583e-05, + "loss": 0.1533, + "step": 26512 + }, + { + "epoch": 72.83791208791209, + "grad_norm": 4.2010908126831055, + "learning_rate": 1.3581043956043957e-05, + "loss": 0.0932, + "step": 26513 + }, + { + "epoch": 72.84065934065934, + "grad_norm": 11.820155143737793, + "learning_rate": 1.357967032967033e-05, + "loss": 0.2308, + "step": 26514 + }, + { + "epoch": 72.8434065934066, + "grad_norm": 9.78639030456543, + "learning_rate": 1.3578296703296703e-05, + "loss": 0.233, + "step": 26515 + }, + { + "epoch": 72.84615384615384, + "grad_norm": 2.97296404838562, + "learning_rate": 1.3576923076923079e-05, + "loss": 0.0362, + "step": 26516 + }, + { + "epoch": 72.8489010989011, + "grad_norm": 9.793732643127441, + "learning_rate": 1.3575549450549452e-05, + "loss": 0.1948, + "step": 26517 + }, + { + "epoch": 72.85164835164835, + "grad_norm": 7.455440044403076, + "learning_rate": 1.3574175824175825e-05, + "loss": 0.0867, + "step": 26518 + }, + { + "epoch": 72.8543956043956, + "grad_norm": 6.9958319664001465, + "learning_rate": 1.3572802197802199e-05, + "loss": 0.0712, + "step": 26519 + }, + { + "epoch": 72.85714285714286, + "grad_norm": 19.262094497680664, + "learning_rate": 1.357142857142857e-05, + "loss": 0.3651, + "step": 26520 + }, + { + "epoch": 72.85989010989012, + "grad_norm": 8.313477516174316, + "learning_rate": 1.3570054945054947e-05, + "loss": 0.1128, + "step": 26521 + }, + { + "epoch": 72.86263736263736, + "grad_norm": 12.730690002441406, + "learning_rate": 1.356868131868132e-05, + "loss": 0.2087, + "step": 26522 + }, + { + "epoch": 72.86538461538461, + "grad_norm": 6.356939315795898, + "learning_rate": 1.3567307692307692e-05, + "loss": 0.0769, + "step": 26523 + }, + { + "epoch": 72.86813186813187, + "grad_norm": 10.833159446716309, + "learning_rate": 1.3565934065934066e-05, + "loss": 0.2162, + "step": 26524 + }, + { + "epoch": 72.87087912087912, + "grad_norm": 3.0954999923706055, + "learning_rate": 1.356456043956044e-05, + "loss": 0.054, + "step": 26525 + }, + { + "epoch": 72.87362637362638, + "grad_norm": 6.194115161895752, + "learning_rate": 1.3563186813186814e-05, + "loss": 0.1115, + "step": 26526 + }, + { + "epoch": 72.87637362637362, + "grad_norm": 11.849124908447266, + "learning_rate": 1.3561813186813188e-05, + "loss": 0.1788, + "step": 26527 + }, + { + "epoch": 72.87912087912088, + "grad_norm": 16.72234344482422, + "learning_rate": 1.3560439560439561e-05, + "loss": 0.3179, + "step": 26528 + }, + { + "epoch": 72.88186813186813, + "grad_norm": 8.23642635345459, + "learning_rate": 1.3559065934065935e-05, + "loss": 0.1406, + "step": 26529 + }, + { + "epoch": 72.88461538461539, + "grad_norm": 12.376559257507324, + "learning_rate": 1.3557692307692308e-05, + "loss": 0.4414, + "step": 26530 + }, + { + "epoch": 72.88736263736264, + "grad_norm": 5.91459321975708, + "learning_rate": 1.3556318681318683e-05, + "loss": 0.2237, + "step": 26531 + }, + { + "epoch": 72.89010989010988, + "grad_norm": 11.22154426574707, + "learning_rate": 1.3554945054945056e-05, + "loss": 0.1269, + "step": 26532 + }, + { + "epoch": 72.89285714285714, + "grad_norm": 9.578376770019531, + "learning_rate": 1.355357142857143e-05, + "loss": 0.1301, + "step": 26533 + }, + { + "epoch": 72.8956043956044, + "grad_norm": 6.085239887237549, + "learning_rate": 1.3552197802197803e-05, + "loss": 0.0822, + "step": 26534 + }, + { + "epoch": 72.89835164835165, + "grad_norm": 3.326885938644409, + "learning_rate": 1.3550824175824175e-05, + "loss": 0.07, + "step": 26535 + }, + { + "epoch": 72.9010989010989, + "grad_norm": 19.3720645904541, + "learning_rate": 1.3549450549450552e-05, + "loss": 0.3247, + "step": 26536 + }, + { + "epoch": 72.90384615384616, + "grad_norm": 16.231903076171875, + "learning_rate": 1.3548076923076925e-05, + "loss": 0.4427, + "step": 26537 + }, + { + "epoch": 72.9065934065934, + "grad_norm": 7.768431186676025, + "learning_rate": 1.3546703296703297e-05, + "loss": 0.0877, + "step": 26538 + }, + { + "epoch": 72.90934065934066, + "grad_norm": 20.29560661315918, + "learning_rate": 1.354532967032967e-05, + "loss": 0.3727, + "step": 26539 + }, + { + "epoch": 72.91208791208791, + "grad_norm": 3.51997447013855, + "learning_rate": 1.3543956043956044e-05, + "loss": 0.0478, + "step": 26540 + }, + { + "epoch": 72.91483516483517, + "grad_norm": 6.27700662612915, + "learning_rate": 1.3542582417582419e-05, + "loss": 0.0846, + "step": 26541 + }, + { + "epoch": 72.91758241758242, + "grad_norm": 8.839458465576172, + "learning_rate": 1.3541208791208792e-05, + "loss": 0.21, + "step": 26542 + }, + { + "epoch": 72.92032967032966, + "grad_norm": 12.60758113861084, + "learning_rate": 1.3539835164835166e-05, + "loss": 0.3598, + "step": 26543 + }, + { + "epoch": 72.92307692307692, + "grad_norm": 0.786287784576416, + "learning_rate": 1.3538461538461539e-05, + "loss": 0.0087, + "step": 26544 + }, + { + "epoch": 72.92582417582418, + "grad_norm": 9.59533405303955, + "learning_rate": 1.3537087912087912e-05, + "loss": 0.2009, + "step": 26545 + }, + { + "epoch": 72.92857142857143, + "grad_norm": 14.193589210510254, + "learning_rate": 1.3535714285714288e-05, + "loss": 0.2909, + "step": 26546 + }, + { + "epoch": 72.93131868131869, + "grad_norm": 7.907932281494141, + "learning_rate": 1.3534340659340661e-05, + "loss": 0.1618, + "step": 26547 + }, + { + "epoch": 72.93406593406593, + "grad_norm": 17.053733825683594, + "learning_rate": 1.3532967032967034e-05, + "loss": 0.1608, + "step": 26548 + }, + { + "epoch": 72.93681318681318, + "grad_norm": 10.024861335754395, + "learning_rate": 1.3531593406593408e-05, + "loss": 0.1194, + "step": 26549 + }, + { + "epoch": 72.93956043956044, + "grad_norm": 13.727804183959961, + "learning_rate": 1.353021978021978e-05, + "loss": 0.1506, + "step": 26550 + }, + { + "epoch": 72.9423076923077, + "grad_norm": 18.78824234008789, + "learning_rate": 1.3528846153846156e-05, + "loss": 0.3158, + "step": 26551 + }, + { + "epoch": 72.94505494505495, + "grad_norm": 8.415903091430664, + "learning_rate": 1.352747252747253e-05, + "loss": 0.1703, + "step": 26552 + }, + { + "epoch": 72.9478021978022, + "grad_norm": 18.42949867248535, + "learning_rate": 1.3526098901098901e-05, + "loss": 0.2823, + "step": 26553 + }, + { + "epoch": 72.95054945054945, + "grad_norm": 13.366105079650879, + "learning_rate": 1.3524725274725275e-05, + "loss": 0.2297, + "step": 26554 + }, + { + "epoch": 72.9532967032967, + "grad_norm": 25.645978927612305, + "learning_rate": 1.3523351648351648e-05, + "loss": 0.5577, + "step": 26555 + }, + { + "epoch": 72.95604395604396, + "grad_norm": 11.81497573852539, + "learning_rate": 1.3521978021978023e-05, + "loss": 0.1966, + "step": 26556 + }, + { + "epoch": 72.95879120879121, + "grad_norm": 15.982245445251465, + "learning_rate": 1.3520604395604397e-05, + "loss": 0.1273, + "step": 26557 + }, + { + "epoch": 72.96153846153847, + "grad_norm": 10.47661018371582, + "learning_rate": 1.351923076923077e-05, + "loss": 0.2994, + "step": 26558 + }, + { + "epoch": 72.96428571428571, + "grad_norm": 30.507606506347656, + "learning_rate": 1.3517857142857144e-05, + "loss": 0.339, + "step": 26559 + }, + { + "epoch": 72.96703296703296, + "grad_norm": 6.800404071807861, + "learning_rate": 1.3516483516483517e-05, + "loss": 0.0976, + "step": 26560 + }, + { + "epoch": 72.96978021978022, + "grad_norm": 10.787641525268555, + "learning_rate": 1.3515109890109889e-05, + "loss": 0.3456, + "step": 26561 + }, + { + "epoch": 72.97252747252747, + "grad_norm": 10.01508903503418, + "learning_rate": 1.3513736263736265e-05, + "loss": 0.2506, + "step": 26562 + }, + { + "epoch": 72.97527472527473, + "grad_norm": 14.173327445983887, + "learning_rate": 1.3512362637362639e-05, + "loss": 0.1413, + "step": 26563 + }, + { + "epoch": 72.97802197802197, + "grad_norm": 16.1949405670166, + "learning_rate": 1.351098901098901e-05, + "loss": 0.2776, + "step": 26564 + }, + { + "epoch": 72.98076923076923, + "grad_norm": 13.804040908813477, + "learning_rate": 1.3509615384615384e-05, + "loss": 0.2476, + "step": 26565 + }, + { + "epoch": 72.98351648351648, + "grad_norm": 8.847646713256836, + "learning_rate": 1.3508241758241757e-05, + "loss": 0.292, + "step": 26566 + }, + { + "epoch": 72.98626373626374, + "grad_norm": 7.6365742683410645, + "learning_rate": 1.3506868131868134e-05, + "loss": 0.1342, + "step": 26567 + }, + { + "epoch": 72.98901098901099, + "grad_norm": 20.865976333618164, + "learning_rate": 1.3505494505494506e-05, + "loss": 0.3595, + "step": 26568 + }, + { + "epoch": 72.99175824175825, + "grad_norm": 15.964031219482422, + "learning_rate": 1.350412087912088e-05, + "loss": 0.3746, + "step": 26569 + }, + { + "epoch": 72.99450549450549, + "grad_norm": 15.249202728271484, + "learning_rate": 1.3502747252747253e-05, + "loss": 0.2865, + "step": 26570 + }, + { + "epoch": 72.99725274725274, + "grad_norm": 2.518369197845459, + "learning_rate": 1.3501373626373626e-05, + "loss": 0.0375, + "step": 26571 + }, + { + "epoch": 73.0, + "grad_norm": 57.68020248413086, + "learning_rate": 1.3500000000000001e-05, + "loss": 1.1308, + "step": 26572 + }, + { + "epoch": 73.0, + "eval_accuracy": 0.7451790633608816, + "eval_f1": 0.7365779060455175, + "eval_f1_DuraRiadoRio_64x64": 0.7372549019607844, + "eval_f1_Mole_64x64": 0.5306122448979592, + "eval_f1_Quebrado_64x64": 0.8821548821548821, + "eval_f1_RiadoRio_64x64": 0.6483790523690773, + "eval_f1_RioFechado_64x64": 0.8844884488448845, + "eval_loss": 1.5192630290985107, + "eval_precision": 0.8114884974466523, + "eval_precision_DuraRiadoRio_64x64": 0.8468468468468469, + "eval_precision_Mole_64x64": 1.0, + "eval_precision_Quebrado_64x64": 0.8562091503267973, + "eval_precision_RiadoRio_64x64": 0.5220883534136547, + "eval_precision_RioFechado_64x64": 0.8322981366459627, + "eval_recall": 0.7445072481673668, + "eval_recall_DuraRiadoRio_64x64": 0.6527777777777778, + "eval_recall_Mole_64x64": 0.3611111111111111, + "eval_recall_Quebrado_64x64": 0.9097222222222222, + "eval_recall_RiadoRio_64x64": 0.8552631578947368, + "eval_recall_RioFechado_64x64": 0.9436619718309859, + "eval_runtime": 1.8068, + "eval_samples_per_second": 401.823, + "eval_steps_per_second": 25.46, + "step": 26572 + }, + { + "epoch": 73.00274725274726, + "grad_norm": 7.537444114685059, + "learning_rate": 1.3498626373626375e-05, + "loss": 0.1083, + "step": 26573 + }, + { + "epoch": 73.00549450549451, + "grad_norm": 25.496484756469727, + "learning_rate": 1.3497252747252748e-05, + "loss": 0.5427, + "step": 26574 + }, + { + "epoch": 73.00824175824175, + "grad_norm": 7.490769386291504, + "learning_rate": 1.3495879120879121e-05, + "loss": 0.1093, + "step": 26575 + }, + { + "epoch": 73.01098901098901, + "grad_norm": 7.063632965087891, + "learning_rate": 1.3494505494505493e-05, + "loss": 0.119, + "step": 26576 + }, + { + "epoch": 73.01373626373626, + "grad_norm": 10.319074630737305, + "learning_rate": 1.349313186813187e-05, + "loss": 0.2709, + "step": 26577 + }, + { + "epoch": 73.01648351648352, + "grad_norm": 14.61413860321045, + "learning_rate": 1.3491758241758243e-05, + "loss": 0.1937, + "step": 26578 + }, + { + "epoch": 73.01923076923077, + "grad_norm": 19.90784454345703, + "learning_rate": 1.3490384615384615e-05, + "loss": 0.5008, + "step": 26579 + }, + { + "epoch": 73.02197802197803, + "grad_norm": 7.629651069641113, + "learning_rate": 1.3489010989010988e-05, + "loss": 0.0824, + "step": 26580 + }, + { + "epoch": 73.02472527472527, + "grad_norm": 14.605975151062012, + "learning_rate": 1.3487637362637362e-05, + "loss": 0.3236, + "step": 26581 + }, + { + "epoch": 73.02747252747253, + "grad_norm": 5.068426132202148, + "learning_rate": 1.3486263736263739e-05, + "loss": 0.1149, + "step": 26582 + }, + { + "epoch": 73.03021978021978, + "grad_norm": 10.135360717773438, + "learning_rate": 1.348489010989011e-05, + "loss": 0.1725, + "step": 26583 + }, + { + "epoch": 73.03296703296704, + "grad_norm": 8.243062019348145, + "learning_rate": 1.3483516483516484e-05, + "loss": 0.1383, + "step": 26584 + }, + { + "epoch": 73.03571428571429, + "grad_norm": 12.65672779083252, + "learning_rate": 1.3482142857142857e-05, + "loss": 0.2892, + "step": 26585 + }, + { + "epoch": 73.03846153846153, + "grad_norm": 11.954773902893066, + "learning_rate": 1.348076923076923e-05, + "loss": 0.2975, + "step": 26586 + }, + { + "epoch": 73.04120879120879, + "grad_norm": 11.706192016601562, + "learning_rate": 1.3479395604395606e-05, + "loss": 0.2589, + "step": 26587 + }, + { + "epoch": 73.04395604395604, + "grad_norm": 16.755212783813477, + "learning_rate": 1.3478021978021979e-05, + "loss": 0.3285, + "step": 26588 + }, + { + "epoch": 73.0467032967033, + "grad_norm": 25.727657318115234, + "learning_rate": 1.3476648351648352e-05, + "loss": 0.7912, + "step": 26589 + }, + { + "epoch": 73.04945054945055, + "grad_norm": 11.651824951171875, + "learning_rate": 1.3475274725274726e-05, + "loss": 0.247, + "step": 26590 + }, + { + "epoch": 73.0521978021978, + "grad_norm": 18.10664939880371, + "learning_rate": 1.3473901098901098e-05, + "loss": 0.4582, + "step": 26591 + }, + { + "epoch": 73.05494505494505, + "grad_norm": 5.701013088226318, + "learning_rate": 1.3472527472527474e-05, + "loss": 0.0938, + "step": 26592 + }, + { + "epoch": 73.0576923076923, + "grad_norm": 11.952940940856934, + "learning_rate": 1.3471153846153848e-05, + "loss": 0.2428, + "step": 26593 + }, + { + "epoch": 73.06043956043956, + "grad_norm": 14.26761531829834, + "learning_rate": 1.346978021978022e-05, + "loss": 0.2238, + "step": 26594 + }, + { + "epoch": 73.06318681318682, + "grad_norm": 10.47697639465332, + "learning_rate": 1.3468406593406593e-05, + "loss": 0.1727, + "step": 26595 + }, + { + "epoch": 73.06593406593407, + "grad_norm": 7.90871524810791, + "learning_rate": 1.3467032967032966e-05, + "loss": 0.1487, + "step": 26596 + }, + { + "epoch": 73.06868131868131, + "grad_norm": 14.14211654663086, + "learning_rate": 1.3465659340659343e-05, + "loss": 0.4832, + "step": 26597 + }, + { + "epoch": 73.07142857142857, + "grad_norm": 14.613585472106934, + "learning_rate": 1.3464285714285715e-05, + "loss": 0.2928, + "step": 26598 + }, + { + "epoch": 73.07417582417582, + "grad_norm": 24.044775009155273, + "learning_rate": 1.3462912087912088e-05, + "loss": 0.3134, + "step": 26599 + }, + { + "epoch": 73.07692307692308, + "grad_norm": 8.223804473876953, + "learning_rate": 1.3461538461538462e-05, + "loss": 0.1325, + "step": 26600 + }, + { + "epoch": 73.07967032967034, + "grad_norm": 15.526328086853027, + "learning_rate": 1.3460164835164835e-05, + "loss": 0.3679, + "step": 26601 + }, + { + "epoch": 73.08241758241758, + "grad_norm": 12.191631317138672, + "learning_rate": 1.345879120879121e-05, + "loss": 0.2349, + "step": 26602 + }, + { + "epoch": 73.08516483516483, + "grad_norm": 13.206949234008789, + "learning_rate": 1.3457417582417584e-05, + "loss": 0.2544, + "step": 26603 + }, + { + "epoch": 73.08791208791209, + "grad_norm": 14.343181610107422, + "learning_rate": 1.3456043956043957e-05, + "loss": 0.3163, + "step": 26604 + }, + { + "epoch": 73.09065934065934, + "grad_norm": 8.76024055480957, + "learning_rate": 1.345467032967033e-05, + "loss": 0.1955, + "step": 26605 + }, + { + "epoch": 73.0934065934066, + "grad_norm": 9.107111930847168, + "learning_rate": 1.3453296703296702e-05, + "loss": 0.1319, + "step": 26606 + }, + { + "epoch": 73.09615384615384, + "grad_norm": 15.915285110473633, + "learning_rate": 1.3451923076923079e-05, + "loss": 0.3909, + "step": 26607 + }, + { + "epoch": 73.0989010989011, + "grad_norm": 14.060839653015137, + "learning_rate": 1.3450549450549452e-05, + "loss": 0.3704, + "step": 26608 + }, + { + "epoch": 73.10164835164835, + "grad_norm": 13.364143371582031, + "learning_rate": 1.3449175824175824e-05, + "loss": 0.4568, + "step": 26609 + }, + { + "epoch": 73.1043956043956, + "grad_norm": 15.187488555908203, + "learning_rate": 1.3447802197802197e-05, + "loss": 0.21, + "step": 26610 + }, + { + "epoch": 73.10714285714286, + "grad_norm": 5.0524582862854, + "learning_rate": 1.344642857142857e-05, + "loss": 0.0784, + "step": 26611 + }, + { + "epoch": 73.10989010989012, + "grad_norm": 9.158964157104492, + "learning_rate": 1.3445054945054948e-05, + "loss": 0.2196, + "step": 26612 + }, + { + "epoch": 73.11263736263736, + "grad_norm": 8.11115550994873, + "learning_rate": 1.344368131868132e-05, + "loss": 0.1809, + "step": 26613 + }, + { + "epoch": 73.11538461538461, + "grad_norm": 11.59794807434082, + "learning_rate": 1.3442307692307693e-05, + "loss": 0.1663, + "step": 26614 + }, + { + "epoch": 73.11813186813187, + "grad_norm": 15.732407569885254, + "learning_rate": 1.3440934065934066e-05, + "loss": 0.2995, + "step": 26615 + }, + { + "epoch": 73.12087912087912, + "grad_norm": 16.91683578491211, + "learning_rate": 1.343956043956044e-05, + "loss": 0.5746, + "step": 26616 + }, + { + "epoch": 73.12362637362638, + "grad_norm": 27.82094383239746, + "learning_rate": 1.3438186813186815e-05, + "loss": 0.5733, + "step": 26617 + }, + { + "epoch": 73.12637362637362, + "grad_norm": 4.304121971130371, + "learning_rate": 1.3436813186813188e-05, + "loss": 0.1148, + "step": 26618 + }, + { + "epoch": 73.12912087912088, + "grad_norm": 8.510003089904785, + "learning_rate": 1.3435439560439561e-05, + "loss": 0.1176, + "step": 26619 + }, + { + "epoch": 73.13186813186813, + "grad_norm": 24.389251708984375, + "learning_rate": 1.3434065934065935e-05, + "loss": 0.5684, + "step": 26620 + }, + { + "epoch": 73.13461538461539, + "grad_norm": 9.091466903686523, + "learning_rate": 1.3432692307692307e-05, + "loss": 0.1709, + "step": 26621 + }, + { + "epoch": 73.13736263736264, + "grad_norm": 16.741756439208984, + "learning_rate": 1.3431318681318683e-05, + "loss": 0.2644, + "step": 26622 + }, + { + "epoch": 73.14010989010988, + "grad_norm": 9.682109832763672, + "learning_rate": 1.3429945054945057e-05, + "loss": 0.2334, + "step": 26623 + }, + { + "epoch": 73.14285714285714, + "grad_norm": 18.70569610595703, + "learning_rate": 1.3428571428571429e-05, + "loss": 0.2756, + "step": 26624 + }, + { + "epoch": 73.1456043956044, + "grad_norm": 3.464681386947632, + "learning_rate": 1.3427197802197802e-05, + "loss": 0.0513, + "step": 26625 + }, + { + "epoch": 73.14835164835165, + "grad_norm": 22.146638870239258, + "learning_rate": 1.3425824175824175e-05, + "loss": 0.5221, + "step": 26626 + }, + { + "epoch": 73.1510989010989, + "grad_norm": 5.6839280128479, + "learning_rate": 1.342445054945055e-05, + "loss": 0.1629, + "step": 26627 + }, + { + "epoch": 73.15384615384616, + "grad_norm": 16.32371711730957, + "learning_rate": 1.3423076923076924e-05, + "loss": 0.1654, + "step": 26628 + }, + { + "epoch": 73.1565934065934, + "grad_norm": 18.82193374633789, + "learning_rate": 1.3421703296703297e-05, + "loss": 0.2957, + "step": 26629 + }, + { + "epoch": 73.15934065934066, + "grad_norm": 19.33048439025879, + "learning_rate": 1.342032967032967e-05, + "loss": 0.4656, + "step": 26630 + }, + { + "epoch": 73.16208791208791, + "grad_norm": 8.617422103881836, + "learning_rate": 1.3418956043956044e-05, + "loss": 0.2761, + "step": 26631 + }, + { + "epoch": 73.16483516483517, + "grad_norm": 9.256646156311035, + "learning_rate": 1.3417582417582419e-05, + "loss": 0.1259, + "step": 26632 + }, + { + "epoch": 73.16758241758242, + "grad_norm": 10.114912033081055, + "learning_rate": 1.3416208791208793e-05, + "loss": 0.1533, + "step": 26633 + }, + { + "epoch": 73.17032967032966, + "grad_norm": 9.9551362991333, + "learning_rate": 1.3414835164835166e-05, + "loss": 0.1497, + "step": 26634 + }, + { + "epoch": 73.17307692307692, + "grad_norm": 1.6660689115524292, + "learning_rate": 1.341346153846154e-05, + "loss": 0.0212, + "step": 26635 + }, + { + "epoch": 73.17582417582418, + "grad_norm": 22.169113159179688, + "learning_rate": 1.3412087912087911e-05, + "loss": 0.819, + "step": 26636 + }, + { + "epoch": 73.17857142857143, + "grad_norm": 16.040870666503906, + "learning_rate": 1.3410714285714288e-05, + "loss": 0.2965, + "step": 26637 + }, + { + "epoch": 73.18131868131869, + "grad_norm": 21.28167724609375, + "learning_rate": 1.3409340659340661e-05, + "loss": 1.2257, + "step": 26638 + }, + { + "epoch": 73.18406593406593, + "grad_norm": 10.819836616516113, + "learning_rate": 1.3407967032967033e-05, + "loss": 0.14, + "step": 26639 + }, + { + "epoch": 73.18681318681318, + "grad_norm": 6.1125407218933105, + "learning_rate": 1.3406593406593406e-05, + "loss": 0.1158, + "step": 26640 + }, + { + "epoch": 73.18956043956044, + "grad_norm": 5.304344177246094, + "learning_rate": 1.340521978021978e-05, + "loss": 0.0786, + "step": 26641 + }, + { + "epoch": 73.1923076923077, + "grad_norm": 12.123502731323242, + "learning_rate": 1.3403846153846155e-05, + "loss": 0.2499, + "step": 26642 + }, + { + "epoch": 73.19505494505495, + "grad_norm": 5.307981967926025, + "learning_rate": 1.3402472527472528e-05, + "loss": 0.1613, + "step": 26643 + }, + { + "epoch": 73.1978021978022, + "grad_norm": 16.45693588256836, + "learning_rate": 1.3401098901098902e-05, + "loss": 0.3733, + "step": 26644 + }, + { + "epoch": 73.20054945054945, + "grad_norm": 14.602303504943848, + "learning_rate": 1.3399725274725275e-05, + "loss": 0.4202, + "step": 26645 + }, + { + "epoch": 73.2032967032967, + "grad_norm": 15.157124519348145, + "learning_rate": 1.3398351648351649e-05, + "loss": 0.2408, + "step": 26646 + }, + { + "epoch": 73.20604395604396, + "grad_norm": 7.054081916809082, + "learning_rate": 1.3396978021978024e-05, + "loss": 0.1177, + "step": 26647 + }, + { + "epoch": 73.20879120879121, + "grad_norm": 17.36872673034668, + "learning_rate": 1.3395604395604397e-05, + "loss": 0.1709, + "step": 26648 + }, + { + "epoch": 73.21153846153847, + "grad_norm": 18.65452766418457, + "learning_rate": 1.339423076923077e-05, + "loss": 0.5896, + "step": 26649 + }, + { + "epoch": 73.21428571428571, + "grad_norm": 13.628825187683105, + "learning_rate": 1.3392857142857144e-05, + "loss": 0.2218, + "step": 26650 + }, + { + "epoch": 73.21703296703296, + "grad_norm": 11.24288558959961, + "learning_rate": 1.3391483516483516e-05, + "loss": 0.2055, + "step": 26651 + }, + { + "epoch": 73.21978021978022, + "grad_norm": 4.392850875854492, + "learning_rate": 1.3390109890109892e-05, + "loss": 0.0539, + "step": 26652 + }, + { + "epoch": 73.22252747252747, + "grad_norm": 25.736433029174805, + "learning_rate": 1.3388736263736266e-05, + "loss": 0.6534, + "step": 26653 + }, + { + "epoch": 73.22527472527473, + "grad_norm": 5.129903793334961, + "learning_rate": 1.3387362637362637e-05, + "loss": 0.0633, + "step": 26654 + }, + { + "epoch": 73.22802197802197, + "grad_norm": 19.225648880004883, + "learning_rate": 1.3385989010989011e-05, + "loss": 0.5309, + "step": 26655 + }, + { + "epoch": 73.23076923076923, + "grad_norm": 18.206144332885742, + "learning_rate": 1.3384615384615384e-05, + "loss": 0.3894, + "step": 26656 + }, + { + "epoch": 73.23351648351648, + "grad_norm": 9.042303085327148, + "learning_rate": 1.338324175824176e-05, + "loss": 0.0875, + "step": 26657 + }, + { + "epoch": 73.23626373626374, + "grad_norm": 10.028727531433105, + "learning_rate": 1.3381868131868133e-05, + "loss": 0.1549, + "step": 26658 + }, + { + "epoch": 73.23901098901099, + "grad_norm": 16.390653610229492, + "learning_rate": 1.3380494505494506e-05, + "loss": 0.3767, + "step": 26659 + }, + { + "epoch": 73.24175824175825, + "grad_norm": 14.674065589904785, + "learning_rate": 1.337912087912088e-05, + "loss": 0.3958, + "step": 26660 + }, + { + "epoch": 73.24450549450549, + "grad_norm": 10.044313430786133, + "learning_rate": 1.3377747252747253e-05, + "loss": 0.2129, + "step": 26661 + }, + { + "epoch": 73.24725274725274, + "grad_norm": 11.484203338623047, + "learning_rate": 1.3376373626373628e-05, + "loss": 0.1371, + "step": 26662 + }, + { + "epoch": 73.25, + "grad_norm": 15.24172592163086, + "learning_rate": 1.3375000000000002e-05, + "loss": 0.1703, + "step": 26663 + }, + { + "epoch": 73.25274725274726, + "grad_norm": 11.006672859191895, + "learning_rate": 1.3373626373626375e-05, + "loss": 0.2603, + "step": 26664 + }, + { + "epoch": 73.25549450549451, + "grad_norm": 3.9094295501708984, + "learning_rate": 1.3372252747252748e-05, + "loss": 0.0361, + "step": 26665 + }, + { + "epoch": 73.25824175824175, + "grad_norm": 12.888849258422852, + "learning_rate": 1.337087912087912e-05, + "loss": 0.169, + "step": 26666 + }, + { + "epoch": 73.26098901098901, + "grad_norm": 12.055874824523926, + "learning_rate": 1.3369505494505497e-05, + "loss": 0.188, + "step": 26667 + }, + { + "epoch": 73.26373626373626, + "grad_norm": 4.843054294586182, + "learning_rate": 1.336813186813187e-05, + "loss": 0.0611, + "step": 26668 + }, + { + "epoch": 73.26648351648352, + "grad_norm": 11.08601188659668, + "learning_rate": 1.3366758241758242e-05, + "loss": 0.2397, + "step": 26669 + }, + { + "epoch": 73.26923076923077, + "grad_norm": 4.712769031524658, + "learning_rate": 1.3365384615384615e-05, + "loss": 0.11, + "step": 26670 + }, + { + "epoch": 73.27197802197803, + "grad_norm": 16.170427322387695, + "learning_rate": 1.3364010989010989e-05, + "loss": 0.2553, + "step": 26671 + }, + { + "epoch": 73.27472527472527, + "grad_norm": 5.898680686950684, + "learning_rate": 1.3362637362637364e-05, + "loss": 0.0548, + "step": 26672 + }, + { + "epoch": 73.27747252747253, + "grad_norm": 5.692196369171143, + "learning_rate": 1.3361263736263737e-05, + "loss": 0.0461, + "step": 26673 + }, + { + "epoch": 73.28021978021978, + "grad_norm": 9.964814186096191, + "learning_rate": 1.335989010989011e-05, + "loss": 0.2541, + "step": 26674 + }, + { + "epoch": 73.28296703296704, + "grad_norm": 15.722517967224121, + "learning_rate": 1.3358516483516484e-05, + "loss": 0.2201, + "step": 26675 + }, + { + "epoch": 73.28571428571429, + "grad_norm": 5.135801315307617, + "learning_rate": 1.3357142857142858e-05, + "loss": 0.0876, + "step": 26676 + }, + { + "epoch": 73.28846153846153, + "grad_norm": 16.148805618286133, + "learning_rate": 1.335576923076923e-05, + "loss": 0.3891, + "step": 26677 + }, + { + "epoch": 73.29120879120879, + "grad_norm": 3.957451105117798, + "learning_rate": 1.3354395604395606e-05, + "loss": 0.0663, + "step": 26678 + }, + { + "epoch": 73.29395604395604, + "grad_norm": 7.397090435028076, + "learning_rate": 1.335302197802198e-05, + "loss": 0.1582, + "step": 26679 + }, + { + "epoch": 73.2967032967033, + "grad_norm": 11.161941528320312, + "learning_rate": 1.3351648351648353e-05, + "loss": 0.1961, + "step": 26680 + }, + { + "epoch": 73.29945054945055, + "grad_norm": 6.650224208831787, + "learning_rate": 1.3350274725274725e-05, + "loss": 0.0811, + "step": 26681 + }, + { + "epoch": 73.3021978021978, + "grad_norm": 18.557762145996094, + "learning_rate": 1.3348901098901098e-05, + "loss": 0.297, + "step": 26682 + }, + { + "epoch": 73.30494505494505, + "grad_norm": 13.774397850036621, + "learning_rate": 1.3347527472527475e-05, + "loss": 0.177, + "step": 26683 + }, + { + "epoch": 73.3076923076923, + "grad_norm": 4.399734973907471, + "learning_rate": 1.3346153846153846e-05, + "loss": 0.0851, + "step": 26684 + }, + { + "epoch": 73.31043956043956, + "grad_norm": 21.835161209106445, + "learning_rate": 1.334478021978022e-05, + "loss": 0.4758, + "step": 26685 + }, + { + "epoch": 73.31318681318682, + "grad_norm": 17.494272232055664, + "learning_rate": 1.3343406593406593e-05, + "loss": 0.2333, + "step": 26686 + }, + { + "epoch": 73.31593406593407, + "grad_norm": 13.845132827758789, + "learning_rate": 1.3342032967032967e-05, + "loss": 0.2576, + "step": 26687 + }, + { + "epoch": 73.31868131868131, + "grad_norm": 9.33419132232666, + "learning_rate": 1.3340659340659342e-05, + "loss": 0.2273, + "step": 26688 + }, + { + "epoch": 73.32142857142857, + "grad_norm": 3.6615726947784424, + "learning_rate": 1.3339285714285715e-05, + "loss": 0.0418, + "step": 26689 + }, + { + "epoch": 73.32417582417582, + "grad_norm": 13.529254913330078, + "learning_rate": 1.3337912087912089e-05, + "loss": 0.264, + "step": 26690 + }, + { + "epoch": 73.32692307692308, + "grad_norm": 8.02171516418457, + "learning_rate": 1.3336538461538462e-05, + "loss": 0.101, + "step": 26691 + }, + { + "epoch": 73.32967032967034, + "grad_norm": 15.195967674255371, + "learning_rate": 1.3335164835164834e-05, + "loss": 0.2574, + "step": 26692 + }, + { + "epoch": 73.33241758241758, + "grad_norm": 1.4149432182312012, + "learning_rate": 1.333379120879121e-05, + "loss": 0.0171, + "step": 26693 + }, + { + "epoch": 73.33516483516483, + "grad_norm": 23.1816463470459, + "learning_rate": 1.3332417582417584e-05, + "loss": 0.333, + "step": 26694 + }, + { + "epoch": 73.33791208791209, + "grad_norm": 19.37868881225586, + "learning_rate": 1.3331043956043957e-05, + "loss": 0.2768, + "step": 26695 + }, + { + "epoch": 73.34065934065934, + "grad_norm": 16.595308303833008, + "learning_rate": 1.3329670329670329e-05, + "loss": 0.5723, + "step": 26696 + }, + { + "epoch": 73.3434065934066, + "grad_norm": 8.552626609802246, + "learning_rate": 1.3328296703296702e-05, + "loss": 0.1297, + "step": 26697 + }, + { + "epoch": 73.34615384615384, + "grad_norm": 12.83399772644043, + "learning_rate": 1.332692307692308e-05, + "loss": 0.1398, + "step": 26698 + }, + { + "epoch": 73.3489010989011, + "grad_norm": 4.087052345275879, + "learning_rate": 1.3325549450549451e-05, + "loss": 0.0378, + "step": 26699 + }, + { + "epoch": 73.35164835164835, + "grad_norm": 4.5451555252075195, + "learning_rate": 1.3324175824175824e-05, + "loss": 0.044, + "step": 26700 + }, + { + "epoch": 73.3543956043956, + "grad_norm": 8.48904800415039, + "learning_rate": 1.3322802197802198e-05, + "loss": 0.1307, + "step": 26701 + }, + { + "epoch": 73.35714285714286, + "grad_norm": 7.96438455581665, + "learning_rate": 1.3321428571428571e-05, + "loss": 0.1387, + "step": 26702 + }, + { + "epoch": 73.35989010989012, + "grad_norm": 6.61557149887085, + "learning_rate": 1.3320054945054946e-05, + "loss": 0.1174, + "step": 26703 + }, + { + "epoch": 73.36263736263736, + "grad_norm": 8.760366439819336, + "learning_rate": 1.331868131868132e-05, + "loss": 0.2143, + "step": 26704 + }, + { + "epoch": 73.36538461538461, + "grad_norm": 24.392847061157227, + "learning_rate": 1.3317307692307693e-05, + "loss": 0.8531, + "step": 26705 + }, + { + "epoch": 73.36813186813187, + "grad_norm": 7.708493709564209, + "learning_rate": 1.3315934065934067e-05, + "loss": 0.1985, + "step": 26706 + }, + { + "epoch": 73.37087912087912, + "grad_norm": 13.615143775939941, + "learning_rate": 1.3314560439560438e-05, + "loss": 0.2029, + "step": 26707 + }, + { + "epoch": 73.37362637362638, + "grad_norm": 21.532123565673828, + "learning_rate": 1.3313186813186815e-05, + "loss": 0.5574, + "step": 26708 + }, + { + "epoch": 73.37637362637362, + "grad_norm": 12.664031982421875, + "learning_rate": 1.3311813186813188e-05, + "loss": 0.2622, + "step": 26709 + }, + { + "epoch": 73.37912087912088, + "grad_norm": 12.362563133239746, + "learning_rate": 1.331043956043956e-05, + "loss": 0.3239, + "step": 26710 + }, + { + "epoch": 73.38186813186813, + "grad_norm": 8.325758934020996, + "learning_rate": 1.3309065934065934e-05, + "loss": 0.129, + "step": 26711 + }, + { + "epoch": 73.38461538461539, + "grad_norm": 22.866207122802734, + "learning_rate": 1.3307692307692307e-05, + "loss": 0.3746, + "step": 26712 + }, + { + "epoch": 73.38736263736264, + "grad_norm": 9.159686088562012, + "learning_rate": 1.3306318681318684e-05, + "loss": 0.1122, + "step": 26713 + }, + { + "epoch": 73.39010989010988, + "grad_norm": 21.58542823791504, + "learning_rate": 1.3304945054945055e-05, + "loss": 0.4952, + "step": 26714 + }, + { + "epoch": 73.39285714285714, + "grad_norm": 10.697595596313477, + "learning_rate": 1.3303571428571429e-05, + "loss": 0.3257, + "step": 26715 + }, + { + "epoch": 73.3956043956044, + "grad_norm": 20.579410552978516, + "learning_rate": 1.3302197802197802e-05, + "loss": 0.4606, + "step": 26716 + }, + { + "epoch": 73.39835164835165, + "grad_norm": 11.898741722106934, + "learning_rate": 1.3300824175824176e-05, + "loss": 0.1639, + "step": 26717 + }, + { + "epoch": 73.4010989010989, + "grad_norm": 8.070961952209473, + "learning_rate": 1.329945054945055e-05, + "loss": 0.0922, + "step": 26718 + }, + { + "epoch": 73.40384615384616, + "grad_norm": 12.074240684509277, + "learning_rate": 1.3298076923076924e-05, + "loss": 0.2869, + "step": 26719 + }, + { + "epoch": 73.4065934065934, + "grad_norm": 4.8436279296875, + "learning_rate": 1.3296703296703298e-05, + "loss": 0.0765, + "step": 26720 + }, + { + "epoch": 73.40934065934066, + "grad_norm": 7.508991241455078, + "learning_rate": 1.3295329670329671e-05, + "loss": 0.0937, + "step": 26721 + }, + { + "epoch": 73.41208791208791, + "grad_norm": 18.125276565551758, + "learning_rate": 1.3293956043956043e-05, + "loss": 0.2392, + "step": 26722 + }, + { + "epoch": 73.41483516483517, + "grad_norm": 10.632814407348633, + "learning_rate": 1.329258241758242e-05, + "loss": 0.1977, + "step": 26723 + }, + { + "epoch": 73.41758241758242, + "grad_norm": 8.283778190612793, + "learning_rate": 1.3291208791208793e-05, + "loss": 0.2463, + "step": 26724 + }, + { + "epoch": 73.42032967032966, + "grad_norm": 9.205687522888184, + "learning_rate": 1.3289835164835165e-05, + "loss": 0.1971, + "step": 26725 + }, + { + "epoch": 73.42307692307692, + "grad_norm": 3.877598285675049, + "learning_rate": 1.3288461538461538e-05, + "loss": 0.0282, + "step": 26726 + }, + { + "epoch": 73.42582417582418, + "grad_norm": 10.318341255187988, + "learning_rate": 1.3287087912087911e-05, + "loss": 0.2647, + "step": 26727 + }, + { + "epoch": 73.42857142857143, + "grad_norm": 15.586542129516602, + "learning_rate": 1.3285714285714288e-05, + "loss": 0.2849, + "step": 26728 + }, + { + "epoch": 73.43131868131869, + "grad_norm": 7.261331558227539, + "learning_rate": 1.328434065934066e-05, + "loss": 0.1006, + "step": 26729 + }, + { + "epoch": 73.43406593406593, + "grad_norm": 13.097450256347656, + "learning_rate": 1.3282967032967033e-05, + "loss": 0.2491, + "step": 26730 + }, + { + "epoch": 73.43681318681318, + "grad_norm": 5.098851203918457, + "learning_rate": 1.3281593406593407e-05, + "loss": 0.1473, + "step": 26731 + }, + { + "epoch": 73.43956043956044, + "grad_norm": 11.559659957885742, + "learning_rate": 1.328021978021978e-05, + "loss": 0.1139, + "step": 26732 + }, + { + "epoch": 73.4423076923077, + "grad_norm": 3.5607199668884277, + "learning_rate": 1.3278846153846155e-05, + "loss": 0.0623, + "step": 26733 + }, + { + "epoch": 73.44505494505495, + "grad_norm": 4.144280433654785, + "learning_rate": 1.3277472527472529e-05, + "loss": 0.0757, + "step": 26734 + }, + { + "epoch": 73.4478021978022, + "grad_norm": 7.781425476074219, + "learning_rate": 1.3276098901098902e-05, + "loss": 0.1292, + "step": 26735 + }, + { + "epoch": 73.45054945054945, + "grad_norm": 12.675217628479004, + "learning_rate": 1.3274725274725275e-05, + "loss": 0.2, + "step": 26736 + }, + { + "epoch": 73.4532967032967, + "grad_norm": 2.323362112045288, + "learning_rate": 1.3273351648351647e-05, + "loss": 0.039, + "step": 26737 + }, + { + "epoch": 73.45604395604396, + "grad_norm": 1.0076972246170044, + "learning_rate": 1.3271978021978024e-05, + "loss": 0.0193, + "step": 26738 + }, + { + "epoch": 73.45879120879121, + "grad_norm": 32.83815383911133, + "learning_rate": 1.3270604395604397e-05, + "loss": 0.9309, + "step": 26739 + }, + { + "epoch": 73.46153846153847, + "grad_norm": 14.175570487976074, + "learning_rate": 1.3269230769230769e-05, + "loss": 0.4419, + "step": 26740 + }, + { + "epoch": 73.46428571428571, + "grad_norm": 14.93660831451416, + "learning_rate": 1.3267857142857143e-05, + "loss": 0.2938, + "step": 26741 + }, + { + "epoch": 73.46703296703296, + "grad_norm": 17.365114212036133, + "learning_rate": 1.3266483516483516e-05, + "loss": 0.4259, + "step": 26742 + }, + { + "epoch": 73.46978021978022, + "grad_norm": 9.866400718688965, + "learning_rate": 1.3265109890109893e-05, + "loss": 0.1381, + "step": 26743 + }, + { + "epoch": 73.47252747252747, + "grad_norm": 9.52258014678955, + "learning_rate": 1.3263736263736264e-05, + "loss": 0.0624, + "step": 26744 + }, + { + "epoch": 73.47527472527473, + "grad_norm": 4.009231090545654, + "learning_rate": 1.3262362637362638e-05, + "loss": 0.0736, + "step": 26745 + }, + { + "epoch": 73.47802197802197, + "grad_norm": 15.250435829162598, + "learning_rate": 1.3260989010989011e-05, + "loss": 0.339, + "step": 26746 + }, + { + "epoch": 73.48076923076923, + "grad_norm": 28.898223876953125, + "learning_rate": 1.3259615384615385e-05, + "loss": 0.7228, + "step": 26747 + }, + { + "epoch": 73.48351648351648, + "grad_norm": 11.70057487487793, + "learning_rate": 1.325824175824176e-05, + "loss": 0.1991, + "step": 26748 + }, + { + "epoch": 73.48626373626374, + "grad_norm": 13.45861530303955, + "learning_rate": 1.3256868131868133e-05, + "loss": 0.1752, + "step": 26749 + }, + { + "epoch": 73.48901098901099, + "grad_norm": 7.145178318023682, + "learning_rate": 1.3255494505494507e-05, + "loss": 0.0763, + "step": 26750 + }, + { + "epoch": 73.49175824175825, + "grad_norm": 17.50420379638672, + "learning_rate": 1.325412087912088e-05, + "loss": 0.1913, + "step": 26751 + }, + { + "epoch": 73.49450549450549, + "grad_norm": 1.9859532117843628, + "learning_rate": 1.3252747252747252e-05, + "loss": 0.0181, + "step": 26752 + }, + { + "epoch": 73.49725274725274, + "grad_norm": 9.324111938476562, + "learning_rate": 1.3251373626373628e-05, + "loss": 0.1379, + "step": 26753 + }, + { + "epoch": 73.5, + "grad_norm": 7.421457290649414, + "learning_rate": 1.3250000000000002e-05, + "loss": 0.0985, + "step": 26754 + }, + { + "epoch": 73.50274725274726, + "grad_norm": 4.71510648727417, + "learning_rate": 1.3248626373626374e-05, + "loss": 0.0794, + "step": 26755 + }, + { + "epoch": 73.50549450549451, + "grad_norm": 5.936847686767578, + "learning_rate": 1.3247252747252747e-05, + "loss": 0.1472, + "step": 26756 + }, + { + "epoch": 73.50824175824175, + "grad_norm": 15.922802925109863, + "learning_rate": 1.324587912087912e-05, + "loss": 0.329, + "step": 26757 + }, + { + "epoch": 73.51098901098901, + "grad_norm": 21.723215103149414, + "learning_rate": 1.3244505494505497e-05, + "loss": 0.4711, + "step": 26758 + }, + { + "epoch": 73.51373626373626, + "grad_norm": 1.4643445014953613, + "learning_rate": 1.3243131868131869e-05, + "loss": 0.0185, + "step": 26759 + }, + { + "epoch": 73.51648351648352, + "grad_norm": 16.131502151489258, + "learning_rate": 1.3241758241758242e-05, + "loss": 0.1708, + "step": 26760 + }, + { + "epoch": 73.51923076923077, + "grad_norm": 5.025143146514893, + "learning_rate": 1.3240384615384616e-05, + "loss": 0.1144, + "step": 26761 + }, + { + "epoch": 73.52197802197803, + "grad_norm": 4.743318557739258, + "learning_rate": 1.3239010989010989e-05, + "loss": 0.037, + "step": 26762 + }, + { + "epoch": 73.52472527472527, + "grad_norm": 6.065115928649902, + "learning_rate": 1.3237637362637364e-05, + "loss": 0.1027, + "step": 26763 + }, + { + "epoch": 73.52747252747253, + "grad_norm": 4.382446765899658, + "learning_rate": 1.3236263736263738e-05, + "loss": 0.079, + "step": 26764 + }, + { + "epoch": 73.53021978021978, + "grad_norm": 16.476293563842773, + "learning_rate": 1.3234890109890111e-05, + "loss": 0.4088, + "step": 26765 + }, + { + "epoch": 73.53296703296704, + "grad_norm": 6.228325366973877, + "learning_rate": 1.3233516483516484e-05, + "loss": 0.1348, + "step": 26766 + }, + { + "epoch": 73.53571428571429, + "grad_norm": 5.08494758605957, + "learning_rate": 1.3232142857142856e-05, + "loss": 0.0993, + "step": 26767 + }, + { + "epoch": 73.53846153846153, + "grad_norm": 4.743393421173096, + "learning_rate": 1.3230769230769233e-05, + "loss": 0.0482, + "step": 26768 + }, + { + "epoch": 73.54120879120879, + "grad_norm": 6.132725238800049, + "learning_rate": 1.3229395604395606e-05, + "loss": 0.093, + "step": 26769 + }, + { + "epoch": 73.54395604395604, + "grad_norm": 9.093589782714844, + "learning_rate": 1.3228021978021978e-05, + "loss": 0.1935, + "step": 26770 + }, + { + "epoch": 73.5467032967033, + "grad_norm": 12.90045166015625, + "learning_rate": 1.3226648351648351e-05, + "loss": 0.2783, + "step": 26771 + }, + { + "epoch": 73.54945054945055, + "grad_norm": 13.461080551147461, + "learning_rate": 1.3225274725274725e-05, + "loss": 0.3931, + "step": 26772 + }, + { + "epoch": 73.5521978021978, + "grad_norm": 7.95640230178833, + "learning_rate": 1.32239010989011e-05, + "loss": 0.1193, + "step": 26773 + }, + { + "epoch": 73.55494505494505, + "grad_norm": 10.795037269592285, + "learning_rate": 1.3222527472527473e-05, + "loss": 0.2367, + "step": 26774 + }, + { + "epoch": 73.5576923076923, + "grad_norm": 16.898611068725586, + "learning_rate": 1.3221153846153847e-05, + "loss": 0.3824, + "step": 26775 + }, + { + "epoch": 73.56043956043956, + "grad_norm": 7.2120137214660645, + "learning_rate": 1.321978021978022e-05, + "loss": 0.0892, + "step": 26776 + }, + { + "epoch": 73.56318681318682, + "grad_norm": 7.387983798980713, + "learning_rate": 1.3218406593406594e-05, + "loss": 0.0801, + "step": 26777 + }, + { + "epoch": 73.56593406593407, + "grad_norm": 1.9703770875930786, + "learning_rate": 1.3217032967032969e-05, + "loss": 0.0249, + "step": 26778 + }, + { + "epoch": 73.56868131868131, + "grad_norm": 11.08289623260498, + "learning_rate": 1.3215659340659342e-05, + "loss": 0.3258, + "step": 26779 + }, + { + "epoch": 73.57142857142857, + "grad_norm": 6.792928218841553, + "learning_rate": 1.3214285714285716e-05, + "loss": 0.0774, + "step": 26780 + }, + { + "epoch": 73.57417582417582, + "grad_norm": 8.046815872192383, + "learning_rate": 1.3212912087912089e-05, + "loss": 0.1239, + "step": 26781 + }, + { + "epoch": 73.57692307692308, + "grad_norm": 6.391409397125244, + "learning_rate": 1.321153846153846e-05, + "loss": 0.0637, + "step": 26782 + }, + { + "epoch": 73.57967032967034, + "grad_norm": 13.551127433776855, + "learning_rate": 1.3210164835164837e-05, + "loss": 0.2749, + "step": 26783 + }, + { + "epoch": 73.58241758241758, + "grad_norm": 7.945613861083984, + "learning_rate": 1.3208791208791211e-05, + "loss": 0.1327, + "step": 26784 + }, + { + "epoch": 73.58516483516483, + "grad_norm": 8.867193222045898, + "learning_rate": 1.3207417582417583e-05, + "loss": 0.1532, + "step": 26785 + }, + { + "epoch": 73.58791208791209, + "grad_norm": 13.683483123779297, + "learning_rate": 1.3206043956043956e-05, + "loss": 0.3977, + "step": 26786 + }, + { + "epoch": 73.59065934065934, + "grad_norm": 5.232954025268555, + "learning_rate": 1.320467032967033e-05, + "loss": 0.0763, + "step": 26787 + }, + { + "epoch": 73.5934065934066, + "grad_norm": 7.194807529449463, + "learning_rate": 1.3203296703296704e-05, + "loss": 0.1091, + "step": 26788 + }, + { + "epoch": 73.59615384615384, + "grad_norm": 4.121859073638916, + "learning_rate": 1.3201923076923078e-05, + "loss": 0.0255, + "step": 26789 + }, + { + "epoch": 73.5989010989011, + "grad_norm": 14.681200981140137, + "learning_rate": 1.3200549450549451e-05, + "loss": 0.1777, + "step": 26790 + }, + { + "epoch": 73.60164835164835, + "grad_norm": 6.497929573059082, + "learning_rate": 1.3199175824175825e-05, + "loss": 0.1561, + "step": 26791 + }, + { + "epoch": 73.6043956043956, + "grad_norm": 3.464138984680176, + "learning_rate": 1.3197802197802198e-05, + "loss": 0.0557, + "step": 26792 + }, + { + "epoch": 73.60714285714286, + "grad_norm": 6.79396915435791, + "learning_rate": 1.3196428571428572e-05, + "loss": 0.0694, + "step": 26793 + }, + { + "epoch": 73.60989010989012, + "grad_norm": 6.649949073791504, + "learning_rate": 1.3195054945054947e-05, + "loss": 0.1081, + "step": 26794 + }, + { + "epoch": 73.61263736263736, + "grad_norm": 10.39663028717041, + "learning_rate": 1.319368131868132e-05, + "loss": 0.2285, + "step": 26795 + }, + { + "epoch": 73.61538461538461, + "grad_norm": 12.161063194274902, + "learning_rate": 1.3192307692307693e-05, + "loss": 0.2146, + "step": 26796 + }, + { + "epoch": 73.61813186813187, + "grad_norm": 12.041139602661133, + "learning_rate": 1.3190934065934065e-05, + "loss": 0.2679, + "step": 26797 + }, + { + "epoch": 73.62087912087912, + "grad_norm": 5.598179817199707, + "learning_rate": 1.3189560439560439e-05, + "loss": 0.0523, + "step": 26798 + }, + { + "epoch": 73.62362637362638, + "grad_norm": 20.4456844329834, + "learning_rate": 1.3188186813186815e-05, + "loss": 0.3948, + "step": 26799 + }, + { + "epoch": 73.62637362637362, + "grad_norm": 4.936982154846191, + "learning_rate": 1.3186813186813187e-05, + "loss": 0.0607, + "step": 26800 + }, + { + "epoch": 73.62912087912088, + "grad_norm": 14.72513484954834, + "learning_rate": 1.318543956043956e-05, + "loss": 0.3285, + "step": 26801 + }, + { + "epoch": 73.63186813186813, + "grad_norm": 13.387591361999512, + "learning_rate": 1.3184065934065934e-05, + "loss": 0.2677, + "step": 26802 + }, + { + "epoch": 73.63461538461539, + "grad_norm": 18.532461166381836, + "learning_rate": 1.3182692307692307e-05, + "loss": 0.3446, + "step": 26803 + }, + { + "epoch": 73.63736263736264, + "grad_norm": 23.75092315673828, + "learning_rate": 1.3181318681318682e-05, + "loss": 0.506, + "step": 26804 + }, + { + "epoch": 73.64010989010988, + "grad_norm": 13.361700057983398, + "learning_rate": 1.3179945054945056e-05, + "loss": 0.2579, + "step": 26805 + }, + { + "epoch": 73.64285714285714, + "grad_norm": 9.41148853302002, + "learning_rate": 1.317857142857143e-05, + "loss": 0.2569, + "step": 26806 + }, + { + "epoch": 73.6456043956044, + "grad_norm": 9.404986381530762, + "learning_rate": 1.3177197802197803e-05, + "loss": 0.1476, + "step": 26807 + }, + { + "epoch": 73.64835164835165, + "grad_norm": 7.584328651428223, + "learning_rate": 1.3175824175824174e-05, + "loss": 0.1094, + "step": 26808 + }, + { + "epoch": 73.6510989010989, + "grad_norm": 13.693329811096191, + "learning_rate": 1.3174450549450551e-05, + "loss": 0.5472, + "step": 26809 + }, + { + "epoch": 73.65384615384616, + "grad_norm": 19.458175659179688, + "learning_rate": 1.3173076923076925e-05, + "loss": 0.277, + "step": 26810 + }, + { + "epoch": 73.6565934065934, + "grad_norm": 4.591289520263672, + "learning_rate": 1.3171703296703298e-05, + "loss": 0.0496, + "step": 26811 + }, + { + "epoch": 73.65934065934066, + "grad_norm": 16.4655818939209, + "learning_rate": 1.317032967032967e-05, + "loss": 0.5578, + "step": 26812 + }, + { + "epoch": 73.66208791208791, + "grad_norm": 10.209979057312012, + "learning_rate": 1.3168956043956043e-05, + "loss": 0.1568, + "step": 26813 + }, + { + "epoch": 73.66483516483517, + "grad_norm": 13.406072616577148, + "learning_rate": 1.316758241758242e-05, + "loss": 0.2005, + "step": 26814 + }, + { + "epoch": 73.66758241758242, + "grad_norm": 8.310009956359863, + "learning_rate": 1.3166208791208792e-05, + "loss": 0.0931, + "step": 26815 + }, + { + "epoch": 73.67032967032966, + "grad_norm": 4.805669784545898, + "learning_rate": 1.3164835164835165e-05, + "loss": 0.0659, + "step": 26816 + }, + { + "epoch": 73.67307692307692, + "grad_norm": 28.421232223510742, + "learning_rate": 1.3163461538461538e-05, + "loss": 0.8667, + "step": 26817 + }, + { + "epoch": 73.67582417582418, + "grad_norm": 10.938532829284668, + "learning_rate": 1.3162087912087912e-05, + "loss": 0.1717, + "step": 26818 + }, + { + "epoch": 73.67857142857143, + "grad_norm": 16.645387649536133, + "learning_rate": 1.3160714285714287e-05, + "loss": 0.6249, + "step": 26819 + }, + { + "epoch": 73.68131868131869, + "grad_norm": 6.932664394378662, + "learning_rate": 1.315934065934066e-05, + "loss": 0.0511, + "step": 26820 + }, + { + "epoch": 73.68406593406593, + "grad_norm": 5.252934455871582, + "learning_rate": 1.3157967032967034e-05, + "loss": 0.0859, + "step": 26821 + }, + { + "epoch": 73.68681318681318, + "grad_norm": 16.765005111694336, + "learning_rate": 1.3156593406593407e-05, + "loss": 0.3327, + "step": 26822 + }, + { + "epoch": 73.68956043956044, + "grad_norm": 12.531245231628418, + "learning_rate": 1.3155219780219779e-05, + "loss": 0.2252, + "step": 26823 + }, + { + "epoch": 73.6923076923077, + "grad_norm": 13.749197006225586, + "learning_rate": 1.3153846153846156e-05, + "loss": 0.2449, + "step": 26824 + }, + { + "epoch": 73.69505494505495, + "grad_norm": 5.416677474975586, + "learning_rate": 1.3152472527472529e-05, + "loss": 0.1126, + "step": 26825 + }, + { + "epoch": 73.6978021978022, + "grad_norm": 16.35347557067871, + "learning_rate": 1.3151098901098902e-05, + "loss": 0.317, + "step": 26826 + }, + { + "epoch": 73.70054945054945, + "grad_norm": 8.518972396850586, + "learning_rate": 1.3149725274725274e-05, + "loss": 0.129, + "step": 26827 + }, + { + "epoch": 73.7032967032967, + "grad_norm": 13.973409652709961, + "learning_rate": 1.3148351648351648e-05, + "loss": 0.1738, + "step": 26828 + }, + { + "epoch": 73.70604395604396, + "grad_norm": 10.624689102172852, + "learning_rate": 1.3146978021978024e-05, + "loss": 0.1134, + "step": 26829 + }, + { + "epoch": 73.70879120879121, + "grad_norm": 14.340422630310059, + "learning_rate": 1.3145604395604396e-05, + "loss": 0.3059, + "step": 26830 + }, + { + "epoch": 73.71153846153847, + "grad_norm": 18.975521087646484, + "learning_rate": 1.314423076923077e-05, + "loss": 0.2428, + "step": 26831 + }, + { + "epoch": 73.71428571428571, + "grad_norm": 3.853431463241577, + "learning_rate": 1.3142857142857143e-05, + "loss": 0.0319, + "step": 26832 + }, + { + "epoch": 73.71703296703296, + "grad_norm": 19.941261291503906, + "learning_rate": 1.3141483516483516e-05, + "loss": 0.3134, + "step": 26833 + }, + { + "epoch": 73.71978021978022, + "grad_norm": 16.679279327392578, + "learning_rate": 1.3140109890109891e-05, + "loss": 0.2469, + "step": 26834 + }, + { + "epoch": 73.72252747252747, + "grad_norm": 19.33586883544922, + "learning_rate": 1.3138736263736265e-05, + "loss": 0.5847, + "step": 26835 + }, + { + "epoch": 73.72527472527473, + "grad_norm": 20.40851402282715, + "learning_rate": 1.3137362637362638e-05, + "loss": 0.2472, + "step": 26836 + }, + { + "epoch": 73.72802197802197, + "grad_norm": 4.726855278015137, + "learning_rate": 1.3135989010989012e-05, + "loss": 0.0705, + "step": 26837 + }, + { + "epoch": 73.73076923076923, + "grad_norm": 11.850082397460938, + "learning_rate": 1.3134615384615383e-05, + "loss": 0.2591, + "step": 26838 + }, + { + "epoch": 73.73351648351648, + "grad_norm": 12.955033302307129, + "learning_rate": 1.313324175824176e-05, + "loss": 0.1277, + "step": 26839 + }, + { + "epoch": 73.73626373626374, + "grad_norm": 8.711495399475098, + "learning_rate": 1.3131868131868134e-05, + "loss": 0.1835, + "step": 26840 + }, + { + "epoch": 73.73901098901099, + "grad_norm": 8.875141143798828, + "learning_rate": 1.3130494505494507e-05, + "loss": 0.1481, + "step": 26841 + }, + { + "epoch": 73.74175824175825, + "grad_norm": 7.402405261993408, + "learning_rate": 1.3129120879120879e-05, + "loss": 0.173, + "step": 26842 + }, + { + "epoch": 73.74450549450549, + "grad_norm": 15.817248344421387, + "learning_rate": 1.3127747252747252e-05, + "loss": 0.3572, + "step": 26843 + }, + { + "epoch": 73.74725274725274, + "grad_norm": 13.286613464355469, + "learning_rate": 1.3126373626373629e-05, + "loss": 0.5201, + "step": 26844 + }, + { + "epoch": 73.75, + "grad_norm": 11.634352684020996, + "learning_rate": 1.3125e-05, + "loss": 0.209, + "step": 26845 + }, + { + "epoch": 73.75274725274726, + "grad_norm": 16.970399856567383, + "learning_rate": 1.3123626373626374e-05, + "loss": 0.4583, + "step": 26846 + }, + { + "epoch": 73.75549450549451, + "grad_norm": 15.688167572021484, + "learning_rate": 1.3122252747252747e-05, + "loss": 0.1896, + "step": 26847 + }, + { + "epoch": 73.75824175824175, + "grad_norm": 14.95698356628418, + "learning_rate": 1.312087912087912e-05, + "loss": 0.5459, + "step": 26848 + }, + { + "epoch": 73.76098901098901, + "grad_norm": 6.812950134277344, + "learning_rate": 1.3119505494505496e-05, + "loss": 0.0712, + "step": 26849 + }, + { + "epoch": 73.76373626373626, + "grad_norm": 6.045804500579834, + "learning_rate": 1.311813186813187e-05, + "loss": 0.1638, + "step": 26850 + }, + { + "epoch": 73.76648351648352, + "grad_norm": 8.218973159790039, + "learning_rate": 1.3116758241758243e-05, + "loss": 0.1972, + "step": 26851 + }, + { + "epoch": 73.76923076923077, + "grad_norm": 20.34625244140625, + "learning_rate": 1.3115384615384616e-05, + "loss": 0.4557, + "step": 26852 + }, + { + "epoch": 73.77197802197803, + "grad_norm": 15.947900772094727, + "learning_rate": 1.3114010989010988e-05, + "loss": 0.6901, + "step": 26853 + }, + { + "epoch": 73.77472527472527, + "grad_norm": 17.61705780029297, + "learning_rate": 1.3112637362637365e-05, + "loss": 0.4152, + "step": 26854 + }, + { + "epoch": 73.77747252747253, + "grad_norm": 5.996251106262207, + "learning_rate": 1.3111263736263738e-05, + "loss": 0.1017, + "step": 26855 + }, + { + "epoch": 73.78021978021978, + "grad_norm": 10.718725204467773, + "learning_rate": 1.3109890109890111e-05, + "loss": 0.2584, + "step": 26856 + }, + { + "epoch": 73.78296703296704, + "grad_norm": 6.381021499633789, + "learning_rate": 1.3108516483516483e-05, + "loss": 0.0893, + "step": 26857 + }, + { + "epoch": 73.78571428571429, + "grad_norm": 9.176562309265137, + "learning_rate": 1.3107142857142857e-05, + "loss": 0.168, + "step": 26858 + }, + { + "epoch": 73.78846153846153, + "grad_norm": 4.473179340362549, + "learning_rate": 1.3105769230769233e-05, + "loss": 0.0673, + "step": 26859 + }, + { + "epoch": 73.79120879120879, + "grad_norm": 6.146421909332275, + "learning_rate": 1.3104395604395605e-05, + "loss": 0.1387, + "step": 26860 + }, + { + "epoch": 73.79395604395604, + "grad_norm": 9.717850685119629, + "learning_rate": 1.3103021978021978e-05, + "loss": 0.3803, + "step": 26861 + }, + { + "epoch": 73.7967032967033, + "grad_norm": 13.719439506530762, + "learning_rate": 1.3101648351648352e-05, + "loss": 0.2104, + "step": 26862 + }, + { + "epoch": 73.79945054945055, + "grad_norm": 4.366701126098633, + "learning_rate": 1.3100274725274725e-05, + "loss": 0.0803, + "step": 26863 + }, + { + "epoch": 73.8021978021978, + "grad_norm": 6.829573154449463, + "learning_rate": 1.30989010989011e-05, + "loss": 0.1729, + "step": 26864 + }, + { + "epoch": 73.80494505494505, + "grad_norm": 10.757152557373047, + "learning_rate": 1.3097527472527474e-05, + "loss": 0.166, + "step": 26865 + }, + { + "epoch": 73.8076923076923, + "grad_norm": 9.64146900177002, + "learning_rate": 1.3096153846153847e-05, + "loss": 0.2156, + "step": 26866 + }, + { + "epoch": 73.81043956043956, + "grad_norm": 4.954480171203613, + "learning_rate": 1.309478021978022e-05, + "loss": 0.0602, + "step": 26867 + }, + { + "epoch": 73.81318681318682, + "grad_norm": 11.340093612670898, + "learning_rate": 1.3093406593406592e-05, + "loss": 0.3345, + "step": 26868 + }, + { + "epoch": 73.81593406593407, + "grad_norm": 10.503631591796875, + "learning_rate": 1.3092032967032969e-05, + "loss": 0.1513, + "step": 26869 + }, + { + "epoch": 73.81868131868131, + "grad_norm": 18.438791275024414, + "learning_rate": 1.3090659340659342e-05, + "loss": 0.6256, + "step": 26870 + }, + { + "epoch": 73.82142857142857, + "grad_norm": 4.9487996101379395, + "learning_rate": 1.3089285714285714e-05, + "loss": 0.0601, + "step": 26871 + }, + { + "epoch": 73.82417582417582, + "grad_norm": 15.170044898986816, + "learning_rate": 1.3087912087912088e-05, + "loss": 0.3204, + "step": 26872 + }, + { + "epoch": 73.82692307692308, + "grad_norm": 5.961891174316406, + "learning_rate": 1.3086538461538461e-05, + "loss": 0.1552, + "step": 26873 + }, + { + "epoch": 73.82967032967034, + "grad_norm": 12.568122863769531, + "learning_rate": 1.3085164835164838e-05, + "loss": 0.2273, + "step": 26874 + }, + { + "epoch": 73.83241758241758, + "grad_norm": 19.0966796875, + "learning_rate": 1.308379120879121e-05, + "loss": 0.3572, + "step": 26875 + }, + { + "epoch": 73.83516483516483, + "grad_norm": 27.315532684326172, + "learning_rate": 1.3082417582417583e-05, + "loss": 0.7734, + "step": 26876 + }, + { + "epoch": 73.83791208791209, + "grad_norm": 7.3731770515441895, + "learning_rate": 1.3081043956043956e-05, + "loss": 0.2357, + "step": 26877 + }, + { + "epoch": 73.84065934065934, + "grad_norm": 13.733386039733887, + "learning_rate": 1.307967032967033e-05, + "loss": 0.1309, + "step": 26878 + }, + { + "epoch": 73.8434065934066, + "grad_norm": 9.02624225616455, + "learning_rate": 1.3078296703296705e-05, + "loss": 0.1298, + "step": 26879 + }, + { + "epoch": 73.84615384615384, + "grad_norm": 4.75895357131958, + "learning_rate": 1.3076923076923078e-05, + "loss": 0.0511, + "step": 26880 + }, + { + "epoch": 73.8489010989011, + "grad_norm": 14.578067779541016, + "learning_rate": 1.3075549450549452e-05, + "loss": 0.2498, + "step": 26881 + }, + { + "epoch": 73.85164835164835, + "grad_norm": 10.39892292022705, + "learning_rate": 1.3074175824175825e-05, + "loss": 0.2633, + "step": 26882 + }, + { + "epoch": 73.8543956043956, + "grad_norm": 9.558799743652344, + "learning_rate": 1.3072802197802197e-05, + "loss": 0.1913, + "step": 26883 + }, + { + "epoch": 73.85714285714286, + "grad_norm": 16.35960578918457, + "learning_rate": 1.3071428571428574e-05, + "loss": 0.2043, + "step": 26884 + }, + { + "epoch": 73.85989010989012, + "grad_norm": 17.503765106201172, + "learning_rate": 1.3070054945054947e-05, + "loss": 0.3902, + "step": 26885 + }, + { + "epoch": 73.86263736263736, + "grad_norm": 12.109246253967285, + "learning_rate": 1.3068681318681319e-05, + "loss": 0.2496, + "step": 26886 + }, + { + "epoch": 73.86538461538461, + "grad_norm": 1.5718225240707397, + "learning_rate": 1.3067307692307692e-05, + "loss": 0.0177, + "step": 26887 + }, + { + "epoch": 73.86813186813187, + "grad_norm": 9.912413597106934, + "learning_rate": 1.3065934065934066e-05, + "loss": 0.1787, + "step": 26888 + }, + { + "epoch": 73.87087912087912, + "grad_norm": 11.094871520996094, + "learning_rate": 1.3064560439560442e-05, + "loss": 0.0917, + "step": 26889 + }, + { + "epoch": 73.87362637362638, + "grad_norm": 12.051077842712402, + "learning_rate": 1.3063186813186814e-05, + "loss": 0.1559, + "step": 26890 + }, + { + "epoch": 73.87637362637362, + "grad_norm": 12.628198623657227, + "learning_rate": 1.3061813186813187e-05, + "loss": 0.368, + "step": 26891 + }, + { + "epoch": 73.87912087912088, + "grad_norm": 13.24932861328125, + "learning_rate": 1.306043956043956e-05, + "loss": 0.3625, + "step": 26892 + }, + { + "epoch": 73.88186813186813, + "grad_norm": 7.662827014923096, + "learning_rate": 1.3059065934065934e-05, + "loss": 0.2334, + "step": 26893 + }, + { + "epoch": 73.88461538461539, + "grad_norm": 12.472732543945312, + "learning_rate": 1.305769230769231e-05, + "loss": 0.1781, + "step": 26894 + }, + { + "epoch": 73.88736263736264, + "grad_norm": 5.738754749298096, + "learning_rate": 1.3056318681318683e-05, + "loss": 0.1078, + "step": 26895 + }, + { + "epoch": 73.89010989010988, + "grad_norm": 11.42823600769043, + "learning_rate": 1.3054945054945056e-05, + "loss": 0.2587, + "step": 26896 + }, + { + "epoch": 73.89285714285714, + "grad_norm": 12.926511764526367, + "learning_rate": 1.305357142857143e-05, + "loss": 0.278, + "step": 26897 + }, + { + "epoch": 73.8956043956044, + "grad_norm": 18.925800323486328, + "learning_rate": 1.3052197802197801e-05, + "loss": 0.3916, + "step": 26898 + }, + { + "epoch": 73.89835164835165, + "grad_norm": 5.0407209396362305, + "learning_rate": 1.3050824175824178e-05, + "loss": 0.0619, + "step": 26899 + }, + { + "epoch": 73.9010989010989, + "grad_norm": 1.0927327871322632, + "learning_rate": 1.3049450549450551e-05, + "loss": 0.0173, + "step": 26900 + }, + { + "epoch": 73.90384615384616, + "grad_norm": 21.662233352661133, + "learning_rate": 1.3048076923076923e-05, + "loss": 0.9401, + "step": 26901 + }, + { + "epoch": 73.9065934065934, + "grad_norm": 3.8661949634552, + "learning_rate": 1.3046703296703297e-05, + "loss": 0.0506, + "step": 26902 + }, + { + "epoch": 73.90934065934066, + "grad_norm": 7.310268402099609, + "learning_rate": 1.304532967032967e-05, + "loss": 0.0908, + "step": 26903 + }, + { + "epoch": 73.91208791208791, + "grad_norm": 10.584931373596191, + "learning_rate": 1.3043956043956043e-05, + "loss": 0.2279, + "step": 26904 + }, + { + "epoch": 73.91483516483517, + "grad_norm": 4.808948040008545, + "learning_rate": 1.3042582417582419e-05, + "loss": 0.0616, + "step": 26905 + }, + { + "epoch": 73.91758241758242, + "grad_norm": 3.982623815536499, + "learning_rate": 1.3041208791208792e-05, + "loss": 0.0479, + "step": 26906 + }, + { + "epoch": 73.92032967032966, + "grad_norm": 6.1322550773620605, + "learning_rate": 1.3039835164835165e-05, + "loss": 0.0681, + "step": 26907 + }, + { + "epoch": 73.92307692307692, + "grad_norm": 22.365947723388672, + "learning_rate": 1.3038461538461539e-05, + "loss": 0.5513, + "step": 26908 + }, + { + "epoch": 73.92582417582418, + "grad_norm": 13.129735946655273, + "learning_rate": 1.3037087912087912e-05, + "loss": 0.3581, + "step": 26909 + }, + { + "epoch": 73.92857142857143, + "grad_norm": 12.863114356994629, + "learning_rate": 1.3035714285714287e-05, + "loss": 0.3391, + "step": 26910 + }, + { + "epoch": 73.93131868131869, + "grad_norm": 15.562909126281738, + "learning_rate": 1.303434065934066e-05, + "loss": 0.2099, + "step": 26911 + }, + { + "epoch": 73.93406593406593, + "grad_norm": 10.988698959350586, + "learning_rate": 1.3032967032967034e-05, + "loss": 0.2102, + "step": 26912 + }, + { + "epoch": 73.93681318681318, + "grad_norm": 9.986248016357422, + "learning_rate": 1.3031593406593406e-05, + "loss": 0.307, + "step": 26913 + }, + { + "epoch": 73.93956043956044, + "grad_norm": 15.354360580444336, + "learning_rate": 1.303021978021978e-05, + "loss": 0.2166, + "step": 26914 + }, + { + "epoch": 73.9423076923077, + "grad_norm": 16.304758071899414, + "learning_rate": 1.3028846153846156e-05, + "loss": 0.2454, + "step": 26915 + }, + { + "epoch": 73.94505494505495, + "grad_norm": 7.23568058013916, + "learning_rate": 1.3027472527472528e-05, + "loss": 0.0656, + "step": 26916 + }, + { + "epoch": 73.9478021978022, + "grad_norm": 10.26171588897705, + "learning_rate": 1.3026098901098901e-05, + "loss": 0.1341, + "step": 26917 + }, + { + "epoch": 73.95054945054945, + "grad_norm": 7.565300941467285, + "learning_rate": 1.3024725274725274e-05, + "loss": 0.1014, + "step": 26918 + }, + { + "epoch": 73.9532967032967, + "grad_norm": 12.198569297790527, + "learning_rate": 1.3023351648351648e-05, + "loss": 0.2081, + "step": 26919 + }, + { + "epoch": 73.95604395604396, + "grad_norm": 13.087594032287598, + "learning_rate": 1.3021978021978023e-05, + "loss": 0.3065, + "step": 26920 + }, + { + "epoch": 73.95879120879121, + "grad_norm": 9.238692283630371, + "learning_rate": 1.3020604395604396e-05, + "loss": 0.1693, + "step": 26921 + }, + { + "epoch": 73.96153846153847, + "grad_norm": 6.534994602203369, + "learning_rate": 1.301923076923077e-05, + "loss": 0.0806, + "step": 26922 + }, + { + "epoch": 73.96428571428571, + "grad_norm": 10.361344337463379, + "learning_rate": 1.3017857142857143e-05, + "loss": 0.1998, + "step": 26923 + }, + { + "epoch": 73.96703296703296, + "grad_norm": 18.680089950561523, + "learning_rate": 1.3016483516483517e-05, + "loss": 0.3799, + "step": 26924 + }, + { + "epoch": 73.96978021978022, + "grad_norm": 3.6743695735931396, + "learning_rate": 1.3015109890109892e-05, + "loss": 0.0478, + "step": 26925 + }, + { + "epoch": 73.97252747252747, + "grad_norm": 0.8530394434928894, + "learning_rate": 1.3013736263736265e-05, + "loss": 0.0101, + "step": 26926 + }, + { + "epoch": 73.97527472527473, + "grad_norm": 14.632684707641602, + "learning_rate": 1.3012362637362639e-05, + "loss": 0.219, + "step": 26927 + }, + { + "epoch": 73.97802197802197, + "grad_norm": 7.48318338394165, + "learning_rate": 1.301098901098901e-05, + "loss": 0.1682, + "step": 26928 + }, + { + "epoch": 73.98076923076923, + "grad_norm": 11.102924346923828, + "learning_rate": 1.3009615384615384e-05, + "loss": 0.1006, + "step": 26929 + }, + { + "epoch": 73.98351648351648, + "grad_norm": 23.334978103637695, + "learning_rate": 1.300824175824176e-05, + "loss": 0.341, + "step": 26930 + }, + { + "epoch": 73.98626373626374, + "grad_norm": 4.10074520111084, + "learning_rate": 1.3006868131868132e-05, + "loss": 0.0399, + "step": 26931 + }, + { + "epoch": 73.98901098901099, + "grad_norm": 6.767799377441406, + "learning_rate": 1.3005494505494506e-05, + "loss": 0.2065, + "step": 26932 + }, + { + "epoch": 73.99175824175825, + "grad_norm": 25.66475486755371, + "learning_rate": 1.3004120879120879e-05, + "loss": 0.737, + "step": 26933 + }, + { + "epoch": 73.99450549450549, + "grad_norm": 2.5170884132385254, + "learning_rate": 1.3002747252747252e-05, + "loss": 0.0319, + "step": 26934 + }, + { + "epoch": 73.99725274725274, + "grad_norm": 9.434632301330566, + "learning_rate": 1.3001373626373627e-05, + "loss": 0.1432, + "step": 26935 + }, + { + "epoch": 74.0, + "grad_norm": 20.18451499938965, + "learning_rate": 1.3000000000000001e-05, + "loss": 0.1167, + "step": 26936 + }, + { + "epoch": 74.0, + "eval_accuracy": 0.709366391184573, + "eval_f1": 0.7240786293794097, + "eval_f1_DuraRiadoRio_64x64": 0.6640926640926641, + "eval_f1_Mole_64x64": 0.8484848484848485, + "eval_f1_Quebrado_64x64": 0.8102189781021898, + "eval_f1_RiadoRio_64x64": 0.5885057471264368, + "eval_f1_RioFechado_64x64": 0.7090909090909091, + "eval_loss": 1.2730739116668701, + "eval_precision": 0.797460478784799, + "eval_precision_DuraRiadoRio_64x64": 0.7478260869565218, + "eval_precision_Mole_64x64": 0.9333333333333333, + "eval_precision_Quebrado_64x64": 0.8538461538461538, + "eval_precision_RiadoRio_64x64": 0.45229681978798586, + "eval_precision_RioFechado_64x64": 1.0, + "eval_recall": 0.7074468742278232, + "eval_recall_DuraRiadoRio_64x64": 0.5972222222222222, + "eval_recall_Mole_64x64": 0.7777777777777778, + "eval_recall_Quebrado_64x64": 0.7708333333333334, + "eval_recall_RiadoRio_64x64": 0.8421052631578947, + "eval_recall_RioFechado_64x64": 0.5492957746478874, + "eval_runtime": 1.7317, + "eval_samples_per_second": 419.241, + "eval_steps_per_second": 26.563, + "step": 26936 + }, + { + "epoch": 74.00274725274726, + "grad_norm": 16.834619522094727, + "learning_rate": 1.2998626373626374e-05, + "loss": 0.282, + "step": 26937 + }, + { + "epoch": 74.00549450549451, + "grad_norm": 21.337690353393555, + "learning_rate": 1.2997252747252748e-05, + "loss": 0.5834, + "step": 26938 + }, + { + "epoch": 74.00824175824175, + "grad_norm": 11.010101318359375, + "learning_rate": 1.2995879120879121e-05, + "loss": 0.2428, + "step": 26939 + }, + { + "epoch": 74.01098901098901, + "grad_norm": 19.345409393310547, + "learning_rate": 1.2994505494505496e-05, + "loss": 0.1619, + "step": 26940 + }, + { + "epoch": 74.01373626373626, + "grad_norm": 15.326079368591309, + "learning_rate": 1.299313186813187e-05, + "loss": 0.3568, + "step": 26941 + }, + { + "epoch": 74.01648351648352, + "grad_norm": 12.399802207946777, + "learning_rate": 1.2991758241758243e-05, + "loss": 0.2206, + "step": 26942 + }, + { + "epoch": 74.01923076923077, + "grad_norm": 11.842814445495605, + "learning_rate": 1.2990384615384615e-05, + "loss": 0.2151, + "step": 26943 + }, + { + "epoch": 74.02197802197803, + "grad_norm": 8.174837112426758, + "learning_rate": 1.2989010989010988e-05, + "loss": 0.1718, + "step": 26944 + }, + { + "epoch": 74.02472527472527, + "grad_norm": 15.05453872680664, + "learning_rate": 1.2987637362637365e-05, + "loss": 0.2979, + "step": 26945 + }, + { + "epoch": 74.02747252747253, + "grad_norm": 14.607208251953125, + "learning_rate": 1.2986263736263737e-05, + "loss": 0.2605, + "step": 26946 + }, + { + "epoch": 74.03021978021978, + "grad_norm": 21.354446411132812, + "learning_rate": 1.298489010989011e-05, + "loss": 0.413, + "step": 26947 + }, + { + "epoch": 74.03296703296704, + "grad_norm": 8.584423065185547, + "learning_rate": 1.2983516483516483e-05, + "loss": 0.24, + "step": 26948 + }, + { + "epoch": 74.03571428571429, + "grad_norm": 14.958739280700684, + "learning_rate": 1.2982142857142857e-05, + "loss": 0.5674, + "step": 26949 + }, + { + "epoch": 74.03846153846153, + "grad_norm": 11.699200630187988, + "learning_rate": 1.2980769230769232e-05, + "loss": 0.1542, + "step": 26950 + }, + { + "epoch": 74.04120879120879, + "grad_norm": 21.0535831451416, + "learning_rate": 1.2979395604395605e-05, + "loss": 0.567, + "step": 26951 + }, + { + "epoch": 74.04395604395604, + "grad_norm": 15.46959114074707, + "learning_rate": 1.2978021978021979e-05, + "loss": 0.5279, + "step": 26952 + }, + { + "epoch": 74.0467032967033, + "grad_norm": 2.533017158508301, + "learning_rate": 1.2976648351648352e-05, + "loss": 0.0203, + "step": 26953 + }, + { + "epoch": 74.04945054945055, + "grad_norm": 8.542745590209961, + "learning_rate": 1.2975274725274726e-05, + "loss": 0.1707, + "step": 26954 + }, + { + "epoch": 74.0521978021978, + "grad_norm": 12.149784088134766, + "learning_rate": 1.29739010989011e-05, + "loss": 0.1511, + "step": 26955 + }, + { + "epoch": 74.05494505494505, + "grad_norm": 12.262264251708984, + "learning_rate": 1.2972527472527474e-05, + "loss": 0.1943, + "step": 26956 + }, + { + "epoch": 74.0576923076923, + "grad_norm": 14.702372550964355, + "learning_rate": 1.2971153846153848e-05, + "loss": 0.3643, + "step": 26957 + }, + { + "epoch": 74.06043956043956, + "grad_norm": 8.003067970275879, + "learning_rate": 1.296978021978022e-05, + "loss": 0.1046, + "step": 26958 + }, + { + "epoch": 74.06318681318682, + "grad_norm": 10.146388053894043, + "learning_rate": 1.2968406593406593e-05, + "loss": 0.2102, + "step": 26959 + }, + { + "epoch": 74.06593406593407, + "grad_norm": 18.623126983642578, + "learning_rate": 1.296703296703297e-05, + "loss": 0.7588, + "step": 26960 + }, + { + "epoch": 74.06868131868131, + "grad_norm": 20.663850784301758, + "learning_rate": 1.2965659340659341e-05, + "loss": 0.3468, + "step": 26961 + }, + { + "epoch": 74.07142857142857, + "grad_norm": 7.840654373168945, + "learning_rate": 1.2964285714285715e-05, + "loss": 0.1693, + "step": 26962 + }, + { + "epoch": 74.07417582417582, + "grad_norm": 13.656889915466309, + "learning_rate": 1.2962912087912088e-05, + "loss": 0.3478, + "step": 26963 + }, + { + "epoch": 74.07692307692308, + "grad_norm": 10.51229476928711, + "learning_rate": 1.2961538461538461e-05, + "loss": 0.177, + "step": 26964 + }, + { + "epoch": 74.07967032967034, + "grad_norm": 9.29702377319336, + "learning_rate": 1.2960164835164836e-05, + "loss": 0.17, + "step": 26965 + }, + { + "epoch": 74.08241758241758, + "grad_norm": 18.80357551574707, + "learning_rate": 1.295879120879121e-05, + "loss": 0.377, + "step": 26966 + }, + { + "epoch": 74.08516483516483, + "grad_norm": 20.264144897460938, + "learning_rate": 1.2957417582417583e-05, + "loss": 0.2575, + "step": 26967 + }, + { + "epoch": 74.08791208791209, + "grad_norm": 13.149049758911133, + "learning_rate": 1.2956043956043957e-05, + "loss": 0.2241, + "step": 26968 + }, + { + "epoch": 74.09065934065934, + "grad_norm": 4.605707168579102, + "learning_rate": 1.2954670329670328e-05, + "loss": 0.1294, + "step": 26969 + }, + { + "epoch": 74.0934065934066, + "grad_norm": 13.673466682434082, + "learning_rate": 1.2953296703296705e-05, + "loss": 0.2861, + "step": 26970 + }, + { + "epoch": 74.09615384615384, + "grad_norm": 19.822383880615234, + "learning_rate": 1.2951923076923079e-05, + "loss": 0.3855, + "step": 26971 + }, + { + "epoch": 74.0989010989011, + "grad_norm": 8.524632453918457, + "learning_rate": 1.2950549450549452e-05, + "loss": 0.1006, + "step": 26972 + }, + { + "epoch": 74.10164835164835, + "grad_norm": 7.03666877746582, + "learning_rate": 1.2949175824175824e-05, + "loss": 0.1442, + "step": 26973 + }, + { + "epoch": 74.1043956043956, + "grad_norm": 8.522283554077148, + "learning_rate": 1.2947802197802197e-05, + "loss": 0.0959, + "step": 26974 + }, + { + "epoch": 74.10714285714286, + "grad_norm": 8.612188339233398, + "learning_rate": 1.2946428571428574e-05, + "loss": 0.1888, + "step": 26975 + }, + { + "epoch": 74.10989010989012, + "grad_norm": 17.300390243530273, + "learning_rate": 1.2945054945054946e-05, + "loss": 0.4639, + "step": 26976 + }, + { + "epoch": 74.11263736263736, + "grad_norm": 9.806564331054688, + "learning_rate": 1.2943681318681319e-05, + "loss": 0.3874, + "step": 26977 + }, + { + "epoch": 74.11538461538461, + "grad_norm": 17.606170654296875, + "learning_rate": 1.2942307692307692e-05, + "loss": 0.3689, + "step": 26978 + }, + { + "epoch": 74.11813186813187, + "grad_norm": 16.51718521118164, + "learning_rate": 1.2940934065934066e-05, + "loss": 0.3073, + "step": 26979 + }, + { + "epoch": 74.12087912087912, + "grad_norm": 4.504008769989014, + "learning_rate": 1.2939560439560441e-05, + "loss": 0.1059, + "step": 26980 + }, + { + "epoch": 74.12362637362638, + "grad_norm": 5.574343681335449, + "learning_rate": 1.2938186813186814e-05, + "loss": 0.0828, + "step": 26981 + }, + { + "epoch": 74.12637362637362, + "grad_norm": 6.922209739685059, + "learning_rate": 1.2936813186813188e-05, + "loss": 0.1301, + "step": 26982 + }, + { + "epoch": 74.12912087912088, + "grad_norm": 10.571684837341309, + "learning_rate": 1.2935439560439561e-05, + "loss": 0.326, + "step": 26983 + }, + { + "epoch": 74.13186813186813, + "grad_norm": 8.230698585510254, + "learning_rate": 1.2934065934065933e-05, + "loss": 0.1771, + "step": 26984 + }, + { + "epoch": 74.13461538461539, + "grad_norm": 11.43344497680664, + "learning_rate": 1.293269230769231e-05, + "loss": 0.2755, + "step": 26985 + }, + { + "epoch": 74.13736263736264, + "grad_norm": 16.93153953552246, + "learning_rate": 1.2931318681318683e-05, + "loss": 0.2892, + "step": 26986 + }, + { + "epoch": 74.14010989010988, + "grad_norm": 6.229710578918457, + "learning_rate": 1.2929945054945057e-05, + "loss": 0.0759, + "step": 26987 + }, + { + "epoch": 74.14285714285714, + "grad_norm": 11.448265075683594, + "learning_rate": 1.2928571428571428e-05, + "loss": 0.1832, + "step": 26988 + }, + { + "epoch": 74.1456043956044, + "grad_norm": 19.42547607421875, + "learning_rate": 1.2927197802197802e-05, + "loss": 0.5408, + "step": 26989 + }, + { + "epoch": 74.14835164835165, + "grad_norm": 20.588855743408203, + "learning_rate": 1.2925824175824178e-05, + "loss": 0.212, + "step": 26990 + }, + { + "epoch": 74.1510989010989, + "grad_norm": 6.271788120269775, + "learning_rate": 1.292445054945055e-05, + "loss": 0.0926, + "step": 26991 + }, + { + "epoch": 74.15384615384616, + "grad_norm": 3.0490012168884277, + "learning_rate": 1.2923076923076924e-05, + "loss": 0.0389, + "step": 26992 + }, + { + "epoch": 74.1565934065934, + "grad_norm": 12.247469902038574, + "learning_rate": 1.2921703296703297e-05, + "loss": 0.19, + "step": 26993 + }, + { + "epoch": 74.15934065934066, + "grad_norm": 9.243400573730469, + "learning_rate": 1.292032967032967e-05, + "loss": 0.2935, + "step": 26994 + }, + { + "epoch": 74.16208791208791, + "grad_norm": 8.917819023132324, + "learning_rate": 1.2918956043956045e-05, + "loss": 0.1958, + "step": 26995 + }, + { + "epoch": 74.16483516483517, + "grad_norm": 3.1020288467407227, + "learning_rate": 1.2917582417582419e-05, + "loss": 0.0421, + "step": 26996 + }, + { + "epoch": 74.16758241758242, + "grad_norm": 7.09203577041626, + "learning_rate": 1.2916208791208792e-05, + "loss": 0.1459, + "step": 26997 + }, + { + "epoch": 74.17032967032966, + "grad_norm": 13.058327674865723, + "learning_rate": 1.2914835164835166e-05, + "loss": 0.252, + "step": 26998 + }, + { + "epoch": 74.17307692307692, + "grad_norm": 9.66506290435791, + "learning_rate": 1.2913461538461537e-05, + "loss": 0.2027, + "step": 26999 + }, + { + "epoch": 74.17582417582418, + "grad_norm": 3.611273765563965, + "learning_rate": 1.2912087912087914e-05, + "loss": 0.0525, + "step": 27000 + }, + { + "epoch": 74.17857142857143, + "grad_norm": 11.928380012512207, + "learning_rate": 1.2910714285714288e-05, + "loss": 0.3903, + "step": 27001 + }, + { + "epoch": 74.18131868131869, + "grad_norm": 1.562010407447815, + "learning_rate": 1.2909340659340661e-05, + "loss": 0.0184, + "step": 27002 + }, + { + "epoch": 74.18406593406593, + "grad_norm": 10.373099327087402, + "learning_rate": 1.2907967032967033e-05, + "loss": 0.1592, + "step": 27003 + }, + { + "epoch": 74.18681318681318, + "grad_norm": 21.250696182250977, + "learning_rate": 1.2906593406593406e-05, + "loss": 0.3491, + "step": 27004 + }, + { + "epoch": 74.18956043956044, + "grad_norm": 20.581920623779297, + "learning_rate": 1.2905219780219783e-05, + "loss": 0.4494, + "step": 27005 + }, + { + "epoch": 74.1923076923077, + "grad_norm": 4.09444522857666, + "learning_rate": 1.2903846153846155e-05, + "loss": 0.0553, + "step": 27006 + }, + { + "epoch": 74.19505494505495, + "grad_norm": 8.768293380737305, + "learning_rate": 1.2902472527472528e-05, + "loss": 0.1324, + "step": 27007 + }, + { + "epoch": 74.1978021978022, + "grad_norm": 18.293922424316406, + "learning_rate": 1.2901098901098901e-05, + "loss": 0.4006, + "step": 27008 + }, + { + "epoch": 74.20054945054945, + "grad_norm": 11.18000602722168, + "learning_rate": 1.2899725274725275e-05, + "loss": 0.3002, + "step": 27009 + }, + { + "epoch": 74.2032967032967, + "grad_norm": 12.837058067321777, + "learning_rate": 1.289835164835165e-05, + "loss": 0.1041, + "step": 27010 + }, + { + "epoch": 74.20604395604396, + "grad_norm": 8.611340522766113, + "learning_rate": 1.2896978021978023e-05, + "loss": 0.1161, + "step": 27011 + }, + { + "epoch": 74.20879120879121, + "grad_norm": 8.437586784362793, + "learning_rate": 1.2895604395604397e-05, + "loss": 0.2064, + "step": 27012 + }, + { + "epoch": 74.21153846153847, + "grad_norm": 22.60978889465332, + "learning_rate": 1.289423076923077e-05, + "loss": 0.331, + "step": 27013 + }, + { + "epoch": 74.21428571428571, + "grad_norm": 12.044450759887695, + "learning_rate": 1.2892857142857142e-05, + "loss": 0.1313, + "step": 27014 + }, + { + "epoch": 74.21703296703296, + "grad_norm": 17.136600494384766, + "learning_rate": 1.2891483516483519e-05, + "loss": 0.3314, + "step": 27015 + }, + { + "epoch": 74.21978021978022, + "grad_norm": 16.86797332763672, + "learning_rate": 1.2890109890109892e-05, + "loss": 0.4572, + "step": 27016 + }, + { + "epoch": 74.22252747252747, + "grad_norm": 7.328530788421631, + "learning_rate": 1.2888736263736265e-05, + "loss": 0.1749, + "step": 27017 + }, + { + "epoch": 74.22527472527473, + "grad_norm": 21.87738609313965, + "learning_rate": 1.2887362637362637e-05, + "loss": 0.7831, + "step": 27018 + }, + { + "epoch": 74.22802197802197, + "grad_norm": 10.86314868927002, + "learning_rate": 1.288598901098901e-05, + "loss": 0.1763, + "step": 27019 + }, + { + "epoch": 74.23076923076923, + "grad_norm": 10.673304557800293, + "learning_rate": 1.2884615384615384e-05, + "loss": 0.2678, + "step": 27020 + }, + { + "epoch": 74.23351648351648, + "grad_norm": 25.702438354492188, + "learning_rate": 1.2883241758241759e-05, + "loss": 0.7168, + "step": 27021 + }, + { + "epoch": 74.23626373626374, + "grad_norm": 10.541759490966797, + "learning_rate": 1.2881868131868133e-05, + "loss": 0.3023, + "step": 27022 + }, + { + "epoch": 74.23901098901099, + "grad_norm": 5.528807640075684, + "learning_rate": 1.2880494505494506e-05, + "loss": 0.0525, + "step": 27023 + }, + { + "epoch": 74.24175824175825, + "grad_norm": 27.78387451171875, + "learning_rate": 1.287912087912088e-05, + "loss": 0.6042, + "step": 27024 + }, + { + "epoch": 74.24450549450549, + "grad_norm": 5.642455577850342, + "learning_rate": 1.2877747252747253e-05, + "loss": 0.083, + "step": 27025 + }, + { + "epoch": 74.24725274725274, + "grad_norm": 5.172135353088379, + "learning_rate": 1.2876373626373628e-05, + "loss": 0.0955, + "step": 27026 + }, + { + "epoch": 74.25, + "grad_norm": 8.732175827026367, + "learning_rate": 1.2875000000000001e-05, + "loss": 0.0888, + "step": 27027 + }, + { + "epoch": 74.25274725274726, + "grad_norm": 10.370847702026367, + "learning_rate": 1.2873626373626375e-05, + "loss": 0.2577, + "step": 27028 + }, + { + "epoch": 74.25549450549451, + "grad_norm": 4.702439785003662, + "learning_rate": 1.2872252747252746e-05, + "loss": 0.0572, + "step": 27029 + }, + { + "epoch": 74.25824175824175, + "grad_norm": 11.562346458435059, + "learning_rate": 1.287087912087912e-05, + "loss": 0.226, + "step": 27030 + }, + { + "epoch": 74.26098901098901, + "grad_norm": 22.659854888916016, + "learning_rate": 1.2869505494505497e-05, + "loss": 0.2301, + "step": 27031 + }, + { + "epoch": 74.26373626373626, + "grad_norm": 10.617612838745117, + "learning_rate": 1.2868131868131868e-05, + "loss": 0.2559, + "step": 27032 + }, + { + "epoch": 74.26648351648352, + "grad_norm": 23.140323638916016, + "learning_rate": 1.2866758241758242e-05, + "loss": 0.4254, + "step": 27033 + }, + { + "epoch": 74.26923076923077, + "grad_norm": 21.096012115478516, + "learning_rate": 1.2865384615384615e-05, + "loss": 0.5655, + "step": 27034 + }, + { + "epoch": 74.27197802197803, + "grad_norm": 19.98227310180664, + "learning_rate": 1.2864010989010988e-05, + "loss": 0.4397, + "step": 27035 + }, + { + "epoch": 74.27472527472527, + "grad_norm": 3.881260871887207, + "learning_rate": 1.2862637362637364e-05, + "loss": 0.07, + "step": 27036 + }, + { + "epoch": 74.27747252747253, + "grad_norm": 15.475446701049805, + "learning_rate": 1.2861263736263737e-05, + "loss": 0.3378, + "step": 27037 + }, + { + "epoch": 74.28021978021978, + "grad_norm": 7.791610240936279, + "learning_rate": 1.285989010989011e-05, + "loss": 0.1137, + "step": 27038 + }, + { + "epoch": 74.28296703296704, + "grad_norm": 6.356031894683838, + "learning_rate": 1.2858516483516484e-05, + "loss": 0.0916, + "step": 27039 + }, + { + "epoch": 74.28571428571429, + "grad_norm": 10.747661590576172, + "learning_rate": 1.2857142857142857e-05, + "loss": 0.1209, + "step": 27040 + }, + { + "epoch": 74.28846153846153, + "grad_norm": 7.230030536651611, + "learning_rate": 1.2855769230769232e-05, + "loss": 0.1574, + "step": 27041 + }, + { + "epoch": 74.29120879120879, + "grad_norm": 11.280457496643066, + "learning_rate": 1.2854395604395606e-05, + "loss": 0.3154, + "step": 27042 + }, + { + "epoch": 74.29395604395604, + "grad_norm": 6.909682750701904, + "learning_rate": 1.2853021978021979e-05, + "loss": 0.1024, + "step": 27043 + }, + { + "epoch": 74.2967032967033, + "grad_norm": 23.144481658935547, + "learning_rate": 1.2851648351648351e-05, + "loss": 0.4346, + "step": 27044 + }, + { + "epoch": 74.29945054945055, + "grad_norm": 19.64244270324707, + "learning_rate": 1.2850274725274724e-05, + "loss": 0.4761, + "step": 27045 + }, + { + "epoch": 74.3021978021978, + "grad_norm": 4.3159074783325195, + "learning_rate": 1.2848901098901101e-05, + "loss": 0.062, + "step": 27046 + }, + { + "epoch": 74.30494505494505, + "grad_norm": 12.070416450500488, + "learning_rate": 1.2847527472527473e-05, + "loss": 0.3158, + "step": 27047 + }, + { + "epoch": 74.3076923076923, + "grad_norm": 6.357911586761475, + "learning_rate": 1.2846153846153846e-05, + "loss": 0.1142, + "step": 27048 + }, + { + "epoch": 74.31043956043956, + "grad_norm": 8.454780578613281, + "learning_rate": 1.284478021978022e-05, + "loss": 0.1228, + "step": 27049 + }, + { + "epoch": 74.31318681318682, + "grad_norm": 6.57576322555542, + "learning_rate": 1.2843406593406593e-05, + "loss": 0.1439, + "step": 27050 + }, + { + "epoch": 74.31593406593407, + "grad_norm": 11.445616722106934, + "learning_rate": 1.2842032967032968e-05, + "loss": 0.2247, + "step": 27051 + }, + { + "epoch": 74.31868131868131, + "grad_norm": 7.486170768737793, + "learning_rate": 1.2840659340659341e-05, + "loss": 0.1109, + "step": 27052 + }, + { + "epoch": 74.32142857142857, + "grad_norm": 3.9381861686706543, + "learning_rate": 1.2839285714285715e-05, + "loss": 0.0752, + "step": 27053 + }, + { + "epoch": 74.32417582417582, + "grad_norm": 7.8390326499938965, + "learning_rate": 1.2837912087912088e-05, + "loss": 0.1491, + "step": 27054 + }, + { + "epoch": 74.32692307692308, + "grad_norm": 2.342569589614868, + "learning_rate": 1.2836538461538462e-05, + "loss": 0.0312, + "step": 27055 + }, + { + "epoch": 74.32967032967034, + "grad_norm": 16.50901222229004, + "learning_rate": 1.2835164835164837e-05, + "loss": 0.3723, + "step": 27056 + }, + { + "epoch": 74.33241758241758, + "grad_norm": 13.148672103881836, + "learning_rate": 1.283379120879121e-05, + "loss": 0.3452, + "step": 27057 + }, + { + "epoch": 74.33516483516483, + "grad_norm": 10.367867469787598, + "learning_rate": 1.2832417582417584e-05, + "loss": 0.2402, + "step": 27058 + }, + { + "epoch": 74.33791208791209, + "grad_norm": 7.222080230712891, + "learning_rate": 1.2831043956043955e-05, + "loss": 0.1366, + "step": 27059 + }, + { + "epoch": 74.34065934065934, + "grad_norm": 11.911694526672363, + "learning_rate": 1.2829670329670329e-05, + "loss": 0.1244, + "step": 27060 + }, + { + "epoch": 74.3434065934066, + "grad_norm": 17.470264434814453, + "learning_rate": 1.2828296703296706e-05, + "loss": 0.2873, + "step": 27061 + }, + { + "epoch": 74.34615384615384, + "grad_norm": 3.02834415435791, + "learning_rate": 1.2826923076923077e-05, + "loss": 0.03, + "step": 27062 + }, + { + "epoch": 74.3489010989011, + "grad_norm": 7.680436611175537, + "learning_rate": 1.282554945054945e-05, + "loss": 0.1384, + "step": 27063 + }, + { + "epoch": 74.35164835164835, + "grad_norm": 27.344533920288086, + "learning_rate": 1.2824175824175824e-05, + "loss": 0.8885, + "step": 27064 + }, + { + "epoch": 74.3543956043956, + "grad_norm": 8.822941780090332, + "learning_rate": 1.2822802197802197e-05, + "loss": 0.0936, + "step": 27065 + }, + { + "epoch": 74.35714285714286, + "grad_norm": 5.781514644622803, + "learning_rate": 1.2821428571428573e-05, + "loss": 0.1247, + "step": 27066 + }, + { + "epoch": 74.35989010989012, + "grad_norm": 8.497410774230957, + "learning_rate": 1.2820054945054946e-05, + "loss": 0.2029, + "step": 27067 + }, + { + "epoch": 74.36263736263736, + "grad_norm": 35.82851028442383, + "learning_rate": 1.281868131868132e-05, + "loss": 0.7795, + "step": 27068 + }, + { + "epoch": 74.36538461538461, + "grad_norm": 14.414477348327637, + "learning_rate": 1.2817307692307693e-05, + "loss": 0.2367, + "step": 27069 + }, + { + "epoch": 74.36813186813187, + "grad_norm": 2.816668748855591, + "learning_rate": 1.2815934065934066e-05, + "loss": 0.0984, + "step": 27070 + }, + { + "epoch": 74.37087912087912, + "grad_norm": 9.06263542175293, + "learning_rate": 1.2814560439560441e-05, + "loss": 0.1973, + "step": 27071 + }, + { + "epoch": 74.37362637362638, + "grad_norm": 7.018558979034424, + "learning_rate": 1.2813186813186815e-05, + "loss": 0.1246, + "step": 27072 + }, + { + "epoch": 74.37637362637362, + "grad_norm": 12.969719886779785, + "learning_rate": 1.2811813186813188e-05, + "loss": 0.1567, + "step": 27073 + }, + { + "epoch": 74.37912087912088, + "grad_norm": 22.45856475830078, + "learning_rate": 1.281043956043956e-05, + "loss": 0.4567, + "step": 27074 + }, + { + "epoch": 74.38186813186813, + "grad_norm": 4.646131992340088, + "learning_rate": 1.2809065934065933e-05, + "loss": 0.0564, + "step": 27075 + }, + { + "epoch": 74.38461538461539, + "grad_norm": 15.527499198913574, + "learning_rate": 1.280769230769231e-05, + "loss": 0.562, + "step": 27076 + }, + { + "epoch": 74.38736263736264, + "grad_norm": 14.16325569152832, + "learning_rate": 1.2806318681318682e-05, + "loss": 0.2064, + "step": 27077 + }, + { + "epoch": 74.39010989010988, + "grad_norm": 10.096956253051758, + "learning_rate": 1.2804945054945055e-05, + "loss": 0.2153, + "step": 27078 + }, + { + "epoch": 74.39285714285714, + "grad_norm": 14.794939041137695, + "learning_rate": 1.2803571428571429e-05, + "loss": 0.2218, + "step": 27079 + }, + { + "epoch": 74.3956043956044, + "grad_norm": 8.643996238708496, + "learning_rate": 1.2802197802197802e-05, + "loss": 0.1197, + "step": 27080 + }, + { + "epoch": 74.39835164835165, + "grad_norm": 13.615147590637207, + "learning_rate": 1.2800824175824177e-05, + "loss": 0.3142, + "step": 27081 + }, + { + "epoch": 74.4010989010989, + "grad_norm": 17.20721435546875, + "learning_rate": 1.279945054945055e-05, + "loss": 0.4117, + "step": 27082 + }, + { + "epoch": 74.40384615384616, + "grad_norm": 6.707899570465088, + "learning_rate": 1.2798076923076924e-05, + "loss": 0.0612, + "step": 27083 + }, + { + "epoch": 74.4065934065934, + "grad_norm": 10.948758125305176, + "learning_rate": 1.2796703296703297e-05, + "loss": 0.2157, + "step": 27084 + }, + { + "epoch": 74.40934065934066, + "grad_norm": 7.594178676605225, + "learning_rate": 1.279532967032967e-05, + "loss": 0.1216, + "step": 27085 + }, + { + "epoch": 74.41208791208791, + "grad_norm": 17.25921630859375, + "learning_rate": 1.2793956043956046e-05, + "loss": 0.2473, + "step": 27086 + }, + { + "epoch": 74.41483516483517, + "grad_norm": 24.06490135192871, + "learning_rate": 1.279258241758242e-05, + "loss": 0.5943, + "step": 27087 + }, + { + "epoch": 74.41758241758242, + "grad_norm": 11.813213348388672, + "learning_rate": 1.2791208791208793e-05, + "loss": 0.1521, + "step": 27088 + }, + { + "epoch": 74.42032967032966, + "grad_norm": 11.569937705993652, + "learning_rate": 1.2789835164835164e-05, + "loss": 0.1539, + "step": 27089 + }, + { + "epoch": 74.42307692307692, + "grad_norm": 17.12110137939453, + "learning_rate": 1.2788461538461538e-05, + "loss": 0.5081, + "step": 27090 + }, + { + "epoch": 74.42582417582418, + "grad_norm": 14.667858123779297, + "learning_rate": 1.2787087912087915e-05, + "loss": 0.2255, + "step": 27091 + }, + { + "epoch": 74.42857142857143, + "grad_norm": 13.947698593139648, + "learning_rate": 1.2785714285714286e-05, + "loss": 0.2882, + "step": 27092 + }, + { + "epoch": 74.43131868131869, + "grad_norm": 18.82179832458496, + "learning_rate": 1.278434065934066e-05, + "loss": 0.2783, + "step": 27093 + }, + { + "epoch": 74.43406593406593, + "grad_norm": 3.194714069366455, + "learning_rate": 1.2782967032967033e-05, + "loss": 0.037, + "step": 27094 + }, + { + "epoch": 74.43681318681318, + "grad_norm": 8.191781997680664, + "learning_rate": 1.2781593406593406e-05, + "loss": 0.1955, + "step": 27095 + }, + { + "epoch": 74.43956043956044, + "grad_norm": 5.8713812828063965, + "learning_rate": 1.2780219780219782e-05, + "loss": 0.1156, + "step": 27096 + }, + { + "epoch": 74.4423076923077, + "grad_norm": 8.404396057128906, + "learning_rate": 1.2778846153846155e-05, + "loss": 0.1509, + "step": 27097 + }, + { + "epoch": 74.44505494505495, + "grad_norm": 4.700338840484619, + "learning_rate": 1.2777472527472528e-05, + "loss": 0.0554, + "step": 27098 + }, + { + "epoch": 74.4478021978022, + "grad_norm": 22.288171768188477, + "learning_rate": 1.2776098901098902e-05, + "loss": 0.7541, + "step": 27099 + }, + { + "epoch": 74.45054945054945, + "grad_norm": 8.397317886352539, + "learning_rate": 1.2774725274725275e-05, + "loss": 0.1499, + "step": 27100 + }, + { + "epoch": 74.4532967032967, + "grad_norm": 6.742576599121094, + "learning_rate": 1.277335164835165e-05, + "loss": 0.174, + "step": 27101 + }, + { + "epoch": 74.45604395604396, + "grad_norm": 2.710557222366333, + "learning_rate": 1.2771978021978024e-05, + "loss": 0.0344, + "step": 27102 + }, + { + "epoch": 74.45879120879121, + "grad_norm": 15.608317375183105, + "learning_rate": 1.2770604395604397e-05, + "loss": 0.2933, + "step": 27103 + }, + { + "epoch": 74.46153846153847, + "grad_norm": 10.263130187988281, + "learning_rate": 1.2769230769230769e-05, + "loss": 0.2916, + "step": 27104 + }, + { + "epoch": 74.46428571428571, + "grad_norm": 4.360768795013428, + "learning_rate": 1.2767857142857142e-05, + "loss": 0.0779, + "step": 27105 + }, + { + "epoch": 74.46703296703296, + "grad_norm": 17.31979751586914, + "learning_rate": 1.2766483516483519e-05, + "loss": 0.3329, + "step": 27106 + }, + { + "epoch": 74.46978021978022, + "grad_norm": 16.156253814697266, + "learning_rate": 1.276510989010989e-05, + "loss": 0.3195, + "step": 27107 + }, + { + "epoch": 74.47252747252747, + "grad_norm": 7.21532678604126, + "learning_rate": 1.2763736263736264e-05, + "loss": 0.1391, + "step": 27108 + }, + { + "epoch": 74.47527472527473, + "grad_norm": 3.0017402172088623, + "learning_rate": 1.2762362637362638e-05, + "loss": 0.0535, + "step": 27109 + }, + { + "epoch": 74.47802197802197, + "grad_norm": 17.16452980041504, + "learning_rate": 1.2760989010989011e-05, + "loss": 0.4554, + "step": 27110 + }, + { + "epoch": 74.48076923076923, + "grad_norm": 11.940411567687988, + "learning_rate": 1.2759615384615386e-05, + "loss": 0.1484, + "step": 27111 + }, + { + "epoch": 74.48351648351648, + "grad_norm": 28.742887496948242, + "learning_rate": 1.275824175824176e-05, + "loss": 1.325, + "step": 27112 + }, + { + "epoch": 74.48626373626374, + "grad_norm": 8.646441459655762, + "learning_rate": 1.2756868131868133e-05, + "loss": 0.1622, + "step": 27113 + }, + { + "epoch": 74.48901098901099, + "grad_norm": 10.98112964630127, + "learning_rate": 1.2755494505494506e-05, + "loss": 0.1933, + "step": 27114 + }, + { + "epoch": 74.49175824175825, + "grad_norm": 12.265727996826172, + "learning_rate": 1.2754120879120878e-05, + "loss": 0.3873, + "step": 27115 + }, + { + "epoch": 74.49450549450549, + "grad_norm": 20.307859420776367, + "learning_rate": 1.2752747252747255e-05, + "loss": 0.3305, + "step": 27116 + }, + { + "epoch": 74.49725274725274, + "grad_norm": 6.558286190032959, + "learning_rate": 1.2751373626373628e-05, + "loss": 0.0724, + "step": 27117 + }, + { + "epoch": 74.5, + "grad_norm": 9.32463550567627, + "learning_rate": 1.2750000000000002e-05, + "loss": 0.1272, + "step": 27118 + }, + { + "epoch": 74.50274725274726, + "grad_norm": 4.3901047706604, + "learning_rate": 1.2748626373626373e-05, + "loss": 0.0664, + "step": 27119 + }, + { + "epoch": 74.50549450549451, + "grad_norm": 5.960153579711914, + "learning_rate": 1.2747252747252747e-05, + "loss": 0.097, + "step": 27120 + }, + { + "epoch": 74.50824175824175, + "grad_norm": 8.051739692687988, + "learning_rate": 1.2745879120879124e-05, + "loss": 0.2601, + "step": 27121 + }, + { + "epoch": 74.51098901098901, + "grad_norm": 9.466255187988281, + "learning_rate": 1.2744505494505495e-05, + "loss": 0.3101, + "step": 27122 + }, + { + "epoch": 74.51373626373626, + "grad_norm": 14.51209831237793, + "learning_rate": 1.2743131868131869e-05, + "loss": 0.3941, + "step": 27123 + }, + { + "epoch": 74.51648351648352, + "grad_norm": 32.536434173583984, + "learning_rate": 1.2741758241758242e-05, + "loss": 0.9028, + "step": 27124 + }, + { + "epoch": 74.51923076923077, + "grad_norm": 5.337064743041992, + "learning_rate": 1.2740384615384615e-05, + "loss": 0.0805, + "step": 27125 + }, + { + "epoch": 74.52197802197803, + "grad_norm": 2.447519063949585, + "learning_rate": 1.273901098901099e-05, + "loss": 0.0284, + "step": 27126 + }, + { + "epoch": 74.52472527472527, + "grad_norm": 20.701614379882812, + "learning_rate": 1.2737637362637364e-05, + "loss": 0.4429, + "step": 27127 + }, + { + "epoch": 74.52747252747253, + "grad_norm": 9.458806037902832, + "learning_rate": 1.2736263736263737e-05, + "loss": 0.2655, + "step": 27128 + }, + { + "epoch": 74.53021978021978, + "grad_norm": 12.049345016479492, + "learning_rate": 1.273489010989011e-05, + "loss": 0.1868, + "step": 27129 + }, + { + "epoch": 74.53296703296704, + "grad_norm": 4.093326568603516, + "learning_rate": 1.2733516483516482e-05, + "loss": 0.078, + "step": 27130 + }, + { + "epoch": 74.53571428571429, + "grad_norm": 5.1079277992248535, + "learning_rate": 1.2732142857142856e-05, + "loss": 0.0795, + "step": 27131 + }, + { + "epoch": 74.53846153846153, + "grad_norm": 2.1327900886535645, + "learning_rate": 1.2730769230769233e-05, + "loss": 0.0281, + "step": 27132 + }, + { + "epoch": 74.54120879120879, + "grad_norm": 4.0273213386535645, + "learning_rate": 1.2729395604395606e-05, + "loss": 0.0308, + "step": 27133 + }, + { + "epoch": 74.54395604395604, + "grad_norm": 8.086467742919922, + "learning_rate": 1.2728021978021978e-05, + "loss": 0.1887, + "step": 27134 + }, + { + "epoch": 74.5467032967033, + "grad_norm": 15.848339080810547, + "learning_rate": 1.2726648351648351e-05, + "loss": 0.2956, + "step": 27135 + }, + { + "epoch": 74.54945054945055, + "grad_norm": 3.8885602951049805, + "learning_rate": 1.2725274725274725e-05, + "loss": 0.0586, + "step": 27136 + }, + { + "epoch": 74.5521978021978, + "grad_norm": 1.6010844707489014, + "learning_rate": 1.27239010989011e-05, + "loss": 0.0156, + "step": 27137 + }, + { + "epoch": 74.55494505494505, + "grad_norm": 24.758337020874023, + "learning_rate": 1.2722527472527473e-05, + "loss": 0.6271, + "step": 27138 + }, + { + "epoch": 74.5576923076923, + "grad_norm": 14.913829803466797, + "learning_rate": 1.2721153846153847e-05, + "loss": 0.2565, + "step": 27139 + }, + { + "epoch": 74.56043956043956, + "grad_norm": 18.37235450744629, + "learning_rate": 1.271978021978022e-05, + "loss": 0.5036, + "step": 27140 + }, + { + "epoch": 74.56318681318682, + "grad_norm": 11.822732925415039, + "learning_rate": 1.2718406593406593e-05, + "loss": 0.147, + "step": 27141 + }, + { + "epoch": 74.56593406593407, + "grad_norm": 23.67130470275879, + "learning_rate": 1.2717032967032968e-05, + "loss": 0.2488, + "step": 27142 + }, + { + "epoch": 74.56868131868131, + "grad_norm": 4.3219404220581055, + "learning_rate": 1.2715659340659342e-05, + "loss": 0.0908, + "step": 27143 + }, + { + "epoch": 74.57142857142857, + "grad_norm": 8.31629753112793, + "learning_rate": 1.2714285714285715e-05, + "loss": 0.1835, + "step": 27144 + }, + { + "epoch": 74.57417582417582, + "grad_norm": 8.196370124816895, + "learning_rate": 1.2712912087912087e-05, + "loss": 0.1001, + "step": 27145 + }, + { + "epoch": 74.57692307692308, + "grad_norm": 11.779995918273926, + "learning_rate": 1.271153846153846e-05, + "loss": 0.2444, + "step": 27146 + }, + { + "epoch": 74.57967032967034, + "grad_norm": 19.87506866455078, + "learning_rate": 1.2710164835164837e-05, + "loss": 0.4248, + "step": 27147 + }, + { + "epoch": 74.58241758241758, + "grad_norm": 11.141175270080566, + "learning_rate": 1.270879120879121e-05, + "loss": 0.1614, + "step": 27148 + }, + { + "epoch": 74.58516483516483, + "grad_norm": 19.803773880004883, + "learning_rate": 1.2707417582417582e-05, + "loss": 0.4336, + "step": 27149 + }, + { + "epoch": 74.58791208791209, + "grad_norm": 11.129742622375488, + "learning_rate": 1.2706043956043956e-05, + "loss": 0.1993, + "step": 27150 + }, + { + "epoch": 74.59065934065934, + "grad_norm": 2.8677961826324463, + "learning_rate": 1.2704670329670329e-05, + "loss": 0.0335, + "step": 27151 + }, + { + "epoch": 74.5934065934066, + "grad_norm": 17.123516082763672, + "learning_rate": 1.2703296703296704e-05, + "loss": 0.3362, + "step": 27152 + }, + { + "epoch": 74.59615384615384, + "grad_norm": 22.727333068847656, + "learning_rate": 1.2701923076923078e-05, + "loss": 0.4701, + "step": 27153 + }, + { + "epoch": 74.5989010989011, + "grad_norm": 9.77191162109375, + "learning_rate": 1.2700549450549451e-05, + "loss": 0.1064, + "step": 27154 + }, + { + "epoch": 74.60164835164835, + "grad_norm": 15.413570404052734, + "learning_rate": 1.2699175824175824e-05, + "loss": 0.1749, + "step": 27155 + }, + { + "epoch": 74.6043956043956, + "grad_norm": 14.023677825927734, + "learning_rate": 1.2697802197802198e-05, + "loss": 0.454, + "step": 27156 + }, + { + "epoch": 74.60714285714286, + "grad_norm": 0.8889340758323669, + "learning_rate": 1.2696428571428573e-05, + "loss": 0.0134, + "step": 27157 + }, + { + "epoch": 74.60989010989012, + "grad_norm": 4.826784133911133, + "learning_rate": 1.2695054945054946e-05, + "loss": 0.0741, + "step": 27158 + }, + { + "epoch": 74.61263736263736, + "grad_norm": 11.136322021484375, + "learning_rate": 1.269368131868132e-05, + "loss": 0.161, + "step": 27159 + }, + { + "epoch": 74.61538461538461, + "grad_norm": 23.15200424194336, + "learning_rate": 1.2692307692307691e-05, + "loss": 0.5668, + "step": 27160 + }, + { + "epoch": 74.61813186813187, + "grad_norm": 20.937768936157227, + "learning_rate": 1.2690934065934065e-05, + "loss": 0.5983, + "step": 27161 + }, + { + "epoch": 74.62087912087912, + "grad_norm": 21.034387588500977, + "learning_rate": 1.2689560439560442e-05, + "loss": 0.419, + "step": 27162 + }, + { + "epoch": 74.62362637362638, + "grad_norm": 16.107786178588867, + "learning_rate": 1.2688186813186815e-05, + "loss": 0.2137, + "step": 27163 + }, + { + "epoch": 74.62637362637362, + "grad_norm": 19.71430015563965, + "learning_rate": 1.2686813186813187e-05, + "loss": 0.3928, + "step": 27164 + }, + { + "epoch": 74.62912087912088, + "grad_norm": 12.653603553771973, + "learning_rate": 1.268543956043956e-05, + "loss": 0.3367, + "step": 27165 + }, + { + "epoch": 74.63186813186813, + "grad_norm": 19.9008731842041, + "learning_rate": 1.2684065934065934e-05, + "loss": 0.3777, + "step": 27166 + }, + { + "epoch": 74.63461538461539, + "grad_norm": 8.759988784790039, + "learning_rate": 1.2682692307692309e-05, + "loss": 0.1435, + "step": 27167 + }, + { + "epoch": 74.63736263736264, + "grad_norm": 11.496275901794434, + "learning_rate": 1.2681318681318682e-05, + "loss": 0.1542, + "step": 27168 + }, + { + "epoch": 74.64010989010988, + "grad_norm": 15.78519058227539, + "learning_rate": 1.2679945054945056e-05, + "loss": 0.3613, + "step": 27169 + }, + { + "epoch": 74.64285714285714, + "grad_norm": 10.58432388305664, + "learning_rate": 1.2678571428571429e-05, + "loss": 0.166, + "step": 27170 + }, + { + "epoch": 74.6456043956044, + "grad_norm": 10.419865608215332, + "learning_rate": 1.2677197802197802e-05, + "loss": 0.1909, + "step": 27171 + }, + { + "epoch": 74.64835164835165, + "grad_norm": 7.089097499847412, + "learning_rate": 1.2675824175824177e-05, + "loss": 0.1085, + "step": 27172 + }, + { + "epoch": 74.6510989010989, + "grad_norm": 7.372049331665039, + "learning_rate": 1.267445054945055e-05, + "loss": 0.1067, + "step": 27173 + }, + { + "epoch": 74.65384615384616, + "grad_norm": 14.203023910522461, + "learning_rate": 1.2673076923076924e-05, + "loss": 0.3235, + "step": 27174 + }, + { + "epoch": 74.6565934065934, + "grad_norm": 10.366228103637695, + "learning_rate": 1.2671703296703296e-05, + "loss": 0.2598, + "step": 27175 + }, + { + "epoch": 74.65934065934066, + "grad_norm": 14.833015441894531, + "learning_rate": 1.267032967032967e-05, + "loss": 0.4572, + "step": 27176 + }, + { + "epoch": 74.66208791208791, + "grad_norm": 18.661209106445312, + "learning_rate": 1.2668956043956046e-05, + "loss": 0.5304, + "step": 27177 + }, + { + "epoch": 74.66483516483517, + "grad_norm": 5.465134620666504, + "learning_rate": 1.2667582417582418e-05, + "loss": 0.0728, + "step": 27178 + }, + { + "epoch": 74.66758241758242, + "grad_norm": 2.004377603530884, + "learning_rate": 1.2666208791208791e-05, + "loss": 0.0315, + "step": 27179 + }, + { + "epoch": 74.67032967032966, + "grad_norm": 16.894929885864258, + "learning_rate": 1.2664835164835165e-05, + "loss": 0.4128, + "step": 27180 + }, + { + "epoch": 74.67307692307692, + "grad_norm": 12.590428352355957, + "learning_rate": 1.2663461538461538e-05, + "loss": 0.1744, + "step": 27181 + }, + { + "epoch": 74.67582417582418, + "grad_norm": 7.983169078826904, + "learning_rate": 1.2662087912087913e-05, + "loss": 0.1212, + "step": 27182 + }, + { + "epoch": 74.67857142857143, + "grad_norm": 22.45982551574707, + "learning_rate": 1.2660714285714287e-05, + "loss": 0.3153, + "step": 27183 + }, + { + "epoch": 74.68131868131869, + "grad_norm": 4.995793342590332, + "learning_rate": 1.265934065934066e-05, + "loss": 0.0564, + "step": 27184 + }, + { + "epoch": 74.68406593406593, + "grad_norm": 17.271839141845703, + "learning_rate": 1.2657967032967033e-05, + "loss": 0.3979, + "step": 27185 + }, + { + "epoch": 74.68681318681318, + "grad_norm": 13.920937538146973, + "learning_rate": 1.2656593406593407e-05, + "loss": 0.2153, + "step": 27186 + }, + { + "epoch": 74.68956043956044, + "grad_norm": 10.066065788269043, + "learning_rate": 1.2655219780219782e-05, + "loss": 0.2172, + "step": 27187 + }, + { + "epoch": 74.6923076923077, + "grad_norm": 19.710622787475586, + "learning_rate": 1.2653846153846155e-05, + "loss": 0.5237, + "step": 27188 + }, + { + "epoch": 74.69505494505495, + "grad_norm": 14.910490036010742, + "learning_rate": 1.2652472527472529e-05, + "loss": 0.3058, + "step": 27189 + }, + { + "epoch": 74.6978021978022, + "grad_norm": 19.10516929626465, + "learning_rate": 1.26510989010989e-05, + "loss": 0.3517, + "step": 27190 + }, + { + "epoch": 74.70054945054945, + "grad_norm": 12.517474174499512, + "learning_rate": 1.2649725274725274e-05, + "loss": 0.2708, + "step": 27191 + }, + { + "epoch": 74.7032967032967, + "grad_norm": 6.815239906311035, + "learning_rate": 1.264835164835165e-05, + "loss": 0.1552, + "step": 27192 + }, + { + "epoch": 74.70604395604396, + "grad_norm": 3.6413002014160156, + "learning_rate": 1.2646978021978022e-05, + "loss": 0.0867, + "step": 27193 + }, + { + "epoch": 74.70879120879121, + "grad_norm": 9.420208930969238, + "learning_rate": 1.2645604395604396e-05, + "loss": 0.2009, + "step": 27194 + }, + { + "epoch": 74.71153846153847, + "grad_norm": 14.641044616699219, + "learning_rate": 1.264423076923077e-05, + "loss": 0.2292, + "step": 27195 + }, + { + "epoch": 74.71428571428571, + "grad_norm": 10.837531089782715, + "learning_rate": 1.2642857142857143e-05, + "loss": 0.1484, + "step": 27196 + }, + { + "epoch": 74.71703296703296, + "grad_norm": 9.953350067138672, + "learning_rate": 1.2641483516483518e-05, + "loss": 0.133, + "step": 27197 + }, + { + "epoch": 74.71978021978022, + "grad_norm": 6.766304016113281, + "learning_rate": 1.2640109890109891e-05, + "loss": 0.1888, + "step": 27198 + }, + { + "epoch": 74.72252747252747, + "grad_norm": 2.3066229820251465, + "learning_rate": 1.2638736263736264e-05, + "loss": 0.0161, + "step": 27199 + }, + { + "epoch": 74.72527472527473, + "grad_norm": 2.428619384765625, + "learning_rate": 1.2637362637362638e-05, + "loss": 0.034, + "step": 27200 + }, + { + "epoch": 74.72802197802197, + "grad_norm": 15.883686065673828, + "learning_rate": 1.2635989010989011e-05, + "loss": 0.2721, + "step": 27201 + }, + { + "epoch": 74.73076923076923, + "grad_norm": 16.31646728515625, + "learning_rate": 1.2634615384615386e-05, + "loss": 0.3139, + "step": 27202 + }, + { + "epoch": 74.73351648351648, + "grad_norm": 16.877836227416992, + "learning_rate": 1.263324175824176e-05, + "loss": 0.3759, + "step": 27203 + }, + { + "epoch": 74.73626373626374, + "grad_norm": 5.109379291534424, + "learning_rate": 1.2631868131868133e-05, + "loss": 0.0696, + "step": 27204 + }, + { + "epoch": 74.73901098901099, + "grad_norm": 11.804295539855957, + "learning_rate": 1.2630494505494505e-05, + "loss": 0.3208, + "step": 27205 + }, + { + "epoch": 74.74175824175825, + "grad_norm": 21.732698440551758, + "learning_rate": 1.2629120879120878e-05, + "loss": 0.3575, + "step": 27206 + }, + { + "epoch": 74.74450549450549, + "grad_norm": 14.261664390563965, + "learning_rate": 1.2627747252747255e-05, + "loss": 0.2113, + "step": 27207 + }, + { + "epoch": 74.74725274725274, + "grad_norm": 14.762015342712402, + "learning_rate": 1.2626373626373627e-05, + "loss": 0.1954, + "step": 27208 + }, + { + "epoch": 74.75, + "grad_norm": 9.381664276123047, + "learning_rate": 1.2625e-05, + "loss": 0.2574, + "step": 27209 + }, + { + "epoch": 74.75274725274726, + "grad_norm": 17.71335792541504, + "learning_rate": 1.2623626373626374e-05, + "loss": 0.3552, + "step": 27210 + }, + { + "epoch": 74.75549450549451, + "grad_norm": 19.228378295898438, + "learning_rate": 1.2622252747252747e-05, + "loss": 0.6022, + "step": 27211 + }, + { + "epoch": 74.75824175824175, + "grad_norm": 21.78240394592285, + "learning_rate": 1.2620879120879122e-05, + "loss": 0.5915, + "step": 27212 + }, + { + "epoch": 74.76098901098901, + "grad_norm": 13.876906394958496, + "learning_rate": 1.2619505494505496e-05, + "loss": 0.3223, + "step": 27213 + }, + { + "epoch": 74.76373626373626, + "grad_norm": 12.484861373901367, + "learning_rate": 1.2618131868131869e-05, + "loss": 0.2476, + "step": 27214 + }, + { + "epoch": 74.76648351648352, + "grad_norm": 15.031582832336426, + "learning_rate": 1.2616758241758242e-05, + "loss": 0.5249, + "step": 27215 + }, + { + "epoch": 74.76923076923077, + "grad_norm": 4.792462348937988, + "learning_rate": 1.2615384615384616e-05, + "loss": 0.0366, + "step": 27216 + }, + { + "epoch": 74.77197802197803, + "grad_norm": 4.580409049987793, + "learning_rate": 1.2614010989010991e-05, + "loss": 0.0567, + "step": 27217 + }, + { + "epoch": 74.77472527472527, + "grad_norm": 3.1416244506835938, + "learning_rate": 1.2612637362637364e-05, + "loss": 0.0449, + "step": 27218 + }, + { + "epoch": 74.77747252747253, + "grad_norm": 15.078824996948242, + "learning_rate": 1.2611263736263738e-05, + "loss": 0.2856, + "step": 27219 + }, + { + "epoch": 74.78021978021978, + "grad_norm": 16.193593978881836, + "learning_rate": 1.260989010989011e-05, + "loss": 0.3532, + "step": 27220 + }, + { + "epoch": 74.78296703296704, + "grad_norm": 11.957283020019531, + "learning_rate": 1.2608516483516483e-05, + "loss": 0.3327, + "step": 27221 + }, + { + "epoch": 74.78571428571429, + "grad_norm": 11.497233390808105, + "learning_rate": 1.260714285714286e-05, + "loss": 0.2471, + "step": 27222 + }, + { + "epoch": 74.78846153846153, + "grad_norm": 16.89211654663086, + "learning_rate": 1.2605769230769231e-05, + "loss": 0.1099, + "step": 27223 + }, + { + "epoch": 74.79120879120879, + "grad_norm": 18.46693992614746, + "learning_rate": 1.2604395604395605e-05, + "loss": 0.273, + "step": 27224 + }, + { + "epoch": 74.79395604395604, + "grad_norm": 2.565565824508667, + "learning_rate": 1.2603021978021978e-05, + "loss": 0.0327, + "step": 27225 + }, + { + "epoch": 74.7967032967033, + "grad_norm": 3.662400484085083, + "learning_rate": 1.2601648351648352e-05, + "loss": 0.0418, + "step": 27226 + }, + { + "epoch": 74.79945054945055, + "grad_norm": 11.904308319091797, + "learning_rate": 1.2600274725274727e-05, + "loss": 0.2768, + "step": 27227 + }, + { + "epoch": 74.8021978021978, + "grad_norm": 12.657533645629883, + "learning_rate": 1.25989010989011e-05, + "loss": 0.1313, + "step": 27228 + }, + { + "epoch": 74.80494505494505, + "grad_norm": 20.227128982543945, + "learning_rate": 1.2597527472527473e-05, + "loss": 0.5943, + "step": 27229 + }, + { + "epoch": 74.8076923076923, + "grad_norm": 7.829949855804443, + "learning_rate": 1.2596153846153847e-05, + "loss": 0.2653, + "step": 27230 + }, + { + "epoch": 74.81043956043956, + "grad_norm": 8.16805362701416, + "learning_rate": 1.259478021978022e-05, + "loss": 0.1661, + "step": 27231 + }, + { + "epoch": 74.81318681318682, + "grad_norm": 15.268986701965332, + "learning_rate": 1.2593406593406595e-05, + "loss": 0.2261, + "step": 27232 + }, + { + "epoch": 74.81593406593407, + "grad_norm": 20.407594680786133, + "learning_rate": 1.2592032967032969e-05, + "loss": 0.4, + "step": 27233 + }, + { + "epoch": 74.81868131868131, + "grad_norm": 11.781258583068848, + "learning_rate": 1.2590659340659342e-05, + "loss": 0.1955, + "step": 27234 + }, + { + "epoch": 74.82142857142857, + "grad_norm": 11.852485656738281, + "learning_rate": 1.2589285714285714e-05, + "loss": 0.1299, + "step": 27235 + }, + { + "epoch": 74.82417582417582, + "grad_norm": 21.959545135498047, + "learning_rate": 1.2587912087912087e-05, + "loss": 0.3206, + "step": 27236 + }, + { + "epoch": 74.82692307692308, + "grad_norm": 13.348803520202637, + "learning_rate": 1.2586538461538464e-05, + "loss": 0.1626, + "step": 27237 + }, + { + "epoch": 74.82967032967034, + "grad_norm": 10.985189437866211, + "learning_rate": 1.2585164835164836e-05, + "loss": 0.2256, + "step": 27238 + }, + { + "epoch": 74.83241758241758, + "grad_norm": 27.03997039794922, + "learning_rate": 1.258379120879121e-05, + "loss": 0.5045, + "step": 27239 + }, + { + "epoch": 74.83516483516483, + "grad_norm": 11.315829277038574, + "learning_rate": 1.2582417582417583e-05, + "loss": 0.3056, + "step": 27240 + }, + { + "epoch": 74.83791208791209, + "grad_norm": 19.596832275390625, + "learning_rate": 1.2581043956043956e-05, + "loss": 0.2476, + "step": 27241 + }, + { + "epoch": 74.84065934065934, + "grad_norm": 18.71244239807129, + "learning_rate": 1.2579670329670331e-05, + "loss": 0.2389, + "step": 27242 + }, + { + "epoch": 74.8434065934066, + "grad_norm": 6.3568434715271, + "learning_rate": 1.2578296703296705e-05, + "loss": 0.129, + "step": 27243 + }, + { + "epoch": 74.84615384615384, + "grad_norm": 11.849226951599121, + "learning_rate": 1.2576923076923078e-05, + "loss": 0.1632, + "step": 27244 + }, + { + "epoch": 74.8489010989011, + "grad_norm": 24.451696395874023, + "learning_rate": 1.2575549450549451e-05, + "loss": 0.6511, + "step": 27245 + }, + { + "epoch": 74.85164835164835, + "grad_norm": 10.629502296447754, + "learning_rate": 1.2574175824175825e-05, + "loss": 0.1152, + "step": 27246 + }, + { + "epoch": 74.8543956043956, + "grad_norm": 7.491814136505127, + "learning_rate": 1.2572802197802196e-05, + "loss": 0.1376, + "step": 27247 + }, + { + "epoch": 74.85714285714286, + "grad_norm": 9.231966018676758, + "learning_rate": 1.2571428571428573e-05, + "loss": 0.2587, + "step": 27248 + }, + { + "epoch": 74.85989010989012, + "grad_norm": 11.799270629882812, + "learning_rate": 1.2570054945054947e-05, + "loss": 0.2687, + "step": 27249 + }, + { + "epoch": 74.86263736263736, + "grad_norm": 5.404326915740967, + "learning_rate": 1.2568681318681318e-05, + "loss": 0.0758, + "step": 27250 + }, + { + "epoch": 74.86538461538461, + "grad_norm": 9.96123218536377, + "learning_rate": 1.2567307692307692e-05, + "loss": 0.3019, + "step": 27251 + }, + { + "epoch": 74.86813186813187, + "grad_norm": 15.145159721374512, + "learning_rate": 1.2565934065934065e-05, + "loss": 0.3389, + "step": 27252 + }, + { + "epoch": 74.87087912087912, + "grad_norm": 11.866479873657227, + "learning_rate": 1.256456043956044e-05, + "loss": 0.3656, + "step": 27253 + }, + { + "epoch": 74.87362637362638, + "grad_norm": 6.21685791015625, + "learning_rate": 1.2563186813186814e-05, + "loss": 0.1185, + "step": 27254 + }, + { + "epoch": 74.87637362637362, + "grad_norm": 2.0392348766326904, + "learning_rate": 1.2561813186813187e-05, + "loss": 0.0189, + "step": 27255 + }, + { + "epoch": 74.87912087912088, + "grad_norm": 12.970152854919434, + "learning_rate": 1.256043956043956e-05, + "loss": 0.1454, + "step": 27256 + }, + { + "epoch": 74.88186813186813, + "grad_norm": 9.307204246520996, + "learning_rate": 1.2559065934065934e-05, + "loss": 0.139, + "step": 27257 + }, + { + "epoch": 74.88461538461539, + "grad_norm": 9.958048820495605, + "learning_rate": 1.2557692307692309e-05, + "loss": 0.1738, + "step": 27258 + }, + { + "epoch": 74.88736263736264, + "grad_norm": 18.13167381286621, + "learning_rate": 1.2556318681318682e-05, + "loss": 0.5021, + "step": 27259 + }, + { + "epoch": 74.89010989010988, + "grad_norm": 19.496610641479492, + "learning_rate": 1.2554945054945056e-05, + "loss": 0.4139, + "step": 27260 + }, + { + "epoch": 74.89285714285714, + "grad_norm": 7.720763683319092, + "learning_rate": 1.255357142857143e-05, + "loss": 0.1354, + "step": 27261 + }, + { + "epoch": 74.8956043956044, + "grad_norm": 9.551679611206055, + "learning_rate": 1.2552197802197801e-05, + "loss": 0.1601, + "step": 27262 + }, + { + "epoch": 74.89835164835165, + "grad_norm": 9.645049095153809, + "learning_rate": 1.2550824175824178e-05, + "loss": 0.2421, + "step": 27263 + }, + { + "epoch": 74.9010989010989, + "grad_norm": 4.8948893547058105, + "learning_rate": 1.2549450549450551e-05, + "loss": 0.0393, + "step": 27264 + }, + { + "epoch": 74.90384615384616, + "grad_norm": 21.204113006591797, + "learning_rate": 1.2548076923076923e-05, + "loss": 0.5633, + "step": 27265 + }, + { + "epoch": 74.9065934065934, + "grad_norm": 12.401138305664062, + "learning_rate": 1.2546703296703296e-05, + "loss": 0.2349, + "step": 27266 + }, + { + "epoch": 74.90934065934066, + "grad_norm": 9.574780464172363, + "learning_rate": 1.254532967032967e-05, + "loss": 0.109, + "step": 27267 + }, + { + "epoch": 74.91208791208791, + "grad_norm": 21.995323181152344, + "learning_rate": 1.2543956043956045e-05, + "loss": 0.5706, + "step": 27268 + }, + { + "epoch": 74.91483516483517, + "grad_norm": 7.575774669647217, + "learning_rate": 1.2542582417582418e-05, + "loss": 0.2444, + "step": 27269 + }, + { + "epoch": 74.91758241758242, + "grad_norm": 12.869527816772461, + "learning_rate": 1.2541208791208792e-05, + "loss": 0.3075, + "step": 27270 + }, + { + "epoch": 74.92032967032966, + "grad_norm": 11.375131607055664, + "learning_rate": 1.2539835164835165e-05, + "loss": 0.3195, + "step": 27271 + }, + { + "epoch": 74.92307692307692, + "grad_norm": 8.268877029418945, + "learning_rate": 1.2538461538461538e-05, + "loss": 0.0938, + "step": 27272 + }, + { + "epoch": 74.92582417582418, + "grad_norm": 16.61274528503418, + "learning_rate": 1.2537087912087914e-05, + "loss": 0.5078, + "step": 27273 + }, + { + "epoch": 74.92857142857143, + "grad_norm": 1.9286118745803833, + "learning_rate": 1.2535714285714287e-05, + "loss": 0.0263, + "step": 27274 + }, + { + "epoch": 74.93131868131869, + "grad_norm": 12.371006965637207, + "learning_rate": 1.253434065934066e-05, + "loss": 0.3625, + "step": 27275 + }, + { + "epoch": 74.93406593406593, + "grad_norm": 2.370441198348999, + "learning_rate": 1.2532967032967032e-05, + "loss": 0.0214, + "step": 27276 + }, + { + "epoch": 74.93681318681318, + "grad_norm": 11.387139320373535, + "learning_rate": 1.2531593406593405e-05, + "loss": 0.3162, + "step": 27277 + }, + { + "epoch": 74.93956043956044, + "grad_norm": 8.201159477233887, + "learning_rate": 1.2530219780219782e-05, + "loss": 0.1413, + "step": 27278 + }, + { + "epoch": 74.9423076923077, + "grad_norm": 16.305959701538086, + "learning_rate": 1.2528846153846156e-05, + "loss": 0.2851, + "step": 27279 + }, + { + "epoch": 74.94505494505495, + "grad_norm": 20.44723129272461, + "learning_rate": 1.2527472527472527e-05, + "loss": 0.524, + "step": 27280 + }, + { + "epoch": 74.9478021978022, + "grad_norm": 12.522512435913086, + "learning_rate": 1.25260989010989e-05, + "loss": 0.1662, + "step": 27281 + }, + { + "epoch": 74.95054945054945, + "grad_norm": 7.099032878875732, + "learning_rate": 1.2524725274725274e-05, + "loss": 0.1059, + "step": 27282 + }, + { + "epoch": 74.9532967032967, + "grad_norm": 11.132225036621094, + "learning_rate": 1.252335164835165e-05, + "loss": 0.1232, + "step": 27283 + }, + { + "epoch": 74.95604395604396, + "grad_norm": 8.856346130371094, + "learning_rate": 1.2521978021978023e-05, + "loss": 0.1885, + "step": 27284 + }, + { + "epoch": 74.95879120879121, + "grad_norm": 11.025413513183594, + "learning_rate": 1.2520604395604396e-05, + "loss": 0.2288, + "step": 27285 + }, + { + "epoch": 74.96153846153847, + "grad_norm": 15.834807395935059, + "learning_rate": 1.251923076923077e-05, + "loss": 0.2235, + "step": 27286 + }, + { + "epoch": 74.96428571428571, + "grad_norm": 5.958070278167725, + "learning_rate": 1.2517857142857143e-05, + "loss": 0.0654, + "step": 27287 + }, + { + "epoch": 74.96703296703296, + "grad_norm": 18.349782943725586, + "learning_rate": 1.2516483516483518e-05, + "loss": 0.268, + "step": 27288 + }, + { + "epoch": 74.96978021978022, + "grad_norm": 25.644859313964844, + "learning_rate": 1.2515109890109891e-05, + "loss": 0.577, + "step": 27289 + }, + { + "epoch": 74.97252747252747, + "grad_norm": 19.766815185546875, + "learning_rate": 1.2513736263736265e-05, + "loss": 0.4615, + "step": 27290 + }, + { + "epoch": 74.97527472527473, + "grad_norm": 2.7834181785583496, + "learning_rate": 1.2512362637362637e-05, + "loss": 0.0267, + "step": 27291 + }, + { + "epoch": 74.97802197802197, + "grad_norm": 5.0267510414123535, + "learning_rate": 1.251098901098901e-05, + "loss": 0.0701, + "step": 27292 + }, + { + "epoch": 74.98076923076923, + "grad_norm": 1.3173439502716064, + "learning_rate": 1.2509615384615387e-05, + "loss": 0.0187, + "step": 27293 + }, + { + "epoch": 74.98351648351648, + "grad_norm": 17.56844711303711, + "learning_rate": 1.250824175824176e-05, + "loss": 0.4127, + "step": 27294 + }, + { + "epoch": 74.98626373626374, + "grad_norm": 15.47386646270752, + "learning_rate": 1.2506868131868132e-05, + "loss": 0.3049, + "step": 27295 + }, + { + "epoch": 74.98901098901099, + "grad_norm": 4.671375274658203, + "learning_rate": 1.2505494505494505e-05, + "loss": 0.0833, + "step": 27296 + }, + { + "epoch": 74.99175824175825, + "grad_norm": 6.615015029907227, + "learning_rate": 1.2504120879120879e-05, + "loss": 0.1774, + "step": 27297 + }, + { + "epoch": 74.99450549450549, + "grad_norm": 16.3325252532959, + "learning_rate": 1.2502747252747254e-05, + "loss": 0.466, + "step": 27298 + }, + { + "epoch": 74.99725274725274, + "grad_norm": 7.1879401206970215, + "learning_rate": 1.2501373626373627e-05, + "loss": 0.0968, + "step": 27299 + }, + { + "epoch": 75.0, + "grad_norm": 35.9955940246582, + "learning_rate": 1.25e-05, + "loss": 0.2747, + "step": 27300 + }, + { + "epoch": 75.0, + "eval_accuracy": 0.8650137741046832, + "eval_f1": 0.862272420327599, + "eval_f1_DuraRiadoRio_64x64": 0.8444444444444444, + "eval_f1_Mole_64x64": 0.9066666666666666, + "eval_f1_Quebrado_64x64": 0.8910256410256411, + "eval_f1_RiadoRio_64x64": 0.7715355805243446, + "eval_f1_RioFechado_64x64": 0.8976897689768977, + "eval_loss": 0.5770056843757629, + "eval_precision": 0.8688620799490365, + "eval_precision_DuraRiadoRio_64x64": 0.9047619047619048, + "eval_precision_Mole_64x64": 0.8717948717948718, + "eval_precision_Quebrado_64x64": 0.8273809523809523, + "eval_precision_RiadoRio_64x64": 0.8956521739130435, + "eval_precision_RioFechado_64x64": 0.84472049689441, + "eval_recall": 0.8673533893418993, + "eval_recall_DuraRiadoRio_64x64": 0.7916666666666666, + "eval_recall_Mole_64x64": 0.9444444444444444, + "eval_recall_Quebrado_64x64": 0.9652777777777778, + "eval_recall_RiadoRio_64x64": 0.6776315789473685, + "eval_recall_RioFechado_64x64": 0.9577464788732394, + "eval_runtime": 1.7499, + "eval_samples_per_second": 414.892, + "eval_steps_per_second": 26.288, + "step": 27300 + }, + { + "epoch": 75.00274725274726, + "grad_norm": 11.441132545471191, + "learning_rate": 1.2498626373626374e-05, + "loss": 0.1943, + "step": 27301 + }, + { + "epoch": 75.00549450549451, + "grad_norm": 6.590500354766846, + "learning_rate": 1.2497252747252747e-05, + "loss": 0.1139, + "step": 27302 + }, + { + "epoch": 75.00824175824175, + "grad_norm": 5.3613667488098145, + "learning_rate": 1.249587912087912e-05, + "loss": 0.2142, + "step": 27303 + }, + { + "epoch": 75.01098901098901, + "grad_norm": 7.210455894470215, + "learning_rate": 1.2494505494505496e-05, + "loss": 0.1006, + "step": 27304 + }, + { + "epoch": 75.01373626373626, + "grad_norm": 13.201647758483887, + "learning_rate": 1.249313186813187e-05, + "loss": 0.2678, + "step": 27305 + }, + { + "epoch": 75.01648351648352, + "grad_norm": 10.053418159484863, + "learning_rate": 1.2491758241758241e-05, + "loss": 0.2166, + "step": 27306 + }, + { + "epoch": 75.01923076923077, + "grad_norm": 5.433042526245117, + "learning_rate": 1.2490384615384616e-05, + "loss": 0.0963, + "step": 27307 + }, + { + "epoch": 75.02197802197803, + "grad_norm": 9.416598320007324, + "learning_rate": 1.248901098901099e-05, + "loss": 0.1414, + "step": 27308 + }, + { + "epoch": 75.02472527472527, + "grad_norm": 10.693221092224121, + "learning_rate": 1.2487637362637365e-05, + "loss": 0.187, + "step": 27309 + }, + { + "epoch": 75.02747252747253, + "grad_norm": 21.775842666625977, + "learning_rate": 1.2486263736263736e-05, + "loss": 0.8341, + "step": 27310 + }, + { + "epoch": 75.03021978021978, + "grad_norm": 6.028726577758789, + "learning_rate": 1.248489010989011e-05, + "loss": 0.1282, + "step": 27311 + }, + { + "epoch": 75.03296703296704, + "grad_norm": 13.028194427490234, + "learning_rate": 1.2483516483516485e-05, + "loss": 0.1279, + "step": 27312 + }, + { + "epoch": 75.03571428571429, + "grad_norm": 16.408143997192383, + "learning_rate": 1.2482142857142858e-05, + "loss": 0.1642, + "step": 27313 + }, + { + "epoch": 75.03846153846153, + "grad_norm": 11.328259468078613, + "learning_rate": 1.2480769230769232e-05, + "loss": 0.2177, + "step": 27314 + }, + { + "epoch": 75.04120879120879, + "grad_norm": 7.207928657531738, + "learning_rate": 1.2479395604395605e-05, + "loss": 0.228, + "step": 27315 + }, + { + "epoch": 75.04395604395604, + "grad_norm": 9.096437454223633, + "learning_rate": 1.2478021978021978e-05, + "loss": 0.1285, + "step": 27316 + }, + { + "epoch": 75.0467032967033, + "grad_norm": 10.169901847839355, + "learning_rate": 1.2476648351648352e-05, + "loss": 0.1636, + "step": 27317 + }, + { + "epoch": 75.04945054945055, + "grad_norm": 4.05679178237915, + "learning_rate": 1.2475274725274725e-05, + "loss": 0.0426, + "step": 27318 + }, + { + "epoch": 75.0521978021978, + "grad_norm": 11.705020904541016, + "learning_rate": 1.24739010989011e-05, + "loss": 0.3158, + "step": 27319 + }, + { + "epoch": 75.05494505494505, + "grad_norm": 8.641937255859375, + "learning_rate": 1.2472527472527474e-05, + "loss": 0.1768, + "step": 27320 + }, + { + "epoch": 75.0576923076923, + "grad_norm": 14.183966636657715, + "learning_rate": 1.2471153846153846e-05, + "loss": 0.294, + "step": 27321 + }, + { + "epoch": 75.06043956043956, + "grad_norm": 13.201966285705566, + "learning_rate": 1.246978021978022e-05, + "loss": 0.2178, + "step": 27322 + }, + { + "epoch": 75.06318681318682, + "grad_norm": 21.693510055541992, + "learning_rate": 1.2468406593406594e-05, + "loss": 0.9233, + "step": 27323 + }, + { + "epoch": 75.06593406593407, + "grad_norm": 13.913244247436523, + "learning_rate": 1.2467032967032969e-05, + "loss": 0.4217, + "step": 27324 + }, + { + "epoch": 75.06868131868131, + "grad_norm": 21.314455032348633, + "learning_rate": 1.2465659340659341e-05, + "loss": 0.542, + "step": 27325 + }, + { + "epoch": 75.07142857142857, + "grad_norm": 24.826351165771484, + "learning_rate": 1.2464285714285714e-05, + "loss": 0.7457, + "step": 27326 + }, + { + "epoch": 75.07417582417582, + "grad_norm": 23.435138702392578, + "learning_rate": 1.246291208791209e-05, + "loss": 0.4919, + "step": 27327 + }, + { + "epoch": 75.07692307692308, + "grad_norm": 10.67355728149414, + "learning_rate": 1.2461538461538463e-05, + "loss": 0.1033, + "step": 27328 + }, + { + "epoch": 75.07967032967034, + "grad_norm": 13.416218757629395, + "learning_rate": 1.2460164835164834e-05, + "loss": 0.4021, + "step": 27329 + }, + { + "epoch": 75.08241758241758, + "grad_norm": 18.795528411865234, + "learning_rate": 1.245879120879121e-05, + "loss": 0.6793, + "step": 27330 + }, + { + "epoch": 75.08516483516483, + "grad_norm": 21.87018585205078, + "learning_rate": 1.2457417582417583e-05, + "loss": 0.5208, + "step": 27331 + }, + { + "epoch": 75.08791208791209, + "grad_norm": 5.637786865234375, + "learning_rate": 1.2456043956043956e-05, + "loss": 0.0752, + "step": 27332 + }, + { + "epoch": 75.09065934065934, + "grad_norm": 2.8055379390716553, + "learning_rate": 1.245467032967033e-05, + "loss": 0.0397, + "step": 27333 + }, + { + "epoch": 75.0934065934066, + "grad_norm": 10.363086700439453, + "learning_rate": 1.2453296703296703e-05, + "loss": 0.1655, + "step": 27334 + }, + { + "epoch": 75.09615384615384, + "grad_norm": 10.274720191955566, + "learning_rate": 1.2451923076923078e-05, + "loss": 0.132, + "step": 27335 + }, + { + "epoch": 75.0989010989011, + "grad_norm": 6.084174633026123, + "learning_rate": 1.245054945054945e-05, + "loss": 0.0671, + "step": 27336 + }, + { + "epoch": 75.10164835164835, + "grad_norm": 10.983904838562012, + "learning_rate": 1.2449175824175825e-05, + "loss": 0.1877, + "step": 27337 + }, + { + "epoch": 75.1043956043956, + "grad_norm": 7.522622108459473, + "learning_rate": 1.2447802197802199e-05, + "loss": 0.1008, + "step": 27338 + }, + { + "epoch": 75.10714285714286, + "grad_norm": 4.237302780151367, + "learning_rate": 1.2446428571428572e-05, + "loss": 0.0996, + "step": 27339 + }, + { + "epoch": 75.10989010989012, + "grad_norm": 16.705020904541016, + "learning_rate": 1.2445054945054945e-05, + "loss": 0.2846, + "step": 27340 + }, + { + "epoch": 75.11263736263736, + "grad_norm": 10.60028076171875, + "learning_rate": 1.2443681318681319e-05, + "loss": 0.1718, + "step": 27341 + }, + { + "epoch": 75.11538461538461, + "grad_norm": 18.637351989746094, + "learning_rate": 1.2442307692307694e-05, + "loss": 0.3471, + "step": 27342 + }, + { + "epoch": 75.11813186813187, + "grad_norm": 14.380702018737793, + "learning_rate": 1.2440934065934067e-05, + "loss": 0.304, + "step": 27343 + }, + { + "epoch": 75.12087912087912, + "grad_norm": 12.971646308898926, + "learning_rate": 1.2439560439560439e-05, + "loss": 0.2534, + "step": 27344 + }, + { + "epoch": 75.12362637362638, + "grad_norm": 12.556371688842773, + "learning_rate": 1.2438186813186814e-05, + "loss": 0.3901, + "step": 27345 + }, + { + "epoch": 75.12637362637362, + "grad_norm": 12.436174392700195, + "learning_rate": 1.2436813186813187e-05, + "loss": 0.2947, + "step": 27346 + }, + { + "epoch": 75.12912087912088, + "grad_norm": 4.629306316375732, + "learning_rate": 1.2435439560439561e-05, + "loss": 0.0809, + "step": 27347 + }, + { + "epoch": 75.13186813186813, + "grad_norm": 19.967754364013672, + "learning_rate": 1.2434065934065934e-05, + "loss": 0.4028, + "step": 27348 + }, + { + "epoch": 75.13461538461539, + "grad_norm": 11.070752143859863, + "learning_rate": 1.2432692307692308e-05, + "loss": 0.2048, + "step": 27349 + }, + { + "epoch": 75.13736263736264, + "grad_norm": 13.309982299804688, + "learning_rate": 1.2431318681318683e-05, + "loss": 0.1843, + "step": 27350 + }, + { + "epoch": 75.14010989010988, + "grad_norm": 6.623362064361572, + "learning_rate": 1.2429945054945055e-05, + "loss": 0.1078, + "step": 27351 + }, + { + "epoch": 75.14285714285714, + "grad_norm": 8.6572847366333, + "learning_rate": 1.242857142857143e-05, + "loss": 0.1719, + "step": 27352 + }, + { + "epoch": 75.1456043956044, + "grad_norm": 12.350107192993164, + "learning_rate": 1.2427197802197803e-05, + "loss": 0.2245, + "step": 27353 + }, + { + "epoch": 75.14835164835165, + "grad_norm": 15.829170227050781, + "learning_rate": 1.2425824175824176e-05, + "loss": 0.2601, + "step": 27354 + }, + { + "epoch": 75.1510989010989, + "grad_norm": 17.55632781982422, + "learning_rate": 1.242445054945055e-05, + "loss": 0.4818, + "step": 27355 + }, + { + "epoch": 75.15384615384616, + "grad_norm": 7.516386032104492, + "learning_rate": 1.2423076923076923e-05, + "loss": 0.186, + "step": 27356 + }, + { + "epoch": 75.1565934065934, + "grad_norm": 9.37278938293457, + "learning_rate": 1.2421703296703298e-05, + "loss": 0.2056, + "step": 27357 + }, + { + "epoch": 75.15934065934066, + "grad_norm": 13.857226371765137, + "learning_rate": 1.2420329670329672e-05, + "loss": 0.3001, + "step": 27358 + }, + { + "epoch": 75.16208791208791, + "grad_norm": 10.006790161132812, + "learning_rate": 1.2418956043956043e-05, + "loss": 0.1508, + "step": 27359 + }, + { + "epoch": 75.16483516483517, + "grad_norm": 8.767348289489746, + "learning_rate": 1.2417582417582419e-05, + "loss": 0.1036, + "step": 27360 + }, + { + "epoch": 75.16758241758242, + "grad_norm": 9.62791919708252, + "learning_rate": 1.2416208791208792e-05, + "loss": 0.1183, + "step": 27361 + }, + { + "epoch": 75.17032967032966, + "grad_norm": 18.853364944458008, + "learning_rate": 1.2414835164835165e-05, + "loss": 0.41, + "step": 27362 + }, + { + "epoch": 75.17307692307692, + "grad_norm": 16.85100746154785, + "learning_rate": 1.2413461538461539e-05, + "loss": 0.2021, + "step": 27363 + }, + { + "epoch": 75.17582417582418, + "grad_norm": 6.761762619018555, + "learning_rate": 1.2412087912087912e-05, + "loss": 0.0868, + "step": 27364 + }, + { + "epoch": 75.17857142857143, + "grad_norm": 11.225866317749023, + "learning_rate": 1.2410714285714287e-05, + "loss": 0.214, + "step": 27365 + }, + { + "epoch": 75.18131868131869, + "grad_norm": 8.732733726501465, + "learning_rate": 1.2409340659340659e-05, + "loss": 0.2105, + "step": 27366 + }, + { + "epoch": 75.18406593406593, + "grad_norm": 21.896259307861328, + "learning_rate": 1.2407967032967034e-05, + "loss": 0.4893, + "step": 27367 + }, + { + "epoch": 75.18681318681318, + "grad_norm": 16.04679298400879, + "learning_rate": 1.2406593406593408e-05, + "loss": 0.2917, + "step": 27368 + }, + { + "epoch": 75.18956043956044, + "grad_norm": 11.350552558898926, + "learning_rate": 1.2405219780219781e-05, + "loss": 0.2317, + "step": 27369 + }, + { + "epoch": 75.1923076923077, + "grad_norm": 9.250882148742676, + "learning_rate": 1.2403846153846154e-05, + "loss": 0.1071, + "step": 27370 + }, + { + "epoch": 75.19505494505495, + "grad_norm": 9.577728271484375, + "learning_rate": 1.2402472527472528e-05, + "loss": 0.0523, + "step": 27371 + }, + { + "epoch": 75.1978021978022, + "grad_norm": 11.472311973571777, + "learning_rate": 1.2401098901098903e-05, + "loss": 0.3035, + "step": 27372 + }, + { + "epoch": 75.20054945054945, + "grad_norm": 10.244608879089355, + "learning_rate": 1.2399725274725276e-05, + "loss": 0.2889, + "step": 27373 + }, + { + "epoch": 75.2032967032967, + "grad_norm": 16.869739532470703, + "learning_rate": 1.2398351648351648e-05, + "loss": 0.4816, + "step": 27374 + }, + { + "epoch": 75.20604395604396, + "grad_norm": 6.130902290344238, + "learning_rate": 1.2396978021978023e-05, + "loss": 0.1097, + "step": 27375 + }, + { + "epoch": 75.20879120879121, + "grad_norm": 3.8584609031677246, + "learning_rate": 1.2395604395604396e-05, + "loss": 0.058, + "step": 27376 + }, + { + "epoch": 75.21153846153847, + "grad_norm": 19.968116760253906, + "learning_rate": 1.239423076923077e-05, + "loss": 0.3366, + "step": 27377 + }, + { + "epoch": 75.21428571428571, + "grad_norm": 3.6001198291778564, + "learning_rate": 1.2392857142857143e-05, + "loss": 0.0385, + "step": 27378 + }, + { + "epoch": 75.21703296703296, + "grad_norm": 10.393524169921875, + "learning_rate": 1.2391483516483517e-05, + "loss": 0.1721, + "step": 27379 + }, + { + "epoch": 75.21978021978022, + "grad_norm": 22.04418182373047, + "learning_rate": 1.2390109890109892e-05, + "loss": 0.3414, + "step": 27380 + }, + { + "epoch": 75.22252747252747, + "grad_norm": 20.041955947875977, + "learning_rate": 1.2388736263736263e-05, + "loss": 0.5449, + "step": 27381 + }, + { + "epoch": 75.22527472527473, + "grad_norm": 6.6801533699035645, + "learning_rate": 1.2387362637362639e-05, + "loss": 0.1006, + "step": 27382 + }, + { + "epoch": 75.22802197802197, + "grad_norm": 5.748857021331787, + "learning_rate": 1.2385989010989012e-05, + "loss": 0.0966, + "step": 27383 + }, + { + "epoch": 75.23076923076923, + "grad_norm": 4.036013603210449, + "learning_rate": 1.2384615384615385e-05, + "loss": 0.0434, + "step": 27384 + }, + { + "epoch": 75.23351648351648, + "grad_norm": 2.199042797088623, + "learning_rate": 1.2383241758241759e-05, + "loss": 0.0206, + "step": 27385 + }, + { + "epoch": 75.23626373626374, + "grad_norm": 7.692093372344971, + "learning_rate": 1.2381868131868132e-05, + "loss": 0.2436, + "step": 27386 + }, + { + "epoch": 75.23901098901099, + "grad_norm": 13.806302070617676, + "learning_rate": 1.2380494505494506e-05, + "loss": 0.2958, + "step": 27387 + }, + { + "epoch": 75.24175824175825, + "grad_norm": 8.914026260375977, + "learning_rate": 1.2379120879120879e-05, + "loss": 0.1694, + "step": 27388 + }, + { + "epoch": 75.24450549450549, + "grad_norm": 5.627710342407227, + "learning_rate": 1.2377747252747252e-05, + "loss": 0.0554, + "step": 27389 + }, + { + "epoch": 75.24725274725274, + "grad_norm": 7.70175838470459, + "learning_rate": 1.2376373626373628e-05, + "loss": 0.0735, + "step": 27390 + }, + { + "epoch": 75.25, + "grad_norm": 10.992919921875, + "learning_rate": 1.2375000000000001e-05, + "loss": 0.2318, + "step": 27391 + }, + { + "epoch": 75.25274725274726, + "grad_norm": 14.815378189086914, + "learning_rate": 1.2373626373626374e-05, + "loss": 0.319, + "step": 27392 + }, + { + "epoch": 75.25549450549451, + "grad_norm": 19.372407913208008, + "learning_rate": 1.2372252747252748e-05, + "loss": 0.4169, + "step": 27393 + }, + { + "epoch": 75.25824175824175, + "grad_norm": 10.446721076965332, + "learning_rate": 1.2370879120879121e-05, + "loss": 0.1516, + "step": 27394 + }, + { + "epoch": 75.26098901098901, + "grad_norm": 10.93835163116455, + "learning_rate": 1.2369505494505496e-05, + "loss": 0.1362, + "step": 27395 + }, + { + "epoch": 75.26373626373626, + "grad_norm": 7.111746311187744, + "learning_rate": 1.2368131868131868e-05, + "loss": 0.1202, + "step": 27396 + }, + { + "epoch": 75.26648351648352, + "grad_norm": 7.665085792541504, + "learning_rate": 1.2366758241758241e-05, + "loss": 0.0994, + "step": 27397 + }, + { + "epoch": 75.26923076923077, + "grad_norm": 16.209196090698242, + "learning_rate": 1.2365384615384616e-05, + "loss": 0.3626, + "step": 27398 + }, + { + "epoch": 75.27197802197803, + "grad_norm": 8.28107738494873, + "learning_rate": 1.236401098901099e-05, + "loss": 0.1897, + "step": 27399 + }, + { + "epoch": 75.27472527472527, + "grad_norm": 4.882693767547607, + "learning_rate": 1.2362637362637363e-05, + "loss": 0.0681, + "step": 27400 + }, + { + "epoch": 75.27747252747253, + "grad_norm": 13.870636940002441, + "learning_rate": 1.2361263736263737e-05, + "loss": 0.2392, + "step": 27401 + }, + { + "epoch": 75.28021978021978, + "grad_norm": 13.287424087524414, + "learning_rate": 1.235989010989011e-05, + "loss": 0.287, + "step": 27402 + }, + { + "epoch": 75.28296703296704, + "grad_norm": 1.974908471107483, + "learning_rate": 1.2358516483516484e-05, + "loss": 0.0314, + "step": 27403 + }, + { + "epoch": 75.28571428571429, + "grad_norm": 7.077321529388428, + "learning_rate": 1.2357142857142857e-05, + "loss": 0.1369, + "step": 27404 + }, + { + "epoch": 75.28846153846153, + "grad_norm": 22.100250244140625, + "learning_rate": 1.2355769230769232e-05, + "loss": 0.4627, + "step": 27405 + }, + { + "epoch": 75.29120879120879, + "grad_norm": 13.276433944702148, + "learning_rate": 1.2354395604395605e-05, + "loss": 0.4795, + "step": 27406 + }, + { + "epoch": 75.29395604395604, + "grad_norm": 5.934657096862793, + "learning_rate": 1.2353021978021979e-05, + "loss": 0.0342, + "step": 27407 + }, + { + "epoch": 75.2967032967033, + "grad_norm": 7.377045154571533, + "learning_rate": 1.2351648351648352e-05, + "loss": 0.1184, + "step": 27408 + }, + { + "epoch": 75.29945054945055, + "grad_norm": 18.795955657958984, + "learning_rate": 1.2350274725274726e-05, + "loss": 0.2924, + "step": 27409 + }, + { + "epoch": 75.3021978021978, + "grad_norm": 8.448567390441895, + "learning_rate": 1.23489010989011e-05, + "loss": 0.2024, + "step": 27410 + }, + { + "epoch": 75.30494505494505, + "grad_norm": 2.1323764324188232, + "learning_rate": 1.2347527472527472e-05, + "loss": 0.0305, + "step": 27411 + }, + { + "epoch": 75.3076923076923, + "grad_norm": 14.452959060668945, + "learning_rate": 1.2346153846153846e-05, + "loss": 0.2296, + "step": 27412 + }, + { + "epoch": 75.31043956043956, + "grad_norm": 7.384202480316162, + "learning_rate": 1.2344780219780221e-05, + "loss": 0.1449, + "step": 27413 + }, + { + "epoch": 75.31318681318682, + "grad_norm": 19.027362823486328, + "learning_rate": 1.2343406593406594e-05, + "loss": 0.6304, + "step": 27414 + }, + { + "epoch": 75.31593406593407, + "grad_norm": 11.903603553771973, + "learning_rate": 1.2342032967032968e-05, + "loss": 0.3125, + "step": 27415 + }, + { + "epoch": 75.31868131868131, + "grad_norm": 8.04925537109375, + "learning_rate": 1.2340659340659341e-05, + "loss": 0.1343, + "step": 27416 + }, + { + "epoch": 75.32142857142857, + "grad_norm": 4.247564792633057, + "learning_rate": 1.2339285714285715e-05, + "loss": 0.0477, + "step": 27417 + }, + { + "epoch": 75.32417582417582, + "grad_norm": 10.083939552307129, + "learning_rate": 1.2337912087912088e-05, + "loss": 0.1175, + "step": 27418 + }, + { + "epoch": 75.32692307692308, + "grad_norm": 14.228023529052734, + "learning_rate": 1.2336538461538461e-05, + "loss": 0.2109, + "step": 27419 + }, + { + "epoch": 75.32967032967034, + "grad_norm": 4.382889270782471, + "learning_rate": 1.2335164835164837e-05, + "loss": 0.0907, + "step": 27420 + }, + { + "epoch": 75.33241758241758, + "grad_norm": 1.6859887838363647, + "learning_rate": 1.233379120879121e-05, + "loss": 0.051, + "step": 27421 + }, + { + "epoch": 75.33516483516483, + "grad_norm": 3.6233856678009033, + "learning_rate": 1.2332417582417583e-05, + "loss": 0.0561, + "step": 27422 + }, + { + "epoch": 75.33791208791209, + "grad_norm": 12.66296100616455, + "learning_rate": 1.2331043956043957e-05, + "loss": 0.2101, + "step": 27423 + }, + { + "epoch": 75.34065934065934, + "grad_norm": 14.596616744995117, + "learning_rate": 1.232967032967033e-05, + "loss": 0.2178, + "step": 27424 + }, + { + "epoch": 75.3434065934066, + "grad_norm": 27.912952423095703, + "learning_rate": 1.2328296703296705e-05, + "loss": 0.9035, + "step": 27425 + }, + { + "epoch": 75.34615384615384, + "grad_norm": 3.7508420944213867, + "learning_rate": 1.2326923076923077e-05, + "loss": 0.0482, + "step": 27426 + }, + { + "epoch": 75.3489010989011, + "grad_norm": 6.777843952178955, + "learning_rate": 1.232554945054945e-05, + "loss": 0.122, + "step": 27427 + }, + { + "epoch": 75.35164835164835, + "grad_norm": 16.450939178466797, + "learning_rate": 1.2324175824175825e-05, + "loss": 0.3639, + "step": 27428 + }, + { + "epoch": 75.3543956043956, + "grad_norm": 5.283799171447754, + "learning_rate": 1.2322802197802199e-05, + "loss": 0.1269, + "step": 27429 + }, + { + "epoch": 75.35714285714286, + "grad_norm": 8.071602821350098, + "learning_rate": 1.2321428571428572e-05, + "loss": 0.2418, + "step": 27430 + }, + { + "epoch": 75.35989010989012, + "grad_norm": 11.84433650970459, + "learning_rate": 1.2320054945054946e-05, + "loss": 0.2078, + "step": 27431 + }, + { + "epoch": 75.36263736263736, + "grad_norm": 3.1773550510406494, + "learning_rate": 1.2318681318681319e-05, + "loss": 0.0506, + "step": 27432 + }, + { + "epoch": 75.36538461538461, + "grad_norm": 13.317130088806152, + "learning_rate": 1.2317307692307693e-05, + "loss": 0.2601, + "step": 27433 + }, + { + "epoch": 75.36813186813187, + "grad_norm": 8.962251663208008, + "learning_rate": 1.2315934065934066e-05, + "loss": 0.1959, + "step": 27434 + }, + { + "epoch": 75.37087912087912, + "grad_norm": 11.307281494140625, + "learning_rate": 1.2314560439560441e-05, + "loss": 0.1748, + "step": 27435 + }, + { + "epoch": 75.37362637362638, + "grad_norm": 13.50085163116455, + "learning_rate": 1.2313186813186814e-05, + "loss": 0.3411, + "step": 27436 + }, + { + "epoch": 75.37637362637362, + "grad_norm": 7.7373833656311035, + "learning_rate": 1.2311813186813186e-05, + "loss": 0.1096, + "step": 27437 + }, + { + "epoch": 75.37912087912088, + "grad_norm": 30.06419563293457, + "learning_rate": 1.2310439560439561e-05, + "loss": 1.182, + "step": 27438 + }, + { + "epoch": 75.38186813186813, + "grad_norm": 5.35816764831543, + "learning_rate": 1.2309065934065935e-05, + "loss": 0.0638, + "step": 27439 + }, + { + "epoch": 75.38461538461539, + "grad_norm": 5.073784351348877, + "learning_rate": 1.230769230769231e-05, + "loss": 0.0717, + "step": 27440 + }, + { + "epoch": 75.38736263736264, + "grad_norm": 11.87089729309082, + "learning_rate": 1.2306318681318681e-05, + "loss": 0.1491, + "step": 27441 + }, + { + "epoch": 75.39010989010988, + "grad_norm": 14.683597564697266, + "learning_rate": 1.2304945054945055e-05, + "loss": 0.1821, + "step": 27442 + }, + { + "epoch": 75.39285714285714, + "grad_norm": 14.311050415039062, + "learning_rate": 1.230357142857143e-05, + "loss": 0.2564, + "step": 27443 + }, + { + "epoch": 75.3956043956044, + "grad_norm": 14.7754487991333, + "learning_rate": 1.2302197802197803e-05, + "loss": 0.5191, + "step": 27444 + }, + { + "epoch": 75.39835164835165, + "grad_norm": 9.468777656555176, + "learning_rate": 1.2300824175824175e-05, + "loss": 0.1983, + "step": 27445 + }, + { + "epoch": 75.4010989010989, + "grad_norm": 15.895929336547852, + "learning_rate": 1.229945054945055e-05, + "loss": 0.2628, + "step": 27446 + }, + { + "epoch": 75.40384615384616, + "grad_norm": 12.896028518676758, + "learning_rate": 1.2298076923076924e-05, + "loss": 0.1232, + "step": 27447 + }, + { + "epoch": 75.4065934065934, + "grad_norm": 11.011663436889648, + "learning_rate": 1.2296703296703297e-05, + "loss": 0.1787, + "step": 27448 + }, + { + "epoch": 75.40934065934066, + "grad_norm": 17.342525482177734, + "learning_rate": 1.229532967032967e-05, + "loss": 0.2044, + "step": 27449 + }, + { + "epoch": 75.41208791208791, + "grad_norm": 14.162893295288086, + "learning_rate": 1.2293956043956044e-05, + "loss": 0.2205, + "step": 27450 + }, + { + "epoch": 75.41483516483517, + "grad_norm": 9.929271697998047, + "learning_rate": 1.2292582417582419e-05, + "loss": 0.1073, + "step": 27451 + }, + { + "epoch": 75.41758241758242, + "grad_norm": 15.382216453552246, + "learning_rate": 1.229120879120879e-05, + "loss": 0.1845, + "step": 27452 + }, + { + "epoch": 75.42032967032966, + "grad_norm": 19.933284759521484, + "learning_rate": 1.2289835164835166e-05, + "loss": 0.3062, + "step": 27453 + }, + { + "epoch": 75.42307692307692, + "grad_norm": 2.984795570373535, + "learning_rate": 1.2288461538461539e-05, + "loss": 0.0281, + "step": 27454 + }, + { + "epoch": 75.42582417582418, + "grad_norm": 11.96419620513916, + "learning_rate": 1.2287087912087913e-05, + "loss": 0.2442, + "step": 27455 + }, + { + "epoch": 75.42857142857143, + "grad_norm": 16.104164123535156, + "learning_rate": 1.2285714285714286e-05, + "loss": 0.3707, + "step": 27456 + }, + { + "epoch": 75.43131868131869, + "grad_norm": 3.2697579860687256, + "learning_rate": 1.228434065934066e-05, + "loss": 0.0357, + "step": 27457 + }, + { + "epoch": 75.43406593406593, + "grad_norm": 5.067954063415527, + "learning_rate": 1.2282967032967034e-05, + "loss": 0.062, + "step": 27458 + }, + { + "epoch": 75.43681318681318, + "grad_norm": 15.75764274597168, + "learning_rate": 1.2281593406593408e-05, + "loss": 0.3148, + "step": 27459 + }, + { + "epoch": 75.43956043956044, + "grad_norm": 9.211718559265137, + "learning_rate": 1.228021978021978e-05, + "loss": 0.1355, + "step": 27460 + }, + { + "epoch": 75.4423076923077, + "grad_norm": 13.849599838256836, + "learning_rate": 1.2278846153846155e-05, + "loss": 0.2948, + "step": 27461 + }, + { + "epoch": 75.44505494505495, + "grad_norm": 14.2716064453125, + "learning_rate": 1.2277472527472528e-05, + "loss": 0.1753, + "step": 27462 + }, + { + "epoch": 75.4478021978022, + "grad_norm": 11.966805458068848, + "learning_rate": 1.2276098901098901e-05, + "loss": 0.143, + "step": 27463 + }, + { + "epoch": 75.45054945054945, + "grad_norm": 4.547956943511963, + "learning_rate": 1.2274725274725275e-05, + "loss": 0.0529, + "step": 27464 + }, + { + "epoch": 75.4532967032967, + "grad_norm": 4.61391019821167, + "learning_rate": 1.2273351648351648e-05, + "loss": 0.1132, + "step": 27465 + }, + { + "epoch": 75.45604395604396, + "grad_norm": 16.643163681030273, + "learning_rate": 1.2271978021978023e-05, + "loss": 0.2851, + "step": 27466 + }, + { + "epoch": 75.45879120879121, + "grad_norm": 14.709086418151855, + "learning_rate": 1.2270604395604395e-05, + "loss": 0.2177, + "step": 27467 + }, + { + "epoch": 75.46153846153847, + "grad_norm": 10.889663696289062, + "learning_rate": 1.226923076923077e-05, + "loss": 0.2703, + "step": 27468 + }, + { + "epoch": 75.46428571428571, + "grad_norm": 6.666116237640381, + "learning_rate": 1.2267857142857144e-05, + "loss": 0.0722, + "step": 27469 + }, + { + "epoch": 75.46703296703296, + "grad_norm": 8.107809066772461, + "learning_rate": 1.2266483516483517e-05, + "loss": 0.081, + "step": 27470 + }, + { + "epoch": 75.46978021978022, + "grad_norm": 9.081259727478027, + "learning_rate": 1.226510989010989e-05, + "loss": 0.1016, + "step": 27471 + }, + { + "epoch": 75.47252747252747, + "grad_norm": 18.616714477539062, + "learning_rate": 1.2263736263736264e-05, + "loss": 0.2863, + "step": 27472 + }, + { + "epoch": 75.47527472527473, + "grad_norm": 11.077890396118164, + "learning_rate": 1.2262362637362639e-05, + "loss": 0.2081, + "step": 27473 + }, + { + "epoch": 75.47802197802197, + "grad_norm": 14.290764808654785, + "learning_rate": 1.2260989010989012e-05, + "loss": 0.2122, + "step": 27474 + }, + { + "epoch": 75.48076923076923, + "grad_norm": 33.73109436035156, + "learning_rate": 1.2259615384615384e-05, + "loss": 0.561, + "step": 27475 + }, + { + "epoch": 75.48351648351648, + "grad_norm": 9.778402328491211, + "learning_rate": 1.225824175824176e-05, + "loss": 0.1394, + "step": 27476 + }, + { + "epoch": 75.48626373626374, + "grad_norm": 5.451862335205078, + "learning_rate": 1.2256868131868133e-05, + "loss": 0.0884, + "step": 27477 + }, + { + "epoch": 75.48901098901099, + "grad_norm": 8.67290210723877, + "learning_rate": 1.2255494505494506e-05, + "loss": 0.1194, + "step": 27478 + }, + { + "epoch": 75.49175824175825, + "grad_norm": 6.633672714233398, + "learning_rate": 1.225412087912088e-05, + "loss": 0.0525, + "step": 27479 + }, + { + "epoch": 75.49450549450549, + "grad_norm": 7.117577075958252, + "learning_rate": 1.2252747252747253e-05, + "loss": 0.049, + "step": 27480 + }, + { + "epoch": 75.49725274725274, + "grad_norm": 8.503335952758789, + "learning_rate": 1.2251373626373628e-05, + "loss": 0.2035, + "step": 27481 + }, + { + "epoch": 75.5, + "grad_norm": 7.996793746948242, + "learning_rate": 1.225e-05, + "loss": 0.0875, + "step": 27482 + }, + { + "epoch": 75.50274725274726, + "grad_norm": 22.742856979370117, + "learning_rate": 1.2248626373626375e-05, + "loss": 0.8753, + "step": 27483 + }, + { + "epoch": 75.50549450549451, + "grad_norm": 5.652416706085205, + "learning_rate": 1.2247252747252748e-05, + "loss": 0.0709, + "step": 27484 + }, + { + "epoch": 75.50824175824175, + "grad_norm": 18.81350326538086, + "learning_rate": 1.2245879120879122e-05, + "loss": 0.2, + "step": 27485 + }, + { + "epoch": 75.51098901098901, + "grad_norm": 13.84464168548584, + "learning_rate": 1.2244505494505495e-05, + "loss": 0.2905, + "step": 27486 + }, + { + "epoch": 75.51373626373626, + "grad_norm": 14.655855178833008, + "learning_rate": 1.2243131868131868e-05, + "loss": 0.3456, + "step": 27487 + }, + { + "epoch": 75.51648351648352, + "grad_norm": 7.106719970703125, + "learning_rate": 1.2241758241758243e-05, + "loss": 0.0868, + "step": 27488 + }, + { + "epoch": 75.51923076923077, + "grad_norm": 25.692855834960938, + "learning_rate": 1.2240384615384617e-05, + "loss": 0.7433, + "step": 27489 + }, + { + "epoch": 75.52197802197803, + "grad_norm": 9.056978225708008, + "learning_rate": 1.2239010989010989e-05, + "loss": 0.0812, + "step": 27490 + }, + { + "epoch": 75.52472527472527, + "grad_norm": 14.348170280456543, + "learning_rate": 1.2237637362637364e-05, + "loss": 0.2788, + "step": 27491 + }, + { + "epoch": 75.52747252747253, + "grad_norm": 5.815730571746826, + "learning_rate": 1.2236263736263737e-05, + "loss": 0.068, + "step": 27492 + }, + { + "epoch": 75.53021978021978, + "grad_norm": 12.662735939025879, + "learning_rate": 1.223489010989011e-05, + "loss": 0.1881, + "step": 27493 + }, + { + "epoch": 75.53296703296704, + "grad_norm": 8.504219055175781, + "learning_rate": 1.2233516483516484e-05, + "loss": 0.1009, + "step": 27494 + }, + { + "epoch": 75.53571428571429, + "grad_norm": 14.710792541503906, + "learning_rate": 1.2232142857142857e-05, + "loss": 0.3202, + "step": 27495 + }, + { + "epoch": 75.53846153846153, + "grad_norm": 11.368789672851562, + "learning_rate": 1.2230769230769232e-05, + "loss": 0.3043, + "step": 27496 + }, + { + "epoch": 75.54120879120879, + "grad_norm": 17.744306564331055, + "learning_rate": 1.2229395604395604e-05, + "loss": 0.5266, + "step": 27497 + }, + { + "epoch": 75.54395604395604, + "grad_norm": 2.0094358921051025, + "learning_rate": 1.222802197802198e-05, + "loss": 0.0241, + "step": 27498 + }, + { + "epoch": 75.5467032967033, + "grad_norm": 15.848191261291504, + "learning_rate": 1.2226648351648353e-05, + "loss": 0.4009, + "step": 27499 + }, + { + "epoch": 75.54945054945055, + "grad_norm": 8.950555801391602, + "learning_rate": 1.2225274725274726e-05, + "loss": 0.1081, + "step": 27500 + }, + { + "epoch": 75.5521978021978, + "grad_norm": 11.523908615112305, + "learning_rate": 1.22239010989011e-05, + "loss": 0.2091, + "step": 27501 + }, + { + "epoch": 75.55494505494505, + "grad_norm": 10.611847877502441, + "learning_rate": 1.2222527472527473e-05, + "loss": 0.2551, + "step": 27502 + }, + { + "epoch": 75.5576923076923, + "grad_norm": 12.366554260253906, + "learning_rate": 1.2221153846153846e-05, + "loss": 0.1843, + "step": 27503 + }, + { + "epoch": 75.56043956043956, + "grad_norm": 7.859602451324463, + "learning_rate": 1.2219780219780221e-05, + "loss": 0.2182, + "step": 27504 + }, + { + "epoch": 75.56318681318682, + "grad_norm": 13.551202774047852, + "learning_rate": 1.2218406593406593e-05, + "loss": 0.1976, + "step": 27505 + }, + { + "epoch": 75.56593406593407, + "grad_norm": 13.733407974243164, + "learning_rate": 1.2217032967032968e-05, + "loss": 0.367, + "step": 27506 + }, + { + "epoch": 75.56868131868131, + "grad_norm": 12.828643798828125, + "learning_rate": 1.2215659340659342e-05, + "loss": 0.2236, + "step": 27507 + }, + { + "epoch": 75.57142857142857, + "grad_norm": 18.24440574645996, + "learning_rate": 1.2214285714285715e-05, + "loss": 0.4098, + "step": 27508 + }, + { + "epoch": 75.57417582417582, + "grad_norm": 15.279516220092773, + "learning_rate": 1.2212912087912088e-05, + "loss": 0.4448, + "step": 27509 + }, + { + "epoch": 75.57692307692308, + "grad_norm": 12.120972633361816, + "learning_rate": 1.2211538461538462e-05, + "loss": 0.2263, + "step": 27510 + }, + { + "epoch": 75.57967032967034, + "grad_norm": 2.813680410385132, + "learning_rate": 1.2210164835164837e-05, + "loss": 0.0267, + "step": 27511 + }, + { + "epoch": 75.58241758241758, + "grad_norm": 10.054640769958496, + "learning_rate": 1.2208791208791209e-05, + "loss": 0.1131, + "step": 27512 + }, + { + "epoch": 75.58516483516483, + "grad_norm": 13.128769874572754, + "learning_rate": 1.2207417582417582e-05, + "loss": 0.4944, + "step": 27513 + }, + { + "epoch": 75.58791208791209, + "grad_norm": 10.659079551696777, + "learning_rate": 1.2206043956043957e-05, + "loss": 0.2686, + "step": 27514 + }, + { + "epoch": 75.59065934065934, + "grad_norm": 14.403082847595215, + "learning_rate": 1.220467032967033e-05, + "loss": 0.1807, + "step": 27515 + }, + { + "epoch": 75.5934065934066, + "grad_norm": 15.436649322509766, + "learning_rate": 1.2203296703296704e-05, + "loss": 0.284, + "step": 27516 + }, + { + "epoch": 75.59615384615384, + "grad_norm": 9.345736503601074, + "learning_rate": 1.2201923076923077e-05, + "loss": 0.09, + "step": 27517 + }, + { + "epoch": 75.5989010989011, + "grad_norm": 16.451658248901367, + "learning_rate": 1.220054945054945e-05, + "loss": 0.2909, + "step": 27518 + }, + { + "epoch": 75.60164835164835, + "grad_norm": 6.843769073486328, + "learning_rate": 1.2199175824175826e-05, + "loss": 0.2238, + "step": 27519 + }, + { + "epoch": 75.6043956043956, + "grad_norm": 11.528849601745605, + "learning_rate": 1.2197802197802198e-05, + "loss": 0.3187, + "step": 27520 + }, + { + "epoch": 75.60714285714286, + "grad_norm": 10.061616897583008, + "learning_rate": 1.2196428571428573e-05, + "loss": 0.1215, + "step": 27521 + }, + { + "epoch": 75.60989010989012, + "grad_norm": 8.168706893920898, + "learning_rate": 1.2195054945054946e-05, + "loss": 0.1163, + "step": 27522 + }, + { + "epoch": 75.61263736263736, + "grad_norm": 17.468177795410156, + "learning_rate": 1.219368131868132e-05, + "loss": 0.4969, + "step": 27523 + }, + { + "epoch": 75.61538461538461, + "grad_norm": 15.315349578857422, + "learning_rate": 1.2192307692307693e-05, + "loss": 0.3944, + "step": 27524 + }, + { + "epoch": 75.61813186813187, + "grad_norm": 17.764556884765625, + "learning_rate": 1.2190934065934066e-05, + "loss": 0.2931, + "step": 27525 + }, + { + "epoch": 75.62087912087912, + "grad_norm": 12.040314674377441, + "learning_rate": 1.2189560439560441e-05, + "loss": 0.2713, + "step": 27526 + }, + { + "epoch": 75.62362637362638, + "grad_norm": 22.005420684814453, + "learning_rate": 1.2188186813186813e-05, + "loss": 0.4144, + "step": 27527 + }, + { + "epoch": 75.62637362637362, + "grad_norm": 5.1786603927612305, + "learning_rate": 1.2186813186813186e-05, + "loss": 0.0496, + "step": 27528 + }, + { + "epoch": 75.62912087912088, + "grad_norm": 6.503366470336914, + "learning_rate": 1.2185439560439562e-05, + "loss": 0.0623, + "step": 27529 + }, + { + "epoch": 75.63186813186813, + "grad_norm": 21.484556198120117, + "learning_rate": 1.2184065934065935e-05, + "loss": 0.6124, + "step": 27530 + }, + { + "epoch": 75.63461538461539, + "grad_norm": 13.945859909057617, + "learning_rate": 1.2182692307692308e-05, + "loss": 0.1771, + "step": 27531 + }, + { + "epoch": 75.63736263736264, + "grad_norm": 4.910858631134033, + "learning_rate": 1.2181318681318682e-05, + "loss": 0.12, + "step": 27532 + }, + { + "epoch": 75.64010989010988, + "grad_norm": 13.242982864379883, + "learning_rate": 1.2179945054945055e-05, + "loss": 0.2523, + "step": 27533 + }, + { + "epoch": 75.64285714285714, + "grad_norm": 11.198542594909668, + "learning_rate": 1.2178571428571429e-05, + "loss": 0.0937, + "step": 27534 + }, + { + "epoch": 75.6456043956044, + "grad_norm": 11.183652877807617, + "learning_rate": 1.2177197802197802e-05, + "loss": 0.1096, + "step": 27535 + }, + { + "epoch": 75.64835164835165, + "grad_norm": 13.575704574584961, + "learning_rate": 1.2175824175824177e-05, + "loss": 0.1889, + "step": 27536 + }, + { + "epoch": 75.6510989010989, + "grad_norm": 5.423712730407715, + "learning_rate": 1.217445054945055e-05, + "loss": 0.0826, + "step": 27537 + }, + { + "epoch": 75.65384615384616, + "grad_norm": 17.36686897277832, + "learning_rate": 1.2173076923076924e-05, + "loss": 0.2798, + "step": 27538 + }, + { + "epoch": 75.6565934065934, + "grad_norm": 10.6174955368042, + "learning_rate": 1.2171703296703297e-05, + "loss": 0.1328, + "step": 27539 + }, + { + "epoch": 75.65934065934066, + "grad_norm": 10.295669555664062, + "learning_rate": 1.217032967032967e-05, + "loss": 0.1244, + "step": 27540 + }, + { + "epoch": 75.66208791208791, + "grad_norm": 15.900151252746582, + "learning_rate": 1.2168956043956046e-05, + "loss": 0.3334, + "step": 27541 + }, + { + "epoch": 75.66483516483517, + "grad_norm": 7.620306491851807, + "learning_rate": 1.2167582417582418e-05, + "loss": 0.1393, + "step": 27542 + }, + { + "epoch": 75.66758241758242, + "grad_norm": 9.978128433227539, + "learning_rate": 1.2166208791208791e-05, + "loss": 0.1569, + "step": 27543 + }, + { + "epoch": 75.67032967032966, + "grad_norm": 12.266275405883789, + "learning_rate": 1.2164835164835166e-05, + "loss": 0.1916, + "step": 27544 + }, + { + "epoch": 75.67307692307692, + "grad_norm": 10.958905220031738, + "learning_rate": 1.216346153846154e-05, + "loss": 0.2055, + "step": 27545 + }, + { + "epoch": 75.67582417582418, + "grad_norm": 4.176995754241943, + "learning_rate": 1.2162087912087913e-05, + "loss": 0.0623, + "step": 27546 + }, + { + "epoch": 75.67857142857143, + "grad_norm": 14.189431190490723, + "learning_rate": 1.2160714285714286e-05, + "loss": 0.2322, + "step": 27547 + }, + { + "epoch": 75.68131868131869, + "grad_norm": 6.06174373626709, + "learning_rate": 1.215934065934066e-05, + "loss": 0.0872, + "step": 27548 + }, + { + "epoch": 75.68406593406593, + "grad_norm": 7.378353595733643, + "learning_rate": 1.2157967032967033e-05, + "loss": 0.0873, + "step": 27549 + }, + { + "epoch": 75.68681318681318, + "grad_norm": 2.5026512145996094, + "learning_rate": 1.2156593406593407e-05, + "loss": 0.027, + "step": 27550 + }, + { + "epoch": 75.68956043956044, + "grad_norm": 23.14519500732422, + "learning_rate": 1.2155219780219782e-05, + "loss": 0.5712, + "step": 27551 + }, + { + "epoch": 75.6923076923077, + "grad_norm": 6.687864303588867, + "learning_rate": 1.2153846153846155e-05, + "loss": 0.1221, + "step": 27552 + }, + { + "epoch": 75.69505494505495, + "grad_norm": 9.76840877532959, + "learning_rate": 1.2152472527472528e-05, + "loss": 0.1623, + "step": 27553 + }, + { + "epoch": 75.6978021978022, + "grad_norm": 17.354978561401367, + "learning_rate": 1.2151098901098902e-05, + "loss": 0.3772, + "step": 27554 + }, + { + "epoch": 75.70054945054945, + "grad_norm": 14.765639305114746, + "learning_rate": 1.2149725274725275e-05, + "loss": 0.2871, + "step": 27555 + }, + { + "epoch": 75.7032967032967, + "grad_norm": 17.270938873291016, + "learning_rate": 1.2148351648351649e-05, + "loss": 0.4282, + "step": 27556 + }, + { + "epoch": 75.70604395604396, + "grad_norm": 17.656402587890625, + "learning_rate": 1.2146978021978022e-05, + "loss": 0.3467, + "step": 27557 + }, + { + "epoch": 75.70879120879121, + "grad_norm": 27.910877227783203, + "learning_rate": 1.2145604395604395e-05, + "loss": 0.9957, + "step": 27558 + }, + { + "epoch": 75.71153846153847, + "grad_norm": 18.060762405395508, + "learning_rate": 1.214423076923077e-05, + "loss": 0.3226, + "step": 27559 + }, + { + "epoch": 75.71428571428571, + "grad_norm": 9.845367431640625, + "learning_rate": 1.2142857142857144e-05, + "loss": 0.1019, + "step": 27560 + }, + { + "epoch": 75.71703296703296, + "grad_norm": 1.1192376613616943, + "learning_rate": 1.2141483516483516e-05, + "loss": 0.0209, + "step": 27561 + }, + { + "epoch": 75.71978021978022, + "grad_norm": 8.484121322631836, + "learning_rate": 1.214010989010989e-05, + "loss": 0.1555, + "step": 27562 + }, + { + "epoch": 75.72252747252747, + "grad_norm": 7.8108344078063965, + "learning_rate": 1.2138736263736264e-05, + "loss": 0.081, + "step": 27563 + }, + { + "epoch": 75.72527472527473, + "grad_norm": 17.52950668334961, + "learning_rate": 1.2137362637362638e-05, + "loss": 0.2712, + "step": 27564 + }, + { + "epoch": 75.72802197802197, + "grad_norm": 13.390210151672363, + "learning_rate": 1.2135989010989011e-05, + "loss": 0.2353, + "step": 27565 + }, + { + "epoch": 75.73076923076923, + "grad_norm": 18.56077766418457, + "learning_rate": 1.2134615384615384e-05, + "loss": 0.2416, + "step": 27566 + }, + { + "epoch": 75.73351648351648, + "grad_norm": 22.9656982421875, + "learning_rate": 1.213324175824176e-05, + "loss": 0.5638, + "step": 27567 + }, + { + "epoch": 75.73626373626374, + "grad_norm": 3.6011674404144287, + "learning_rate": 1.2131868131868133e-05, + "loss": 0.0579, + "step": 27568 + }, + { + "epoch": 75.73901098901099, + "grad_norm": 4.803902626037598, + "learning_rate": 1.2130494505494506e-05, + "loss": 0.0624, + "step": 27569 + }, + { + "epoch": 75.74175824175825, + "grad_norm": 4.95663595199585, + "learning_rate": 1.212912087912088e-05, + "loss": 0.1031, + "step": 27570 + }, + { + "epoch": 75.74450549450549, + "grad_norm": 22.869401931762695, + "learning_rate": 1.2127747252747253e-05, + "loss": 0.3673, + "step": 27571 + }, + { + "epoch": 75.74725274725274, + "grad_norm": 6.216737747192383, + "learning_rate": 1.2126373626373627e-05, + "loss": 0.163, + "step": 27572 + }, + { + "epoch": 75.75, + "grad_norm": 2.3168065547943115, + "learning_rate": 1.2125e-05, + "loss": 0.0371, + "step": 27573 + }, + { + "epoch": 75.75274725274726, + "grad_norm": 16.096845626831055, + "learning_rate": 1.2123626373626375e-05, + "loss": 0.2541, + "step": 27574 + }, + { + "epoch": 75.75549450549451, + "grad_norm": 23.83152961730957, + "learning_rate": 1.2122252747252748e-05, + "loss": 0.5013, + "step": 27575 + }, + { + "epoch": 75.75824175824175, + "grad_norm": 17.067121505737305, + "learning_rate": 1.212087912087912e-05, + "loss": 0.415, + "step": 27576 + }, + { + "epoch": 75.76098901098901, + "grad_norm": 14.972450256347656, + "learning_rate": 1.2119505494505495e-05, + "loss": 0.2793, + "step": 27577 + }, + { + "epoch": 75.76373626373626, + "grad_norm": 5.564982891082764, + "learning_rate": 1.2118131868131869e-05, + "loss": 0.055, + "step": 27578 + }, + { + "epoch": 75.76648351648352, + "grad_norm": 24.011825561523438, + "learning_rate": 1.2116758241758242e-05, + "loss": 0.7704, + "step": 27579 + }, + { + "epoch": 75.76923076923077, + "grad_norm": 3.405468225479126, + "learning_rate": 1.2115384615384615e-05, + "loss": 0.0455, + "step": 27580 + }, + { + "epoch": 75.77197802197803, + "grad_norm": 8.24835205078125, + "learning_rate": 1.2114010989010989e-05, + "loss": 0.1443, + "step": 27581 + }, + { + "epoch": 75.77472527472527, + "grad_norm": 15.883355140686035, + "learning_rate": 1.2112637362637364e-05, + "loss": 0.4808, + "step": 27582 + }, + { + "epoch": 75.77747252747253, + "grad_norm": 14.62438678741455, + "learning_rate": 1.2111263736263736e-05, + "loss": 0.2611, + "step": 27583 + }, + { + "epoch": 75.78021978021978, + "grad_norm": 14.893927574157715, + "learning_rate": 1.210989010989011e-05, + "loss": 0.2789, + "step": 27584 + }, + { + "epoch": 75.78296703296704, + "grad_norm": 10.054070472717285, + "learning_rate": 1.2108516483516484e-05, + "loss": 0.1219, + "step": 27585 + }, + { + "epoch": 75.78571428571429, + "grad_norm": 5.641658782958984, + "learning_rate": 1.2107142857142858e-05, + "loss": 0.1418, + "step": 27586 + }, + { + "epoch": 75.78846153846153, + "grad_norm": 10.514274597167969, + "learning_rate": 1.2105769230769231e-05, + "loss": 0.1305, + "step": 27587 + }, + { + "epoch": 75.79120879120879, + "grad_norm": 20.28901481628418, + "learning_rate": 1.2104395604395604e-05, + "loss": 0.4592, + "step": 27588 + }, + { + "epoch": 75.79395604395604, + "grad_norm": 6.799310684204102, + "learning_rate": 1.210302197802198e-05, + "loss": 0.1195, + "step": 27589 + }, + { + "epoch": 75.7967032967033, + "grad_norm": 7.77886962890625, + "learning_rate": 1.2101648351648353e-05, + "loss": 0.1268, + "step": 27590 + }, + { + "epoch": 75.79945054945055, + "grad_norm": 5.517573833465576, + "learning_rate": 1.2100274725274725e-05, + "loss": 0.1862, + "step": 27591 + }, + { + "epoch": 75.8021978021978, + "grad_norm": 16.172868728637695, + "learning_rate": 1.20989010989011e-05, + "loss": 0.3737, + "step": 27592 + }, + { + "epoch": 75.80494505494505, + "grad_norm": 8.232943534851074, + "learning_rate": 1.2097527472527473e-05, + "loss": 0.1595, + "step": 27593 + }, + { + "epoch": 75.8076923076923, + "grad_norm": 6.667172431945801, + "learning_rate": 1.2096153846153847e-05, + "loss": 0.0704, + "step": 27594 + }, + { + "epoch": 75.81043956043956, + "grad_norm": 4.478461742401123, + "learning_rate": 1.209478021978022e-05, + "loss": 0.0439, + "step": 27595 + }, + { + "epoch": 75.81318681318682, + "grad_norm": 30.907865524291992, + "learning_rate": 1.2093406593406593e-05, + "loss": 1.0393, + "step": 27596 + }, + { + "epoch": 75.81593406593407, + "grad_norm": 12.992950439453125, + "learning_rate": 1.2092032967032968e-05, + "loss": 0.355, + "step": 27597 + }, + { + "epoch": 75.81868131868131, + "grad_norm": 14.630023956298828, + "learning_rate": 1.209065934065934e-05, + "loss": 0.3458, + "step": 27598 + }, + { + "epoch": 75.82142857142857, + "grad_norm": 15.167337417602539, + "learning_rate": 1.2089285714285715e-05, + "loss": 0.6144, + "step": 27599 + }, + { + "epoch": 75.82417582417582, + "grad_norm": 5.363810062408447, + "learning_rate": 1.2087912087912089e-05, + "loss": 0.1095, + "step": 27600 + }, + { + "epoch": 75.82692307692308, + "grad_norm": 7.037774085998535, + "learning_rate": 1.2086538461538462e-05, + "loss": 0.2354, + "step": 27601 + }, + { + "epoch": 75.82967032967034, + "grad_norm": 10.920676231384277, + "learning_rate": 1.2085164835164836e-05, + "loss": 0.1793, + "step": 27602 + }, + { + "epoch": 75.83241758241758, + "grad_norm": 2.7599165439605713, + "learning_rate": 1.2083791208791209e-05, + "loss": 0.0235, + "step": 27603 + }, + { + "epoch": 75.83516483516483, + "grad_norm": 14.159618377685547, + "learning_rate": 1.2082417582417584e-05, + "loss": 0.2928, + "step": 27604 + }, + { + "epoch": 75.83791208791209, + "grad_norm": 9.449769973754883, + "learning_rate": 1.2081043956043957e-05, + "loss": 0.2801, + "step": 27605 + }, + { + "epoch": 75.84065934065934, + "grad_norm": 21.7182674407959, + "learning_rate": 1.207967032967033e-05, + "loss": 0.3445, + "step": 27606 + }, + { + "epoch": 75.8434065934066, + "grad_norm": 9.766193389892578, + "learning_rate": 1.2078296703296704e-05, + "loss": 0.2325, + "step": 27607 + }, + { + "epoch": 75.84615384615384, + "grad_norm": 19.183605194091797, + "learning_rate": 1.2076923076923078e-05, + "loss": 0.2618, + "step": 27608 + }, + { + "epoch": 75.8489010989011, + "grad_norm": 4.286009311676025, + "learning_rate": 1.2075549450549451e-05, + "loss": 0.0653, + "step": 27609 + }, + { + "epoch": 75.85164835164835, + "grad_norm": 13.999478340148926, + "learning_rate": 1.2074175824175824e-05, + "loss": 0.3859, + "step": 27610 + }, + { + "epoch": 75.8543956043956, + "grad_norm": 11.622443199157715, + "learning_rate": 1.2072802197802198e-05, + "loss": 0.3035, + "step": 27611 + }, + { + "epoch": 75.85714285714286, + "grad_norm": 15.339598655700684, + "learning_rate": 1.2071428571428573e-05, + "loss": 0.15, + "step": 27612 + }, + { + "epoch": 75.85989010989012, + "grad_norm": 24.280338287353516, + "learning_rate": 1.2070054945054945e-05, + "loss": 0.5664, + "step": 27613 + }, + { + "epoch": 75.86263736263736, + "grad_norm": 1.2761605978012085, + "learning_rate": 1.2068681318681318e-05, + "loss": 0.0167, + "step": 27614 + }, + { + "epoch": 75.86538461538461, + "grad_norm": 6.268144607543945, + "learning_rate": 1.2067307692307693e-05, + "loss": 0.0902, + "step": 27615 + }, + { + "epoch": 75.86813186813187, + "grad_norm": 6.658174991607666, + "learning_rate": 1.2065934065934067e-05, + "loss": 0.0811, + "step": 27616 + }, + { + "epoch": 75.87087912087912, + "grad_norm": 11.093315124511719, + "learning_rate": 1.206456043956044e-05, + "loss": 0.1151, + "step": 27617 + }, + { + "epoch": 75.87362637362638, + "grad_norm": 9.924513816833496, + "learning_rate": 1.2063186813186813e-05, + "loss": 0.2402, + "step": 27618 + }, + { + "epoch": 75.87637362637362, + "grad_norm": 7.167882919311523, + "learning_rate": 1.2061813186813187e-05, + "loss": 0.0957, + "step": 27619 + }, + { + "epoch": 75.87912087912088, + "grad_norm": 13.662606239318848, + "learning_rate": 1.2060439560439562e-05, + "loss": 0.2857, + "step": 27620 + }, + { + "epoch": 75.88186813186813, + "grad_norm": 14.039726257324219, + "learning_rate": 1.2059065934065934e-05, + "loss": 0.3134, + "step": 27621 + }, + { + "epoch": 75.88461538461539, + "grad_norm": 2.4522640705108643, + "learning_rate": 1.2057692307692309e-05, + "loss": 0.0216, + "step": 27622 + }, + { + "epoch": 75.88736263736264, + "grad_norm": 10.989213943481445, + "learning_rate": 1.2056318681318682e-05, + "loss": 0.1667, + "step": 27623 + }, + { + "epoch": 75.89010989010988, + "grad_norm": 2.2760753631591797, + "learning_rate": 1.2054945054945056e-05, + "loss": 0.0367, + "step": 27624 + }, + { + "epoch": 75.89285714285714, + "grad_norm": 13.543061256408691, + "learning_rate": 1.2053571428571429e-05, + "loss": 0.1158, + "step": 27625 + }, + { + "epoch": 75.8956043956044, + "grad_norm": 19.49812889099121, + "learning_rate": 1.2052197802197802e-05, + "loss": 0.4411, + "step": 27626 + }, + { + "epoch": 75.89835164835165, + "grad_norm": 3.8603694438934326, + "learning_rate": 1.2050824175824177e-05, + "loss": 0.0742, + "step": 27627 + }, + { + "epoch": 75.9010989010989, + "grad_norm": 17.41162872314453, + "learning_rate": 1.204945054945055e-05, + "loss": 0.3847, + "step": 27628 + }, + { + "epoch": 75.90384615384616, + "grad_norm": 3.4386045932769775, + "learning_rate": 1.2048076923076923e-05, + "loss": 0.0734, + "step": 27629 + }, + { + "epoch": 75.9065934065934, + "grad_norm": 16.486392974853516, + "learning_rate": 1.2046703296703298e-05, + "loss": 0.3057, + "step": 27630 + }, + { + "epoch": 75.90934065934066, + "grad_norm": 6.066860198974609, + "learning_rate": 1.2045329670329671e-05, + "loss": 0.119, + "step": 27631 + }, + { + "epoch": 75.91208791208791, + "grad_norm": 14.17351245880127, + "learning_rate": 1.2043956043956045e-05, + "loss": 0.2602, + "step": 27632 + }, + { + "epoch": 75.91483516483517, + "grad_norm": 16.484853744506836, + "learning_rate": 1.2042582417582418e-05, + "loss": 0.2226, + "step": 27633 + }, + { + "epoch": 75.91758241758242, + "grad_norm": 1.9548250436782837, + "learning_rate": 1.2041208791208791e-05, + "loss": 0.0188, + "step": 27634 + }, + { + "epoch": 75.92032967032966, + "grad_norm": 16.295276641845703, + "learning_rate": 1.2039835164835166e-05, + "loss": 0.2493, + "step": 27635 + }, + { + "epoch": 75.92307692307692, + "grad_norm": 12.814349174499512, + "learning_rate": 1.2038461538461538e-05, + "loss": 0.2368, + "step": 27636 + }, + { + "epoch": 75.92582417582418, + "grad_norm": 25.50990104675293, + "learning_rate": 1.2037087912087913e-05, + "loss": 0.3826, + "step": 27637 + }, + { + "epoch": 75.92857142857143, + "grad_norm": 4.590110778808594, + "learning_rate": 1.2035714285714287e-05, + "loss": 0.0774, + "step": 27638 + }, + { + "epoch": 75.93131868131869, + "grad_norm": 9.579456329345703, + "learning_rate": 1.203434065934066e-05, + "loss": 0.1437, + "step": 27639 + }, + { + "epoch": 75.93406593406593, + "grad_norm": 6.863268852233887, + "learning_rate": 1.2032967032967033e-05, + "loss": 0.0862, + "step": 27640 + }, + { + "epoch": 75.93681318681318, + "grad_norm": 17.781158447265625, + "learning_rate": 1.2031593406593407e-05, + "loss": 0.2268, + "step": 27641 + }, + { + "epoch": 75.93956043956044, + "grad_norm": 20.16910171508789, + "learning_rate": 1.2030219780219782e-05, + "loss": 0.1957, + "step": 27642 + }, + { + "epoch": 75.9423076923077, + "grad_norm": 10.631265640258789, + "learning_rate": 1.2028846153846154e-05, + "loss": 0.2624, + "step": 27643 + }, + { + "epoch": 75.94505494505495, + "grad_norm": 12.8806734085083, + "learning_rate": 1.2027472527472527e-05, + "loss": 0.0984, + "step": 27644 + }, + { + "epoch": 75.9478021978022, + "grad_norm": 10.463781356811523, + "learning_rate": 1.2026098901098902e-05, + "loss": 0.083, + "step": 27645 + }, + { + "epoch": 75.95054945054945, + "grad_norm": 2.1721999645233154, + "learning_rate": 1.2024725274725276e-05, + "loss": 0.0282, + "step": 27646 + }, + { + "epoch": 75.9532967032967, + "grad_norm": 10.25808334350586, + "learning_rate": 1.2023351648351649e-05, + "loss": 0.1364, + "step": 27647 + }, + { + "epoch": 75.95604395604396, + "grad_norm": 21.03931999206543, + "learning_rate": 1.2021978021978022e-05, + "loss": 0.5356, + "step": 27648 + }, + { + "epoch": 75.95879120879121, + "grad_norm": 14.392040252685547, + "learning_rate": 1.2020604395604396e-05, + "loss": 0.3208, + "step": 27649 + }, + { + "epoch": 75.96153846153847, + "grad_norm": 22.748981475830078, + "learning_rate": 1.2019230769230771e-05, + "loss": 0.5695, + "step": 27650 + }, + { + "epoch": 75.96428571428571, + "grad_norm": 17.700429916381836, + "learning_rate": 1.2017857142857143e-05, + "loss": 0.4536, + "step": 27651 + }, + { + "epoch": 75.96703296703296, + "grad_norm": 22.062152862548828, + "learning_rate": 1.2016483516483518e-05, + "loss": 0.6779, + "step": 27652 + }, + { + "epoch": 75.96978021978022, + "grad_norm": 17.144641876220703, + "learning_rate": 1.2015109890109891e-05, + "loss": 0.729, + "step": 27653 + }, + { + "epoch": 75.97252747252747, + "grad_norm": 13.862194061279297, + "learning_rate": 1.2013736263736265e-05, + "loss": 0.2353, + "step": 27654 + }, + { + "epoch": 75.97527472527473, + "grad_norm": 21.385854721069336, + "learning_rate": 1.2012362637362638e-05, + "loss": 0.6119, + "step": 27655 + }, + { + "epoch": 75.97802197802197, + "grad_norm": 17.137020111083984, + "learning_rate": 1.2010989010989011e-05, + "loss": 0.3831, + "step": 27656 + }, + { + "epoch": 75.98076923076923, + "grad_norm": 5.435888767242432, + "learning_rate": 1.2009615384615386e-05, + "loss": 0.1118, + "step": 27657 + }, + { + "epoch": 75.98351648351648, + "grad_norm": 8.68952751159668, + "learning_rate": 1.2008241758241758e-05, + "loss": 0.1826, + "step": 27658 + }, + { + "epoch": 75.98626373626374, + "grad_norm": 5.091212272644043, + "learning_rate": 1.2006868131868132e-05, + "loss": 0.1221, + "step": 27659 + }, + { + "epoch": 75.98901098901099, + "grad_norm": 31.68926239013672, + "learning_rate": 1.2005494505494507e-05, + "loss": 0.2167, + "step": 27660 + }, + { + "epoch": 75.99175824175825, + "grad_norm": 7.038106918334961, + "learning_rate": 1.200412087912088e-05, + "loss": 0.1354, + "step": 27661 + }, + { + "epoch": 75.99450549450549, + "grad_norm": 3.692441701889038, + "learning_rate": 1.2002747252747253e-05, + "loss": 0.052, + "step": 27662 + }, + { + "epoch": 75.99725274725274, + "grad_norm": 14.466296195983887, + "learning_rate": 1.2001373626373627e-05, + "loss": 0.245, + "step": 27663 + }, + { + "epoch": 76.0, + "grad_norm": 77.49791717529297, + "learning_rate": 1.2e-05, + "loss": 0.7676, + "step": 27664 + }, + { + "epoch": 76.0, + "eval_accuracy": 0.803030303030303, + "eval_f1": 0.8041934674657515, + "eval_f1_DuraRiadoRio_64x64": 0.796875, + "eval_f1_Mole_64x64": 0.7848101265822784, + "eval_f1_Quebrado_64x64": 0.845679012345679, + "eval_f1_RiadoRio_64x64": 0.7246376811594203, + "eval_f1_RioFechado_64x64": 0.8689655172413793, + "eval_loss": 0.9432891607284546, + "eval_precision": 0.8341690283918263, + "eval_precision_DuraRiadoRio_64x64": 0.9107142857142857, + "eval_precision_Mole_64x64": 1.0, + "eval_precision_Quebrado_64x64": 0.7611111111111111, + "eval_precision_RiadoRio_64x64": 0.6476683937823834, + "eval_precision_RioFechado_64x64": 0.8513513513513513, + "eval_recall": 0.8030495840540317, + "eval_recall_DuraRiadoRio_64x64": 0.7083333333333334, + "eval_recall_Mole_64x64": 0.6458333333333334, + "eval_recall_Quebrado_64x64": 0.9513888888888888, + "eval_recall_RiadoRio_64x64": 0.8223684210526315, + "eval_recall_RioFechado_64x64": 0.8873239436619719, + "eval_runtime": 1.7773, + "eval_samples_per_second": 408.482, + "eval_steps_per_second": 25.882, + "step": 27664 + }, + { + "epoch": 76.00274725274726, + "grad_norm": 6.007843017578125, + "learning_rate": 1.1998626373626375e-05, + "loss": 0.0892, + "step": 27665 + }, + { + "epoch": 76.00549450549451, + "grad_norm": 3.439479351043701, + "learning_rate": 1.1997252747252747e-05, + "loss": 0.0285, + "step": 27666 + }, + { + "epoch": 76.00824175824175, + "grad_norm": 16.796615600585938, + "learning_rate": 1.1995879120879122e-05, + "loss": 0.2698, + "step": 27667 + }, + { + "epoch": 76.01098901098901, + "grad_norm": 9.473564147949219, + "learning_rate": 1.1994505494505496e-05, + "loss": 0.1311, + "step": 27668 + }, + { + "epoch": 76.01373626373626, + "grad_norm": 12.52135181427002, + "learning_rate": 1.1993131868131869e-05, + "loss": 0.3236, + "step": 27669 + }, + { + "epoch": 76.01648351648352, + "grad_norm": 12.571338653564453, + "learning_rate": 1.1991758241758242e-05, + "loss": 0.292, + "step": 27670 + }, + { + "epoch": 76.01923076923077, + "grad_norm": 20.287927627563477, + "learning_rate": 1.1990384615384616e-05, + "loss": 0.3508, + "step": 27671 + }, + { + "epoch": 76.02197802197803, + "grad_norm": 12.546686172485352, + "learning_rate": 1.198901098901099e-05, + "loss": 0.5239, + "step": 27672 + }, + { + "epoch": 76.02472527472527, + "grad_norm": 6.72767448425293, + "learning_rate": 1.1987637362637363e-05, + "loss": 0.1438, + "step": 27673 + }, + { + "epoch": 76.02747252747253, + "grad_norm": 24.056671142578125, + "learning_rate": 1.1986263736263736e-05, + "loss": 0.5601, + "step": 27674 + }, + { + "epoch": 76.03021978021978, + "grad_norm": 10.85731315612793, + "learning_rate": 1.1984890109890111e-05, + "loss": 0.1576, + "step": 27675 + }, + { + "epoch": 76.03296703296704, + "grad_norm": 10.763745307922363, + "learning_rate": 1.1983516483516485e-05, + "loss": 0.2405, + "step": 27676 + }, + { + "epoch": 76.03571428571429, + "grad_norm": 22.76114273071289, + "learning_rate": 1.1982142857142856e-05, + "loss": 0.4867, + "step": 27677 + }, + { + "epoch": 76.03846153846153, + "grad_norm": 10.666071891784668, + "learning_rate": 1.1980769230769231e-05, + "loss": 0.2798, + "step": 27678 + }, + { + "epoch": 76.04120879120879, + "grad_norm": 11.015368461608887, + "learning_rate": 1.1979395604395605e-05, + "loss": 0.3421, + "step": 27679 + }, + { + "epoch": 76.04395604395604, + "grad_norm": 7.51235294342041, + "learning_rate": 1.197802197802198e-05, + "loss": 0.1921, + "step": 27680 + }, + { + "epoch": 76.0467032967033, + "grad_norm": 6.517968654632568, + "learning_rate": 1.1976648351648352e-05, + "loss": 0.0794, + "step": 27681 + }, + { + "epoch": 76.04945054945055, + "grad_norm": 12.314605712890625, + "learning_rate": 1.1975274725274725e-05, + "loss": 0.296, + "step": 27682 + }, + { + "epoch": 76.0521978021978, + "grad_norm": 7.899534225463867, + "learning_rate": 1.19739010989011e-05, + "loss": 0.0772, + "step": 27683 + }, + { + "epoch": 76.05494505494505, + "grad_norm": 20.53667449951172, + "learning_rate": 1.1972527472527474e-05, + "loss": 0.3814, + "step": 27684 + }, + { + "epoch": 76.0576923076923, + "grad_norm": 7.575638294219971, + "learning_rate": 1.1971153846153847e-05, + "loss": 0.1362, + "step": 27685 + }, + { + "epoch": 76.06043956043956, + "grad_norm": 13.318650245666504, + "learning_rate": 1.196978021978022e-05, + "loss": 0.1228, + "step": 27686 + }, + { + "epoch": 76.06318681318682, + "grad_norm": 9.475054740905762, + "learning_rate": 1.1968406593406594e-05, + "loss": 0.092, + "step": 27687 + }, + { + "epoch": 76.06593406593407, + "grad_norm": 21.68805694580078, + "learning_rate": 1.1967032967032967e-05, + "loss": 0.4485, + "step": 27688 + }, + { + "epoch": 76.06868131868131, + "grad_norm": 18.713977813720703, + "learning_rate": 1.196565934065934e-05, + "loss": 0.3051, + "step": 27689 + }, + { + "epoch": 76.07142857142857, + "grad_norm": 8.547182083129883, + "learning_rate": 1.1964285714285716e-05, + "loss": 0.1525, + "step": 27690 + }, + { + "epoch": 76.07417582417582, + "grad_norm": 16.62696075439453, + "learning_rate": 1.1962912087912089e-05, + "loss": 0.2717, + "step": 27691 + }, + { + "epoch": 76.07692307692308, + "grad_norm": 15.115550994873047, + "learning_rate": 1.196153846153846e-05, + "loss": 0.2244, + "step": 27692 + }, + { + "epoch": 76.07967032967034, + "grad_norm": 11.660889625549316, + "learning_rate": 1.1960164835164836e-05, + "loss": 0.1562, + "step": 27693 + }, + { + "epoch": 76.08241758241758, + "grad_norm": 10.486328125, + "learning_rate": 1.195879120879121e-05, + "loss": 0.1765, + "step": 27694 + }, + { + "epoch": 76.08516483516483, + "grad_norm": 16.082944869995117, + "learning_rate": 1.1957417582417583e-05, + "loss": 0.3883, + "step": 27695 + }, + { + "epoch": 76.08791208791209, + "grad_norm": 3.1973578929901123, + "learning_rate": 1.1956043956043956e-05, + "loss": 0.0369, + "step": 27696 + }, + { + "epoch": 76.09065934065934, + "grad_norm": 17.515235900878906, + "learning_rate": 1.195467032967033e-05, + "loss": 0.3584, + "step": 27697 + }, + { + "epoch": 76.0934065934066, + "grad_norm": 7.933355331420898, + "learning_rate": 1.1953296703296705e-05, + "loss": 0.1107, + "step": 27698 + }, + { + "epoch": 76.09615384615384, + "grad_norm": 17.098936080932617, + "learning_rate": 1.1951923076923078e-05, + "loss": 0.3619, + "step": 27699 + }, + { + "epoch": 76.0989010989011, + "grad_norm": 1.853571891784668, + "learning_rate": 1.1950549450549451e-05, + "loss": 0.0276, + "step": 27700 + }, + { + "epoch": 76.10164835164835, + "grad_norm": 14.971683502197266, + "learning_rate": 1.1949175824175825e-05, + "loss": 0.4782, + "step": 27701 + }, + { + "epoch": 76.1043956043956, + "grad_norm": 10.679974555969238, + "learning_rate": 1.1947802197802198e-05, + "loss": 0.1361, + "step": 27702 + }, + { + "epoch": 76.10714285714286, + "grad_norm": 17.562236785888672, + "learning_rate": 1.1946428571428572e-05, + "loss": 0.5077, + "step": 27703 + }, + { + "epoch": 76.10989010989012, + "grad_norm": 10.601365089416504, + "learning_rate": 1.1945054945054945e-05, + "loss": 0.2827, + "step": 27704 + }, + { + "epoch": 76.11263736263736, + "grad_norm": 14.10165023803711, + "learning_rate": 1.194368131868132e-05, + "loss": 0.2675, + "step": 27705 + }, + { + "epoch": 76.11538461538461, + "grad_norm": 10.047730445861816, + "learning_rate": 1.1942307692307694e-05, + "loss": 0.1041, + "step": 27706 + }, + { + "epoch": 76.11813186813187, + "grad_norm": 11.209776878356934, + "learning_rate": 1.1940934065934065e-05, + "loss": 0.184, + "step": 27707 + }, + { + "epoch": 76.12087912087912, + "grad_norm": 11.284123420715332, + "learning_rate": 1.193956043956044e-05, + "loss": 0.2595, + "step": 27708 + }, + { + "epoch": 76.12362637362638, + "grad_norm": 9.321794509887695, + "learning_rate": 1.1938186813186814e-05, + "loss": 0.1493, + "step": 27709 + }, + { + "epoch": 76.12637362637362, + "grad_norm": 20.203092575073242, + "learning_rate": 1.1936813186813187e-05, + "loss": 0.4641, + "step": 27710 + }, + { + "epoch": 76.12912087912088, + "grad_norm": 10.727778434753418, + "learning_rate": 1.193543956043956e-05, + "loss": 0.219, + "step": 27711 + }, + { + "epoch": 76.13186813186813, + "grad_norm": 3.1033201217651367, + "learning_rate": 1.1934065934065934e-05, + "loss": 0.0273, + "step": 27712 + }, + { + "epoch": 76.13461538461539, + "grad_norm": 3.8083760738372803, + "learning_rate": 1.1932692307692309e-05, + "loss": 0.0395, + "step": 27713 + }, + { + "epoch": 76.13736263736264, + "grad_norm": 13.30547046661377, + "learning_rate": 1.1931318681318683e-05, + "loss": 0.1005, + "step": 27714 + }, + { + "epoch": 76.14010989010988, + "grad_norm": 7.189538478851318, + "learning_rate": 1.1929945054945056e-05, + "loss": 0.0929, + "step": 27715 + }, + { + "epoch": 76.14285714285714, + "grad_norm": 4.827399730682373, + "learning_rate": 1.192857142857143e-05, + "loss": 0.0491, + "step": 27716 + }, + { + "epoch": 76.1456043956044, + "grad_norm": 23.471464157104492, + "learning_rate": 1.1927197802197803e-05, + "loss": 0.8819, + "step": 27717 + }, + { + "epoch": 76.14835164835165, + "grad_norm": 7.938848972320557, + "learning_rate": 1.1925824175824176e-05, + "loss": 0.104, + "step": 27718 + }, + { + "epoch": 76.1510989010989, + "grad_norm": 18.75218391418457, + "learning_rate": 1.192445054945055e-05, + "loss": 0.5136, + "step": 27719 + }, + { + "epoch": 76.15384615384616, + "grad_norm": 6.868486404418945, + "learning_rate": 1.1923076923076925e-05, + "loss": 0.1304, + "step": 27720 + }, + { + "epoch": 76.1565934065934, + "grad_norm": 14.3102388381958, + "learning_rate": 1.1921703296703298e-05, + "loss": 0.3299, + "step": 27721 + }, + { + "epoch": 76.15934065934066, + "grad_norm": 12.704338073730469, + "learning_rate": 1.192032967032967e-05, + "loss": 0.2982, + "step": 27722 + }, + { + "epoch": 76.16208791208791, + "grad_norm": 16.519380569458008, + "learning_rate": 1.1918956043956045e-05, + "loss": 0.3064, + "step": 27723 + }, + { + "epoch": 76.16483516483517, + "grad_norm": 11.90650749206543, + "learning_rate": 1.1917582417582418e-05, + "loss": 0.1119, + "step": 27724 + }, + { + "epoch": 76.16758241758242, + "grad_norm": 13.709725379943848, + "learning_rate": 1.1916208791208792e-05, + "loss": 0.2233, + "step": 27725 + }, + { + "epoch": 76.17032967032966, + "grad_norm": 2.810703992843628, + "learning_rate": 1.1914835164835165e-05, + "loss": 0.0526, + "step": 27726 + }, + { + "epoch": 76.17307692307692, + "grad_norm": 21.180904388427734, + "learning_rate": 1.1913461538461538e-05, + "loss": 0.8323, + "step": 27727 + }, + { + "epoch": 76.17582417582418, + "grad_norm": 8.042830467224121, + "learning_rate": 1.1912087912087914e-05, + "loss": 0.0582, + "step": 27728 + }, + { + "epoch": 76.17857142857143, + "grad_norm": 15.237545013427734, + "learning_rate": 1.1910714285714287e-05, + "loss": 0.2487, + "step": 27729 + }, + { + "epoch": 76.18131868131869, + "grad_norm": 5.8518877029418945, + "learning_rate": 1.1909340659340659e-05, + "loss": 0.0975, + "step": 27730 + }, + { + "epoch": 76.18406593406593, + "grad_norm": 9.19636058807373, + "learning_rate": 1.1907967032967034e-05, + "loss": 0.1237, + "step": 27731 + }, + { + "epoch": 76.18681318681318, + "grad_norm": 21.183765411376953, + "learning_rate": 1.1906593406593407e-05, + "loss": 0.3868, + "step": 27732 + }, + { + "epoch": 76.18956043956044, + "grad_norm": 12.663225173950195, + "learning_rate": 1.190521978021978e-05, + "loss": 0.1606, + "step": 27733 + }, + { + "epoch": 76.1923076923077, + "grad_norm": 10.073728561401367, + "learning_rate": 1.1903846153846154e-05, + "loss": 0.1429, + "step": 27734 + }, + { + "epoch": 76.19505494505495, + "grad_norm": 3.793501853942871, + "learning_rate": 1.1902472527472527e-05, + "loss": 0.0522, + "step": 27735 + }, + { + "epoch": 76.1978021978022, + "grad_norm": 7.170633316040039, + "learning_rate": 1.1901098901098903e-05, + "loss": 0.0954, + "step": 27736 + }, + { + "epoch": 76.20054945054945, + "grad_norm": 8.984384536743164, + "learning_rate": 1.1899725274725274e-05, + "loss": 0.1934, + "step": 27737 + }, + { + "epoch": 76.2032967032967, + "grad_norm": 7.851601600646973, + "learning_rate": 1.189835164835165e-05, + "loss": 0.1186, + "step": 27738 + }, + { + "epoch": 76.20604395604396, + "grad_norm": 2.48773455619812, + "learning_rate": 1.1896978021978023e-05, + "loss": 0.0299, + "step": 27739 + }, + { + "epoch": 76.20879120879121, + "grad_norm": 11.6572904586792, + "learning_rate": 1.1895604395604396e-05, + "loss": 0.3364, + "step": 27740 + }, + { + "epoch": 76.21153846153847, + "grad_norm": 3.898864984512329, + "learning_rate": 1.189423076923077e-05, + "loss": 0.0401, + "step": 27741 + }, + { + "epoch": 76.21428571428571, + "grad_norm": 12.422436714172363, + "learning_rate": 1.1892857142857143e-05, + "loss": 0.2013, + "step": 27742 + }, + { + "epoch": 76.21703296703296, + "grad_norm": 12.533319473266602, + "learning_rate": 1.1891483516483518e-05, + "loss": 0.3147, + "step": 27743 + }, + { + "epoch": 76.21978021978022, + "grad_norm": 15.683150291442871, + "learning_rate": 1.189010989010989e-05, + "loss": 0.4166, + "step": 27744 + }, + { + "epoch": 76.22252747252747, + "grad_norm": 18.371688842773438, + "learning_rate": 1.1888736263736263e-05, + "loss": 0.3073, + "step": 27745 + }, + { + "epoch": 76.22527472527473, + "grad_norm": 5.594099998474121, + "learning_rate": 1.1887362637362638e-05, + "loss": 0.0829, + "step": 27746 + }, + { + "epoch": 76.22802197802197, + "grad_norm": 5.389345645904541, + "learning_rate": 1.1885989010989012e-05, + "loss": 0.1053, + "step": 27747 + }, + { + "epoch": 76.23076923076923, + "grad_norm": 16.0105037689209, + "learning_rate": 1.1884615384615385e-05, + "loss": 0.2837, + "step": 27748 + }, + { + "epoch": 76.23351648351648, + "grad_norm": 16.666547775268555, + "learning_rate": 1.1883241758241759e-05, + "loss": 0.2154, + "step": 27749 + }, + { + "epoch": 76.23626373626374, + "grad_norm": 13.15195369720459, + "learning_rate": 1.1881868131868132e-05, + "loss": 0.4889, + "step": 27750 + }, + { + "epoch": 76.23901098901099, + "grad_norm": 8.18545913696289, + "learning_rate": 1.1880494505494507e-05, + "loss": 0.0643, + "step": 27751 + }, + { + "epoch": 76.24175824175825, + "grad_norm": 9.265267372131348, + "learning_rate": 1.1879120879120879e-05, + "loss": 0.1516, + "step": 27752 + }, + { + "epoch": 76.24450549450549, + "grad_norm": 4.591742515563965, + "learning_rate": 1.1877747252747254e-05, + "loss": 0.0882, + "step": 27753 + }, + { + "epoch": 76.24725274725274, + "grad_norm": 12.439947128295898, + "learning_rate": 1.1876373626373627e-05, + "loss": 0.1158, + "step": 27754 + }, + { + "epoch": 76.25, + "grad_norm": 9.005906105041504, + "learning_rate": 1.1875e-05, + "loss": 0.2017, + "step": 27755 + }, + { + "epoch": 76.25274725274726, + "grad_norm": 14.144438743591309, + "learning_rate": 1.1873626373626374e-05, + "loss": 0.3011, + "step": 27756 + }, + { + "epoch": 76.25549450549451, + "grad_norm": 11.124992370605469, + "learning_rate": 1.1872252747252747e-05, + "loss": 0.1584, + "step": 27757 + }, + { + "epoch": 76.25824175824175, + "grad_norm": 7.658742427825928, + "learning_rate": 1.1870879120879123e-05, + "loss": 0.1361, + "step": 27758 + }, + { + "epoch": 76.26098901098901, + "grad_norm": 7.409960746765137, + "learning_rate": 1.1869505494505494e-05, + "loss": 0.1678, + "step": 27759 + }, + { + "epoch": 76.26373626373626, + "grad_norm": 11.483315467834473, + "learning_rate": 1.1868131868131868e-05, + "loss": 0.1693, + "step": 27760 + }, + { + "epoch": 76.26648351648352, + "grad_norm": 5.509198188781738, + "learning_rate": 1.1866758241758243e-05, + "loss": 0.0882, + "step": 27761 + }, + { + "epoch": 76.26923076923077, + "grad_norm": 8.160770416259766, + "learning_rate": 1.1865384615384616e-05, + "loss": 0.1114, + "step": 27762 + }, + { + "epoch": 76.27197802197803, + "grad_norm": 2.61444354057312, + "learning_rate": 1.186401098901099e-05, + "loss": 0.0415, + "step": 27763 + }, + { + "epoch": 76.27472527472527, + "grad_norm": 6.281050205230713, + "learning_rate": 1.1862637362637363e-05, + "loss": 0.0906, + "step": 27764 + }, + { + "epoch": 76.27747252747253, + "grad_norm": 6.369194984436035, + "learning_rate": 1.1861263736263736e-05, + "loss": 0.1105, + "step": 27765 + }, + { + "epoch": 76.28021978021978, + "grad_norm": 14.325128555297852, + "learning_rate": 1.1859890109890112e-05, + "loss": 0.2927, + "step": 27766 + }, + { + "epoch": 76.28296703296704, + "grad_norm": 13.991629600524902, + "learning_rate": 1.1858516483516483e-05, + "loss": 0.161, + "step": 27767 + }, + { + "epoch": 76.28571428571429, + "grad_norm": 7.504533290863037, + "learning_rate": 1.1857142857142858e-05, + "loss": 0.1389, + "step": 27768 + }, + { + "epoch": 76.28846153846153, + "grad_norm": 10.373007774353027, + "learning_rate": 1.1855769230769232e-05, + "loss": 0.2309, + "step": 27769 + }, + { + "epoch": 76.29120879120879, + "grad_norm": 15.288335800170898, + "learning_rate": 1.1854395604395605e-05, + "loss": 0.3466, + "step": 27770 + }, + { + "epoch": 76.29395604395604, + "grad_norm": 8.447917938232422, + "learning_rate": 1.1853021978021979e-05, + "loss": 0.1032, + "step": 27771 + }, + { + "epoch": 76.2967032967033, + "grad_norm": 9.953069686889648, + "learning_rate": 1.1851648351648352e-05, + "loss": 0.2093, + "step": 27772 + }, + { + "epoch": 76.29945054945055, + "grad_norm": 9.301650047302246, + "learning_rate": 1.1850274725274727e-05, + "loss": 0.1005, + "step": 27773 + }, + { + "epoch": 76.3021978021978, + "grad_norm": 13.138608932495117, + "learning_rate": 1.1848901098901099e-05, + "loss": 0.1997, + "step": 27774 + }, + { + "epoch": 76.30494505494505, + "grad_norm": 15.499693870544434, + "learning_rate": 1.1847527472527472e-05, + "loss": 0.4109, + "step": 27775 + }, + { + "epoch": 76.3076923076923, + "grad_norm": 19.647293090820312, + "learning_rate": 1.1846153846153847e-05, + "loss": 0.4942, + "step": 27776 + }, + { + "epoch": 76.31043956043956, + "grad_norm": 16.200578689575195, + "learning_rate": 1.184478021978022e-05, + "loss": 0.5786, + "step": 27777 + }, + { + "epoch": 76.31318681318682, + "grad_norm": 2.485560417175293, + "learning_rate": 1.1843406593406594e-05, + "loss": 0.0361, + "step": 27778 + }, + { + "epoch": 76.31593406593407, + "grad_norm": 12.113260269165039, + "learning_rate": 1.1842032967032967e-05, + "loss": 0.1549, + "step": 27779 + }, + { + "epoch": 76.31868131868131, + "grad_norm": 7.6711530685424805, + "learning_rate": 1.1840659340659341e-05, + "loss": 0.0737, + "step": 27780 + }, + { + "epoch": 76.32142857142857, + "grad_norm": 5.088940143585205, + "learning_rate": 1.1839285714285716e-05, + "loss": 0.0629, + "step": 27781 + }, + { + "epoch": 76.32417582417582, + "grad_norm": 9.354793548583984, + "learning_rate": 1.1837912087912088e-05, + "loss": 0.1572, + "step": 27782 + }, + { + "epoch": 76.32692307692308, + "grad_norm": 15.973027229309082, + "learning_rate": 1.1836538461538463e-05, + "loss": 0.4004, + "step": 27783 + }, + { + "epoch": 76.32967032967034, + "grad_norm": 3.3723981380462646, + "learning_rate": 1.1835164835164836e-05, + "loss": 0.0686, + "step": 27784 + }, + { + "epoch": 76.33241758241758, + "grad_norm": 8.512296676635742, + "learning_rate": 1.183379120879121e-05, + "loss": 0.1138, + "step": 27785 + }, + { + "epoch": 76.33516483516483, + "grad_norm": 21.872962951660156, + "learning_rate": 1.1832417582417583e-05, + "loss": 0.3199, + "step": 27786 + }, + { + "epoch": 76.33791208791209, + "grad_norm": 16.22331428527832, + "learning_rate": 1.1831043956043956e-05, + "loss": 0.5706, + "step": 27787 + }, + { + "epoch": 76.34065934065934, + "grad_norm": 1.266819953918457, + "learning_rate": 1.182967032967033e-05, + "loss": 0.0108, + "step": 27788 + }, + { + "epoch": 76.3434065934066, + "grad_norm": 4.223174095153809, + "learning_rate": 1.1828296703296703e-05, + "loss": 0.043, + "step": 27789 + }, + { + "epoch": 76.34615384615384, + "grad_norm": 19.012170791625977, + "learning_rate": 1.1826923076923077e-05, + "loss": 0.5459, + "step": 27790 + }, + { + "epoch": 76.3489010989011, + "grad_norm": 6.010416030883789, + "learning_rate": 1.1825549450549452e-05, + "loss": 0.1185, + "step": 27791 + }, + { + "epoch": 76.35164835164835, + "grad_norm": 19.6817626953125, + "learning_rate": 1.1824175824175825e-05, + "loss": 0.7764, + "step": 27792 + }, + { + "epoch": 76.3543956043956, + "grad_norm": 13.884453773498535, + "learning_rate": 1.1822802197802197e-05, + "loss": 0.1973, + "step": 27793 + }, + { + "epoch": 76.35714285714286, + "grad_norm": 10.54957103729248, + "learning_rate": 1.1821428571428572e-05, + "loss": 0.1103, + "step": 27794 + }, + { + "epoch": 76.35989010989012, + "grad_norm": 14.359758377075195, + "learning_rate": 1.1820054945054945e-05, + "loss": 0.3104, + "step": 27795 + }, + { + "epoch": 76.36263736263736, + "grad_norm": 10.84544849395752, + "learning_rate": 1.181868131868132e-05, + "loss": 0.1972, + "step": 27796 + }, + { + "epoch": 76.36538461538461, + "grad_norm": 9.134839057922363, + "learning_rate": 1.1817307692307692e-05, + "loss": 0.1613, + "step": 27797 + }, + { + "epoch": 76.36813186813187, + "grad_norm": 7.5096588134765625, + "learning_rate": 1.1815934065934066e-05, + "loss": 0.097, + "step": 27798 + }, + { + "epoch": 76.37087912087912, + "grad_norm": 1.2565767765045166, + "learning_rate": 1.181456043956044e-05, + "loss": 0.0203, + "step": 27799 + }, + { + "epoch": 76.37362637362638, + "grad_norm": 7.275271892547607, + "learning_rate": 1.1813186813186814e-05, + "loss": 0.1996, + "step": 27800 + }, + { + "epoch": 76.37637362637362, + "grad_norm": 10.14287281036377, + "learning_rate": 1.1811813186813188e-05, + "loss": 0.1493, + "step": 27801 + }, + { + "epoch": 76.37912087912088, + "grad_norm": 15.195306777954102, + "learning_rate": 1.1810439560439561e-05, + "loss": 0.6176, + "step": 27802 + }, + { + "epoch": 76.38186813186813, + "grad_norm": 5.402720928192139, + "learning_rate": 1.1809065934065934e-05, + "loss": 0.0831, + "step": 27803 + }, + { + "epoch": 76.38461538461539, + "grad_norm": 15.806994438171387, + "learning_rate": 1.1807692307692308e-05, + "loss": 0.2135, + "step": 27804 + }, + { + "epoch": 76.38736263736264, + "grad_norm": 6.3467912673950195, + "learning_rate": 1.1806318681318681e-05, + "loss": 0.1129, + "step": 27805 + }, + { + "epoch": 76.39010989010988, + "grad_norm": 11.220458984375, + "learning_rate": 1.1804945054945056e-05, + "loss": 0.1364, + "step": 27806 + }, + { + "epoch": 76.39285714285714, + "grad_norm": 2.516345262527466, + "learning_rate": 1.180357142857143e-05, + "loss": 0.0313, + "step": 27807 + }, + { + "epoch": 76.3956043956044, + "grad_norm": 7.476109981536865, + "learning_rate": 1.1802197802197801e-05, + "loss": 0.1344, + "step": 27808 + }, + { + "epoch": 76.39835164835165, + "grad_norm": 17.92192840576172, + "learning_rate": 1.1800824175824176e-05, + "loss": 0.5755, + "step": 27809 + }, + { + "epoch": 76.4010989010989, + "grad_norm": 12.480817794799805, + "learning_rate": 1.179945054945055e-05, + "loss": 0.32, + "step": 27810 + }, + { + "epoch": 76.40384615384616, + "grad_norm": 29.659982681274414, + "learning_rate": 1.1798076923076925e-05, + "loss": 0.5461, + "step": 27811 + }, + { + "epoch": 76.4065934065934, + "grad_norm": 3.126723527908325, + "learning_rate": 1.1796703296703297e-05, + "loss": 0.0302, + "step": 27812 + }, + { + "epoch": 76.40934065934066, + "grad_norm": 23.3330020904541, + "learning_rate": 1.179532967032967e-05, + "loss": 0.397, + "step": 27813 + }, + { + "epoch": 76.41208791208791, + "grad_norm": 13.623754501342773, + "learning_rate": 1.1793956043956045e-05, + "loss": 0.156, + "step": 27814 + }, + { + "epoch": 76.41483516483517, + "grad_norm": 27.563182830810547, + "learning_rate": 1.1792582417582419e-05, + "loss": 0.7165, + "step": 27815 + }, + { + "epoch": 76.41758241758242, + "grad_norm": 27.98810577392578, + "learning_rate": 1.1791208791208792e-05, + "loss": 0.7179, + "step": 27816 + }, + { + "epoch": 76.42032967032966, + "grad_norm": 8.782541275024414, + "learning_rate": 1.1789835164835165e-05, + "loss": 0.0987, + "step": 27817 + }, + { + "epoch": 76.42307692307692, + "grad_norm": 8.265637397766113, + "learning_rate": 1.1788461538461539e-05, + "loss": 0.2055, + "step": 27818 + }, + { + "epoch": 76.42582417582418, + "grad_norm": 9.417220115661621, + "learning_rate": 1.1787087912087912e-05, + "loss": 0.2319, + "step": 27819 + }, + { + "epoch": 76.42857142857143, + "grad_norm": 9.49638557434082, + "learning_rate": 1.1785714285714286e-05, + "loss": 0.2798, + "step": 27820 + }, + { + "epoch": 76.43131868131869, + "grad_norm": 5.4958062171936035, + "learning_rate": 1.178434065934066e-05, + "loss": 0.115, + "step": 27821 + }, + { + "epoch": 76.43406593406593, + "grad_norm": 6.09626579284668, + "learning_rate": 1.1782967032967034e-05, + "loss": 0.0631, + "step": 27822 + }, + { + "epoch": 76.43681318681318, + "grad_norm": 7.429718494415283, + "learning_rate": 1.1781593406593406e-05, + "loss": 0.1201, + "step": 27823 + }, + { + "epoch": 76.43956043956044, + "grad_norm": 13.241510391235352, + "learning_rate": 1.1780219780219781e-05, + "loss": 0.4327, + "step": 27824 + }, + { + "epoch": 76.4423076923077, + "grad_norm": 2.6996049880981445, + "learning_rate": 1.1778846153846154e-05, + "loss": 0.024, + "step": 27825 + }, + { + "epoch": 76.44505494505495, + "grad_norm": 6.182109832763672, + "learning_rate": 1.177747252747253e-05, + "loss": 0.0515, + "step": 27826 + }, + { + "epoch": 76.4478021978022, + "grad_norm": 8.76811695098877, + "learning_rate": 1.1776098901098901e-05, + "loss": 0.125, + "step": 27827 + }, + { + "epoch": 76.45054945054945, + "grad_norm": 14.587324142456055, + "learning_rate": 1.1774725274725275e-05, + "loss": 0.2367, + "step": 27828 + }, + { + "epoch": 76.4532967032967, + "grad_norm": 16.047412872314453, + "learning_rate": 1.177335164835165e-05, + "loss": 0.4219, + "step": 27829 + }, + { + "epoch": 76.45604395604396, + "grad_norm": 4.910309791564941, + "learning_rate": 1.1771978021978023e-05, + "loss": 0.0487, + "step": 27830 + }, + { + "epoch": 76.45879120879121, + "grad_norm": 14.04832649230957, + "learning_rate": 1.1770604395604397e-05, + "loss": 0.4046, + "step": 27831 + }, + { + "epoch": 76.46153846153847, + "grad_norm": 23.37450408935547, + "learning_rate": 1.176923076923077e-05, + "loss": 0.7045, + "step": 27832 + }, + { + "epoch": 76.46428571428571, + "grad_norm": 4.417473316192627, + "learning_rate": 1.1767857142857143e-05, + "loss": 0.078, + "step": 27833 + }, + { + "epoch": 76.46703296703296, + "grad_norm": 11.487613677978516, + "learning_rate": 1.1766483516483517e-05, + "loss": 0.0985, + "step": 27834 + }, + { + "epoch": 76.46978021978022, + "grad_norm": 15.316009521484375, + "learning_rate": 1.176510989010989e-05, + "loss": 0.2656, + "step": 27835 + }, + { + "epoch": 76.47252747252747, + "grad_norm": 7.930287837982178, + "learning_rate": 1.1763736263736265e-05, + "loss": 0.1107, + "step": 27836 + }, + { + "epoch": 76.47527472527473, + "grad_norm": 11.305974960327148, + "learning_rate": 1.1762362637362639e-05, + "loss": 0.2468, + "step": 27837 + }, + { + "epoch": 76.47802197802197, + "grad_norm": 10.345403671264648, + "learning_rate": 1.176098901098901e-05, + "loss": 0.2499, + "step": 27838 + }, + { + "epoch": 76.48076923076923, + "grad_norm": 17.02684783935547, + "learning_rate": 1.1759615384615385e-05, + "loss": 0.3631, + "step": 27839 + }, + { + "epoch": 76.48351648351648, + "grad_norm": 4.77756404876709, + "learning_rate": 1.1758241758241759e-05, + "loss": 0.0474, + "step": 27840 + }, + { + "epoch": 76.48626373626374, + "grad_norm": 14.18052864074707, + "learning_rate": 1.1756868131868132e-05, + "loss": 0.1605, + "step": 27841 + }, + { + "epoch": 76.48901098901099, + "grad_norm": 14.002896308898926, + "learning_rate": 1.1755494505494506e-05, + "loss": 0.1759, + "step": 27842 + }, + { + "epoch": 76.49175824175825, + "grad_norm": 11.05107307434082, + "learning_rate": 1.1754120879120879e-05, + "loss": 0.1391, + "step": 27843 + }, + { + "epoch": 76.49450549450549, + "grad_norm": 13.267518997192383, + "learning_rate": 1.1752747252747254e-05, + "loss": 0.1773, + "step": 27844 + }, + { + "epoch": 76.49725274725274, + "grad_norm": 20.011754989624023, + "learning_rate": 1.1751373626373628e-05, + "loss": 0.4048, + "step": 27845 + }, + { + "epoch": 76.5, + "grad_norm": 12.220183372497559, + "learning_rate": 1.175e-05, + "loss": 0.2623, + "step": 27846 + }, + { + "epoch": 76.50274725274726, + "grad_norm": 8.239253997802734, + "learning_rate": 1.1748626373626374e-05, + "loss": 0.1766, + "step": 27847 + }, + { + "epoch": 76.50549450549451, + "grad_norm": 13.577303886413574, + "learning_rate": 1.1747252747252748e-05, + "loss": 0.2435, + "step": 27848 + }, + { + "epoch": 76.50824175824175, + "grad_norm": 13.596025466918945, + "learning_rate": 1.1745879120879121e-05, + "loss": 0.2203, + "step": 27849 + }, + { + "epoch": 76.51098901098901, + "grad_norm": 7.876188278198242, + "learning_rate": 1.1744505494505495e-05, + "loss": 0.0862, + "step": 27850 + }, + { + "epoch": 76.51373626373626, + "grad_norm": 3.8701159954071045, + "learning_rate": 1.1743131868131868e-05, + "loss": 0.0556, + "step": 27851 + }, + { + "epoch": 76.51648351648352, + "grad_norm": 8.150602340698242, + "learning_rate": 1.1741758241758243e-05, + "loss": 0.1204, + "step": 27852 + }, + { + "epoch": 76.51923076923077, + "grad_norm": 7.179851055145264, + "learning_rate": 1.1740384615384615e-05, + "loss": 0.0822, + "step": 27853 + }, + { + "epoch": 76.52197802197803, + "grad_norm": 2.4083328247070312, + "learning_rate": 1.173901098901099e-05, + "loss": 0.0334, + "step": 27854 + }, + { + "epoch": 76.52472527472527, + "grad_norm": 9.962590217590332, + "learning_rate": 1.1737637362637363e-05, + "loss": 0.0903, + "step": 27855 + }, + { + "epoch": 76.52747252747253, + "grad_norm": 5.574549198150635, + "learning_rate": 1.1736263736263737e-05, + "loss": 0.0742, + "step": 27856 + }, + { + "epoch": 76.53021978021978, + "grad_norm": 17.131559371948242, + "learning_rate": 1.173489010989011e-05, + "loss": 0.3521, + "step": 27857 + }, + { + "epoch": 76.53296703296704, + "grad_norm": 7.864840030670166, + "learning_rate": 1.1733516483516484e-05, + "loss": 0.1288, + "step": 27858 + }, + { + "epoch": 76.53571428571429, + "grad_norm": 13.5128173828125, + "learning_rate": 1.1732142857142859e-05, + "loss": 0.1726, + "step": 27859 + }, + { + "epoch": 76.53846153846153, + "grad_norm": 8.99718189239502, + "learning_rate": 1.1730769230769232e-05, + "loss": 0.1813, + "step": 27860 + }, + { + "epoch": 76.54120879120879, + "grad_norm": 6.039210796356201, + "learning_rate": 1.1729395604395604e-05, + "loss": 0.0691, + "step": 27861 + }, + { + "epoch": 76.54395604395604, + "grad_norm": 2.682889461517334, + "learning_rate": 1.1728021978021979e-05, + "loss": 0.0285, + "step": 27862 + }, + { + "epoch": 76.5467032967033, + "grad_norm": 14.99288272857666, + "learning_rate": 1.1726648351648352e-05, + "loss": 0.2031, + "step": 27863 + }, + { + "epoch": 76.54945054945055, + "grad_norm": 2.945359706878662, + "learning_rate": 1.1725274725274726e-05, + "loss": 0.0545, + "step": 27864 + }, + { + "epoch": 76.5521978021978, + "grad_norm": 16.07857322692871, + "learning_rate": 1.1723901098901099e-05, + "loss": 0.1227, + "step": 27865 + }, + { + "epoch": 76.55494505494505, + "grad_norm": 12.732887268066406, + "learning_rate": 1.1722527472527473e-05, + "loss": 0.2142, + "step": 27866 + }, + { + "epoch": 76.5576923076923, + "grad_norm": 3.4243650436401367, + "learning_rate": 1.1721153846153848e-05, + "loss": 0.057, + "step": 27867 + }, + { + "epoch": 76.56043956043956, + "grad_norm": 2.0544536113739014, + "learning_rate": 1.171978021978022e-05, + "loss": 0.0176, + "step": 27868 + }, + { + "epoch": 76.56318681318682, + "grad_norm": 18.135128021240234, + "learning_rate": 1.1718406593406594e-05, + "loss": 0.2551, + "step": 27869 + }, + { + "epoch": 76.56593406593407, + "grad_norm": 6.652249336242676, + "learning_rate": 1.1717032967032968e-05, + "loss": 0.0993, + "step": 27870 + }, + { + "epoch": 76.56868131868131, + "grad_norm": 8.001787185668945, + "learning_rate": 1.1715659340659341e-05, + "loss": 0.1613, + "step": 27871 + }, + { + "epoch": 76.57142857142857, + "grad_norm": 21.820417404174805, + "learning_rate": 1.1714285714285715e-05, + "loss": 0.3453, + "step": 27872 + }, + { + "epoch": 76.57417582417582, + "grad_norm": 10.14720630645752, + "learning_rate": 1.1712912087912088e-05, + "loss": 0.1821, + "step": 27873 + }, + { + "epoch": 76.57692307692308, + "grad_norm": 15.08439826965332, + "learning_rate": 1.1711538461538463e-05, + "loss": 0.2326, + "step": 27874 + }, + { + "epoch": 76.57967032967034, + "grad_norm": 4.422044277191162, + "learning_rate": 1.1710164835164837e-05, + "loss": 0.0466, + "step": 27875 + }, + { + "epoch": 76.58241758241758, + "grad_norm": 16.149139404296875, + "learning_rate": 1.1708791208791208e-05, + "loss": 0.3295, + "step": 27876 + }, + { + "epoch": 76.58516483516483, + "grad_norm": 12.167886734008789, + "learning_rate": 1.1707417582417583e-05, + "loss": 0.2262, + "step": 27877 + }, + { + "epoch": 76.58791208791209, + "grad_norm": 16.569087982177734, + "learning_rate": 1.1706043956043957e-05, + "loss": 0.2817, + "step": 27878 + }, + { + "epoch": 76.59065934065934, + "grad_norm": 10.063419342041016, + "learning_rate": 1.170467032967033e-05, + "loss": 0.1989, + "step": 27879 + }, + { + "epoch": 76.5934065934066, + "grad_norm": 12.672690391540527, + "learning_rate": 1.1703296703296704e-05, + "loss": 0.2204, + "step": 27880 + }, + { + "epoch": 76.59615384615384, + "grad_norm": 3.9579217433929443, + "learning_rate": 1.1701923076923077e-05, + "loss": 0.0488, + "step": 27881 + }, + { + "epoch": 76.5989010989011, + "grad_norm": 11.712638854980469, + "learning_rate": 1.1700549450549452e-05, + "loss": 0.2691, + "step": 27882 + }, + { + "epoch": 76.60164835164835, + "grad_norm": 2.8293309211730957, + "learning_rate": 1.1699175824175824e-05, + "loss": 0.0201, + "step": 27883 + }, + { + "epoch": 76.6043956043956, + "grad_norm": 14.09218692779541, + "learning_rate": 1.1697802197802199e-05, + "loss": 0.3764, + "step": 27884 + }, + { + "epoch": 76.60714285714286, + "grad_norm": 11.831448554992676, + "learning_rate": 1.1696428571428572e-05, + "loss": 0.276, + "step": 27885 + }, + { + "epoch": 76.60989010989012, + "grad_norm": 23.346635818481445, + "learning_rate": 1.1695054945054946e-05, + "loss": 0.3571, + "step": 27886 + }, + { + "epoch": 76.61263736263736, + "grad_norm": 9.872133255004883, + "learning_rate": 1.169368131868132e-05, + "loss": 0.1605, + "step": 27887 + }, + { + "epoch": 76.61538461538461, + "grad_norm": 7.397054195404053, + "learning_rate": 1.1692307692307693e-05, + "loss": 0.1001, + "step": 27888 + }, + { + "epoch": 76.61813186813187, + "grad_norm": 1.6793872117996216, + "learning_rate": 1.1690934065934068e-05, + "loss": 0.0224, + "step": 27889 + }, + { + "epoch": 76.62087912087912, + "grad_norm": 10.03901195526123, + "learning_rate": 1.1689560439560441e-05, + "loss": 0.1439, + "step": 27890 + }, + { + "epoch": 76.62362637362638, + "grad_norm": 5.465203285217285, + "learning_rate": 1.1688186813186813e-05, + "loss": 0.1059, + "step": 27891 + }, + { + "epoch": 76.62637362637362, + "grad_norm": 12.773757934570312, + "learning_rate": 1.1686813186813188e-05, + "loss": 0.2667, + "step": 27892 + }, + { + "epoch": 76.62912087912088, + "grad_norm": 16.04904556274414, + "learning_rate": 1.1685439560439561e-05, + "loss": 0.2907, + "step": 27893 + }, + { + "epoch": 76.63186813186813, + "grad_norm": 10.803702354431152, + "learning_rate": 1.1684065934065935e-05, + "loss": 0.2615, + "step": 27894 + }, + { + "epoch": 76.63461538461539, + "grad_norm": 6.556073188781738, + "learning_rate": 1.1682692307692308e-05, + "loss": 0.0571, + "step": 27895 + }, + { + "epoch": 76.63736263736264, + "grad_norm": 28.258983612060547, + "learning_rate": 1.1681318681318682e-05, + "loss": 0.6184, + "step": 27896 + }, + { + "epoch": 76.64010989010988, + "grad_norm": 18.27755355834961, + "learning_rate": 1.1679945054945057e-05, + "loss": 0.4796, + "step": 27897 + }, + { + "epoch": 76.64285714285714, + "grad_norm": 21.05260467529297, + "learning_rate": 1.1678571428571428e-05, + "loss": 0.5692, + "step": 27898 + }, + { + "epoch": 76.6456043956044, + "grad_norm": 12.876070976257324, + "learning_rate": 1.1677197802197802e-05, + "loss": 0.226, + "step": 27899 + }, + { + "epoch": 76.64835164835165, + "grad_norm": 13.293006896972656, + "learning_rate": 1.1675824175824177e-05, + "loss": 0.1759, + "step": 27900 + }, + { + "epoch": 76.6510989010989, + "grad_norm": 22.342851638793945, + "learning_rate": 1.167445054945055e-05, + "loss": 0.6724, + "step": 27901 + }, + { + "epoch": 76.65384615384616, + "grad_norm": 9.835467338562012, + "learning_rate": 1.1673076923076924e-05, + "loss": 0.1947, + "step": 27902 + }, + { + "epoch": 76.6565934065934, + "grad_norm": 9.546014785766602, + "learning_rate": 1.1671703296703297e-05, + "loss": 0.2252, + "step": 27903 + }, + { + "epoch": 76.65934065934066, + "grad_norm": 7.907456874847412, + "learning_rate": 1.167032967032967e-05, + "loss": 0.1719, + "step": 27904 + }, + { + "epoch": 76.66208791208791, + "grad_norm": 14.23420238494873, + "learning_rate": 1.1668956043956044e-05, + "loss": 0.3011, + "step": 27905 + }, + { + "epoch": 76.66483516483517, + "grad_norm": 16.998435974121094, + "learning_rate": 1.1667582417582417e-05, + "loss": 0.28, + "step": 27906 + }, + { + "epoch": 76.66758241758242, + "grad_norm": 11.634721755981445, + "learning_rate": 1.1666208791208792e-05, + "loss": 0.2553, + "step": 27907 + }, + { + "epoch": 76.67032967032966, + "grad_norm": 7.385143280029297, + "learning_rate": 1.1664835164835166e-05, + "loss": 0.116, + "step": 27908 + }, + { + "epoch": 76.67307692307692, + "grad_norm": 7.497754096984863, + "learning_rate": 1.166346153846154e-05, + "loss": 0.2174, + "step": 27909 + }, + { + "epoch": 76.67582417582418, + "grad_norm": 8.572558403015137, + "learning_rate": 1.1662087912087913e-05, + "loss": 0.1845, + "step": 27910 + }, + { + "epoch": 76.67857142857143, + "grad_norm": 10.193471908569336, + "learning_rate": 1.1660714285714286e-05, + "loss": 0.1579, + "step": 27911 + }, + { + "epoch": 76.68131868131869, + "grad_norm": 12.474516868591309, + "learning_rate": 1.1659340659340661e-05, + "loss": 0.1704, + "step": 27912 + }, + { + "epoch": 76.68406593406593, + "grad_norm": 14.309621810913086, + "learning_rate": 1.1657967032967033e-05, + "loss": 0.2188, + "step": 27913 + }, + { + "epoch": 76.68681318681318, + "grad_norm": 14.168431282043457, + "learning_rate": 1.1656593406593406e-05, + "loss": 0.2017, + "step": 27914 + }, + { + "epoch": 76.68956043956044, + "grad_norm": 1.9248319864273071, + "learning_rate": 1.1655219780219781e-05, + "loss": 0.0309, + "step": 27915 + }, + { + "epoch": 76.6923076923077, + "grad_norm": 2.7924673557281494, + "learning_rate": 1.1653846153846155e-05, + "loss": 0.0394, + "step": 27916 + }, + { + "epoch": 76.69505494505495, + "grad_norm": 11.702428817749023, + "learning_rate": 1.1652472527472528e-05, + "loss": 0.1821, + "step": 27917 + }, + { + "epoch": 76.6978021978022, + "grad_norm": 8.480328559875488, + "learning_rate": 1.1651098901098902e-05, + "loss": 0.0959, + "step": 27918 + }, + { + "epoch": 76.70054945054945, + "grad_norm": 10.37976360321045, + "learning_rate": 1.1649725274725275e-05, + "loss": 0.1877, + "step": 27919 + }, + { + "epoch": 76.7032967032967, + "grad_norm": 19.101276397705078, + "learning_rate": 1.1648351648351648e-05, + "loss": 0.3592, + "step": 27920 + }, + { + "epoch": 76.70604395604396, + "grad_norm": 20.46923065185547, + "learning_rate": 1.1646978021978022e-05, + "loss": 0.276, + "step": 27921 + }, + { + "epoch": 76.70879120879121, + "grad_norm": 32.22541809082031, + "learning_rate": 1.1645604395604397e-05, + "loss": 0.6386, + "step": 27922 + }, + { + "epoch": 76.71153846153847, + "grad_norm": 9.137787818908691, + "learning_rate": 1.164423076923077e-05, + "loss": 0.1699, + "step": 27923 + }, + { + "epoch": 76.71428571428571, + "grad_norm": 29.729639053344727, + "learning_rate": 1.1642857142857144e-05, + "loss": 0.8094, + "step": 27924 + }, + { + "epoch": 76.71703296703296, + "grad_norm": 13.426807403564453, + "learning_rate": 1.1641483516483517e-05, + "loss": 0.3291, + "step": 27925 + }, + { + "epoch": 76.71978021978022, + "grad_norm": 17.490863800048828, + "learning_rate": 1.164010989010989e-05, + "loss": 0.2388, + "step": 27926 + }, + { + "epoch": 76.72252747252747, + "grad_norm": 10.893769264221191, + "learning_rate": 1.1638736263736266e-05, + "loss": 0.1352, + "step": 27927 + }, + { + "epoch": 76.72527472527473, + "grad_norm": 10.305525779724121, + "learning_rate": 1.1637362637362637e-05, + "loss": 0.2412, + "step": 27928 + }, + { + "epoch": 76.72802197802197, + "grad_norm": 22.27292823791504, + "learning_rate": 1.163598901098901e-05, + "loss": 0.4445, + "step": 27929 + }, + { + "epoch": 76.73076923076923, + "grad_norm": 3.6150777339935303, + "learning_rate": 1.1634615384615386e-05, + "loss": 0.0454, + "step": 27930 + }, + { + "epoch": 76.73351648351648, + "grad_norm": 12.900614738464355, + "learning_rate": 1.163324175824176e-05, + "loss": 0.1775, + "step": 27931 + }, + { + "epoch": 76.73626373626374, + "grad_norm": 22.853515625, + "learning_rate": 1.1631868131868133e-05, + "loss": 0.4426, + "step": 27932 + }, + { + "epoch": 76.73901098901099, + "grad_norm": 7.624098300933838, + "learning_rate": 1.1630494505494506e-05, + "loss": 0.1196, + "step": 27933 + }, + { + "epoch": 76.74175824175825, + "grad_norm": 22.121889114379883, + "learning_rate": 1.162912087912088e-05, + "loss": 0.351, + "step": 27934 + }, + { + "epoch": 76.74450549450549, + "grad_norm": 11.168099403381348, + "learning_rate": 1.1627747252747253e-05, + "loss": 0.3234, + "step": 27935 + }, + { + "epoch": 76.74725274725274, + "grad_norm": 10.533201217651367, + "learning_rate": 1.1626373626373626e-05, + "loss": 0.1885, + "step": 27936 + }, + { + "epoch": 76.75, + "grad_norm": 10.009531021118164, + "learning_rate": 1.1625000000000001e-05, + "loss": 0.2367, + "step": 27937 + }, + { + "epoch": 76.75274725274726, + "grad_norm": 23.36956024169922, + "learning_rate": 1.1623626373626375e-05, + "loss": 0.4805, + "step": 27938 + }, + { + "epoch": 76.75549450549451, + "grad_norm": 2.594062089920044, + "learning_rate": 1.1622252747252746e-05, + "loss": 0.0347, + "step": 27939 + }, + { + "epoch": 76.75824175824175, + "grad_norm": 8.319979667663574, + "learning_rate": 1.1620879120879122e-05, + "loss": 0.1539, + "step": 27940 + }, + { + "epoch": 76.76098901098901, + "grad_norm": 27.361377716064453, + "learning_rate": 1.1619505494505495e-05, + "loss": 0.8451, + "step": 27941 + }, + { + "epoch": 76.76373626373626, + "grad_norm": 16.22928237915039, + "learning_rate": 1.161813186813187e-05, + "loss": 0.2986, + "step": 27942 + }, + { + "epoch": 76.76648351648352, + "grad_norm": 2.7009263038635254, + "learning_rate": 1.1616758241758242e-05, + "loss": 0.0415, + "step": 27943 + }, + { + "epoch": 76.76923076923077, + "grad_norm": 7.732542037963867, + "learning_rate": 1.1615384615384615e-05, + "loss": 0.1314, + "step": 27944 + }, + { + "epoch": 76.77197802197803, + "grad_norm": 11.736353874206543, + "learning_rate": 1.161401098901099e-05, + "loss": 0.1294, + "step": 27945 + }, + { + "epoch": 76.77472527472527, + "grad_norm": 7.7294135093688965, + "learning_rate": 1.1612637362637364e-05, + "loss": 0.0559, + "step": 27946 + }, + { + "epoch": 76.77747252747253, + "grad_norm": 7.3100504875183105, + "learning_rate": 1.1611263736263737e-05, + "loss": 0.1931, + "step": 27947 + }, + { + "epoch": 76.78021978021978, + "grad_norm": 11.415987014770508, + "learning_rate": 1.160989010989011e-05, + "loss": 0.2657, + "step": 27948 + }, + { + "epoch": 76.78296703296704, + "grad_norm": 12.571821212768555, + "learning_rate": 1.1608516483516484e-05, + "loss": 0.1554, + "step": 27949 + }, + { + "epoch": 76.78571428571429, + "grad_norm": 6.102582931518555, + "learning_rate": 1.1607142857142857e-05, + "loss": 0.0717, + "step": 27950 + }, + { + "epoch": 76.78846153846153, + "grad_norm": 21.27472686767578, + "learning_rate": 1.160576923076923e-05, + "loss": 0.5066, + "step": 27951 + }, + { + "epoch": 76.79120879120879, + "grad_norm": 14.125455856323242, + "learning_rate": 1.1604395604395606e-05, + "loss": 0.1914, + "step": 27952 + }, + { + "epoch": 76.79395604395604, + "grad_norm": 6.513438701629639, + "learning_rate": 1.160302197802198e-05, + "loss": 0.093, + "step": 27953 + }, + { + "epoch": 76.7967032967033, + "grad_norm": 16.24409294128418, + "learning_rate": 1.1601648351648351e-05, + "loss": 0.3475, + "step": 27954 + }, + { + "epoch": 76.79945054945055, + "grad_norm": 9.582282066345215, + "learning_rate": 1.1600274725274726e-05, + "loss": 0.1785, + "step": 27955 + }, + { + "epoch": 76.8021978021978, + "grad_norm": 14.199445724487305, + "learning_rate": 1.15989010989011e-05, + "loss": 0.2251, + "step": 27956 + }, + { + "epoch": 76.80494505494505, + "grad_norm": 14.258306503295898, + "learning_rate": 1.1597527472527473e-05, + "loss": 0.2673, + "step": 27957 + }, + { + "epoch": 76.8076923076923, + "grad_norm": 16.456418991088867, + "learning_rate": 1.1596153846153846e-05, + "loss": 0.3905, + "step": 27958 + }, + { + "epoch": 76.81043956043956, + "grad_norm": 12.910300254821777, + "learning_rate": 1.159478021978022e-05, + "loss": 0.2281, + "step": 27959 + }, + { + "epoch": 76.81318681318682, + "grad_norm": 14.242810249328613, + "learning_rate": 1.1593406593406595e-05, + "loss": 0.1607, + "step": 27960 + }, + { + "epoch": 76.81593406593407, + "grad_norm": 5.231131076812744, + "learning_rate": 1.1592032967032968e-05, + "loss": 0.1494, + "step": 27961 + }, + { + "epoch": 76.81868131868131, + "grad_norm": 10.861379623413086, + "learning_rate": 1.159065934065934e-05, + "loss": 0.3069, + "step": 27962 + }, + { + "epoch": 76.82142857142857, + "grad_norm": 7.851099491119385, + "learning_rate": 1.1589285714285715e-05, + "loss": 0.2091, + "step": 27963 + }, + { + "epoch": 76.82417582417582, + "grad_norm": 9.891071319580078, + "learning_rate": 1.1587912087912088e-05, + "loss": 0.1014, + "step": 27964 + }, + { + "epoch": 76.82692307692308, + "grad_norm": 7.192659378051758, + "learning_rate": 1.1586538461538462e-05, + "loss": 0.1516, + "step": 27965 + }, + { + "epoch": 76.82967032967034, + "grad_norm": 6.554263591766357, + "learning_rate": 1.1585164835164835e-05, + "loss": 0.0809, + "step": 27966 + }, + { + "epoch": 76.83241758241758, + "grad_norm": 5.527948379516602, + "learning_rate": 1.1583791208791209e-05, + "loss": 0.1052, + "step": 27967 + }, + { + "epoch": 76.83516483516483, + "grad_norm": 17.755435943603516, + "learning_rate": 1.1582417582417584e-05, + "loss": 0.4248, + "step": 27968 + }, + { + "epoch": 76.83791208791209, + "grad_norm": 10.746665000915527, + "learning_rate": 1.1581043956043955e-05, + "loss": 0.0964, + "step": 27969 + }, + { + "epoch": 76.84065934065934, + "grad_norm": 0.7184454798698425, + "learning_rate": 1.157967032967033e-05, + "loss": 0.0081, + "step": 27970 + }, + { + "epoch": 76.8434065934066, + "grad_norm": 18.022144317626953, + "learning_rate": 1.1578296703296704e-05, + "loss": 0.1563, + "step": 27971 + }, + { + "epoch": 76.84615384615384, + "grad_norm": 12.6058931350708, + "learning_rate": 1.1576923076923077e-05, + "loss": 0.2656, + "step": 27972 + }, + { + "epoch": 76.8489010989011, + "grad_norm": 11.68241024017334, + "learning_rate": 1.157554945054945e-05, + "loss": 0.1585, + "step": 27973 + }, + { + "epoch": 76.85164835164835, + "grad_norm": 17.436626434326172, + "learning_rate": 1.1574175824175824e-05, + "loss": 0.3454, + "step": 27974 + }, + { + "epoch": 76.8543956043956, + "grad_norm": 12.440075874328613, + "learning_rate": 1.15728021978022e-05, + "loss": 0.1301, + "step": 27975 + }, + { + "epoch": 76.85714285714286, + "grad_norm": 16.050861358642578, + "learning_rate": 1.1571428571428573e-05, + "loss": 0.3959, + "step": 27976 + }, + { + "epoch": 76.85989010989012, + "grad_norm": 6.6599626541137695, + "learning_rate": 1.1570054945054944e-05, + "loss": 0.0993, + "step": 27977 + }, + { + "epoch": 76.86263736263736, + "grad_norm": 10.091055870056152, + "learning_rate": 1.156868131868132e-05, + "loss": 0.1658, + "step": 27978 + }, + { + "epoch": 76.86538461538461, + "grad_norm": 8.824481964111328, + "learning_rate": 1.1567307692307693e-05, + "loss": 0.1038, + "step": 27979 + }, + { + "epoch": 76.86813186813187, + "grad_norm": 7.720927715301514, + "learning_rate": 1.1565934065934066e-05, + "loss": 0.1212, + "step": 27980 + }, + { + "epoch": 76.87087912087912, + "grad_norm": 14.148653030395508, + "learning_rate": 1.156456043956044e-05, + "loss": 0.2721, + "step": 27981 + }, + { + "epoch": 76.87362637362638, + "grad_norm": 17.76926040649414, + "learning_rate": 1.1563186813186813e-05, + "loss": 0.4992, + "step": 27982 + }, + { + "epoch": 76.87637362637362, + "grad_norm": 12.584432601928711, + "learning_rate": 1.1561813186813188e-05, + "loss": 0.1888, + "step": 27983 + }, + { + "epoch": 76.87912087912088, + "grad_norm": 12.532758712768555, + "learning_rate": 1.156043956043956e-05, + "loss": 0.3899, + "step": 27984 + }, + { + "epoch": 76.88186813186813, + "grad_norm": 4.1859588623046875, + "learning_rate": 1.1559065934065935e-05, + "loss": 0.0428, + "step": 27985 + }, + { + "epoch": 76.88461538461539, + "grad_norm": 15.652146339416504, + "learning_rate": 1.1557692307692308e-05, + "loss": 0.4003, + "step": 27986 + }, + { + "epoch": 76.88736263736264, + "grad_norm": 4.342509746551514, + "learning_rate": 1.1556318681318682e-05, + "loss": 0.0636, + "step": 27987 + }, + { + "epoch": 76.89010989010988, + "grad_norm": 12.47365665435791, + "learning_rate": 1.1554945054945055e-05, + "loss": 0.2832, + "step": 27988 + }, + { + "epoch": 76.89285714285714, + "grad_norm": 3.7411909103393555, + "learning_rate": 1.1553571428571429e-05, + "loss": 0.0526, + "step": 27989 + }, + { + "epoch": 76.8956043956044, + "grad_norm": 10.448780059814453, + "learning_rate": 1.1552197802197804e-05, + "loss": 0.1286, + "step": 27990 + }, + { + "epoch": 76.89835164835165, + "grad_norm": 11.208159446716309, + "learning_rate": 1.1550824175824177e-05, + "loss": 0.2201, + "step": 27991 + }, + { + "epoch": 76.9010989010989, + "grad_norm": 6.088372230529785, + "learning_rate": 1.1549450549450549e-05, + "loss": 0.0734, + "step": 27992 + }, + { + "epoch": 76.90384615384616, + "grad_norm": 3.748103380203247, + "learning_rate": 1.1548076923076924e-05, + "loss": 0.0381, + "step": 27993 + }, + { + "epoch": 76.9065934065934, + "grad_norm": 7.2222795486450195, + "learning_rate": 1.1546703296703297e-05, + "loss": 0.2015, + "step": 27994 + }, + { + "epoch": 76.90934065934066, + "grad_norm": 20.932588577270508, + "learning_rate": 1.154532967032967e-05, + "loss": 0.2488, + "step": 27995 + }, + { + "epoch": 76.91208791208791, + "grad_norm": 14.306753158569336, + "learning_rate": 1.1543956043956044e-05, + "loss": 0.3687, + "step": 27996 + }, + { + "epoch": 76.91483516483517, + "grad_norm": 11.099555969238281, + "learning_rate": 1.1542582417582418e-05, + "loss": 0.2485, + "step": 27997 + }, + { + "epoch": 76.91758241758242, + "grad_norm": 2.154200315475464, + "learning_rate": 1.1541208791208793e-05, + "loss": 0.0218, + "step": 27998 + }, + { + "epoch": 76.92032967032966, + "grad_norm": 9.875628471374512, + "learning_rate": 1.1539835164835164e-05, + "loss": 0.2147, + "step": 27999 + }, + { + "epoch": 76.92307692307692, + "grad_norm": 5.419108867645264, + "learning_rate": 1.153846153846154e-05, + "loss": 0.0724, + "step": 28000 + }, + { + "epoch": 76.92582417582418, + "grad_norm": 19.18379020690918, + "learning_rate": 1.1537087912087913e-05, + "loss": 0.2652, + "step": 28001 + }, + { + "epoch": 76.92857142857143, + "grad_norm": 12.747611999511719, + "learning_rate": 1.1535714285714286e-05, + "loss": 0.4133, + "step": 28002 + }, + { + "epoch": 76.93131868131869, + "grad_norm": 8.671231269836426, + "learning_rate": 1.153434065934066e-05, + "loss": 0.1451, + "step": 28003 + }, + { + "epoch": 76.93406593406593, + "grad_norm": 3.752315044403076, + "learning_rate": 1.1532967032967033e-05, + "loss": 0.0343, + "step": 28004 + }, + { + "epoch": 76.93681318681318, + "grad_norm": 23.08833122253418, + "learning_rate": 1.1531593406593408e-05, + "loss": 0.4713, + "step": 28005 + }, + { + "epoch": 76.93956043956044, + "grad_norm": 6.752601623535156, + "learning_rate": 1.1530219780219782e-05, + "loss": 0.1335, + "step": 28006 + }, + { + "epoch": 76.9423076923077, + "grad_norm": 4.053488731384277, + "learning_rate": 1.1528846153846153e-05, + "loss": 0.0325, + "step": 28007 + }, + { + "epoch": 76.94505494505495, + "grad_norm": 5.303481578826904, + "learning_rate": 1.1527472527472528e-05, + "loss": 0.0539, + "step": 28008 + }, + { + "epoch": 76.9478021978022, + "grad_norm": 12.498834609985352, + "learning_rate": 1.1526098901098902e-05, + "loss": 0.2334, + "step": 28009 + }, + { + "epoch": 76.95054945054945, + "grad_norm": 8.532978057861328, + "learning_rate": 1.1524725274725275e-05, + "loss": 0.1438, + "step": 28010 + }, + { + "epoch": 76.9532967032967, + "grad_norm": 7.3258137702941895, + "learning_rate": 1.1523351648351649e-05, + "loss": 0.0986, + "step": 28011 + }, + { + "epoch": 76.95604395604396, + "grad_norm": 4.052186489105225, + "learning_rate": 1.1521978021978022e-05, + "loss": 0.0773, + "step": 28012 + }, + { + "epoch": 76.95879120879121, + "grad_norm": 9.716865539550781, + "learning_rate": 1.1520604395604397e-05, + "loss": 0.1054, + "step": 28013 + }, + { + "epoch": 76.96153846153847, + "grad_norm": 6.652029514312744, + "learning_rate": 1.1519230769230769e-05, + "loss": 0.111, + "step": 28014 + }, + { + "epoch": 76.96428571428571, + "grad_norm": 7.420628070831299, + "learning_rate": 1.1517857142857142e-05, + "loss": 0.2667, + "step": 28015 + }, + { + "epoch": 76.96703296703296, + "grad_norm": 8.072855949401855, + "learning_rate": 1.1516483516483517e-05, + "loss": 0.253, + "step": 28016 + }, + { + "epoch": 76.96978021978022, + "grad_norm": 12.035571098327637, + "learning_rate": 1.151510989010989e-05, + "loss": 0.1405, + "step": 28017 + }, + { + "epoch": 76.97252747252747, + "grad_norm": 4.119729995727539, + "learning_rate": 1.1513736263736264e-05, + "loss": 0.0439, + "step": 28018 + }, + { + "epoch": 76.97527472527473, + "grad_norm": 14.244101524353027, + "learning_rate": 1.1512362637362638e-05, + "loss": 0.2156, + "step": 28019 + }, + { + "epoch": 76.97802197802197, + "grad_norm": 17.05304527282715, + "learning_rate": 1.1510989010989011e-05, + "loss": 0.1279, + "step": 28020 + }, + { + "epoch": 76.98076923076923, + "grad_norm": 6.652619361877441, + "learning_rate": 1.1509615384615386e-05, + "loss": 0.0739, + "step": 28021 + }, + { + "epoch": 76.98351648351648, + "grad_norm": 5.786157608032227, + "learning_rate": 1.1508241758241758e-05, + "loss": 0.047, + "step": 28022 + }, + { + "epoch": 76.98626373626374, + "grad_norm": 8.168859481811523, + "learning_rate": 1.1506868131868133e-05, + "loss": 0.1038, + "step": 28023 + }, + { + "epoch": 76.98901098901099, + "grad_norm": 7.478990077972412, + "learning_rate": 1.1505494505494506e-05, + "loss": 0.1484, + "step": 28024 + }, + { + "epoch": 76.99175824175825, + "grad_norm": 14.141021728515625, + "learning_rate": 1.150412087912088e-05, + "loss": 0.1638, + "step": 28025 + }, + { + "epoch": 76.99450549450549, + "grad_norm": 10.633923530578613, + "learning_rate": 1.1502747252747253e-05, + "loss": 0.1873, + "step": 28026 + }, + { + "epoch": 76.99725274725274, + "grad_norm": 3.163170576095581, + "learning_rate": 1.1501373626373627e-05, + "loss": 0.0648, + "step": 28027 + }, + { + "epoch": 77.0, + "grad_norm": 45.0104866027832, + "learning_rate": 1.1500000000000002e-05, + "loss": 0.8577, + "step": 28028 + }, + { + "epoch": 77.0, + "eval_accuracy": 0.7520661157024794, + "eval_f1": 0.7285306439153886, + "eval_f1_DuraRiadoRio_64x64": 0.8, + "eval_f1_Mole_64x64": 0.7616438356164383, + "eval_f1_Quebrado_64x64": 0.8657718120805369, + "eval_f1_RiadoRio_64x64": 0.35467980295566504, + "eval_f1_RioFechado_64x64": 0.8605577689243028, + "eval_loss": 1.285569667816162, + "eval_precision": 0.7729800670646562, + "eval_precision_DuraRiadoRio_64x64": 0.7015706806282722, + "eval_precision_Mole_64x64": 0.6289592760180995, + "eval_precision_Quebrado_64x64": 0.8376623376623377, + "eval_precision_RiadoRio_64x64": 0.7058823529411765, + "eval_precision_RioFechado_64x64": 0.9908256880733946, + "eval_recall": 0.757814430442303, + "eval_recall_DuraRiadoRio_64x64": 0.9305555555555556, + "eval_recall_Mole_64x64": 0.9652777777777778, + "eval_recall_Quebrado_64x64": 0.8958333333333334, + "eval_recall_RiadoRio_64x64": 0.23684210526315788, + "eval_recall_RioFechado_64x64": 0.7605633802816901, + "eval_runtime": 1.7393, + "eval_samples_per_second": 417.417, + "eval_steps_per_second": 26.448, + "step": 28028 + }, + { + "epoch": 77.00274725274726, + "grad_norm": 10.435606002807617, + "learning_rate": 1.1498626373626373e-05, + "loss": 0.1682, + "step": 28029 + }, + { + "epoch": 77.00549450549451, + "grad_norm": 12.057585716247559, + "learning_rate": 1.1497252747252747e-05, + "loss": 0.2071, + "step": 28030 + }, + { + "epoch": 77.00824175824175, + "grad_norm": 26.605331420898438, + "learning_rate": 1.1495879120879122e-05, + "loss": 0.4379, + "step": 28031 + }, + { + "epoch": 77.01098901098901, + "grad_norm": 7.372236728668213, + "learning_rate": 1.1494505494505495e-05, + "loss": 0.0956, + "step": 28032 + }, + { + "epoch": 77.01373626373626, + "grad_norm": 4.115596294403076, + "learning_rate": 1.1493131868131869e-05, + "loss": 0.0289, + "step": 28033 + }, + { + "epoch": 77.01648351648352, + "grad_norm": 10.26063346862793, + "learning_rate": 1.1491758241758242e-05, + "loss": 0.1842, + "step": 28034 + }, + { + "epoch": 77.01923076923077, + "grad_norm": 8.765013694763184, + "learning_rate": 1.1490384615384616e-05, + "loss": 0.2099, + "step": 28035 + }, + { + "epoch": 77.02197802197803, + "grad_norm": 10.107518196105957, + "learning_rate": 1.148901098901099e-05, + "loss": 0.1475, + "step": 28036 + }, + { + "epoch": 77.02472527472527, + "grad_norm": 6.963620185852051, + "learning_rate": 1.1487637362637362e-05, + "loss": 0.1247, + "step": 28037 + }, + { + "epoch": 77.02747252747253, + "grad_norm": 11.477568626403809, + "learning_rate": 1.1486263736263737e-05, + "loss": 0.1706, + "step": 28038 + }, + { + "epoch": 77.03021978021978, + "grad_norm": 4.614752292633057, + "learning_rate": 1.1484890109890111e-05, + "loss": 0.0782, + "step": 28039 + }, + { + "epoch": 77.03296703296704, + "grad_norm": 4.205655097961426, + "learning_rate": 1.1483516483516484e-05, + "loss": 0.0355, + "step": 28040 + }, + { + "epoch": 77.03571428571429, + "grad_norm": 13.75878620147705, + "learning_rate": 1.1482142857142858e-05, + "loss": 0.293, + "step": 28041 + }, + { + "epoch": 77.03846153846153, + "grad_norm": 9.79483413696289, + "learning_rate": 1.1480769230769231e-05, + "loss": 0.1547, + "step": 28042 + }, + { + "epoch": 77.04120879120879, + "grad_norm": 15.803435325622559, + "learning_rate": 1.1479395604395606e-05, + "loss": 0.2599, + "step": 28043 + }, + { + "epoch": 77.04395604395604, + "grad_norm": 3.303567409515381, + "learning_rate": 1.1478021978021978e-05, + "loss": 0.0535, + "step": 28044 + }, + { + "epoch": 77.0467032967033, + "grad_norm": 10.183051109313965, + "learning_rate": 1.1476648351648351e-05, + "loss": 0.2491, + "step": 28045 + }, + { + "epoch": 77.04945054945055, + "grad_norm": 14.985603332519531, + "learning_rate": 1.1475274725274726e-05, + "loss": 0.2621, + "step": 28046 + }, + { + "epoch": 77.0521978021978, + "grad_norm": 6.335253715515137, + "learning_rate": 1.14739010989011e-05, + "loss": 0.1154, + "step": 28047 + }, + { + "epoch": 77.05494505494505, + "grad_norm": 2.614506721496582, + "learning_rate": 1.1472527472527473e-05, + "loss": 0.0306, + "step": 28048 + }, + { + "epoch": 77.0576923076923, + "grad_norm": 12.074390411376953, + "learning_rate": 1.1471153846153847e-05, + "loss": 0.2761, + "step": 28049 + }, + { + "epoch": 77.06043956043956, + "grad_norm": 1.2122243642807007, + "learning_rate": 1.146978021978022e-05, + "loss": 0.0133, + "step": 28050 + }, + { + "epoch": 77.06318681318682, + "grad_norm": 19.705713272094727, + "learning_rate": 1.1468406593406593e-05, + "loss": 0.7171, + "step": 28051 + }, + { + "epoch": 77.06593406593407, + "grad_norm": 14.841328620910645, + "learning_rate": 1.1467032967032967e-05, + "loss": 0.3573, + "step": 28052 + }, + { + "epoch": 77.06868131868131, + "grad_norm": 13.983466148376465, + "learning_rate": 1.1465659340659342e-05, + "loss": 0.1152, + "step": 28053 + }, + { + "epoch": 77.07142857142857, + "grad_norm": 10.81294059753418, + "learning_rate": 1.1464285714285715e-05, + "loss": 0.1008, + "step": 28054 + }, + { + "epoch": 77.07417582417582, + "grad_norm": 17.77077865600586, + "learning_rate": 1.1462912087912089e-05, + "loss": 0.5568, + "step": 28055 + }, + { + "epoch": 77.07692307692308, + "grad_norm": 13.28519344329834, + "learning_rate": 1.1461538461538462e-05, + "loss": 0.3224, + "step": 28056 + }, + { + "epoch": 77.07967032967034, + "grad_norm": 11.777801513671875, + "learning_rate": 1.1460164835164836e-05, + "loss": 0.2485, + "step": 28057 + }, + { + "epoch": 77.08241758241758, + "grad_norm": 15.586997032165527, + "learning_rate": 1.145879120879121e-05, + "loss": 0.3593, + "step": 28058 + }, + { + "epoch": 77.08516483516483, + "grad_norm": 7.205745220184326, + "learning_rate": 1.1457417582417582e-05, + "loss": 0.1144, + "step": 28059 + }, + { + "epoch": 77.08791208791209, + "grad_norm": 9.261515617370605, + "learning_rate": 1.1456043956043956e-05, + "loss": 0.0962, + "step": 28060 + }, + { + "epoch": 77.09065934065934, + "grad_norm": 5.467604160308838, + "learning_rate": 1.1454670329670331e-05, + "loss": 0.0584, + "step": 28061 + }, + { + "epoch": 77.0934065934066, + "grad_norm": 15.073156356811523, + "learning_rate": 1.1453296703296704e-05, + "loss": 0.3618, + "step": 28062 + }, + { + "epoch": 77.09615384615384, + "grad_norm": 12.402358055114746, + "learning_rate": 1.1451923076923078e-05, + "loss": 0.1497, + "step": 28063 + }, + { + "epoch": 77.0989010989011, + "grad_norm": 17.193632125854492, + "learning_rate": 1.1450549450549451e-05, + "loss": 0.4307, + "step": 28064 + }, + { + "epoch": 77.10164835164835, + "grad_norm": 15.76433277130127, + "learning_rate": 1.1449175824175825e-05, + "loss": 0.3373, + "step": 28065 + }, + { + "epoch": 77.1043956043956, + "grad_norm": 9.77039909362793, + "learning_rate": 1.1447802197802198e-05, + "loss": 0.1575, + "step": 28066 + }, + { + "epoch": 77.10714285714286, + "grad_norm": 19.03091049194336, + "learning_rate": 1.1446428571428571e-05, + "loss": 0.37, + "step": 28067 + }, + { + "epoch": 77.10989010989012, + "grad_norm": 14.928479194641113, + "learning_rate": 1.1445054945054945e-05, + "loss": 0.1841, + "step": 28068 + }, + { + "epoch": 77.11263736263736, + "grad_norm": 14.167680740356445, + "learning_rate": 1.144368131868132e-05, + "loss": 0.2394, + "step": 28069 + }, + { + "epoch": 77.11538461538461, + "grad_norm": 5.216747283935547, + "learning_rate": 1.1442307692307693e-05, + "loss": 0.0627, + "step": 28070 + }, + { + "epoch": 77.11813186813187, + "grad_norm": 12.238028526306152, + "learning_rate": 1.1440934065934067e-05, + "loss": 0.1362, + "step": 28071 + }, + { + "epoch": 77.12087912087912, + "grad_norm": 15.794205665588379, + "learning_rate": 1.143956043956044e-05, + "loss": 0.1729, + "step": 28072 + }, + { + "epoch": 77.12362637362638, + "grad_norm": 20.964204788208008, + "learning_rate": 1.1438186813186813e-05, + "loss": 0.9417, + "step": 28073 + }, + { + "epoch": 77.12637362637362, + "grad_norm": 12.333423614501953, + "learning_rate": 1.1436813186813187e-05, + "loss": 0.152, + "step": 28074 + }, + { + "epoch": 77.12912087912088, + "grad_norm": 13.898879051208496, + "learning_rate": 1.143543956043956e-05, + "loss": 0.2091, + "step": 28075 + }, + { + "epoch": 77.13186813186813, + "grad_norm": 18.233972549438477, + "learning_rate": 1.1434065934065935e-05, + "loss": 0.3784, + "step": 28076 + }, + { + "epoch": 77.13461538461539, + "grad_norm": 15.887351036071777, + "learning_rate": 1.1432692307692309e-05, + "loss": 0.1885, + "step": 28077 + }, + { + "epoch": 77.13736263736264, + "grad_norm": 5.370952129364014, + "learning_rate": 1.143131868131868e-05, + "loss": 0.0896, + "step": 28078 + }, + { + "epoch": 77.14010989010988, + "grad_norm": 18.991310119628906, + "learning_rate": 1.1429945054945056e-05, + "loss": 0.3963, + "step": 28079 + }, + { + "epoch": 77.14285714285714, + "grad_norm": 14.873839378356934, + "learning_rate": 1.1428571428571429e-05, + "loss": 0.3275, + "step": 28080 + }, + { + "epoch": 77.1456043956044, + "grad_norm": 7.3480682373046875, + "learning_rate": 1.1427197802197802e-05, + "loss": 0.0949, + "step": 28081 + }, + { + "epoch": 77.14835164835165, + "grad_norm": 13.387149810791016, + "learning_rate": 1.1425824175824176e-05, + "loss": 0.2168, + "step": 28082 + }, + { + "epoch": 77.1510989010989, + "grad_norm": 16.835277557373047, + "learning_rate": 1.142445054945055e-05, + "loss": 0.248, + "step": 28083 + }, + { + "epoch": 77.15384615384616, + "grad_norm": 12.221673965454102, + "learning_rate": 1.1423076923076924e-05, + "loss": 0.1448, + "step": 28084 + }, + { + "epoch": 77.1565934065934, + "grad_norm": 4.719616889953613, + "learning_rate": 1.1421703296703298e-05, + "loss": 0.0671, + "step": 28085 + }, + { + "epoch": 77.15934065934066, + "grad_norm": 4.193248271942139, + "learning_rate": 1.1420329670329671e-05, + "loss": 0.0786, + "step": 28086 + }, + { + "epoch": 77.16208791208791, + "grad_norm": 3.1513543128967285, + "learning_rate": 1.1418956043956045e-05, + "loss": 0.0329, + "step": 28087 + }, + { + "epoch": 77.16483516483517, + "grad_norm": 14.124256134033203, + "learning_rate": 1.1417582417582418e-05, + "loss": 0.3223, + "step": 28088 + }, + { + "epoch": 77.16758241758242, + "grad_norm": 5.370558738708496, + "learning_rate": 1.1416208791208791e-05, + "loss": 0.1137, + "step": 28089 + }, + { + "epoch": 77.17032967032966, + "grad_norm": 8.624109268188477, + "learning_rate": 1.1414835164835165e-05, + "loss": 0.1078, + "step": 28090 + }, + { + "epoch": 77.17307692307692, + "grad_norm": 7.9644598960876465, + "learning_rate": 1.141346153846154e-05, + "loss": 0.1119, + "step": 28091 + }, + { + "epoch": 77.17582417582418, + "grad_norm": 4.289515018463135, + "learning_rate": 1.1412087912087913e-05, + "loss": 0.0789, + "step": 28092 + }, + { + "epoch": 77.17857142857143, + "grad_norm": 10.48188304901123, + "learning_rate": 1.1410714285714285e-05, + "loss": 0.1678, + "step": 28093 + }, + { + "epoch": 77.18131868131869, + "grad_norm": 13.499626159667969, + "learning_rate": 1.140934065934066e-05, + "loss": 0.3884, + "step": 28094 + }, + { + "epoch": 77.18406593406593, + "grad_norm": 13.243563652038574, + "learning_rate": 1.1407967032967034e-05, + "loss": 0.1219, + "step": 28095 + }, + { + "epoch": 77.18681318681318, + "grad_norm": 10.399181365966797, + "learning_rate": 1.1406593406593407e-05, + "loss": 0.2333, + "step": 28096 + }, + { + "epoch": 77.18956043956044, + "grad_norm": 7.3520050048828125, + "learning_rate": 1.140521978021978e-05, + "loss": 0.1093, + "step": 28097 + }, + { + "epoch": 77.1923076923077, + "grad_norm": 2.4334921836853027, + "learning_rate": 1.1403846153846154e-05, + "loss": 0.0299, + "step": 28098 + }, + { + "epoch": 77.19505494505495, + "grad_norm": 10.313475608825684, + "learning_rate": 1.1402472527472529e-05, + "loss": 0.1178, + "step": 28099 + }, + { + "epoch": 77.1978021978022, + "grad_norm": 13.734254837036133, + "learning_rate": 1.14010989010989e-05, + "loss": 0.3482, + "step": 28100 + }, + { + "epoch": 77.20054945054945, + "grad_norm": 10.44430160522461, + "learning_rate": 1.1399725274725276e-05, + "loss": 0.244, + "step": 28101 + }, + { + "epoch": 77.2032967032967, + "grad_norm": 14.495210647583008, + "learning_rate": 1.1398351648351649e-05, + "loss": 0.2306, + "step": 28102 + }, + { + "epoch": 77.20604395604396, + "grad_norm": 7.434367656707764, + "learning_rate": 1.1396978021978022e-05, + "loss": 0.1163, + "step": 28103 + }, + { + "epoch": 77.20879120879121, + "grad_norm": 5.582939624786377, + "learning_rate": 1.1395604395604396e-05, + "loss": 0.059, + "step": 28104 + }, + { + "epoch": 77.21153846153847, + "grad_norm": 15.147294998168945, + "learning_rate": 1.139423076923077e-05, + "loss": 0.1713, + "step": 28105 + }, + { + "epoch": 77.21428571428571, + "grad_norm": 14.86902904510498, + "learning_rate": 1.1392857142857144e-05, + "loss": 0.3158, + "step": 28106 + }, + { + "epoch": 77.21703296703296, + "grad_norm": 15.388725280761719, + "learning_rate": 1.1391483516483518e-05, + "loss": 0.3159, + "step": 28107 + }, + { + "epoch": 77.21978021978022, + "grad_norm": 1.9224565029144287, + "learning_rate": 1.139010989010989e-05, + "loss": 0.0246, + "step": 28108 + }, + { + "epoch": 77.22252747252747, + "grad_norm": 11.728530883789062, + "learning_rate": 1.1388736263736265e-05, + "loss": 0.1102, + "step": 28109 + }, + { + "epoch": 77.22527472527473, + "grad_norm": 10.31436538696289, + "learning_rate": 1.1387362637362638e-05, + "loss": 0.1039, + "step": 28110 + }, + { + "epoch": 77.22802197802197, + "grad_norm": 13.1934232711792, + "learning_rate": 1.1385989010989011e-05, + "loss": 0.3399, + "step": 28111 + }, + { + "epoch": 77.23076923076923, + "grad_norm": 20.645601272583008, + "learning_rate": 1.1384615384615385e-05, + "loss": 0.7189, + "step": 28112 + }, + { + "epoch": 77.23351648351648, + "grad_norm": 11.764884948730469, + "learning_rate": 1.1383241758241758e-05, + "loss": 0.2071, + "step": 28113 + }, + { + "epoch": 77.23626373626374, + "grad_norm": 2.75004506111145, + "learning_rate": 1.1381868131868133e-05, + "loss": 0.0337, + "step": 28114 + }, + { + "epoch": 77.23901098901099, + "grad_norm": 12.695024490356445, + "learning_rate": 1.1380494505494505e-05, + "loss": 0.4843, + "step": 28115 + }, + { + "epoch": 77.24175824175825, + "grad_norm": 10.811698913574219, + "learning_rate": 1.137912087912088e-05, + "loss": 0.1721, + "step": 28116 + }, + { + "epoch": 77.24450549450549, + "grad_norm": 10.575790405273438, + "learning_rate": 1.1377747252747254e-05, + "loss": 0.1156, + "step": 28117 + }, + { + "epoch": 77.24725274725274, + "grad_norm": 9.754258155822754, + "learning_rate": 1.1376373626373627e-05, + "loss": 0.2305, + "step": 28118 + }, + { + "epoch": 77.25, + "grad_norm": 16.888851165771484, + "learning_rate": 1.1375e-05, + "loss": 0.3784, + "step": 28119 + }, + { + "epoch": 77.25274725274726, + "grad_norm": 8.787158966064453, + "learning_rate": 1.1373626373626374e-05, + "loss": 0.2304, + "step": 28120 + }, + { + "epoch": 77.25549450549451, + "grad_norm": 3.672260046005249, + "learning_rate": 1.1372252747252749e-05, + "loss": 0.0413, + "step": 28121 + }, + { + "epoch": 77.25824175824175, + "grad_norm": 26.242605209350586, + "learning_rate": 1.1370879120879122e-05, + "loss": 0.7406, + "step": 28122 + }, + { + "epoch": 77.26098901098901, + "grad_norm": 2.3292226791381836, + "learning_rate": 1.1369505494505494e-05, + "loss": 0.0212, + "step": 28123 + }, + { + "epoch": 77.26373626373626, + "grad_norm": 1.5244972705841064, + "learning_rate": 1.1368131868131869e-05, + "loss": 0.0139, + "step": 28124 + }, + { + "epoch": 77.26648351648352, + "grad_norm": 16.695159912109375, + "learning_rate": 1.1366758241758242e-05, + "loss": 0.2858, + "step": 28125 + }, + { + "epoch": 77.26923076923077, + "grad_norm": 30.78032684326172, + "learning_rate": 1.1365384615384616e-05, + "loss": 0.7852, + "step": 28126 + }, + { + "epoch": 77.27197802197803, + "grad_norm": 1.778989553451538, + "learning_rate": 1.136401098901099e-05, + "loss": 0.0236, + "step": 28127 + }, + { + "epoch": 77.27472527472527, + "grad_norm": 7.544405460357666, + "learning_rate": 1.1362637362637363e-05, + "loss": 0.1408, + "step": 28128 + }, + { + "epoch": 77.27747252747253, + "grad_norm": 13.977721214294434, + "learning_rate": 1.1361263736263738e-05, + "loss": 0.2909, + "step": 28129 + }, + { + "epoch": 77.28021978021978, + "grad_norm": 3.2554314136505127, + "learning_rate": 1.135989010989011e-05, + "loss": 0.0411, + "step": 28130 + }, + { + "epoch": 77.28296703296704, + "grad_norm": 10.115612030029297, + "learning_rate": 1.1358516483516483e-05, + "loss": 0.27, + "step": 28131 + }, + { + "epoch": 77.28571428571429, + "grad_norm": 15.737025260925293, + "learning_rate": 1.1357142857142858e-05, + "loss": 0.1912, + "step": 28132 + }, + { + "epoch": 77.28846153846153, + "grad_norm": 16.752534866333008, + "learning_rate": 1.1355769230769231e-05, + "loss": 0.1083, + "step": 28133 + }, + { + "epoch": 77.29120879120879, + "grad_norm": 11.321660041809082, + "learning_rate": 1.1354395604395605e-05, + "loss": 0.1253, + "step": 28134 + }, + { + "epoch": 77.29395604395604, + "grad_norm": 12.9488525390625, + "learning_rate": 1.1353021978021978e-05, + "loss": 0.4854, + "step": 28135 + }, + { + "epoch": 77.2967032967033, + "grad_norm": 10.944698333740234, + "learning_rate": 1.1351648351648352e-05, + "loss": 0.2261, + "step": 28136 + }, + { + "epoch": 77.29945054945055, + "grad_norm": 12.987550735473633, + "learning_rate": 1.1350274725274727e-05, + "loss": 0.1991, + "step": 28137 + }, + { + "epoch": 77.3021978021978, + "grad_norm": 13.673171997070312, + "learning_rate": 1.1348901098901098e-05, + "loss": 0.2605, + "step": 28138 + }, + { + "epoch": 77.30494505494505, + "grad_norm": 24.01987648010254, + "learning_rate": 1.1347527472527474e-05, + "loss": 0.4748, + "step": 28139 + }, + { + "epoch": 77.3076923076923, + "grad_norm": 8.00019359588623, + "learning_rate": 1.1346153846153847e-05, + "loss": 0.151, + "step": 28140 + }, + { + "epoch": 77.31043956043956, + "grad_norm": 9.248379707336426, + "learning_rate": 1.134478021978022e-05, + "loss": 0.1315, + "step": 28141 + }, + { + "epoch": 77.31318681318682, + "grad_norm": 15.16353702545166, + "learning_rate": 1.1343406593406594e-05, + "loss": 0.4853, + "step": 28142 + }, + { + "epoch": 77.31593406593407, + "grad_norm": 12.925561904907227, + "learning_rate": 1.1342032967032967e-05, + "loss": 0.1934, + "step": 28143 + }, + { + "epoch": 77.31868131868131, + "grad_norm": 9.636494636535645, + "learning_rate": 1.1340659340659342e-05, + "loss": 0.177, + "step": 28144 + }, + { + "epoch": 77.32142857142857, + "grad_norm": 20.818742752075195, + "learning_rate": 1.1339285714285714e-05, + "loss": 0.5285, + "step": 28145 + }, + { + "epoch": 77.32417582417582, + "grad_norm": 14.621374130249023, + "learning_rate": 1.1337912087912087e-05, + "loss": 0.2766, + "step": 28146 + }, + { + "epoch": 77.32692307692308, + "grad_norm": 0.6176181435585022, + "learning_rate": 1.1336538461538463e-05, + "loss": 0.0075, + "step": 28147 + }, + { + "epoch": 77.32967032967034, + "grad_norm": 24.84885597229004, + "learning_rate": 1.1335164835164836e-05, + "loss": 0.3531, + "step": 28148 + }, + { + "epoch": 77.33241758241758, + "grad_norm": 11.929476737976074, + "learning_rate": 1.133379120879121e-05, + "loss": 0.1032, + "step": 28149 + }, + { + "epoch": 77.33516483516483, + "grad_norm": 14.415409088134766, + "learning_rate": 1.1332417582417583e-05, + "loss": 0.2172, + "step": 28150 + }, + { + "epoch": 77.33791208791209, + "grad_norm": 12.025885581970215, + "learning_rate": 1.1331043956043956e-05, + "loss": 0.2654, + "step": 28151 + }, + { + "epoch": 77.34065934065934, + "grad_norm": 12.513629913330078, + "learning_rate": 1.1329670329670331e-05, + "loss": 0.1627, + "step": 28152 + }, + { + "epoch": 77.3434065934066, + "grad_norm": 16.523197174072266, + "learning_rate": 1.1328296703296703e-05, + "loss": 0.1886, + "step": 28153 + }, + { + "epoch": 77.34615384615384, + "grad_norm": 17.665790557861328, + "learning_rate": 1.1326923076923078e-05, + "loss": 0.3189, + "step": 28154 + }, + { + "epoch": 77.3489010989011, + "grad_norm": 7.492313385009766, + "learning_rate": 1.1325549450549451e-05, + "loss": 0.091, + "step": 28155 + }, + { + "epoch": 77.35164835164835, + "grad_norm": 11.361276626586914, + "learning_rate": 1.1324175824175825e-05, + "loss": 0.1507, + "step": 28156 + }, + { + "epoch": 77.3543956043956, + "grad_norm": 1.9640862941741943, + "learning_rate": 1.1322802197802198e-05, + "loss": 0.0229, + "step": 28157 + }, + { + "epoch": 77.35714285714286, + "grad_norm": 19.667444229125977, + "learning_rate": 1.1321428571428572e-05, + "loss": 0.5329, + "step": 28158 + }, + { + "epoch": 77.35989010989012, + "grad_norm": 17.579504013061523, + "learning_rate": 1.1320054945054947e-05, + "loss": 0.3059, + "step": 28159 + }, + { + "epoch": 77.36263736263736, + "grad_norm": 18.493515014648438, + "learning_rate": 1.1318681318681319e-05, + "loss": 0.3741, + "step": 28160 + }, + { + "epoch": 77.36538461538461, + "grad_norm": 1.6209077835083008, + "learning_rate": 1.1317307692307692e-05, + "loss": 0.0223, + "step": 28161 + }, + { + "epoch": 77.36813186813187, + "grad_norm": 27.40620231628418, + "learning_rate": 1.1315934065934067e-05, + "loss": 0.4942, + "step": 28162 + }, + { + "epoch": 77.37087912087912, + "grad_norm": 15.064756393432617, + "learning_rate": 1.131456043956044e-05, + "loss": 0.3455, + "step": 28163 + }, + { + "epoch": 77.37362637362638, + "grad_norm": 9.003271102905273, + "learning_rate": 1.1313186813186814e-05, + "loss": 0.1094, + "step": 28164 + }, + { + "epoch": 77.37637362637362, + "grad_norm": 16.187705993652344, + "learning_rate": 1.1311813186813187e-05, + "loss": 0.3501, + "step": 28165 + }, + { + "epoch": 77.37912087912088, + "grad_norm": 16.30055809020996, + "learning_rate": 1.131043956043956e-05, + "loss": 0.1717, + "step": 28166 + }, + { + "epoch": 77.38186813186813, + "grad_norm": 8.720800399780273, + "learning_rate": 1.1309065934065936e-05, + "loss": 0.0976, + "step": 28167 + }, + { + "epoch": 77.38461538461539, + "grad_norm": 6.040255546569824, + "learning_rate": 1.1307692307692307e-05, + "loss": 0.0856, + "step": 28168 + }, + { + "epoch": 77.38736263736264, + "grad_norm": 21.551578521728516, + "learning_rate": 1.1306318681318683e-05, + "loss": 0.7177, + "step": 28169 + }, + { + "epoch": 77.39010989010988, + "grad_norm": 12.927807807922363, + "learning_rate": 1.1304945054945056e-05, + "loss": 0.3064, + "step": 28170 + }, + { + "epoch": 77.39285714285714, + "grad_norm": 21.202423095703125, + "learning_rate": 1.130357142857143e-05, + "loss": 0.628, + "step": 28171 + }, + { + "epoch": 77.3956043956044, + "grad_norm": 10.132420539855957, + "learning_rate": 1.1302197802197803e-05, + "loss": 0.3604, + "step": 28172 + }, + { + "epoch": 77.39835164835165, + "grad_norm": 9.215458869934082, + "learning_rate": 1.1300824175824176e-05, + "loss": 0.2057, + "step": 28173 + }, + { + "epoch": 77.4010989010989, + "grad_norm": 10.882725715637207, + "learning_rate": 1.1299450549450551e-05, + "loss": 0.2974, + "step": 28174 + }, + { + "epoch": 77.40384615384616, + "grad_norm": 0.7149457931518555, + "learning_rate": 1.1298076923076923e-05, + "loss": 0.0127, + "step": 28175 + }, + { + "epoch": 77.4065934065934, + "grad_norm": 10.764575958251953, + "learning_rate": 1.1296703296703296e-05, + "loss": 0.1422, + "step": 28176 + }, + { + "epoch": 77.40934065934066, + "grad_norm": 16.80950927734375, + "learning_rate": 1.1295329670329672e-05, + "loss": 0.5236, + "step": 28177 + }, + { + "epoch": 77.41208791208791, + "grad_norm": 14.230112075805664, + "learning_rate": 1.1293956043956045e-05, + "loss": 0.3601, + "step": 28178 + }, + { + "epoch": 77.41483516483517, + "grad_norm": 12.99996280670166, + "learning_rate": 1.1292582417582418e-05, + "loss": 0.2558, + "step": 28179 + }, + { + "epoch": 77.41758241758242, + "grad_norm": 18.92636489868164, + "learning_rate": 1.1291208791208792e-05, + "loss": 0.3184, + "step": 28180 + }, + { + "epoch": 77.42032967032966, + "grad_norm": 14.482269287109375, + "learning_rate": 1.1289835164835165e-05, + "loss": 0.1938, + "step": 28181 + }, + { + "epoch": 77.42307692307692, + "grad_norm": 11.051648139953613, + "learning_rate": 1.128846153846154e-05, + "loss": 0.2548, + "step": 28182 + }, + { + "epoch": 77.42582417582418, + "grad_norm": 4.941029071807861, + "learning_rate": 1.1287087912087912e-05, + "loss": 0.0565, + "step": 28183 + }, + { + "epoch": 77.42857142857143, + "grad_norm": 11.88016128540039, + "learning_rate": 1.1285714285714285e-05, + "loss": 0.2261, + "step": 28184 + }, + { + "epoch": 77.43131868131869, + "grad_norm": 11.433756828308105, + "learning_rate": 1.128434065934066e-05, + "loss": 0.3492, + "step": 28185 + }, + { + "epoch": 77.43406593406593, + "grad_norm": 11.544577598571777, + "learning_rate": 1.1282967032967034e-05, + "loss": 0.2266, + "step": 28186 + }, + { + "epoch": 77.43681318681318, + "grad_norm": 10.203167915344238, + "learning_rate": 1.1281593406593407e-05, + "loss": 0.1585, + "step": 28187 + }, + { + "epoch": 77.43956043956044, + "grad_norm": 6.112652778625488, + "learning_rate": 1.128021978021978e-05, + "loss": 0.0597, + "step": 28188 + }, + { + "epoch": 77.4423076923077, + "grad_norm": 19.690826416015625, + "learning_rate": 1.1278846153846154e-05, + "loss": 0.5742, + "step": 28189 + }, + { + "epoch": 77.44505494505495, + "grad_norm": 12.852571487426758, + "learning_rate": 1.1277472527472527e-05, + "loss": 0.318, + "step": 28190 + }, + { + "epoch": 77.4478021978022, + "grad_norm": 27.29437828063965, + "learning_rate": 1.1276098901098901e-05, + "loss": 0.5729, + "step": 28191 + }, + { + "epoch": 77.45054945054945, + "grad_norm": 10.420817375183105, + "learning_rate": 1.1274725274725276e-05, + "loss": 0.235, + "step": 28192 + }, + { + "epoch": 77.4532967032967, + "grad_norm": 17.092607498168945, + "learning_rate": 1.127335164835165e-05, + "loss": 0.5399, + "step": 28193 + }, + { + "epoch": 77.45604395604396, + "grad_norm": 20.68836212158203, + "learning_rate": 1.1271978021978021e-05, + "loss": 0.4467, + "step": 28194 + }, + { + "epoch": 77.45879120879121, + "grad_norm": 10.762768745422363, + "learning_rate": 1.1270604395604396e-05, + "loss": 0.2457, + "step": 28195 + }, + { + "epoch": 77.46153846153847, + "grad_norm": 19.672266006469727, + "learning_rate": 1.126923076923077e-05, + "loss": 0.4602, + "step": 28196 + }, + { + "epoch": 77.46428571428571, + "grad_norm": 17.62900733947754, + "learning_rate": 1.1267857142857145e-05, + "loss": 0.5762, + "step": 28197 + }, + { + "epoch": 77.46703296703296, + "grad_norm": 8.077208518981934, + "learning_rate": 1.1266483516483516e-05, + "loss": 0.0854, + "step": 28198 + }, + { + "epoch": 77.46978021978022, + "grad_norm": 11.01818561553955, + "learning_rate": 1.126510989010989e-05, + "loss": 0.2467, + "step": 28199 + }, + { + "epoch": 77.47252747252747, + "grad_norm": 10.168439865112305, + "learning_rate": 1.1263736263736265e-05, + "loss": 0.2098, + "step": 28200 + }, + { + "epoch": 77.47527472527473, + "grad_norm": 6.273926258087158, + "learning_rate": 1.1262362637362638e-05, + "loss": 0.0358, + "step": 28201 + }, + { + "epoch": 77.47802197802197, + "grad_norm": 18.639026641845703, + "learning_rate": 1.1260989010989012e-05, + "loss": 0.2742, + "step": 28202 + }, + { + "epoch": 77.48076923076923, + "grad_norm": 15.513934135437012, + "learning_rate": 1.1259615384615385e-05, + "loss": 0.2092, + "step": 28203 + }, + { + "epoch": 77.48351648351648, + "grad_norm": 7.367504596710205, + "learning_rate": 1.1258241758241759e-05, + "loss": 0.1646, + "step": 28204 + }, + { + "epoch": 77.48626373626374, + "grad_norm": 15.495489120483398, + "learning_rate": 1.1256868131868132e-05, + "loss": 0.4169, + "step": 28205 + }, + { + "epoch": 77.48901098901099, + "grad_norm": 8.59018325805664, + "learning_rate": 1.1255494505494505e-05, + "loss": 0.1928, + "step": 28206 + }, + { + "epoch": 77.49175824175825, + "grad_norm": 21.964935302734375, + "learning_rate": 1.125412087912088e-05, + "loss": 0.4142, + "step": 28207 + }, + { + "epoch": 77.49450549450549, + "grad_norm": 32.53031921386719, + "learning_rate": 1.1252747252747254e-05, + "loss": 0.6271, + "step": 28208 + }, + { + "epoch": 77.49725274725274, + "grad_norm": 2.6208043098449707, + "learning_rate": 1.1251373626373626e-05, + "loss": 0.0383, + "step": 28209 + }, + { + "epoch": 77.5, + "grad_norm": 6.2729926109313965, + "learning_rate": 1.125e-05, + "loss": 0.1347, + "step": 28210 + }, + { + "epoch": 77.50274725274726, + "grad_norm": 23.3748722076416, + "learning_rate": 1.1248626373626374e-05, + "loss": 0.8193, + "step": 28211 + }, + { + "epoch": 77.50549450549451, + "grad_norm": 13.429759979248047, + "learning_rate": 1.1247252747252748e-05, + "loss": 0.3843, + "step": 28212 + }, + { + "epoch": 77.50824175824175, + "grad_norm": 6.026500225067139, + "learning_rate": 1.1245879120879121e-05, + "loss": 0.1473, + "step": 28213 + }, + { + "epoch": 77.51098901098901, + "grad_norm": 5.158093452453613, + "learning_rate": 1.1244505494505494e-05, + "loss": 0.1188, + "step": 28214 + }, + { + "epoch": 77.51373626373626, + "grad_norm": 3.733670234680176, + "learning_rate": 1.124313186813187e-05, + "loss": 0.0361, + "step": 28215 + }, + { + "epoch": 77.51648351648352, + "grad_norm": 10.570321083068848, + "learning_rate": 1.1241758241758243e-05, + "loss": 0.1161, + "step": 28216 + }, + { + "epoch": 77.51923076923077, + "grad_norm": 24.867265701293945, + "learning_rate": 1.1240384615384616e-05, + "loss": 0.5781, + "step": 28217 + }, + { + "epoch": 77.52197802197803, + "grad_norm": 12.867334365844727, + "learning_rate": 1.123901098901099e-05, + "loss": 0.3213, + "step": 28218 + }, + { + "epoch": 77.52472527472527, + "grad_norm": 5.5539937019348145, + "learning_rate": 1.1237637362637363e-05, + "loss": 0.0588, + "step": 28219 + }, + { + "epoch": 77.52747252747253, + "grad_norm": 8.051011085510254, + "learning_rate": 1.1236263736263736e-05, + "loss": 0.1434, + "step": 28220 + }, + { + "epoch": 77.53021978021978, + "grad_norm": 9.262422561645508, + "learning_rate": 1.123489010989011e-05, + "loss": 0.1882, + "step": 28221 + }, + { + "epoch": 77.53296703296704, + "grad_norm": 15.025653839111328, + "learning_rate": 1.1233516483516485e-05, + "loss": 0.3312, + "step": 28222 + }, + { + "epoch": 77.53571428571429, + "grad_norm": 13.177765846252441, + "learning_rate": 1.1232142857142858e-05, + "loss": 0.2389, + "step": 28223 + }, + { + "epoch": 77.53846153846153, + "grad_norm": 14.939241409301758, + "learning_rate": 1.123076923076923e-05, + "loss": 0.3817, + "step": 28224 + }, + { + "epoch": 77.54120879120879, + "grad_norm": 19.622356414794922, + "learning_rate": 1.1229395604395605e-05, + "loss": 0.3116, + "step": 28225 + }, + { + "epoch": 77.54395604395604, + "grad_norm": 8.54556941986084, + "learning_rate": 1.1228021978021979e-05, + "loss": 0.1625, + "step": 28226 + }, + { + "epoch": 77.5467032967033, + "grad_norm": 6.093624114990234, + "learning_rate": 1.1226648351648352e-05, + "loss": 0.1058, + "step": 28227 + }, + { + "epoch": 77.54945054945055, + "grad_norm": 8.107036590576172, + "learning_rate": 1.1225274725274725e-05, + "loss": 0.1251, + "step": 28228 + }, + { + "epoch": 77.5521978021978, + "grad_norm": 12.12443733215332, + "learning_rate": 1.1223901098901099e-05, + "loss": 0.2275, + "step": 28229 + }, + { + "epoch": 77.55494505494505, + "grad_norm": 12.520650863647461, + "learning_rate": 1.1222527472527474e-05, + "loss": 0.193, + "step": 28230 + }, + { + "epoch": 77.5576923076923, + "grad_norm": 4.745795726776123, + "learning_rate": 1.1221153846153847e-05, + "loss": 0.0498, + "step": 28231 + }, + { + "epoch": 77.56043956043956, + "grad_norm": 9.775455474853516, + "learning_rate": 1.121978021978022e-05, + "loss": 0.1209, + "step": 28232 + }, + { + "epoch": 77.56318681318682, + "grad_norm": 20.697433471679688, + "learning_rate": 1.1218406593406594e-05, + "loss": 0.2846, + "step": 28233 + }, + { + "epoch": 77.56593406593407, + "grad_norm": 6.376312255859375, + "learning_rate": 1.1217032967032968e-05, + "loss": 0.0812, + "step": 28234 + }, + { + "epoch": 77.56868131868131, + "grad_norm": 4.986549377441406, + "learning_rate": 1.1215659340659341e-05, + "loss": 0.1247, + "step": 28235 + }, + { + "epoch": 77.57142857142857, + "grad_norm": 6.60926628112793, + "learning_rate": 1.1214285714285714e-05, + "loss": 0.1423, + "step": 28236 + }, + { + "epoch": 77.57417582417582, + "grad_norm": 2.790201425552368, + "learning_rate": 1.121291208791209e-05, + "loss": 0.0291, + "step": 28237 + }, + { + "epoch": 77.57692307692308, + "grad_norm": 4.4648213386535645, + "learning_rate": 1.1211538461538463e-05, + "loss": 0.0643, + "step": 28238 + }, + { + "epoch": 77.57967032967034, + "grad_norm": 7.6108012199401855, + "learning_rate": 1.1210164835164835e-05, + "loss": 0.1995, + "step": 28239 + }, + { + "epoch": 77.58241758241758, + "grad_norm": 8.022611618041992, + "learning_rate": 1.120879120879121e-05, + "loss": 0.2424, + "step": 28240 + }, + { + "epoch": 77.58516483516483, + "grad_norm": 11.157081604003906, + "learning_rate": 1.1207417582417583e-05, + "loss": 0.1775, + "step": 28241 + }, + { + "epoch": 77.58791208791209, + "grad_norm": 5.389464855194092, + "learning_rate": 1.1206043956043957e-05, + "loss": 0.1274, + "step": 28242 + }, + { + "epoch": 77.59065934065934, + "grad_norm": 6.506629943847656, + "learning_rate": 1.120467032967033e-05, + "loss": 0.1533, + "step": 28243 + }, + { + "epoch": 77.5934065934066, + "grad_norm": 18.278522491455078, + "learning_rate": 1.1203296703296703e-05, + "loss": 0.3121, + "step": 28244 + }, + { + "epoch": 77.59615384615384, + "grad_norm": 6.556300163269043, + "learning_rate": 1.1201923076923078e-05, + "loss": 0.1293, + "step": 28245 + }, + { + "epoch": 77.5989010989011, + "grad_norm": 25.990964889526367, + "learning_rate": 1.1200549450549452e-05, + "loss": 0.7662, + "step": 28246 + }, + { + "epoch": 77.60164835164835, + "grad_norm": 11.656476974487305, + "learning_rate": 1.1199175824175824e-05, + "loss": 0.3789, + "step": 28247 + }, + { + "epoch": 77.6043956043956, + "grad_norm": 2.5847325325012207, + "learning_rate": 1.1197802197802199e-05, + "loss": 0.0202, + "step": 28248 + }, + { + "epoch": 77.60714285714286, + "grad_norm": 14.809316635131836, + "learning_rate": 1.1196428571428572e-05, + "loss": 0.3511, + "step": 28249 + }, + { + "epoch": 77.60989010989012, + "grad_norm": 6.541518688201904, + "learning_rate": 1.1195054945054945e-05, + "loss": 0.099, + "step": 28250 + }, + { + "epoch": 77.61263736263736, + "grad_norm": 11.884820938110352, + "learning_rate": 1.1193681318681319e-05, + "loss": 0.2388, + "step": 28251 + }, + { + "epoch": 77.61538461538461, + "grad_norm": 16.846628189086914, + "learning_rate": 1.1192307692307692e-05, + "loss": 0.3049, + "step": 28252 + }, + { + "epoch": 77.61813186813187, + "grad_norm": 16.0613956451416, + "learning_rate": 1.1190934065934067e-05, + "loss": 0.6013, + "step": 28253 + }, + { + "epoch": 77.62087912087912, + "grad_norm": 5.050336837768555, + "learning_rate": 1.1189560439560439e-05, + "loss": 0.0697, + "step": 28254 + }, + { + "epoch": 77.62362637362638, + "grad_norm": 2.796792984008789, + "learning_rate": 1.1188186813186814e-05, + "loss": 0.0293, + "step": 28255 + }, + { + "epoch": 77.62637362637362, + "grad_norm": 12.05455493927002, + "learning_rate": 1.1186813186813188e-05, + "loss": 0.2697, + "step": 28256 + }, + { + "epoch": 77.62912087912088, + "grad_norm": 3.2954905033111572, + "learning_rate": 1.1185439560439561e-05, + "loss": 0.0394, + "step": 28257 + }, + { + "epoch": 77.63186813186813, + "grad_norm": 15.057764053344727, + "learning_rate": 1.1184065934065934e-05, + "loss": 0.3942, + "step": 28258 + }, + { + "epoch": 77.63461538461539, + "grad_norm": 14.047812461853027, + "learning_rate": 1.1182692307692308e-05, + "loss": 0.2347, + "step": 28259 + }, + { + "epoch": 77.63736263736264, + "grad_norm": 7.94659423828125, + "learning_rate": 1.1181318681318683e-05, + "loss": 0.1492, + "step": 28260 + }, + { + "epoch": 77.64010989010988, + "grad_norm": 3.8835408687591553, + "learning_rate": 1.1179945054945055e-05, + "loss": 0.051, + "step": 28261 + }, + { + "epoch": 77.64285714285714, + "grad_norm": 14.372825622558594, + "learning_rate": 1.1178571428571428e-05, + "loss": 0.3004, + "step": 28262 + }, + { + "epoch": 77.6456043956044, + "grad_norm": 12.063153266906738, + "learning_rate": 1.1177197802197803e-05, + "loss": 0.168, + "step": 28263 + }, + { + "epoch": 77.64835164835165, + "grad_norm": 14.175530433654785, + "learning_rate": 1.1175824175824177e-05, + "loss": 0.2117, + "step": 28264 + }, + { + "epoch": 77.6510989010989, + "grad_norm": 9.852896690368652, + "learning_rate": 1.117445054945055e-05, + "loss": 0.1547, + "step": 28265 + }, + { + "epoch": 77.65384615384616, + "grad_norm": 46.06901550292969, + "learning_rate": 1.1173076923076923e-05, + "loss": 1.0803, + "step": 28266 + }, + { + "epoch": 77.6565934065934, + "grad_norm": 21.168407440185547, + "learning_rate": 1.1171703296703297e-05, + "loss": 0.2934, + "step": 28267 + }, + { + "epoch": 77.65934065934066, + "grad_norm": 5.332469463348389, + "learning_rate": 1.1170329670329672e-05, + "loss": 0.0511, + "step": 28268 + }, + { + "epoch": 77.66208791208791, + "grad_norm": 13.910080909729004, + "learning_rate": 1.1168956043956044e-05, + "loss": 0.2859, + "step": 28269 + }, + { + "epoch": 77.66483516483517, + "grad_norm": 9.370195388793945, + "learning_rate": 1.1167582417582419e-05, + "loss": 0.0779, + "step": 28270 + }, + { + "epoch": 77.66758241758242, + "grad_norm": 14.497406005859375, + "learning_rate": 1.1166208791208792e-05, + "loss": 0.2574, + "step": 28271 + }, + { + "epoch": 77.67032967032966, + "grad_norm": 13.382574081420898, + "learning_rate": 1.1164835164835165e-05, + "loss": 0.1624, + "step": 28272 + }, + { + "epoch": 77.67307692307692, + "grad_norm": 19.44577980041504, + "learning_rate": 1.1163461538461539e-05, + "loss": 0.4639, + "step": 28273 + }, + { + "epoch": 77.67582417582418, + "grad_norm": 5.561519622802734, + "learning_rate": 1.1162087912087912e-05, + "loss": 0.0892, + "step": 28274 + }, + { + "epoch": 77.67857142857143, + "grad_norm": 7.169943809509277, + "learning_rate": 1.1160714285714287e-05, + "loss": 0.1031, + "step": 28275 + }, + { + "epoch": 77.68131868131869, + "grad_norm": 9.941286087036133, + "learning_rate": 1.1159340659340659e-05, + "loss": 0.1465, + "step": 28276 + }, + { + "epoch": 77.68406593406593, + "grad_norm": 11.416236877441406, + "learning_rate": 1.1157967032967033e-05, + "loss": 0.2529, + "step": 28277 + }, + { + "epoch": 77.68681318681318, + "grad_norm": 14.377035140991211, + "learning_rate": 1.1156593406593408e-05, + "loss": 0.2492, + "step": 28278 + }, + { + "epoch": 77.68956043956044, + "grad_norm": 10.105451583862305, + "learning_rate": 1.1155219780219781e-05, + "loss": 0.3546, + "step": 28279 + }, + { + "epoch": 77.6923076923077, + "grad_norm": 15.721519470214844, + "learning_rate": 1.1153846153846154e-05, + "loss": 0.3228, + "step": 28280 + }, + { + "epoch": 77.69505494505495, + "grad_norm": 16.89202880859375, + "learning_rate": 1.1152472527472528e-05, + "loss": 0.4144, + "step": 28281 + }, + { + "epoch": 77.6978021978022, + "grad_norm": 24.964073181152344, + "learning_rate": 1.1151098901098901e-05, + "loss": 0.9628, + "step": 28282 + }, + { + "epoch": 77.70054945054945, + "grad_norm": 17.729869842529297, + "learning_rate": 1.1149725274725276e-05, + "loss": 0.2538, + "step": 28283 + }, + { + "epoch": 77.7032967032967, + "grad_norm": 2.38802170753479, + "learning_rate": 1.1148351648351648e-05, + "loss": 0.0277, + "step": 28284 + }, + { + "epoch": 77.70604395604396, + "grad_norm": 11.625123023986816, + "learning_rate": 1.1146978021978023e-05, + "loss": 0.4275, + "step": 28285 + }, + { + "epoch": 77.70879120879121, + "grad_norm": 16.304737091064453, + "learning_rate": 1.1145604395604397e-05, + "loss": 0.4536, + "step": 28286 + }, + { + "epoch": 77.71153846153847, + "grad_norm": 9.578475952148438, + "learning_rate": 1.114423076923077e-05, + "loss": 0.1951, + "step": 28287 + }, + { + "epoch": 77.71428571428571, + "grad_norm": 6.666933059692383, + "learning_rate": 1.1142857142857143e-05, + "loss": 0.1527, + "step": 28288 + }, + { + "epoch": 77.71703296703296, + "grad_norm": 4.671026229858398, + "learning_rate": 1.1141483516483517e-05, + "loss": 0.1041, + "step": 28289 + }, + { + "epoch": 77.71978021978022, + "grad_norm": 4.333520889282227, + "learning_rate": 1.1140109890109892e-05, + "loss": 0.1132, + "step": 28290 + }, + { + "epoch": 77.72252747252747, + "grad_norm": 2.718353509902954, + "learning_rate": 1.1138736263736264e-05, + "loss": 0.0399, + "step": 28291 + }, + { + "epoch": 77.72527472527473, + "grad_norm": 4.060091018676758, + "learning_rate": 1.1137362637362637e-05, + "loss": 0.0626, + "step": 28292 + }, + { + "epoch": 77.72802197802197, + "grad_norm": 4.398006439208984, + "learning_rate": 1.1135989010989012e-05, + "loss": 0.0542, + "step": 28293 + }, + { + "epoch": 77.73076923076923, + "grad_norm": 10.518853187561035, + "learning_rate": 1.1134615384615386e-05, + "loss": 0.1448, + "step": 28294 + }, + { + "epoch": 77.73351648351648, + "grad_norm": 8.71200942993164, + "learning_rate": 1.1133241758241759e-05, + "loss": 0.1801, + "step": 28295 + }, + { + "epoch": 77.73626373626374, + "grad_norm": 22.38730239868164, + "learning_rate": 1.1131868131868132e-05, + "loss": 0.4806, + "step": 28296 + }, + { + "epoch": 77.73901098901099, + "grad_norm": 5.8858232498168945, + "learning_rate": 1.1130494505494506e-05, + "loss": 0.158, + "step": 28297 + }, + { + "epoch": 77.74175824175825, + "grad_norm": 13.420526504516602, + "learning_rate": 1.112912087912088e-05, + "loss": 0.2795, + "step": 28298 + }, + { + "epoch": 77.74450549450549, + "grad_norm": 19.030921936035156, + "learning_rate": 1.1127747252747253e-05, + "loss": 0.4491, + "step": 28299 + }, + { + "epoch": 77.74725274725274, + "grad_norm": 6.9449591636657715, + "learning_rate": 1.1126373626373626e-05, + "loss": 0.1381, + "step": 28300 + }, + { + "epoch": 77.75, + "grad_norm": 4.881898403167725, + "learning_rate": 1.1125000000000001e-05, + "loss": 0.0556, + "step": 28301 + }, + { + "epoch": 77.75274725274726, + "grad_norm": 4.180384159088135, + "learning_rate": 1.1123626373626374e-05, + "loss": 0.0706, + "step": 28302 + }, + { + "epoch": 77.75549450549451, + "grad_norm": 2.794762134552002, + "learning_rate": 1.1122252747252748e-05, + "loss": 0.05, + "step": 28303 + }, + { + "epoch": 77.75824175824175, + "grad_norm": 8.02059268951416, + "learning_rate": 1.1120879120879121e-05, + "loss": 0.2164, + "step": 28304 + }, + { + "epoch": 77.76098901098901, + "grad_norm": 14.051980972290039, + "learning_rate": 1.1119505494505495e-05, + "loss": 0.218, + "step": 28305 + }, + { + "epoch": 77.76373626373626, + "grad_norm": 6.011813163757324, + "learning_rate": 1.1118131868131868e-05, + "loss": 0.1133, + "step": 28306 + }, + { + "epoch": 77.76648351648352, + "grad_norm": 9.075386047363281, + "learning_rate": 1.1116758241758241e-05, + "loss": 0.165, + "step": 28307 + }, + { + "epoch": 77.76923076923077, + "grad_norm": 17.14100456237793, + "learning_rate": 1.1115384615384617e-05, + "loss": 0.3454, + "step": 28308 + }, + { + "epoch": 77.77197802197803, + "grad_norm": 13.102543830871582, + "learning_rate": 1.111401098901099e-05, + "loss": 0.216, + "step": 28309 + }, + { + "epoch": 77.77472527472527, + "grad_norm": 21.116735458374023, + "learning_rate": 1.1112637362637362e-05, + "loss": 0.3219, + "step": 28310 + }, + { + "epoch": 77.77747252747253, + "grad_norm": 15.85663890838623, + "learning_rate": 1.1111263736263737e-05, + "loss": 0.2576, + "step": 28311 + }, + { + "epoch": 77.78021978021978, + "grad_norm": 9.94375228881836, + "learning_rate": 1.110989010989011e-05, + "loss": 0.1268, + "step": 28312 + }, + { + "epoch": 77.78296703296704, + "grad_norm": 13.918717384338379, + "learning_rate": 1.1108516483516485e-05, + "loss": 0.4391, + "step": 28313 + }, + { + "epoch": 77.78571428571429, + "grad_norm": 9.654563903808594, + "learning_rate": 1.1107142857142857e-05, + "loss": 0.155, + "step": 28314 + }, + { + "epoch": 77.78846153846153, + "grad_norm": 3.4856760501861572, + "learning_rate": 1.110576923076923e-05, + "loss": 0.0603, + "step": 28315 + }, + { + "epoch": 77.79120879120879, + "grad_norm": 10.594024658203125, + "learning_rate": 1.1104395604395606e-05, + "loss": 0.1386, + "step": 28316 + }, + { + "epoch": 77.79395604395604, + "grad_norm": 22.125619888305664, + "learning_rate": 1.1103021978021979e-05, + "loss": 0.6109, + "step": 28317 + }, + { + "epoch": 77.7967032967033, + "grad_norm": 14.464761734008789, + "learning_rate": 1.1101648351648352e-05, + "loss": 0.1521, + "step": 28318 + }, + { + "epoch": 77.79945054945055, + "grad_norm": 21.18519401550293, + "learning_rate": 1.1100274725274726e-05, + "loss": 0.3234, + "step": 28319 + }, + { + "epoch": 77.8021978021978, + "grad_norm": 11.756026268005371, + "learning_rate": 1.10989010989011e-05, + "loss": 0.1469, + "step": 28320 + }, + { + "epoch": 77.80494505494505, + "grad_norm": 14.63730525970459, + "learning_rate": 1.1097527472527473e-05, + "loss": 0.2412, + "step": 28321 + }, + { + "epoch": 77.8076923076923, + "grad_norm": 21.26866340637207, + "learning_rate": 1.1096153846153846e-05, + "loss": 0.4456, + "step": 28322 + }, + { + "epoch": 77.81043956043956, + "grad_norm": 4.590928077697754, + "learning_rate": 1.1094780219780221e-05, + "loss": 0.0659, + "step": 28323 + }, + { + "epoch": 77.81318681318682, + "grad_norm": 11.335259437561035, + "learning_rate": 1.1093406593406594e-05, + "loss": 0.1638, + "step": 28324 + }, + { + "epoch": 77.81593406593407, + "grad_norm": 14.322003364562988, + "learning_rate": 1.1092032967032966e-05, + "loss": 0.2754, + "step": 28325 + }, + { + "epoch": 77.81868131868131, + "grad_norm": 14.713507652282715, + "learning_rate": 1.1090659340659341e-05, + "loss": 0.2919, + "step": 28326 + }, + { + "epoch": 77.82142857142857, + "grad_norm": 20.96221351623535, + "learning_rate": 1.1089285714285715e-05, + "loss": 0.1956, + "step": 28327 + }, + { + "epoch": 77.82417582417582, + "grad_norm": 15.71528148651123, + "learning_rate": 1.108791208791209e-05, + "loss": 0.2673, + "step": 28328 + }, + { + "epoch": 77.82692307692308, + "grad_norm": 10.54509162902832, + "learning_rate": 1.1086538461538462e-05, + "loss": 0.1902, + "step": 28329 + }, + { + "epoch": 77.82967032967034, + "grad_norm": 26.074970245361328, + "learning_rate": 1.1085164835164835e-05, + "loss": 0.7137, + "step": 28330 + }, + { + "epoch": 77.83241758241758, + "grad_norm": 11.836885452270508, + "learning_rate": 1.108379120879121e-05, + "loss": 0.2105, + "step": 28331 + }, + { + "epoch": 77.83516483516483, + "grad_norm": 12.156384468078613, + "learning_rate": 1.1082417582417583e-05, + "loss": 0.1217, + "step": 28332 + }, + { + "epoch": 77.83791208791209, + "grad_norm": 12.11740779876709, + "learning_rate": 1.1081043956043957e-05, + "loss": 0.1019, + "step": 28333 + }, + { + "epoch": 77.84065934065934, + "grad_norm": 10.340848922729492, + "learning_rate": 1.107967032967033e-05, + "loss": 0.1441, + "step": 28334 + }, + { + "epoch": 77.8434065934066, + "grad_norm": 5.969644546508789, + "learning_rate": 1.1078296703296704e-05, + "loss": 0.1189, + "step": 28335 + }, + { + "epoch": 77.84615384615384, + "grad_norm": 13.867819786071777, + "learning_rate": 1.1076923076923077e-05, + "loss": 0.2976, + "step": 28336 + }, + { + "epoch": 77.8489010989011, + "grad_norm": 12.78471851348877, + "learning_rate": 1.107554945054945e-05, + "loss": 0.222, + "step": 28337 + }, + { + "epoch": 77.85164835164835, + "grad_norm": 7.370469570159912, + "learning_rate": 1.1074175824175826e-05, + "loss": 0.2406, + "step": 28338 + }, + { + "epoch": 77.8543956043956, + "grad_norm": 9.537551879882812, + "learning_rate": 1.1072802197802199e-05, + "loss": 0.1536, + "step": 28339 + }, + { + "epoch": 77.85714285714286, + "grad_norm": 14.240036010742188, + "learning_rate": 1.107142857142857e-05, + "loss": 0.3771, + "step": 28340 + }, + { + "epoch": 77.85989010989012, + "grad_norm": 13.585836410522461, + "learning_rate": 1.1070054945054946e-05, + "loss": 0.1881, + "step": 28341 + }, + { + "epoch": 77.86263736263736, + "grad_norm": 11.17854118347168, + "learning_rate": 1.106868131868132e-05, + "loss": 0.217, + "step": 28342 + }, + { + "epoch": 77.86538461538461, + "grad_norm": 3.9933507442474365, + "learning_rate": 1.1067307692307694e-05, + "loss": 0.0788, + "step": 28343 + }, + { + "epoch": 77.86813186813187, + "grad_norm": 2.1797075271606445, + "learning_rate": 1.1065934065934066e-05, + "loss": 0.0397, + "step": 28344 + }, + { + "epoch": 77.87087912087912, + "grad_norm": 7.31032657623291, + "learning_rate": 1.106456043956044e-05, + "loss": 0.1455, + "step": 28345 + }, + { + "epoch": 77.87362637362638, + "grad_norm": 21.038265228271484, + "learning_rate": 1.1063186813186815e-05, + "loss": 0.1926, + "step": 28346 + }, + { + "epoch": 77.87637362637362, + "grad_norm": 12.137332916259766, + "learning_rate": 1.1061813186813188e-05, + "loss": 0.2726, + "step": 28347 + }, + { + "epoch": 77.87912087912088, + "grad_norm": 12.660771369934082, + "learning_rate": 1.1060439560439561e-05, + "loss": 0.2947, + "step": 28348 + }, + { + "epoch": 77.88186813186813, + "grad_norm": 10.043153762817383, + "learning_rate": 1.1059065934065935e-05, + "loss": 0.1802, + "step": 28349 + }, + { + "epoch": 77.88461538461539, + "grad_norm": 13.211250305175781, + "learning_rate": 1.1057692307692308e-05, + "loss": 0.2554, + "step": 28350 + }, + { + "epoch": 77.88736263736264, + "grad_norm": 19.05400276184082, + "learning_rate": 1.1056318681318682e-05, + "loss": 0.1857, + "step": 28351 + }, + { + "epoch": 77.89010989010988, + "grad_norm": 6.571267127990723, + "learning_rate": 1.1054945054945055e-05, + "loss": 0.1472, + "step": 28352 + }, + { + "epoch": 77.89285714285714, + "grad_norm": 8.574284553527832, + "learning_rate": 1.1053571428571428e-05, + "loss": 0.1129, + "step": 28353 + }, + { + "epoch": 77.8956043956044, + "grad_norm": 16.899616241455078, + "learning_rate": 1.1052197802197803e-05, + "loss": 0.4299, + "step": 28354 + }, + { + "epoch": 77.89835164835165, + "grad_norm": 4.803437232971191, + "learning_rate": 1.1050824175824175e-05, + "loss": 0.0646, + "step": 28355 + }, + { + "epoch": 77.9010989010989, + "grad_norm": 9.0137300491333, + "learning_rate": 1.104945054945055e-05, + "loss": 0.1346, + "step": 28356 + }, + { + "epoch": 77.90384615384616, + "grad_norm": 11.952821731567383, + "learning_rate": 1.1048076923076924e-05, + "loss": 0.2474, + "step": 28357 + }, + { + "epoch": 77.9065934065934, + "grad_norm": 8.620626449584961, + "learning_rate": 1.1046703296703297e-05, + "loss": 0.1488, + "step": 28358 + }, + { + "epoch": 77.90934065934066, + "grad_norm": 12.103801727294922, + "learning_rate": 1.104532967032967e-05, + "loss": 0.2492, + "step": 28359 + }, + { + "epoch": 77.91208791208791, + "grad_norm": 17.921367645263672, + "learning_rate": 1.1043956043956044e-05, + "loss": 0.2925, + "step": 28360 + }, + { + "epoch": 77.91483516483517, + "grad_norm": 25.720945358276367, + "learning_rate": 1.1042582417582419e-05, + "loss": 0.5434, + "step": 28361 + }, + { + "epoch": 77.91758241758242, + "grad_norm": 7.420403003692627, + "learning_rate": 1.1041208791208792e-05, + "loss": 0.0647, + "step": 28362 + }, + { + "epoch": 77.92032967032966, + "grad_norm": 10.061771392822266, + "learning_rate": 1.1039835164835164e-05, + "loss": 0.2729, + "step": 28363 + }, + { + "epoch": 77.92307692307692, + "grad_norm": 12.672178268432617, + "learning_rate": 1.103846153846154e-05, + "loss": 0.1602, + "step": 28364 + }, + { + "epoch": 77.92582417582418, + "grad_norm": 8.621257781982422, + "learning_rate": 1.1037087912087913e-05, + "loss": 0.2137, + "step": 28365 + }, + { + "epoch": 77.92857142857143, + "grad_norm": 14.97386646270752, + "learning_rate": 1.1035714285714286e-05, + "loss": 0.1361, + "step": 28366 + }, + { + "epoch": 77.93131868131869, + "grad_norm": 3.597400665283203, + "learning_rate": 1.103434065934066e-05, + "loss": 0.0383, + "step": 28367 + }, + { + "epoch": 77.93406593406593, + "grad_norm": 10.083992004394531, + "learning_rate": 1.1032967032967033e-05, + "loss": 0.2179, + "step": 28368 + }, + { + "epoch": 77.93681318681318, + "grad_norm": 4.799915790557861, + "learning_rate": 1.1031593406593408e-05, + "loss": 0.097, + "step": 28369 + }, + { + "epoch": 77.93956043956044, + "grad_norm": 17.925064086914062, + "learning_rate": 1.103021978021978e-05, + "loss": 0.4673, + "step": 28370 + }, + { + "epoch": 77.9423076923077, + "grad_norm": 12.155341148376465, + "learning_rate": 1.1028846153846155e-05, + "loss": 0.225, + "step": 28371 + }, + { + "epoch": 77.94505494505495, + "grad_norm": 12.366073608398438, + "learning_rate": 1.1027472527472528e-05, + "loss": 0.2518, + "step": 28372 + }, + { + "epoch": 77.9478021978022, + "grad_norm": 4.580443859100342, + "learning_rate": 1.1026098901098902e-05, + "loss": 0.0664, + "step": 28373 + }, + { + "epoch": 77.95054945054945, + "grad_norm": 3.762528419494629, + "learning_rate": 1.1024725274725275e-05, + "loss": 0.0305, + "step": 28374 + }, + { + "epoch": 77.9532967032967, + "grad_norm": 2.745556592941284, + "learning_rate": 1.1023351648351648e-05, + "loss": 0.0211, + "step": 28375 + }, + { + "epoch": 77.95604395604396, + "grad_norm": 7.316079616546631, + "learning_rate": 1.1021978021978024e-05, + "loss": 0.2381, + "step": 28376 + }, + { + "epoch": 77.95879120879121, + "grad_norm": 20.120302200317383, + "learning_rate": 1.1020604395604397e-05, + "loss": 0.5938, + "step": 28377 + }, + { + "epoch": 77.96153846153847, + "grad_norm": 13.973296165466309, + "learning_rate": 1.1019230769230769e-05, + "loss": 0.1826, + "step": 28378 + }, + { + "epoch": 77.96428571428571, + "grad_norm": 3.9423177242279053, + "learning_rate": 1.1017857142857144e-05, + "loss": 0.0665, + "step": 28379 + }, + { + "epoch": 77.96703296703296, + "grad_norm": 7.634961128234863, + "learning_rate": 1.1016483516483517e-05, + "loss": 0.1787, + "step": 28380 + }, + { + "epoch": 77.96978021978022, + "grad_norm": 16.648984909057617, + "learning_rate": 1.101510989010989e-05, + "loss": 0.2889, + "step": 28381 + }, + { + "epoch": 77.97252747252747, + "grad_norm": 12.685479164123535, + "learning_rate": 1.1013736263736264e-05, + "loss": 0.123, + "step": 28382 + }, + { + "epoch": 77.97527472527473, + "grad_norm": 3.728074312210083, + "learning_rate": 1.1012362637362637e-05, + "loss": 0.0607, + "step": 28383 + }, + { + "epoch": 77.97802197802197, + "grad_norm": 16.775373458862305, + "learning_rate": 1.1010989010989012e-05, + "loss": 0.5657, + "step": 28384 + }, + { + "epoch": 77.98076923076923, + "grad_norm": 7.561381816864014, + "learning_rate": 1.1009615384615384e-05, + "loss": 0.0886, + "step": 28385 + }, + { + "epoch": 77.98351648351648, + "grad_norm": 5.248262882232666, + "learning_rate": 1.100824175824176e-05, + "loss": 0.0607, + "step": 28386 + }, + { + "epoch": 77.98626373626374, + "grad_norm": 11.7221040725708, + "learning_rate": 1.1006868131868133e-05, + "loss": 0.1509, + "step": 28387 + }, + { + "epoch": 77.98901098901099, + "grad_norm": 7.768197536468506, + "learning_rate": 1.1005494505494506e-05, + "loss": 0.2101, + "step": 28388 + }, + { + "epoch": 77.99175824175825, + "grad_norm": 16.883344650268555, + "learning_rate": 1.100412087912088e-05, + "loss": 0.4034, + "step": 28389 + }, + { + "epoch": 77.99450549450549, + "grad_norm": 12.4401216506958, + "learning_rate": 1.1002747252747253e-05, + "loss": 0.408, + "step": 28390 + }, + { + "epoch": 77.99725274725274, + "grad_norm": 6.5413360595703125, + "learning_rate": 1.1001373626373628e-05, + "loss": 0.1557, + "step": 28391 + }, + { + "epoch": 78.0, + "grad_norm": 32.61581039428711, + "learning_rate": 1.1000000000000001e-05, + "loss": 0.3569, + "step": 28392 + }, + { + "epoch": 78.0, + "eval_accuracy": 0.7741046831955923, + "eval_f1": 0.7849982652196468, + "eval_f1_DuraRiadoRio_64x64": 0.6844444444444444, + "eval_f1_Mole_64x64": 0.8725868725868726, + "eval_f1_Quebrado_64x64": 0.8439716312056738, + "eval_f1_RiadoRio_64x64": 0.6407766990291263, + "eval_f1_RioFechado_64x64": 0.8832116788321168, + "eval_loss": 1.0885872840881348, + "eval_precision": 0.8439807589082952, + "eval_precision_DuraRiadoRio_64x64": 0.9506172839506173, + "eval_precision_Mole_64x64": 0.9826086956521739, + "eval_precision_Quebrado_64x64": 0.8623188405797102, + "eval_precision_RiadoRio_64x64": 0.5076923076923077, + "eval_precision_RioFechado_64x64": 0.9166666666666666, + "eval_recall": 0.77327341240425, + "eval_recall_DuraRiadoRio_64x64": 0.5347222222222222, + "eval_recall_Mole_64x64": 0.7847222222222222, + "eval_recall_Quebrado_64x64": 0.8263888888888888, + "eval_recall_RiadoRio_64x64": 0.868421052631579, + "eval_recall_RioFechado_64x64": 0.852112676056338, + "eval_runtime": 1.7822, + "eval_samples_per_second": 407.352, + "eval_steps_per_second": 25.81, + "step": 28392 + }, + { + "epoch": 78.00274725274726, + "grad_norm": 7.270179271697998, + "learning_rate": 1.0998626373626373e-05, + "loss": 0.0758, + "step": 28393 + }, + { + "epoch": 78.00549450549451, + "grad_norm": 13.280516624450684, + "learning_rate": 1.0997252747252748e-05, + "loss": 0.3734, + "step": 28394 + }, + { + "epoch": 78.00824175824175, + "grad_norm": 7.049779891967773, + "learning_rate": 1.0995879120879122e-05, + "loss": 0.1259, + "step": 28395 + }, + { + "epoch": 78.01098901098901, + "grad_norm": 8.635693550109863, + "learning_rate": 1.0994505494505495e-05, + "loss": 0.0793, + "step": 28396 + }, + { + "epoch": 78.01373626373626, + "grad_norm": 11.534846305847168, + "learning_rate": 1.0993131868131868e-05, + "loss": 0.1176, + "step": 28397 + }, + { + "epoch": 78.01648351648352, + "grad_norm": 4.466638088226318, + "learning_rate": 1.0991758241758242e-05, + "loss": 0.0649, + "step": 28398 + }, + { + "epoch": 78.01923076923077, + "grad_norm": 13.606951713562012, + "learning_rate": 1.0990384615384617e-05, + "loss": 0.1302, + "step": 28399 + }, + { + "epoch": 78.02197802197803, + "grad_norm": 8.434967041015625, + "learning_rate": 1.0989010989010989e-05, + "loss": 0.0728, + "step": 28400 + }, + { + "epoch": 78.02472527472527, + "grad_norm": 11.745201110839844, + "learning_rate": 1.0987637362637364e-05, + "loss": 0.2711, + "step": 28401 + }, + { + "epoch": 78.02747252747253, + "grad_norm": 15.974102973937988, + "learning_rate": 1.0986263736263737e-05, + "loss": 0.2051, + "step": 28402 + }, + { + "epoch": 78.03021978021978, + "grad_norm": 13.747352600097656, + "learning_rate": 1.098489010989011e-05, + "loss": 0.3418, + "step": 28403 + }, + { + "epoch": 78.03296703296704, + "grad_norm": 17.109827041625977, + "learning_rate": 1.0983516483516484e-05, + "loss": 0.547, + "step": 28404 + }, + { + "epoch": 78.03571428571429, + "grad_norm": 15.960591316223145, + "learning_rate": 1.0982142857142857e-05, + "loss": 0.2501, + "step": 28405 + }, + { + "epoch": 78.03846153846153, + "grad_norm": 11.587904930114746, + "learning_rate": 1.0980769230769232e-05, + "loss": 0.3171, + "step": 28406 + }, + { + "epoch": 78.04120879120879, + "grad_norm": 13.251763343811035, + "learning_rate": 1.0979395604395604e-05, + "loss": 0.1769, + "step": 28407 + }, + { + "epoch": 78.04395604395604, + "grad_norm": 11.341858863830566, + "learning_rate": 1.0978021978021978e-05, + "loss": 0.184, + "step": 28408 + }, + { + "epoch": 78.0467032967033, + "grad_norm": 14.359148979187012, + "learning_rate": 1.0976648351648353e-05, + "loss": 0.1519, + "step": 28409 + }, + { + "epoch": 78.04945054945055, + "grad_norm": 8.153712272644043, + "learning_rate": 1.0975274725274726e-05, + "loss": 0.2144, + "step": 28410 + }, + { + "epoch": 78.0521978021978, + "grad_norm": 3.528658628463745, + "learning_rate": 1.09739010989011e-05, + "loss": 0.0518, + "step": 28411 + }, + { + "epoch": 78.05494505494505, + "grad_norm": 9.608787536621094, + "learning_rate": 1.0972527472527473e-05, + "loss": 0.1171, + "step": 28412 + }, + { + "epoch": 78.0576923076923, + "grad_norm": 11.701498031616211, + "learning_rate": 1.0971153846153846e-05, + "loss": 0.1396, + "step": 28413 + }, + { + "epoch": 78.06043956043956, + "grad_norm": 14.786850929260254, + "learning_rate": 1.0969780219780221e-05, + "loss": 0.3246, + "step": 28414 + }, + { + "epoch": 78.06318681318682, + "grad_norm": 4.706472873687744, + "learning_rate": 1.0968406593406593e-05, + "loss": 0.0437, + "step": 28415 + }, + { + "epoch": 78.06593406593407, + "grad_norm": 17.41584587097168, + "learning_rate": 1.0967032967032967e-05, + "loss": 0.2262, + "step": 28416 + }, + { + "epoch": 78.06868131868131, + "grad_norm": 8.801676750183105, + "learning_rate": 1.0965659340659342e-05, + "loss": 0.0876, + "step": 28417 + }, + { + "epoch": 78.07142857142857, + "grad_norm": 5.791764259338379, + "learning_rate": 1.0964285714285715e-05, + "loss": 0.119, + "step": 28418 + }, + { + "epoch": 78.07417582417582, + "grad_norm": 9.516949653625488, + "learning_rate": 1.0962912087912088e-05, + "loss": 0.1588, + "step": 28419 + }, + { + "epoch": 78.07692307692308, + "grad_norm": 5.898062229156494, + "learning_rate": 1.0961538461538462e-05, + "loss": 0.0966, + "step": 28420 + }, + { + "epoch": 78.07967032967034, + "grad_norm": 10.296358108520508, + "learning_rate": 1.0960164835164835e-05, + "loss": 0.192, + "step": 28421 + }, + { + "epoch": 78.08241758241758, + "grad_norm": 11.997774124145508, + "learning_rate": 1.0958791208791209e-05, + "loss": 0.1202, + "step": 28422 + }, + { + "epoch": 78.08516483516483, + "grad_norm": 17.32938575744629, + "learning_rate": 1.0957417582417582e-05, + "loss": 0.418, + "step": 28423 + }, + { + "epoch": 78.08791208791209, + "grad_norm": 11.29260540008545, + "learning_rate": 1.0956043956043957e-05, + "loss": 0.139, + "step": 28424 + }, + { + "epoch": 78.09065934065934, + "grad_norm": 4.429274559020996, + "learning_rate": 1.095467032967033e-05, + "loss": 0.0441, + "step": 28425 + }, + { + "epoch": 78.0934065934066, + "grad_norm": 5.935614585876465, + "learning_rate": 1.0953296703296704e-05, + "loss": 0.1034, + "step": 28426 + }, + { + "epoch": 78.09615384615384, + "grad_norm": 9.675201416015625, + "learning_rate": 1.0951923076923077e-05, + "loss": 0.3443, + "step": 28427 + }, + { + "epoch": 78.0989010989011, + "grad_norm": 13.015592575073242, + "learning_rate": 1.095054945054945e-05, + "loss": 0.2002, + "step": 28428 + }, + { + "epoch": 78.10164835164835, + "grad_norm": 9.351004600524902, + "learning_rate": 1.0949175824175826e-05, + "loss": 0.1051, + "step": 28429 + }, + { + "epoch": 78.1043956043956, + "grad_norm": 9.86568546295166, + "learning_rate": 1.0947802197802198e-05, + "loss": 0.1853, + "step": 28430 + }, + { + "epoch": 78.10714285714286, + "grad_norm": 4.610857009887695, + "learning_rate": 1.0946428571428571e-05, + "loss": 0.0381, + "step": 28431 + }, + { + "epoch": 78.10989010989012, + "grad_norm": 15.447037696838379, + "learning_rate": 1.0945054945054946e-05, + "loss": 0.3583, + "step": 28432 + }, + { + "epoch": 78.11263736263736, + "grad_norm": 7.596652984619141, + "learning_rate": 1.094368131868132e-05, + "loss": 0.1495, + "step": 28433 + }, + { + "epoch": 78.11538461538461, + "grad_norm": 1.752076268196106, + "learning_rate": 1.0942307692307693e-05, + "loss": 0.0242, + "step": 28434 + }, + { + "epoch": 78.11813186813187, + "grad_norm": 8.491939544677734, + "learning_rate": 1.0940934065934066e-05, + "loss": 0.1113, + "step": 28435 + }, + { + "epoch": 78.12087912087912, + "grad_norm": 11.045376777648926, + "learning_rate": 1.093956043956044e-05, + "loss": 0.2692, + "step": 28436 + }, + { + "epoch": 78.12362637362638, + "grad_norm": 2.8355712890625, + "learning_rate": 1.0938186813186813e-05, + "loss": 0.0489, + "step": 28437 + }, + { + "epoch": 78.12637362637362, + "grad_norm": 21.642410278320312, + "learning_rate": 1.0936813186813187e-05, + "loss": 0.4406, + "step": 28438 + }, + { + "epoch": 78.12912087912088, + "grad_norm": 8.086395263671875, + "learning_rate": 1.0935439560439562e-05, + "loss": 0.1153, + "step": 28439 + }, + { + "epoch": 78.13186813186813, + "grad_norm": 6.53212833404541, + "learning_rate": 1.0934065934065935e-05, + "loss": 0.0827, + "step": 28440 + }, + { + "epoch": 78.13461538461539, + "grad_norm": 3.2529287338256836, + "learning_rate": 1.0932692307692309e-05, + "loss": 0.0346, + "step": 28441 + }, + { + "epoch": 78.13736263736264, + "grad_norm": 7.3922224044799805, + "learning_rate": 1.0931318681318682e-05, + "loss": 0.0602, + "step": 28442 + }, + { + "epoch": 78.14010989010988, + "grad_norm": 7.518048286437988, + "learning_rate": 1.0929945054945055e-05, + "loss": 0.0609, + "step": 28443 + }, + { + "epoch": 78.14285714285714, + "grad_norm": 7.971233367919922, + "learning_rate": 1.092857142857143e-05, + "loss": 0.1109, + "step": 28444 + }, + { + "epoch": 78.1456043956044, + "grad_norm": 14.819413185119629, + "learning_rate": 1.0927197802197802e-05, + "loss": 0.2293, + "step": 28445 + }, + { + "epoch": 78.14835164835165, + "grad_norm": 13.496788024902344, + "learning_rate": 1.0925824175824176e-05, + "loss": 0.2717, + "step": 28446 + }, + { + "epoch": 78.1510989010989, + "grad_norm": 14.890445709228516, + "learning_rate": 1.092445054945055e-05, + "loss": 0.4151, + "step": 28447 + }, + { + "epoch": 78.15384615384616, + "grad_norm": 4.388632297515869, + "learning_rate": 1.0923076923076924e-05, + "loss": 0.1167, + "step": 28448 + }, + { + "epoch": 78.1565934065934, + "grad_norm": 16.591367721557617, + "learning_rate": 1.0921703296703297e-05, + "loss": 0.3339, + "step": 28449 + }, + { + "epoch": 78.15934065934066, + "grad_norm": 5.018261909484863, + "learning_rate": 1.0920329670329671e-05, + "loss": 0.0778, + "step": 28450 + }, + { + "epoch": 78.16208791208791, + "grad_norm": 5.470570087432861, + "learning_rate": 1.0918956043956044e-05, + "loss": 0.0846, + "step": 28451 + }, + { + "epoch": 78.16483516483517, + "grad_norm": 8.240656852722168, + "learning_rate": 1.0917582417582418e-05, + "loss": 0.0899, + "step": 28452 + }, + { + "epoch": 78.16758241758242, + "grad_norm": 16.347381591796875, + "learning_rate": 1.0916208791208791e-05, + "loss": 0.333, + "step": 28453 + }, + { + "epoch": 78.17032967032966, + "grad_norm": 12.972336769104004, + "learning_rate": 1.0914835164835166e-05, + "loss": 0.3137, + "step": 28454 + }, + { + "epoch": 78.17307692307692, + "grad_norm": 14.404387474060059, + "learning_rate": 1.091346153846154e-05, + "loss": 0.2806, + "step": 28455 + }, + { + "epoch": 78.17582417582418, + "grad_norm": 7.72488260269165, + "learning_rate": 1.0912087912087911e-05, + "loss": 0.0918, + "step": 28456 + }, + { + "epoch": 78.17857142857143, + "grad_norm": 4.023770809173584, + "learning_rate": 1.0910714285714286e-05, + "loss": 0.0874, + "step": 28457 + }, + { + "epoch": 78.18131868131869, + "grad_norm": 12.333089828491211, + "learning_rate": 1.090934065934066e-05, + "loss": 0.1542, + "step": 28458 + }, + { + "epoch": 78.18406593406593, + "grad_norm": 5.830773830413818, + "learning_rate": 1.0907967032967035e-05, + "loss": 0.0364, + "step": 28459 + }, + { + "epoch": 78.18681318681318, + "grad_norm": 24.09468650817871, + "learning_rate": 1.0906593406593407e-05, + "loss": 0.6133, + "step": 28460 + }, + { + "epoch": 78.18956043956044, + "grad_norm": 12.135278701782227, + "learning_rate": 1.090521978021978e-05, + "loss": 0.2007, + "step": 28461 + }, + { + "epoch": 78.1923076923077, + "grad_norm": 0.8553310632705688, + "learning_rate": 1.0903846153846155e-05, + "loss": 0.0062, + "step": 28462 + }, + { + "epoch": 78.19505494505495, + "grad_norm": 19.53973960876465, + "learning_rate": 1.0902472527472529e-05, + "loss": 0.4286, + "step": 28463 + }, + { + "epoch": 78.1978021978022, + "grad_norm": 14.428004264831543, + "learning_rate": 1.0901098901098902e-05, + "loss": 0.1937, + "step": 28464 + }, + { + "epoch": 78.20054945054945, + "grad_norm": 4.1329216957092285, + "learning_rate": 1.0899725274725275e-05, + "loss": 0.0379, + "step": 28465 + }, + { + "epoch": 78.2032967032967, + "grad_norm": 13.510704040527344, + "learning_rate": 1.0898351648351649e-05, + "loss": 0.1993, + "step": 28466 + }, + { + "epoch": 78.20604395604396, + "grad_norm": 18.487695693969727, + "learning_rate": 1.0896978021978022e-05, + "loss": 0.419, + "step": 28467 + }, + { + "epoch": 78.20879120879121, + "grad_norm": 11.918209075927734, + "learning_rate": 1.0895604395604396e-05, + "loss": 0.2371, + "step": 28468 + }, + { + "epoch": 78.21153846153847, + "grad_norm": 22.074748992919922, + "learning_rate": 1.0894230769230769e-05, + "loss": 0.7902, + "step": 28469 + }, + { + "epoch": 78.21428571428571, + "grad_norm": 10.286581039428711, + "learning_rate": 1.0892857142857144e-05, + "loss": 0.1358, + "step": 28470 + }, + { + "epoch": 78.21703296703296, + "grad_norm": 16.550430297851562, + "learning_rate": 1.0891483516483516e-05, + "loss": 0.3108, + "step": 28471 + }, + { + "epoch": 78.21978021978022, + "grad_norm": 22.751506805419922, + "learning_rate": 1.0890109890109891e-05, + "loss": 0.4553, + "step": 28472 + }, + { + "epoch": 78.22252747252747, + "grad_norm": 19.579729080200195, + "learning_rate": 1.0888736263736264e-05, + "loss": 0.6763, + "step": 28473 + }, + { + "epoch": 78.22527472527473, + "grad_norm": 12.294625282287598, + "learning_rate": 1.0887362637362638e-05, + "loss": 0.1853, + "step": 28474 + }, + { + "epoch": 78.22802197802197, + "grad_norm": 6.239440441131592, + "learning_rate": 1.0885989010989011e-05, + "loss": 0.0733, + "step": 28475 + }, + { + "epoch": 78.23076923076923, + "grad_norm": 4.163633823394775, + "learning_rate": 1.0884615384615385e-05, + "loss": 0.0689, + "step": 28476 + }, + { + "epoch": 78.23351648351648, + "grad_norm": 14.412269592285156, + "learning_rate": 1.088324175824176e-05, + "loss": 0.1448, + "step": 28477 + }, + { + "epoch": 78.23626373626374, + "grad_norm": 19.381038665771484, + "learning_rate": 1.0881868131868133e-05, + "loss": 0.27, + "step": 28478 + }, + { + "epoch": 78.23901098901099, + "grad_norm": 4.531855583190918, + "learning_rate": 1.0880494505494505e-05, + "loss": 0.0743, + "step": 28479 + }, + { + "epoch": 78.24175824175825, + "grad_norm": 6.967342376708984, + "learning_rate": 1.087912087912088e-05, + "loss": 0.1736, + "step": 28480 + }, + { + "epoch": 78.24450549450549, + "grad_norm": 14.853705406188965, + "learning_rate": 1.0877747252747253e-05, + "loss": 0.4431, + "step": 28481 + }, + { + "epoch": 78.24725274725274, + "grad_norm": 1.9508992433547974, + "learning_rate": 1.0876373626373627e-05, + "loss": 0.0221, + "step": 28482 + }, + { + "epoch": 78.25, + "grad_norm": 9.206430435180664, + "learning_rate": 1.0875e-05, + "loss": 0.1875, + "step": 28483 + }, + { + "epoch": 78.25274725274726, + "grad_norm": 15.374541282653809, + "learning_rate": 1.0873626373626373e-05, + "loss": 0.2508, + "step": 28484 + }, + { + "epoch": 78.25549450549451, + "grad_norm": 9.138091087341309, + "learning_rate": 1.0872252747252749e-05, + "loss": 0.2797, + "step": 28485 + }, + { + "epoch": 78.25824175824175, + "grad_norm": 16.495437622070312, + "learning_rate": 1.087087912087912e-05, + "loss": 0.1489, + "step": 28486 + }, + { + "epoch": 78.26098901098901, + "grad_norm": 5.102478981018066, + "learning_rate": 1.0869505494505495e-05, + "loss": 0.0822, + "step": 28487 + }, + { + "epoch": 78.26373626373626, + "grad_norm": 13.82779312133789, + "learning_rate": 1.0868131868131869e-05, + "loss": 0.1486, + "step": 28488 + }, + { + "epoch": 78.26648351648352, + "grad_norm": 5.47669792175293, + "learning_rate": 1.0866758241758242e-05, + "loss": 0.0643, + "step": 28489 + }, + { + "epoch": 78.26923076923077, + "grad_norm": 11.418723106384277, + "learning_rate": 1.0865384615384616e-05, + "loss": 0.2216, + "step": 28490 + }, + { + "epoch": 78.27197802197803, + "grad_norm": 9.77953815460205, + "learning_rate": 1.0864010989010989e-05, + "loss": 0.1203, + "step": 28491 + }, + { + "epoch": 78.27472527472527, + "grad_norm": 13.809550285339355, + "learning_rate": 1.0862637362637364e-05, + "loss": 0.2069, + "step": 28492 + }, + { + "epoch": 78.27747252747253, + "grad_norm": 13.761414527893066, + "learning_rate": 1.0861263736263738e-05, + "loss": 0.4501, + "step": 28493 + }, + { + "epoch": 78.28021978021978, + "grad_norm": 9.437807083129883, + "learning_rate": 1.085989010989011e-05, + "loss": 0.2571, + "step": 28494 + }, + { + "epoch": 78.28296703296704, + "grad_norm": 7.57227087020874, + "learning_rate": 1.0858516483516484e-05, + "loss": 0.177, + "step": 28495 + }, + { + "epoch": 78.28571428571429, + "grad_norm": 11.07795524597168, + "learning_rate": 1.0857142857142858e-05, + "loss": 0.1323, + "step": 28496 + }, + { + "epoch": 78.28846153846153, + "grad_norm": 12.978365898132324, + "learning_rate": 1.0855769230769231e-05, + "loss": 0.3323, + "step": 28497 + }, + { + "epoch": 78.29120879120879, + "grad_norm": 4.7328338623046875, + "learning_rate": 1.0854395604395605e-05, + "loss": 0.0399, + "step": 28498 + }, + { + "epoch": 78.29395604395604, + "grad_norm": 15.311824798583984, + "learning_rate": 1.0853021978021978e-05, + "loss": 0.3127, + "step": 28499 + }, + { + "epoch": 78.2967032967033, + "grad_norm": 7.126338005065918, + "learning_rate": 1.0851648351648353e-05, + "loss": 0.1693, + "step": 28500 + }, + { + "epoch": 78.29945054945055, + "grad_norm": 9.584226608276367, + "learning_rate": 1.0850274725274725e-05, + "loss": 0.0914, + "step": 28501 + }, + { + "epoch": 78.3021978021978, + "grad_norm": 20.6700382232666, + "learning_rate": 1.08489010989011e-05, + "loss": 0.2865, + "step": 28502 + }, + { + "epoch": 78.30494505494505, + "grad_norm": 8.71817398071289, + "learning_rate": 1.0847527472527473e-05, + "loss": 0.0834, + "step": 28503 + }, + { + "epoch": 78.3076923076923, + "grad_norm": 9.588979721069336, + "learning_rate": 1.0846153846153847e-05, + "loss": 0.1972, + "step": 28504 + }, + { + "epoch": 78.31043956043956, + "grad_norm": 10.832694053649902, + "learning_rate": 1.084478021978022e-05, + "loss": 0.1249, + "step": 28505 + }, + { + "epoch": 78.31318681318682, + "grad_norm": 6.93396520614624, + "learning_rate": 1.0843406593406594e-05, + "loss": 0.079, + "step": 28506 + }, + { + "epoch": 78.31593406593407, + "grad_norm": 15.952017784118652, + "learning_rate": 1.0842032967032969e-05, + "loss": 0.2001, + "step": 28507 + }, + { + "epoch": 78.31868131868131, + "grad_norm": 3.2632505893707275, + "learning_rate": 1.0840659340659342e-05, + "loss": 0.0421, + "step": 28508 + }, + { + "epoch": 78.32142857142857, + "grad_norm": 16.487747192382812, + "learning_rate": 1.0839285714285714e-05, + "loss": 0.4356, + "step": 28509 + }, + { + "epoch": 78.32417582417582, + "grad_norm": 17.87125587463379, + "learning_rate": 1.0837912087912089e-05, + "loss": 0.3712, + "step": 28510 + }, + { + "epoch": 78.32692307692308, + "grad_norm": 19.11536407470703, + "learning_rate": 1.0836538461538462e-05, + "loss": 0.2484, + "step": 28511 + }, + { + "epoch": 78.32967032967034, + "grad_norm": 18.04901123046875, + "learning_rate": 1.0835164835164836e-05, + "loss": 0.3733, + "step": 28512 + }, + { + "epoch": 78.33241758241758, + "grad_norm": 9.11542797088623, + "learning_rate": 1.0833791208791209e-05, + "loss": 0.1014, + "step": 28513 + }, + { + "epoch": 78.33516483516483, + "grad_norm": 21.954633712768555, + "learning_rate": 1.0832417582417582e-05, + "loss": 0.423, + "step": 28514 + }, + { + "epoch": 78.33791208791209, + "grad_norm": 9.870946884155273, + "learning_rate": 1.0831043956043958e-05, + "loss": 0.1537, + "step": 28515 + }, + { + "epoch": 78.34065934065934, + "grad_norm": 15.505875587463379, + "learning_rate": 1.082967032967033e-05, + "loss": 0.3914, + "step": 28516 + }, + { + "epoch": 78.3434065934066, + "grad_norm": 10.426435470581055, + "learning_rate": 1.0828296703296704e-05, + "loss": 0.2322, + "step": 28517 + }, + { + "epoch": 78.34615384615384, + "grad_norm": 9.169631958007812, + "learning_rate": 1.0826923076923078e-05, + "loss": 0.2453, + "step": 28518 + }, + { + "epoch": 78.3489010989011, + "grad_norm": 5.248072624206543, + "learning_rate": 1.0825549450549451e-05, + "loss": 0.0719, + "step": 28519 + }, + { + "epoch": 78.35164835164835, + "grad_norm": 13.924786567687988, + "learning_rate": 1.0824175824175825e-05, + "loss": 0.2254, + "step": 28520 + }, + { + "epoch": 78.3543956043956, + "grad_norm": 6.889886856079102, + "learning_rate": 1.0822802197802198e-05, + "loss": 0.1187, + "step": 28521 + }, + { + "epoch": 78.35714285714286, + "grad_norm": 10.670398712158203, + "learning_rate": 1.0821428571428573e-05, + "loss": 0.1126, + "step": 28522 + }, + { + "epoch": 78.35989010989012, + "grad_norm": 13.971577644348145, + "learning_rate": 1.0820054945054947e-05, + "loss": 0.4354, + "step": 28523 + }, + { + "epoch": 78.36263736263736, + "grad_norm": 9.048605918884277, + "learning_rate": 1.0818681318681318e-05, + "loss": 0.1782, + "step": 28524 + }, + { + "epoch": 78.36538461538461, + "grad_norm": 2.7851593494415283, + "learning_rate": 1.0817307692307693e-05, + "loss": 0.0219, + "step": 28525 + }, + { + "epoch": 78.36813186813187, + "grad_norm": 7.471950054168701, + "learning_rate": 1.0815934065934067e-05, + "loss": 0.1882, + "step": 28526 + }, + { + "epoch": 78.37087912087912, + "grad_norm": 8.023275375366211, + "learning_rate": 1.081456043956044e-05, + "loss": 0.2048, + "step": 28527 + }, + { + "epoch": 78.37362637362638, + "grad_norm": 12.291218757629395, + "learning_rate": 1.0813186813186814e-05, + "loss": 0.1717, + "step": 28528 + }, + { + "epoch": 78.37637362637362, + "grad_norm": 6.656983375549316, + "learning_rate": 1.0811813186813187e-05, + "loss": 0.1747, + "step": 28529 + }, + { + "epoch": 78.37912087912088, + "grad_norm": 19.726917266845703, + "learning_rate": 1.0810439560439562e-05, + "loss": 0.2321, + "step": 28530 + }, + { + "epoch": 78.38186813186813, + "grad_norm": 6.8968658447265625, + "learning_rate": 1.0809065934065934e-05, + "loss": 0.226, + "step": 28531 + }, + { + "epoch": 78.38461538461539, + "grad_norm": 7.232378005981445, + "learning_rate": 1.0807692307692307e-05, + "loss": 0.0551, + "step": 28532 + }, + { + "epoch": 78.38736263736264, + "grad_norm": 1.5060935020446777, + "learning_rate": 1.0806318681318682e-05, + "loss": 0.0146, + "step": 28533 + }, + { + "epoch": 78.39010989010988, + "grad_norm": 10.283232688903809, + "learning_rate": 1.0804945054945056e-05, + "loss": 0.1825, + "step": 28534 + }, + { + "epoch": 78.39285714285714, + "grad_norm": 9.634363174438477, + "learning_rate": 1.0803571428571429e-05, + "loss": 0.1892, + "step": 28535 + }, + { + "epoch": 78.3956043956044, + "grad_norm": 9.578495025634766, + "learning_rate": 1.0802197802197802e-05, + "loss": 0.1444, + "step": 28536 + }, + { + "epoch": 78.39835164835165, + "grad_norm": 22.338165283203125, + "learning_rate": 1.0800824175824176e-05, + "loss": 0.3809, + "step": 28537 + }, + { + "epoch": 78.4010989010989, + "grad_norm": 15.268969535827637, + "learning_rate": 1.0799450549450551e-05, + "loss": 0.2815, + "step": 28538 + }, + { + "epoch": 78.40384615384616, + "grad_norm": 20.309181213378906, + "learning_rate": 1.0798076923076923e-05, + "loss": 0.5691, + "step": 28539 + }, + { + "epoch": 78.4065934065934, + "grad_norm": 13.074712753295898, + "learning_rate": 1.0796703296703298e-05, + "loss": 0.3017, + "step": 28540 + }, + { + "epoch": 78.40934065934066, + "grad_norm": 9.272672653198242, + "learning_rate": 1.0795329670329671e-05, + "loss": 0.1854, + "step": 28541 + }, + { + "epoch": 78.41208791208791, + "grad_norm": 6.95124626159668, + "learning_rate": 1.0793956043956045e-05, + "loss": 0.0563, + "step": 28542 + }, + { + "epoch": 78.41483516483517, + "grad_norm": 9.539600372314453, + "learning_rate": 1.0792582417582418e-05, + "loss": 0.1144, + "step": 28543 + }, + { + "epoch": 78.41758241758242, + "grad_norm": 13.253532409667969, + "learning_rate": 1.0791208791208791e-05, + "loss": 0.1073, + "step": 28544 + }, + { + "epoch": 78.42032967032966, + "grad_norm": 11.919879913330078, + "learning_rate": 1.0789835164835167e-05, + "loss": 0.2171, + "step": 28545 + }, + { + "epoch": 78.42307692307692, + "grad_norm": 15.56723403930664, + "learning_rate": 1.0788461538461538e-05, + "loss": 0.1521, + "step": 28546 + }, + { + "epoch": 78.42582417582418, + "grad_norm": 7.255734920501709, + "learning_rate": 1.0787087912087912e-05, + "loss": 0.0537, + "step": 28547 + }, + { + "epoch": 78.42857142857143, + "grad_norm": 9.328462600708008, + "learning_rate": 1.0785714285714287e-05, + "loss": 0.1667, + "step": 28548 + }, + { + "epoch": 78.43131868131869, + "grad_norm": 4.868224620819092, + "learning_rate": 1.078434065934066e-05, + "loss": 0.0462, + "step": 28549 + }, + { + "epoch": 78.43406593406593, + "grad_norm": 14.682310104370117, + "learning_rate": 1.0782967032967034e-05, + "loss": 0.2818, + "step": 28550 + }, + { + "epoch": 78.43681318681318, + "grad_norm": 30.686948776245117, + "learning_rate": 1.0781593406593407e-05, + "loss": 0.8772, + "step": 28551 + }, + { + "epoch": 78.43956043956044, + "grad_norm": 12.079132080078125, + "learning_rate": 1.078021978021978e-05, + "loss": 0.4695, + "step": 28552 + }, + { + "epoch": 78.4423076923077, + "grad_norm": 25.58263397216797, + "learning_rate": 1.0778846153846155e-05, + "loss": 0.6072, + "step": 28553 + }, + { + "epoch": 78.44505494505495, + "grad_norm": 10.481478691101074, + "learning_rate": 1.0777472527472527e-05, + "loss": 0.1777, + "step": 28554 + }, + { + "epoch": 78.4478021978022, + "grad_norm": 12.365968704223633, + "learning_rate": 1.0776098901098902e-05, + "loss": 0.219, + "step": 28555 + }, + { + "epoch": 78.45054945054945, + "grad_norm": 5.108955383300781, + "learning_rate": 1.0774725274725276e-05, + "loss": 0.0537, + "step": 28556 + }, + { + "epoch": 78.4532967032967, + "grad_norm": 7.38879919052124, + "learning_rate": 1.0773351648351649e-05, + "loss": 0.0668, + "step": 28557 + }, + { + "epoch": 78.45604395604396, + "grad_norm": 12.373868942260742, + "learning_rate": 1.0771978021978023e-05, + "loss": 0.2359, + "step": 28558 + }, + { + "epoch": 78.45879120879121, + "grad_norm": 4.686483860015869, + "learning_rate": 1.0770604395604396e-05, + "loss": 0.0679, + "step": 28559 + }, + { + "epoch": 78.46153846153847, + "grad_norm": 12.43596363067627, + "learning_rate": 1.0769230769230771e-05, + "loss": 0.1119, + "step": 28560 + }, + { + "epoch": 78.46428571428571, + "grad_norm": 15.957292556762695, + "learning_rate": 1.0767857142857143e-05, + "loss": 0.5099, + "step": 28561 + }, + { + "epoch": 78.46703296703296, + "grad_norm": 10.25995922088623, + "learning_rate": 1.0766483516483516e-05, + "loss": 0.184, + "step": 28562 + }, + { + "epoch": 78.46978021978022, + "grad_norm": 12.084883689880371, + "learning_rate": 1.0765109890109891e-05, + "loss": 0.1936, + "step": 28563 + }, + { + "epoch": 78.47252747252747, + "grad_norm": 1.2676329612731934, + "learning_rate": 1.0763736263736265e-05, + "loss": 0.0124, + "step": 28564 + }, + { + "epoch": 78.47527472527473, + "grad_norm": 10.575923919677734, + "learning_rate": 1.0762362637362638e-05, + "loss": 0.1797, + "step": 28565 + }, + { + "epoch": 78.47802197802197, + "grad_norm": 4.377483367919922, + "learning_rate": 1.0760989010989011e-05, + "loss": 0.1159, + "step": 28566 + }, + { + "epoch": 78.48076923076923, + "grad_norm": 19.325237274169922, + "learning_rate": 1.0759615384615385e-05, + "loss": 0.2123, + "step": 28567 + }, + { + "epoch": 78.48351648351648, + "grad_norm": 9.954073905944824, + "learning_rate": 1.0758241758241758e-05, + "loss": 0.2178, + "step": 28568 + }, + { + "epoch": 78.48626373626374, + "grad_norm": 14.61103630065918, + "learning_rate": 1.0756868131868132e-05, + "loss": 0.1765, + "step": 28569 + }, + { + "epoch": 78.48901098901099, + "grad_norm": 2.1279609203338623, + "learning_rate": 1.0755494505494507e-05, + "loss": 0.0233, + "step": 28570 + }, + { + "epoch": 78.49175824175825, + "grad_norm": 4.154682636260986, + "learning_rate": 1.075412087912088e-05, + "loss": 0.0496, + "step": 28571 + }, + { + "epoch": 78.49450549450549, + "grad_norm": 13.973250389099121, + "learning_rate": 1.0752747252747254e-05, + "loss": 0.3087, + "step": 28572 + }, + { + "epoch": 78.49725274725274, + "grad_norm": 16.914867401123047, + "learning_rate": 1.0751373626373627e-05, + "loss": 0.4839, + "step": 28573 + }, + { + "epoch": 78.5, + "grad_norm": 14.636693954467773, + "learning_rate": 1.075e-05, + "loss": 0.3267, + "step": 28574 + }, + { + "epoch": 78.50274725274726, + "grad_norm": 4.401904582977295, + "learning_rate": 1.0748626373626376e-05, + "loss": 0.0569, + "step": 28575 + }, + { + "epoch": 78.50549450549451, + "grad_norm": 16.095840454101562, + "learning_rate": 1.0747252747252747e-05, + "loss": 0.3039, + "step": 28576 + }, + { + "epoch": 78.50824175824175, + "grad_norm": 5.411073207855225, + "learning_rate": 1.074587912087912e-05, + "loss": 0.1251, + "step": 28577 + }, + { + "epoch": 78.51098901098901, + "grad_norm": 8.618315696716309, + "learning_rate": 1.0744505494505496e-05, + "loss": 0.248, + "step": 28578 + }, + { + "epoch": 78.51373626373626, + "grad_norm": 7.635754108428955, + "learning_rate": 1.0743131868131869e-05, + "loss": 0.0716, + "step": 28579 + }, + { + "epoch": 78.51648351648352, + "grad_norm": 12.455293655395508, + "learning_rate": 1.0741758241758241e-05, + "loss": 0.4016, + "step": 28580 + }, + { + "epoch": 78.51923076923077, + "grad_norm": 4.295360088348389, + "learning_rate": 1.0740384615384616e-05, + "loss": 0.0482, + "step": 28581 + }, + { + "epoch": 78.52197802197803, + "grad_norm": 6.837073802947998, + "learning_rate": 1.073901098901099e-05, + "loss": 0.2288, + "step": 28582 + }, + { + "epoch": 78.52472527472527, + "grad_norm": 6.428343772888184, + "learning_rate": 1.0737637362637363e-05, + "loss": 0.0856, + "step": 28583 + }, + { + "epoch": 78.52747252747253, + "grad_norm": 21.836814880371094, + "learning_rate": 1.0736263736263736e-05, + "loss": 0.6022, + "step": 28584 + }, + { + "epoch": 78.53021978021978, + "grad_norm": 14.776762008666992, + "learning_rate": 1.073489010989011e-05, + "loss": 0.4222, + "step": 28585 + }, + { + "epoch": 78.53296703296704, + "grad_norm": 12.981698036193848, + "learning_rate": 1.0733516483516485e-05, + "loss": 0.2584, + "step": 28586 + }, + { + "epoch": 78.53571428571429, + "grad_norm": 5.599978446960449, + "learning_rate": 1.0732142857142858e-05, + "loss": 0.0674, + "step": 28587 + }, + { + "epoch": 78.53846153846153, + "grad_norm": 5.2140727043151855, + "learning_rate": 1.0730769230769231e-05, + "loss": 0.1035, + "step": 28588 + }, + { + "epoch": 78.54120879120879, + "grad_norm": 1.2784539461135864, + "learning_rate": 1.0729395604395605e-05, + "loss": 0.0178, + "step": 28589 + }, + { + "epoch": 78.54395604395604, + "grad_norm": 11.772889137268066, + "learning_rate": 1.0728021978021978e-05, + "loss": 0.2698, + "step": 28590 + }, + { + "epoch": 78.5467032967033, + "grad_norm": 3.1390843391418457, + "learning_rate": 1.0726648351648352e-05, + "loss": 0.0365, + "step": 28591 + }, + { + "epoch": 78.54945054945055, + "grad_norm": 13.717885971069336, + "learning_rate": 1.0725274725274725e-05, + "loss": 0.317, + "step": 28592 + }, + { + "epoch": 78.5521978021978, + "grad_norm": 13.937647819519043, + "learning_rate": 1.07239010989011e-05, + "loss": 0.2717, + "step": 28593 + }, + { + "epoch": 78.55494505494505, + "grad_norm": 18.381088256835938, + "learning_rate": 1.0722527472527474e-05, + "loss": 0.2846, + "step": 28594 + }, + { + "epoch": 78.5576923076923, + "grad_norm": 1.1495072841644287, + "learning_rate": 1.0721153846153845e-05, + "loss": 0.0158, + "step": 28595 + }, + { + "epoch": 78.56043956043956, + "grad_norm": 7.077305793762207, + "learning_rate": 1.071978021978022e-05, + "loss": 0.0833, + "step": 28596 + }, + { + "epoch": 78.56318681318682, + "grad_norm": 7.477952003479004, + "learning_rate": 1.0718406593406594e-05, + "loss": 0.167, + "step": 28597 + }, + { + "epoch": 78.56593406593407, + "grad_norm": 3.2650136947631836, + "learning_rate": 1.0717032967032967e-05, + "loss": 0.0542, + "step": 28598 + }, + { + "epoch": 78.56868131868131, + "grad_norm": 7.009597301483154, + "learning_rate": 1.071565934065934e-05, + "loss": 0.1098, + "step": 28599 + }, + { + "epoch": 78.57142857142857, + "grad_norm": 7.37956428527832, + "learning_rate": 1.0714285714285714e-05, + "loss": 0.1943, + "step": 28600 + }, + { + "epoch": 78.57417582417582, + "grad_norm": 23.9207706451416, + "learning_rate": 1.071291208791209e-05, + "loss": 0.4537, + "step": 28601 + }, + { + "epoch": 78.57692307692308, + "grad_norm": 13.07784366607666, + "learning_rate": 1.0711538461538463e-05, + "loss": 0.2566, + "step": 28602 + }, + { + "epoch": 78.57967032967034, + "grad_norm": 20.200382232666016, + "learning_rate": 1.0710164835164836e-05, + "loss": 0.3616, + "step": 28603 + }, + { + "epoch": 78.58241758241758, + "grad_norm": 14.24914264678955, + "learning_rate": 1.070879120879121e-05, + "loss": 0.1482, + "step": 28604 + }, + { + "epoch": 78.58516483516483, + "grad_norm": 6.469478607177734, + "learning_rate": 1.0707417582417583e-05, + "loss": 0.0702, + "step": 28605 + }, + { + "epoch": 78.58791208791209, + "grad_norm": 5.485107898712158, + "learning_rate": 1.0706043956043956e-05, + "loss": 0.1151, + "step": 28606 + }, + { + "epoch": 78.59065934065934, + "grad_norm": 16.836471557617188, + "learning_rate": 1.070467032967033e-05, + "loss": 0.2353, + "step": 28607 + }, + { + "epoch": 78.5934065934066, + "grad_norm": 11.0061616897583, + "learning_rate": 1.0703296703296705e-05, + "loss": 0.3144, + "step": 28608 + }, + { + "epoch": 78.59615384615384, + "grad_norm": 12.654011726379395, + "learning_rate": 1.0701923076923078e-05, + "loss": 0.4715, + "step": 28609 + }, + { + "epoch": 78.5989010989011, + "grad_norm": 17.464693069458008, + "learning_rate": 1.070054945054945e-05, + "loss": 0.2725, + "step": 28610 + }, + { + "epoch": 78.60164835164835, + "grad_norm": 2.498603582382202, + "learning_rate": 1.0699175824175825e-05, + "loss": 0.021, + "step": 28611 + }, + { + "epoch": 78.6043956043956, + "grad_norm": 13.6719388961792, + "learning_rate": 1.0697802197802198e-05, + "loss": 0.2169, + "step": 28612 + }, + { + "epoch": 78.60714285714286, + "grad_norm": 9.0199613571167, + "learning_rate": 1.0696428571428572e-05, + "loss": 0.1327, + "step": 28613 + }, + { + "epoch": 78.60989010989012, + "grad_norm": 4.755559921264648, + "learning_rate": 1.0695054945054945e-05, + "loss": 0.0438, + "step": 28614 + }, + { + "epoch": 78.61263736263736, + "grad_norm": 17.38884735107422, + "learning_rate": 1.0693681318681319e-05, + "loss": 0.2344, + "step": 28615 + }, + { + "epoch": 78.61538461538461, + "grad_norm": 7.196464538574219, + "learning_rate": 1.0692307692307694e-05, + "loss": 0.0863, + "step": 28616 + }, + { + "epoch": 78.61813186813187, + "grad_norm": 15.435376167297363, + "learning_rate": 1.0690934065934065e-05, + "loss": 0.162, + "step": 28617 + }, + { + "epoch": 78.62087912087912, + "grad_norm": 10.633848190307617, + "learning_rate": 1.068956043956044e-05, + "loss": 0.1538, + "step": 28618 + }, + { + "epoch": 78.62362637362638, + "grad_norm": 1.4431902170181274, + "learning_rate": 1.0688186813186814e-05, + "loss": 0.0145, + "step": 28619 + }, + { + "epoch": 78.62637362637362, + "grad_norm": 13.22232723236084, + "learning_rate": 1.0686813186813187e-05, + "loss": 0.3215, + "step": 28620 + }, + { + "epoch": 78.62912087912088, + "grad_norm": 16.694904327392578, + "learning_rate": 1.068543956043956e-05, + "loss": 0.2907, + "step": 28621 + }, + { + "epoch": 78.63186813186813, + "grad_norm": 5.173967361450195, + "learning_rate": 1.0684065934065934e-05, + "loss": 0.1578, + "step": 28622 + }, + { + "epoch": 78.63461538461539, + "grad_norm": 11.298432350158691, + "learning_rate": 1.068269230769231e-05, + "loss": 0.1747, + "step": 28623 + }, + { + "epoch": 78.63736263736264, + "grad_norm": 9.911998748779297, + "learning_rate": 1.0681318681318683e-05, + "loss": 0.1363, + "step": 28624 + }, + { + "epoch": 78.64010989010988, + "grad_norm": 14.39776611328125, + "learning_rate": 1.0679945054945054e-05, + "loss": 0.2371, + "step": 28625 + }, + { + "epoch": 78.64285714285714, + "grad_norm": 21.293495178222656, + "learning_rate": 1.067857142857143e-05, + "loss": 0.9558, + "step": 28626 + }, + { + "epoch": 78.6456043956044, + "grad_norm": 9.297347068786621, + "learning_rate": 1.0677197802197803e-05, + "loss": 0.1335, + "step": 28627 + }, + { + "epoch": 78.64835164835165, + "grad_norm": 4.659127712249756, + "learning_rate": 1.0675824175824176e-05, + "loss": 0.0319, + "step": 28628 + }, + { + "epoch": 78.6510989010989, + "grad_norm": 14.309032440185547, + "learning_rate": 1.067445054945055e-05, + "loss": 0.4388, + "step": 28629 + }, + { + "epoch": 78.65384615384616, + "grad_norm": 7.360416889190674, + "learning_rate": 1.0673076923076923e-05, + "loss": 0.1071, + "step": 28630 + }, + { + "epoch": 78.6565934065934, + "grad_norm": 2.183342218399048, + "learning_rate": 1.0671703296703298e-05, + "loss": 0.0264, + "step": 28631 + }, + { + "epoch": 78.65934065934066, + "grad_norm": 7.805089950561523, + "learning_rate": 1.067032967032967e-05, + "loss": 0.1615, + "step": 28632 + }, + { + "epoch": 78.66208791208791, + "grad_norm": 10.27297592163086, + "learning_rate": 1.0668956043956045e-05, + "loss": 0.173, + "step": 28633 + }, + { + "epoch": 78.66483516483517, + "grad_norm": 12.139376640319824, + "learning_rate": 1.0667582417582418e-05, + "loss": 0.2446, + "step": 28634 + }, + { + "epoch": 78.66758241758242, + "grad_norm": 13.023794174194336, + "learning_rate": 1.0666208791208792e-05, + "loss": 0.2342, + "step": 28635 + }, + { + "epoch": 78.67032967032966, + "grad_norm": 23.10525131225586, + "learning_rate": 1.0664835164835165e-05, + "loss": 0.6657, + "step": 28636 + }, + { + "epoch": 78.67307692307692, + "grad_norm": 8.325578689575195, + "learning_rate": 1.0663461538461539e-05, + "loss": 0.1629, + "step": 28637 + }, + { + "epoch": 78.67582417582418, + "grad_norm": 8.640538215637207, + "learning_rate": 1.0662087912087912e-05, + "loss": 0.104, + "step": 28638 + }, + { + "epoch": 78.67857142857143, + "grad_norm": 23.805816650390625, + "learning_rate": 1.0660714285714287e-05, + "loss": 0.7639, + "step": 28639 + }, + { + "epoch": 78.68131868131869, + "grad_norm": 5.84270715713501, + "learning_rate": 1.0659340659340659e-05, + "loss": 0.1085, + "step": 28640 + }, + { + "epoch": 78.68406593406593, + "grad_norm": 11.223360061645508, + "learning_rate": 1.0657967032967034e-05, + "loss": 0.1863, + "step": 28641 + }, + { + "epoch": 78.68681318681318, + "grad_norm": 12.392409324645996, + "learning_rate": 1.0656593406593407e-05, + "loss": 0.2107, + "step": 28642 + }, + { + "epoch": 78.68956043956044, + "grad_norm": 11.231454849243164, + "learning_rate": 1.065521978021978e-05, + "loss": 0.2363, + "step": 28643 + }, + { + "epoch": 78.6923076923077, + "grad_norm": 14.248379707336426, + "learning_rate": 1.0653846153846154e-05, + "loss": 0.1492, + "step": 28644 + }, + { + "epoch": 78.69505494505495, + "grad_norm": 11.811687469482422, + "learning_rate": 1.0652472527472528e-05, + "loss": 0.2082, + "step": 28645 + }, + { + "epoch": 78.6978021978022, + "grad_norm": 15.617963790893555, + "learning_rate": 1.0651098901098903e-05, + "loss": 0.1208, + "step": 28646 + }, + { + "epoch": 78.70054945054945, + "grad_norm": 23.90797233581543, + "learning_rate": 1.0649725274725274e-05, + "loss": 0.334, + "step": 28647 + }, + { + "epoch": 78.7032967032967, + "grad_norm": 2.3628652095794678, + "learning_rate": 1.0648351648351648e-05, + "loss": 0.0305, + "step": 28648 + }, + { + "epoch": 78.70604395604396, + "grad_norm": 13.396020889282227, + "learning_rate": 1.0646978021978023e-05, + "loss": 0.2501, + "step": 28649 + }, + { + "epoch": 78.70879120879121, + "grad_norm": 3.2752435207366943, + "learning_rate": 1.0645604395604396e-05, + "loss": 0.0648, + "step": 28650 + }, + { + "epoch": 78.71153846153847, + "grad_norm": 11.269340515136719, + "learning_rate": 1.064423076923077e-05, + "loss": 0.1268, + "step": 28651 + }, + { + "epoch": 78.71428571428571, + "grad_norm": 13.441386222839355, + "learning_rate": 1.0642857142857143e-05, + "loss": 0.1994, + "step": 28652 + }, + { + "epoch": 78.71703296703296, + "grad_norm": 9.355125427246094, + "learning_rate": 1.0641483516483516e-05, + "loss": 0.1547, + "step": 28653 + }, + { + "epoch": 78.71978021978022, + "grad_norm": 16.308427810668945, + "learning_rate": 1.0640109890109892e-05, + "loss": 0.4163, + "step": 28654 + }, + { + "epoch": 78.72252747252747, + "grad_norm": 14.933327674865723, + "learning_rate": 1.0638736263736263e-05, + "loss": 0.1579, + "step": 28655 + }, + { + "epoch": 78.72527472527473, + "grad_norm": 5.467692852020264, + "learning_rate": 1.0637362637362638e-05, + "loss": 0.0909, + "step": 28656 + }, + { + "epoch": 78.72802197802197, + "grad_norm": 10.073440551757812, + "learning_rate": 1.0635989010989012e-05, + "loss": 0.2882, + "step": 28657 + }, + { + "epoch": 78.73076923076923, + "grad_norm": 12.739872932434082, + "learning_rate": 1.0634615384615385e-05, + "loss": 0.1939, + "step": 28658 + }, + { + "epoch": 78.73351648351648, + "grad_norm": 20.596424102783203, + "learning_rate": 1.0633241758241759e-05, + "loss": 0.5533, + "step": 28659 + }, + { + "epoch": 78.73626373626374, + "grad_norm": 6.7950263023376465, + "learning_rate": 1.0631868131868132e-05, + "loss": 0.0578, + "step": 28660 + }, + { + "epoch": 78.73901098901099, + "grad_norm": 20.18094825744629, + "learning_rate": 1.0630494505494507e-05, + "loss": 0.4367, + "step": 28661 + }, + { + "epoch": 78.74175824175825, + "grad_norm": 9.33734130859375, + "learning_rate": 1.0629120879120879e-05, + "loss": 0.0926, + "step": 28662 + }, + { + "epoch": 78.74450549450549, + "grad_norm": 15.583137512207031, + "learning_rate": 1.0627747252747252e-05, + "loss": 0.2157, + "step": 28663 + }, + { + "epoch": 78.74725274725274, + "grad_norm": 6.171241283416748, + "learning_rate": 1.0626373626373627e-05, + "loss": 0.1162, + "step": 28664 + }, + { + "epoch": 78.75, + "grad_norm": 9.191527366638184, + "learning_rate": 1.0625e-05, + "loss": 0.2102, + "step": 28665 + }, + { + "epoch": 78.75274725274726, + "grad_norm": 4.3916015625, + "learning_rate": 1.0623626373626374e-05, + "loss": 0.076, + "step": 28666 + }, + { + "epoch": 78.75549450549451, + "grad_norm": 0.7688259482383728, + "learning_rate": 1.0622252747252748e-05, + "loss": 0.0102, + "step": 28667 + }, + { + "epoch": 78.75824175824175, + "grad_norm": 14.621155738830566, + "learning_rate": 1.0620879120879121e-05, + "loss": 0.3723, + "step": 28668 + }, + { + "epoch": 78.76098901098901, + "grad_norm": 7.742239475250244, + "learning_rate": 1.0619505494505496e-05, + "loss": 0.1715, + "step": 28669 + }, + { + "epoch": 78.76373626373626, + "grad_norm": 6.300751686096191, + "learning_rate": 1.0618131868131868e-05, + "loss": 0.072, + "step": 28670 + }, + { + "epoch": 78.76648351648352, + "grad_norm": 13.67669677734375, + "learning_rate": 1.0616758241758243e-05, + "loss": 0.1034, + "step": 28671 + }, + { + "epoch": 78.76923076923077, + "grad_norm": 12.384087562561035, + "learning_rate": 1.0615384615384616e-05, + "loss": 0.1521, + "step": 28672 + }, + { + "epoch": 78.77197802197803, + "grad_norm": 12.826700210571289, + "learning_rate": 1.061401098901099e-05, + "loss": 0.3913, + "step": 28673 + }, + { + "epoch": 78.77472527472527, + "grad_norm": 11.206147193908691, + "learning_rate": 1.0612637362637363e-05, + "loss": 0.2221, + "step": 28674 + }, + { + "epoch": 78.77747252747253, + "grad_norm": 5.772002696990967, + "learning_rate": 1.0611263736263737e-05, + "loss": 0.0817, + "step": 28675 + }, + { + "epoch": 78.78021978021978, + "grad_norm": 7.847929954528809, + "learning_rate": 1.0609890109890112e-05, + "loss": 0.1825, + "step": 28676 + }, + { + "epoch": 78.78296703296704, + "grad_norm": 1.3292872905731201, + "learning_rate": 1.0608516483516483e-05, + "loss": 0.0197, + "step": 28677 + }, + { + "epoch": 78.78571428571429, + "grad_norm": 9.910809516906738, + "learning_rate": 1.0607142857142857e-05, + "loss": 0.1841, + "step": 28678 + }, + { + "epoch": 78.78846153846153, + "grad_norm": 0.5790701508522034, + "learning_rate": 1.0605769230769232e-05, + "loss": 0.0083, + "step": 28679 + }, + { + "epoch": 78.79120879120879, + "grad_norm": 3.1183159351348877, + "learning_rate": 1.0604395604395605e-05, + "loss": 0.028, + "step": 28680 + }, + { + "epoch": 78.79395604395604, + "grad_norm": 16.729280471801758, + "learning_rate": 1.0603021978021979e-05, + "loss": 0.5856, + "step": 28681 + }, + { + "epoch": 78.7967032967033, + "grad_norm": 19.758024215698242, + "learning_rate": 1.0601648351648352e-05, + "loss": 0.3682, + "step": 28682 + }, + { + "epoch": 78.79945054945055, + "grad_norm": 8.047880172729492, + "learning_rate": 1.0600274725274725e-05, + "loss": 0.1097, + "step": 28683 + }, + { + "epoch": 78.8021978021978, + "grad_norm": 3.6054391860961914, + "learning_rate": 1.05989010989011e-05, + "loss": 0.0334, + "step": 28684 + }, + { + "epoch": 78.80494505494505, + "grad_norm": 17.717636108398438, + "learning_rate": 1.0597527472527472e-05, + "loss": 0.2588, + "step": 28685 + }, + { + "epoch": 78.8076923076923, + "grad_norm": 8.41138744354248, + "learning_rate": 1.0596153846153847e-05, + "loss": 0.1079, + "step": 28686 + }, + { + "epoch": 78.81043956043956, + "grad_norm": 20.352279663085938, + "learning_rate": 1.059478021978022e-05, + "loss": 0.4256, + "step": 28687 + }, + { + "epoch": 78.81318681318682, + "grad_norm": 15.689826011657715, + "learning_rate": 1.0593406593406594e-05, + "loss": 0.3594, + "step": 28688 + }, + { + "epoch": 78.81593406593407, + "grad_norm": 14.297574996948242, + "learning_rate": 1.0592032967032968e-05, + "loss": 0.3446, + "step": 28689 + }, + { + "epoch": 78.81868131868131, + "grad_norm": 12.275252342224121, + "learning_rate": 1.0590659340659341e-05, + "loss": 0.1855, + "step": 28690 + }, + { + "epoch": 78.82142857142857, + "grad_norm": 19.843095779418945, + "learning_rate": 1.0589285714285716e-05, + "loss": 0.4616, + "step": 28691 + }, + { + "epoch": 78.82417582417582, + "grad_norm": 21.558738708496094, + "learning_rate": 1.0587912087912088e-05, + "loss": 0.3536, + "step": 28692 + }, + { + "epoch": 78.82692307692308, + "grad_norm": 9.79169750213623, + "learning_rate": 1.0586538461538461e-05, + "loss": 0.3311, + "step": 28693 + }, + { + "epoch": 78.82967032967034, + "grad_norm": 17.93295669555664, + "learning_rate": 1.0585164835164836e-05, + "loss": 0.4051, + "step": 28694 + }, + { + "epoch": 78.83241758241758, + "grad_norm": 3.73934268951416, + "learning_rate": 1.058379120879121e-05, + "loss": 0.046, + "step": 28695 + }, + { + "epoch": 78.83516483516483, + "grad_norm": 17.695068359375, + "learning_rate": 1.0582417582417581e-05, + "loss": 0.4814, + "step": 28696 + }, + { + "epoch": 78.83791208791209, + "grad_norm": 18.906747817993164, + "learning_rate": 1.0581043956043957e-05, + "loss": 0.5462, + "step": 28697 + }, + { + "epoch": 78.84065934065934, + "grad_norm": 10.068121910095215, + "learning_rate": 1.057967032967033e-05, + "loss": 0.1357, + "step": 28698 + }, + { + "epoch": 78.8434065934066, + "grad_norm": 6.4634199142456055, + "learning_rate": 1.0578296703296705e-05, + "loss": 0.0619, + "step": 28699 + }, + { + "epoch": 78.84615384615384, + "grad_norm": 24.191991806030273, + "learning_rate": 1.0576923076923077e-05, + "loss": 0.6333, + "step": 28700 + }, + { + "epoch": 78.8489010989011, + "grad_norm": 14.468682289123535, + "learning_rate": 1.057554945054945e-05, + "loss": 0.1511, + "step": 28701 + }, + { + "epoch": 78.85164835164835, + "grad_norm": 18.544321060180664, + "learning_rate": 1.0574175824175825e-05, + "loss": 0.37, + "step": 28702 + }, + { + "epoch": 78.8543956043956, + "grad_norm": 10.55964469909668, + "learning_rate": 1.0572802197802199e-05, + "loss": 0.1528, + "step": 28703 + }, + { + "epoch": 78.85714285714286, + "grad_norm": 20.279891967773438, + "learning_rate": 1.0571428571428572e-05, + "loss": 0.4622, + "step": 28704 + }, + { + "epoch": 78.85989010989012, + "grad_norm": 15.29450798034668, + "learning_rate": 1.0570054945054946e-05, + "loss": 0.3925, + "step": 28705 + }, + { + "epoch": 78.86263736263736, + "grad_norm": 12.613886833190918, + "learning_rate": 1.0568681318681319e-05, + "loss": 0.1522, + "step": 28706 + }, + { + "epoch": 78.86538461538461, + "grad_norm": 11.656271934509277, + "learning_rate": 1.0567307692307692e-05, + "loss": 0.1846, + "step": 28707 + }, + { + "epoch": 78.86813186813187, + "grad_norm": 4.1560235023498535, + "learning_rate": 1.0565934065934066e-05, + "loss": 0.0437, + "step": 28708 + }, + { + "epoch": 78.87087912087912, + "grad_norm": 25.90196418762207, + "learning_rate": 1.056456043956044e-05, + "loss": 0.546, + "step": 28709 + }, + { + "epoch": 78.87362637362638, + "grad_norm": 26.127595901489258, + "learning_rate": 1.0563186813186814e-05, + "loss": 0.5662, + "step": 28710 + }, + { + "epoch": 78.87637362637362, + "grad_norm": 4.406006813049316, + "learning_rate": 1.0561813186813186e-05, + "loss": 0.0723, + "step": 28711 + }, + { + "epoch": 78.87912087912088, + "grad_norm": 14.519124031066895, + "learning_rate": 1.0560439560439561e-05, + "loss": 0.402, + "step": 28712 + }, + { + "epoch": 78.88186813186813, + "grad_norm": 9.92822551727295, + "learning_rate": 1.0559065934065934e-05, + "loss": 0.1274, + "step": 28713 + }, + { + "epoch": 78.88461538461539, + "grad_norm": 11.286690711975098, + "learning_rate": 1.055769230769231e-05, + "loss": 0.1353, + "step": 28714 + }, + { + "epoch": 78.88736263736264, + "grad_norm": 20.822546005249023, + "learning_rate": 1.0556318681318681e-05, + "loss": 0.379, + "step": 28715 + }, + { + "epoch": 78.89010989010988, + "grad_norm": 9.848407745361328, + "learning_rate": 1.0554945054945055e-05, + "loss": 0.1214, + "step": 28716 + }, + { + "epoch": 78.89285714285714, + "grad_norm": 4.057932376861572, + "learning_rate": 1.055357142857143e-05, + "loss": 0.0566, + "step": 28717 + }, + { + "epoch": 78.8956043956044, + "grad_norm": 22.197114944458008, + "learning_rate": 1.0552197802197803e-05, + "loss": 0.537, + "step": 28718 + }, + { + "epoch": 78.89835164835165, + "grad_norm": 16.596120834350586, + "learning_rate": 1.0550824175824177e-05, + "loss": 0.5285, + "step": 28719 + }, + { + "epoch": 78.9010989010989, + "grad_norm": 17.094877243041992, + "learning_rate": 1.054945054945055e-05, + "loss": 0.2812, + "step": 28720 + }, + { + "epoch": 78.90384615384616, + "grad_norm": 4.58076810836792, + "learning_rate": 1.0548076923076923e-05, + "loss": 0.0673, + "step": 28721 + }, + { + "epoch": 78.9065934065934, + "grad_norm": 21.154277801513672, + "learning_rate": 1.0546703296703297e-05, + "loss": 0.2834, + "step": 28722 + }, + { + "epoch": 78.90934065934066, + "grad_norm": 16.52703285217285, + "learning_rate": 1.054532967032967e-05, + "loss": 0.174, + "step": 28723 + }, + { + "epoch": 78.91208791208791, + "grad_norm": 6.751414775848389, + "learning_rate": 1.0543956043956045e-05, + "loss": 0.1915, + "step": 28724 + }, + { + "epoch": 78.91483516483517, + "grad_norm": 12.25816822052002, + "learning_rate": 1.0542582417582419e-05, + "loss": 0.1719, + "step": 28725 + }, + { + "epoch": 78.91758241758242, + "grad_norm": 15.286357879638672, + "learning_rate": 1.054120879120879e-05, + "loss": 0.4365, + "step": 28726 + }, + { + "epoch": 78.92032967032966, + "grad_norm": 25.27597999572754, + "learning_rate": 1.0539835164835166e-05, + "loss": 0.4938, + "step": 28727 + }, + { + "epoch": 78.92307692307692, + "grad_norm": 7.781228065490723, + "learning_rate": 1.0538461538461539e-05, + "loss": 0.2065, + "step": 28728 + }, + { + "epoch": 78.92582417582418, + "grad_norm": 8.360974311828613, + "learning_rate": 1.0537087912087912e-05, + "loss": 0.2012, + "step": 28729 + }, + { + "epoch": 78.92857142857143, + "grad_norm": 2.6388423442840576, + "learning_rate": 1.0535714285714286e-05, + "loss": 0.0397, + "step": 28730 + }, + { + "epoch": 78.93131868131869, + "grad_norm": 14.285639762878418, + "learning_rate": 1.053434065934066e-05, + "loss": 0.1941, + "step": 28731 + }, + { + "epoch": 78.93406593406593, + "grad_norm": 10.992071151733398, + "learning_rate": 1.0532967032967034e-05, + "loss": 0.1347, + "step": 28732 + }, + { + "epoch": 78.93681318681318, + "grad_norm": 6.459771156311035, + "learning_rate": 1.0531593406593408e-05, + "loss": 0.0425, + "step": 28733 + }, + { + "epoch": 78.93956043956044, + "grad_norm": 6.873717308044434, + "learning_rate": 1.0530219780219781e-05, + "loss": 0.1074, + "step": 28734 + }, + { + "epoch": 78.9423076923077, + "grad_norm": 34.450843811035156, + "learning_rate": 1.0528846153846154e-05, + "loss": 0.5296, + "step": 28735 + }, + { + "epoch": 78.94505494505495, + "grad_norm": 4.762022018432617, + "learning_rate": 1.0527472527472528e-05, + "loss": 0.0791, + "step": 28736 + }, + { + "epoch": 78.9478021978022, + "grad_norm": 13.045933723449707, + "learning_rate": 1.0526098901098901e-05, + "loss": 0.2874, + "step": 28737 + }, + { + "epoch": 78.95054945054945, + "grad_norm": 13.832926750183105, + "learning_rate": 1.0524725274725275e-05, + "loss": 0.2097, + "step": 28738 + }, + { + "epoch": 78.9532967032967, + "grad_norm": 3.4765496253967285, + "learning_rate": 1.052335164835165e-05, + "loss": 0.0535, + "step": 28739 + }, + { + "epoch": 78.95604395604396, + "grad_norm": 23.554489135742188, + "learning_rate": 1.0521978021978023e-05, + "loss": 0.5991, + "step": 28740 + }, + { + "epoch": 78.95879120879121, + "grad_norm": 20.332443237304688, + "learning_rate": 1.0520604395604395e-05, + "loss": 0.2632, + "step": 28741 + }, + { + "epoch": 78.96153846153847, + "grad_norm": 17.532182693481445, + "learning_rate": 1.051923076923077e-05, + "loss": 0.31, + "step": 28742 + }, + { + "epoch": 78.96428571428571, + "grad_norm": 15.624727249145508, + "learning_rate": 1.0517857142857143e-05, + "loss": 0.2292, + "step": 28743 + }, + { + "epoch": 78.96703296703296, + "grad_norm": 9.988225936889648, + "learning_rate": 1.0516483516483517e-05, + "loss": 0.0985, + "step": 28744 + }, + { + "epoch": 78.96978021978022, + "grad_norm": 14.822542190551758, + "learning_rate": 1.051510989010989e-05, + "loss": 0.4426, + "step": 28745 + }, + { + "epoch": 78.97252747252747, + "grad_norm": 20.66379737854004, + "learning_rate": 1.0513736263736264e-05, + "loss": 0.5887, + "step": 28746 + }, + { + "epoch": 78.97527472527473, + "grad_norm": 10.805072784423828, + "learning_rate": 1.0512362637362639e-05, + "loss": 0.1931, + "step": 28747 + }, + { + "epoch": 78.97802197802197, + "grad_norm": 12.342293739318848, + "learning_rate": 1.0510989010989012e-05, + "loss": 0.2032, + "step": 28748 + }, + { + "epoch": 78.98076923076923, + "grad_norm": 9.687700271606445, + "learning_rate": 1.0509615384615386e-05, + "loss": 0.0746, + "step": 28749 + }, + { + "epoch": 78.98351648351648, + "grad_norm": 10.633805274963379, + "learning_rate": 1.0508241758241759e-05, + "loss": 0.2376, + "step": 28750 + }, + { + "epoch": 78.98626373626374, + "grad_norm": 3.8219401836395264, + "learning_rate": 1.0506868131868132e-05, + "loss": 0.067, + "step": 28751 + }, + { + "epoch": 78.98901098901099, + "grad_norm": 5.440643310546875, + "learning_rate": 1.0505494505494506e-05, + "loss": 0.1246, + "step": 28752 + }, + { + "epoch": 78.99175824175825, + "grad_norm": 4.414373397827148, + "learning_rate": 1.050412087912088e-05, + "loss": 0.0412, + "step": 28753 + }, + { + "epoch": 78.99450549450549, + "grad_norm": 6.176924705505371, + "learning_rate": 1.0502747252747253e-05, + "loss": 0.0914, + "step": 28754 + }, + { + "epoch": 78.99725274725274, + "grad_norm": 13.402215957641602, + "learning_rate": 1.0501373626373628e-05, + "loss": 0.2601, + "step": 28755 + }, + { + "epoch": 79.0, + "grad_norm": 58.146305084228516, + "learning_rate": 1.05e-05, + "loss": 0.8389, + "step": 28756 + }, + { + "epoch": 79.0, + "eval_accuracy": 0.6225895316804407, + "eval_f1": 0.6470438943261303, + "eval_f1_DuraRiadoRio_64x64": 0.6175115207373272, + "eval_f1_Mole_64x64": 0.7447698744769874, + "eval_f1_Quebrado_64x64": 0.7575757575757576, + "eval_f1_RiadoRio_64x64": 0.5066666666666667, + "eval_f1_RioFechado_64x64": 0.6086956521739131, + "eval_loss": 2.0285401344299316, + "eval_precision": 0.8027565583233205, + "eval_precision_DuraRiadoRio_64x64": 0.9178082191780822, + "eval_precision_Mole_64x64": 0.9368421052631579, + "eval_precision_Quebrado_64x64": 0.8333333333333334, + "eval_precision_RiadoRio_64x64": 0.35656836461126007, + "eval_precision_RioFechado_64x64": 0.9692307692307692, + "eval_recall": 0.6192879499217527, + "eval_recall_DuraRiadoRio_64x64": 0.4652777777777778, + "eval_recall_Mole_64x64": 0.6180555555555556, + "eval_recall_Quebrado_64x64": 0.6944444444444444, + "eval_recall_RiadoRio_64x64": 0.875, + "eval_recall_RioFechado_64x64": 0.44366197183098594, + "eval_runtime": 1.7319, + "eval_samples_per_second": 419.198, + "eval_steps_per_second": 26.561, + "step": 28756 + }, + { + "epoch": 79.00274725274726, + "grad_norm": 12.316533088684082, + "learning_rate": 1.0498626373626375e-05, + "loss": 0.0701, + "step": 28757 + }, + { + "epoch": 79.00549450549451, + "grad_norm": 18.88165283203125, + "learning_rate": 1.0497252747252748e-05, + "loss": 0.3772, + "step": 28758 + }, + { + "epoch": 79.00824175824175, + "grad_norm": 10.21099853515625, + "learning_rate": 1.0495879120879121e-05, + "loss": 0.2078, + "step": 28759 + }, + { + "epoch": 79.01098901098901, + "grad_norm": 16.408409118652344, + "learning_rate": 1.0494505494505495e-05, + "loss": 0.2598, + "step": 28760 + }, + { + "epoch": 79.01373626373626, + "grad_norm": 17.45269775390625, + "learning_rate": 1.0493131868131868e-05, + "loss": 0.3634, + "step": 28761 + }, + { + "epoch": 79.01648351648352, + "grad_norm": 8.363073348999023, + "learning_rate": 1.0491758241758243e-05, + "loss": 0.0943, + "step": 28762 + }, + { + "epoch": 79.01923076923077, + "grad_norm": 15.526485443115234, + "learning_rate": 1.0490384615384617e-05, + "loss": 0.4514, + "step": 28763 + }, + { + "epoch": 79.02197802197803, + "grad_norm": 7.43272066116333, + "learning_rate": 1.0489010989010988e-05, + "loss": 0.1926, + "step": 28764 + }, + { + "epoch": 79.02472527472527, + "grad_norm": 1.0187451839447021, + "learning_rate": 1.0487637362637363e-05, + "loss": 0.0139, + "step": 28765 + }, + { + "epoch": 79.02747252747253, + "grad_norm": 16.38477897644043, + "learning_rate": 1.0486263736263737e-05, + "loss": 0.2771, + "step": 28766 + }, + { + "epoch": 79.03021978021978, + "grad_norm": 17.230070114135742, + "learning_rate": 1.048489010989011e-05, + "loss": 0.1956, + "step": 28767 + }, + { + "epoch": 79.03296703296704, + "grad_norm": 15.032294273376465, + "learning_rate": 1.0483516483516484e-05, + "loss": 0.3483, + "step": 28768 + }, + { + "epoch": 79.03571428571429, + "grad_norm": 1.7374014854431152, + "learning_rate": 1.0482142857142857e-05, + "loss": 0.0187, + "step": 28769 + }, + { + "epoch": 79.03846153846153, + "grad_norm": 9.043998718261719, + "learning_rate": 1.0480769230769232e-05, + "loss": 0.1827, + "step": 28770 + }, + { + "epoch": 79.04120879120879, + "grad_norm": 18.495018005371094, + "learning_rate": 1.0479395604395604e-05, + "loss": 0.3733, + "step": 28771 + }, + { + "epoch": 79.04395604395604, + "grad_norm": 8.785847663879395, + "learning_rate": 1.0478021978021979e-05, + "loss": 0.1047, + "step": 28772 + }, + { + "epoch": 79.0467032967033, + "grad_norm": 15.663786888122559, + "learning_rate": 1.0476648351648352e-05, + "loss": 0.1627, + "step": 28773 + }, + { + "epoch": 79.04945054945055, + "grad_norm": 8.718441009521484, + "learning_rate": 1.0475274725274726e-05, + "loss": 0.0887, + "step": 28774 + }, + { + "epoch": 79.0521978021978, + "grad_norm": 12.268106460571289, + "learning_rate": 1.04739010989011e-05, + "loss": 0.1617, + "step": 28775 + }, + { + "epoch": 79.05494505494505, + "grad_norm": 4.477203369140625, + "learning_rate": 1.0472527472527473e-05, + "loss": 0.0875, + "step": 28776 + }, + { + "epoch": 79.0576923076923, + "grad_norm": 7.953845024108887, + "learning_rate": 1.0471153846153848e-05, + "loss": 0.2278, + "step": 28777 + }, + { + "epoch": 79.06043956043956, + "grad_norm": 19.51457977294922, + "learning_rate": 1.046978021978022e-05, + "loss": 0.2153, + "step": 28778 + }, + { + "epoch": 79.06318681318682, + "grad_norm": 11.848151206970215, + "learning_rate": 1.0468406593406593e-05, + "loss": 0.1753, + "step": 28779 + }, + { + "epoch": 79.06593406593407, + "grad_norm": 8.519011497497559, + "learning_rate": 1.0467032967032968e-05, + "loss": 0.0757, + "step": 28780 + }, + { + "epoch": 79.06868131868131, + "grad_norm": 20.009496688842773, + "learning_rate": 1.0465659340659341e-05, + "loss": 0.5396, + "step": 28781 + }, + { + "epoch": 79.07142857142857, + "grad_norm": 4.10584831237793, + "learning_rate": 1.0464285714285715e-05, + "loss": 0.0523, + "step": 28782 + }, + { + "epoch": 79.07417582417582, + "grad_norm": 13.750300407409668, + "learning_rate": 1.0462912087912088e-05, + "loss": 0.2225, + "step": 28783 + }, + { + "epoch": 79.07692307692308, + "grad_norm": 12.881426811218262, + "learning_rate": 1.0461538461538462e-05, + "loss": 0.1356, + "step": 28784 + }, + { + "epoch": 79.07967032967034, + "grad_norm": 20.435611724853516, + "learning_rate": 1.0460164835164837e-05, + "loss": 0.529, + "step": 28785 + }, + { + "epoch": 79.08241758241758, + "grad_norm": 6.4919047355651855, + "learning_rate": 1.0458791208791208e-05, + "loss": 0.1167, + "step": 28786 + }, + { + "epoch": 79.08516483516483, + "grad_norm": 32.47161102294922, + "learning_rate": 1.0457417582417584e-05, + "loss": 1.0618, + "step": 28787 + }, + { + "epoch": 79.08791208791209, + "grad_norm": 14.670849800109863, + "learning_rate": 1.0456043956043957e-05, + "loss": 0.1748, + "step": 28788 + }, + { + "epoch": 79.09065934065934, + "grad_norm": 4.356411933898926, + "learning_rate": 1.045467032967033e-05, + "loss": 0.0597, + "step": 28789 + }, + { + "epoch": 79.0934065934066, + "grad_norm": 13.168397903442383, + "learning_rate": 1.0453296703296704e-05, + "loss": 0.3207, + "step": 28790 + }, + { + "epoch": 79.09615384615384, + "grad_norm": 9.729029655456543, + "learning_rate": 1.0451923076923077e-05, + "loss": 0.1726, + "step": 28791 + }, + { + "epoch": 79.0989010989011, + "grad_norm": 10.45697021484375, + "learning_rate": 1.0450549450549452e-05, + "loss": 0.2372, + "step": 28792 + }, + { + "epoch": 79.10164835164835, + "grad_norm": 18.300128936767578, + "learning_rate": 1.0449175824175824e-05, + "loss": 0.5386, + "step": 28793 + }, + { + "epoch": 79.1043956043956, + "grad_norm": 10.926653861999512, + "learning_rate": 1.0447802197802197e-05, + "loss": 0.4102, + "step": 28794 + }, + { + "epoch": 79.10714285714286, + "grad_norm": 3.889495849609375, + "learning_rate": 1.0446428571428572e-05, + "loss": 0.0586, + "step": 28795 + }, + { + "epoch": 79.10989010989012, + "grad_norm": 17.725662231445312, + "learning_rate": 1.0445054945054946e-05, + "loss": 0.3134, + "step": 28796 + }, + { + "epoch": 79.11263736263736, + "grad_norm": 8.418335914611816, + "learning_rate": 1.044368131868132e-05, + "loss": 0.1122, + "step": 28797 + }, + { + "epoch": 79.11538461538461, + "grad_norm": 10.623477935791016, + "learning_rate": 1.0442307692307693e-05, + "loss": 0.1194, + "step": 28798 + }, + { + "epoch": 79.11813186813187, + "grad_norm": 11.597265243530273, + "learning_rate": 1.0440934065934066e-05, + "loss": 0.1699, + "step": 28799 + }, + { + "epoch": 79.12087912087912, + "grad_norm": 17.845510482788086, + "learning_rate": 1.0439560439560441e-05, + "loss": 0.2723, + "step": 28800 + }, + { + "epoch": 79.12362637362638, + "grad_norm": 10.987794876098633, + "learning_rate": 1.0438186813186813e-05, + "loss": 0.1321, + "step": 28801 + }, + { + "epoch": 79.12637362637362, + "grad_norm": 11.27120304107666, + "learning_rate": 1.0436813186813188e-05, + "loss": 0.1685, + "step": 28802 + }, + { + "epoch": 79.12912087912088, + "grad_norm": 7.73786735534668, + "learning_rate": 1.0435439560439561e-05, + "loss": 0.1534, + "step": 28803 + }, + { + "epoch": 79.13186813186813, + "grad_norm": 14.11752986907959, + "learning_rate": 1.0434065934065935e-05, + "loss": 0.2801, + "step": 28804 + }, + { + "epoch": 79.13461538461539, + "grad_norm": 18.86206817626953, + "learning_rate": 1.0432692307692308e-05, + "loss": 0.618, + "step": 28805 + }, + { + "epoch": 79.13736263736264, + "grad_norm": 11.334796905517578, + "learning_rate": 1.0431318681318682e-05, + "loss": 0.0706, + "step": 28806 + }, + { + "epoch": 79.14010989010988, + "grad_norm": 14.262494087219238, + "learning_rate": 1.0429945054945057e-05, + "loss": 0.4956, + "step": 28807 + }, + { + "epoch": 79.14285714285714, + "grad_norm": 17.24493408203125, + "learning_rate": 1.0428571428571428e-05, + "loss": 0.2845, + "step": 28808 + }, + { + "epoch": 79.1456043956044, + "grad_norm": 8.234838485717773, + "learning_rate": 1.0427197802197802e-05, + "loss": 0.1697, + "step": 28809 + }, + { + "epoch": 79.14835164835165, + "grad_norm": 10.413204193115234, + "learning_rate": 1.0425824175824177e-05, + "loss": 0.1437, + "step": 28810 + }, + { + "epoch": 79.1510989010989, + "grad_norm": 1.0454576015472412, + "learning_rate": 1.042445054945055e-05, + "loss": 0.0107, + "step": 28811 + }, + { + "epoch": 79.15384615384616, + "grad_norm": 4.5639753341674805, + "learning_rate": 1.0423076923076922e-05, + "loss": 0.0605, + "step": 28812 + }, + { + "epoch": 79.1565934065934, + "grad_norm": 22.073577880859375, + "learning_rate": 1.0421703296703297e-05, + "loss": 0.3572, + "step": 28813 + }, + { + "epoch": 79.15934065934066, + "grad_norm": 18.056476593017578, + "learning_rate": 1.042032967032967e-05, + "loss": 0.4025, + "step": 28814 + }, + { + "epoch": 79.16208791208791, + "grad_norm": 7.62082052230835, + "learning_rate": 1.0418956043956046e-05, + "loss": 0.1005, + "step": 28815 + }, + { + "epoch": 79.16483516483517, + "grad_norm": 1.733957052230835, + "learning_rate": 1.0417582417582417e-05, + "loss": 0.0179, + "step": 28816 + }, + { + "epoch": 79.16758241758242, + "grad_norm": 14.277861595153809, + "learning_rate": 1.041620879120879e-05, + "loss": 0.1877, + "step": 28817 + }, + { + "epoch": 79.17032967032966, + "grad_norm": 21.470447540283203, + "learning_rate": 1.0414835164835166e-05, + "loss": 0.4803, + "step": 28818 + }, + { + "epoch": 79.17307692307692, + "grad_norm": 9.127077102661133, + "learning_rate": 1.041346153846154e-05, + "loss": 0.1953, + "step": 28819 + }, + { + "epoch": 79.17582417582418, + "grad_norm": 1.3974425792694092, + "learning_rate": 1.0412087912087913e-05, + "loss": 0.018, + "step": 28820 + }, + { + "epoch": 79.17857142857143, + "grad_norm": 18.626283645629883, + "learning_rate": 1.0410714285714286e-05, + "loss": 0.5798, + "step": 28821 + }, + { + "epoch": 79.18131868131869, + "grad_norm": 23.036718368530273, + "learning_rate": 1.040934065934066e-05, + "loss": 0.3824, + "step": 28822 + }, + { + "epoch": 79.18406593406593, + "grad_norm": 8.237749099731445, + "learning_rate": 1.0407967032967033e-05, + "loss": 0.1267, + "step": 28823 + }, + { + "epoch": 79.18681318681318, + "grad_norm": 10.189342498779297, + "learning_rate": 1.0406593406593406e-05, + "loss": 0.1916, + "step": 28824 + }, + { + "epoch": 79.18956043956044, + "grad_norm": 20.51076316833496, + "learning_rate": 1.0405219780219781e-05, + "loss": 0.5095, + "step": 28825 + }, + { + "epoch": 79.1923076923077, + "grad_norm": 10.204483985900879, + "learning_rate": 1.0403846153846155e-05, + "loss": 0.1433, + "step": 28826 + }, + { + "epoch": 79.19505494505495, + "grad_norm": 16.14784812927246, + "learning_rate": 1.0402472527472527e-05, + "loss": 0.5134, + "step": 28827 + }, + { + "epoch": 79.1978021978022, + "grad_norm": 29.66990852355957, + "learning_rate": 1.0401098901098902e-05, + "loss": 0.5403, + "step": 28828 + }, + { + "epoch": 79.20054945054945, + "grad_norm": 5.098108291625977, + "learning_rate": 1.0399725274725275e-05, + "loss": 0.0463, + "step": 28829 + }, + { + "epoch": 79.2032967032967, + "grad_norm": 2.8597326278686523, + "learning_rate": 1.039835164835165e-05, + "loss": 0.0453, + "step": 28830 + }, + { + "epoch": 79.20604395604396, + "grad_norm": 18.53744888305664, + "learning_rate": 1.0396978021978022e-05, + "loss": 0.3546, + "step": 28831 + }, + { + "epoch": 79.20879120879121, + "grad_norm": 16.133678436279297, + "learning_rate": 1.0395604395604395e-05, + "loss": 0.327, + "step": 28832 + }, + { + "epoch": 79.21153846153847, + "grad_norm": 2.260986804962158, + "learning_rate": 1.039423076923077e-05, + "loss": 0.0215, + "step": 28833 + }, + { + "epoch": 79.21428571428571, + "grad_norm": 4.3896002769470215, + "learning_rate": 1.0392857142857144e-05, + "loss": 0.0636, + "step": 28834 + }, + { + "epoch": 79.21703296703296, + "grad_norm": 13.841695785522461, + "learning_rate": 1.0391483516483517e-05, + "loss": 0.1863, + "step": 28835 + }, + { + "epoch": 79.21978021978022, + "grad_norm": 0.7332051396369934, + "learning_rate": 1.039010989010989e-05, + "loss": 0.0108, + "step": 28836 + }, + { + "epoch": 79.22252747252747, + "grad_norm": 13.208416938781738, + "learning_rate": 1.0388736263736264e-05, + "loss": 0.2254, + "step": 28837 + }, + { + "epoch": 79.22527472527473, + "grad_norm": 10.929474830627441, + "learning_rate": 1.0387362637362637e-05, + "loss": 0.1116, + "step": 28838 + }, + { + "epoch": 79.22802197802197, + "grad_norm": 19.88324737548828, + "learning_rate": 1.038598901098901e-05, + "loss": 0.5547, + "step": 28839 + }, + { + "epoch": 79.23076923076923, + "grad_norm": 24.165735244750977, + "learning_rate": 1.0384615384615386e-05, + "loss": 0.8408, + "step": 28840 + }, + { + "epoch": 79.23351648351648, + "grad_norm": 12.92985725402832, + "learning_rate": 1.038324175824176e-05, + "loss": 0.3289, + "step": 28841 + }, + { + "epoch": 79.23626373626374, + "grad_norm": 13.82563304901123, + "learning_rate": 1.0381868131868131e-05, + "loss": 0.1921, + "step": 28842 + }, + { + "epoch": 79.23901098901099, + "grad_norm": 4.977993488311768, + "learning_rate": 1.0380494505494506e-05, + "loss": 0.0896, + "step": 28843 + }, + { + "epoch": 79.24175824175825, + "grad_norm": 16.09482765197754, + "learning_rate": 1.037912087912088e-05, + "loss": 0.2485, + "step": 28844 + }, + { + "epoch": 79.24450549450549, + "grad_norm": 11.90679931640625, + "learning_rate": 1.0377747252747255e-05, + "loss": 0.2341, + "step": 28845 + }, + { + "epoch": 79.24725274725274, + "grad_norm": 14.019968032836914, + "learning_rate": 1.0376373626373626e-05, + "loss": 0.3001, + "step": 28846 + }, + { + "epoch": 79.25, + "grad_norm": 8.028596878051758, + "learning_rate": 1.0375e-05, + "loss": 0.1516, + "step": 28847 + }, + { + "epoch": 79.25274725274726, + "grad_norm": 20.31247901916504, + "learning_rate": 1.0373626373626375e-05, + "loss": 0.6636, + "step": 28848 + }, + { + "epoch": 79.25549450549451, + "grad_norm": 11.395339012145996, + "learning_rate": 1.0372252747252748e-05, + "loss": 0.1979, + "step": 28849 + }, + { + "epoch": 79.25824175824175, + "grad_norm": 9.372941970825195, + "learning_rate": 1.0370879120879122e-05, + "loss": 0.1116, + "step": 28850 + }, + { + "epoch": 79.26098901098901, + "grad_norm": 12.190629959106445, + "learning_rate": 1.0369505494505495e-05, + "loss": 0.1029, + "step": 28851 + }, + { + "epoch": 79.26373626373626, + "grad_norm": 9.244059562683105, + "learning_rate": 1.0368131868131868e-05, + "loss": 0.1927, + "step": 28852 + }, + { + "epoch": 79.26648351648352, + "grad_norm": 8.032268524169922, + "learning_rate": 1.0366758241758242e-05, + "loss": 0.0972, + "step": 28853 + }, + { + "epoch": 79.26923076923077, + "grad_norm": 29.999834060668945, + "learning_rate": 1.0365384615384615e-05, + "loss": 0.3614, + "step": 28854 + }, + { + "epoch": 79.27197802197803, + "grad_norm": 22.135671615600586, + "learning_rate": 1.036401098901099e-05, + "loss": 0.6407, + "step": 28855 + }, + { + "epoch": 79.27472527472527, + "grad_norm": 8.397968292236328, + "learning_rate": 1.0362637362637364e-05, + "loss": 0.179, + "step": 28856 + }, + { + "epoch": 79.27747252747253, + "grad_norm": 16.85465431213379, + "learning_rate": 1.0361263736263736e-05, + "loss": 0.2928, + "step": 28857 + }, + { + "epoch": 79.28021978021978, + "grad_norm": 9.224672317504883, + "learning_rate": 1.035989010989011e-05, + "loss": 0.1566, + "step": 28858 + }, + { + "epoch": 79.28296703296704, + "grad_norm": 5.878480434417725, + "learning_rate": 1.0358516483516484e-05, + "loss": 0.1321, + "step": 28859 + }, + { + "epoch": 79.28571428571429, + "grad_norm": 10.397378921508789, + "learning_rate": 1.0357142857142859e-05, + "loss": 0.217, + "step": 28860 + }, + { + "epoch": 79.28846153846153, + "grad_norm": 9.352264404296875, + "learning_rate": 1.0355769230769231e-05, + "loss": 0.1675, + "step": 28861 + }, + { + "epoch": 79.29120879120879, + "grad_norm": 6.6083269119262695, + "learning_rate": 1.0354395604395604e-05, + "loss": 0.1157, + "step": 28862 + }, + { + "epoch": 79.29395604395604, + "grad_norm": 12.412050247192383, + "learning_rate": 1.035302197802198e-05, + "loss": 0.1718, + "step": 28863 + }, + { + "epoch": 79.2967032967033, + "grad_norm": 17.326841354370117, + "learning_rate": 1.0351648351648353e-05, + "loss": 0.2587, + "step": 28864 + }, + { + "epoch": 79.29945054945055, + "grad_norm": 14.712471961975098, + "learning_rate": 1.0350274725274724e-05, + "loss": 0.1936, + "step": 28865 + }, + { + "epoch": 79.3021978021978, + "grad_norm": 3.0892434120178223, + "learning_rate": 1.03489010989011e-05, + "loss": 0.0268, + "step": 28866 + }, + { + "epoch": 79.30494505494505, + "grad_norm": 9.101234436035156, + "learning_rate": 1.0347527472527473e-05, + "loss": 0.1263, + "step": 28867 + }, + { + "epoch": 79.3076923076923, + "grad_norm": 16.261314392089844, + "learning_rate": 1.0346153846153846e-05, + "loss": 0.3921, + "step": 28868 + }, + { + "epoch": 79.31043956043956, + "grad_norm": 10.403803825378418, + "learning_rate": 1.034478021978022e-05, + "loss": 0.163, + "step": 28869 + }, + { + "epoch": 79.31318681318682, + "grad_norm": 11.610979080200195, + "learning_rate": 1.0343406593406593e-05, + "loss": 0.1564, + "step": 28870 + }, + { + "epoch": 79.31593406593407, + "grad_norm": 15.266168594360352, + "learning_rate": 1.0342032967032968e-05, + "loss": 0.3789, + "step": 28871 + }, + { + "epoch": 79.31868131868131, + "grad_norm": 15.18602466583252, + "learning_rate": 1.034065934065934e-05, + "loss": 0.4247, + "step": 28872 + }, + { + "epoch": 79.32142857142857, + "grad_norm": 18.22274398803711, + "learning_rate": 1.0339285714285715e-05, + "loss": 0.5897, + "step": 28873 + }, + { + "epoch": 79.32417582417582, + "grad_norm": 19.31199836730957, + "learning_rate": 1.0337912087912089e-05, + "loss": 0.2456, + "step": 28874 + }, + { + "epoch": 79.32692307692308, + "grad_norm": 15.402201652526855, + "learning_rate": 1.0336538461538462e-05, + "loss": 0.2792, + "step": 28875 + }, + { + "epoch": 79.32967032967034, + "grad_norm": 7.70722770690918, + "learning_rate": 1.0335164835164835e-05, + "loss": 0.0865, + "step": 28876 + }, + { + "epoch": 79.33241758241758, + "grad_norm": 9.694029808044434, + "learning_rate": 1.0333791208791209e-05, + "loss": 0.132, + "step": 28877 + }, + { + "epoch": 79.33516483516483, + "grad_norm": 5.712386608123779, + "learning_rate": 1.0332417582417584e-05, + "loss": 0.1112, + "step": 28878 + }, + { + "epoch": 79.33791208791209, + "grad_norm": 7.389815330505371, + "learning_rate": 1.0331043956043957e-05, + "loss": 0.0931, + "step": 28879 + }, + { + "epoch": 79.34065934065934, + "grad_norm": 11.37219524383545, + "learning_rate": 1.0329670329670329e-05, + "loss": 0.3857, + "step": 28880 + }, + { + "epoch": 79.3434065934066, + "grad_norm": 14.410364151000977, + "learning_rate": 1.0328296703296704e-05, + "loss": 0.3102, + "step": 28881 + }, + { + "epoch": 79.34615384615384, + "grad_norm": 3.1359543800354004, + "learning_rate": 1.0326923076923077e-05, + "loss": 0.054, + "step": 28882 + }, + { + "epoch": 79.3489010989011, + "grad_norm": 18.30877113342285, + "learning_rate": 1.0325549450549451e-05, + "loss": 0.4276, + "step": 28883 + }, + { + "epoch": 79.35164835164835, + "grad_norm": 25.645183563232422, + "learning_rate": 1.0324175824175824e-05, + "loss": 0.7325, + "step": 28884 + }, + { + "epoch": 79.3543956043956, + "grad_norm": 13.54090404510498, + "learning_rate": 1.0322802197802198e-05, + "loss": 0.2587, + "step": 28885 + }, + { + "epoch": 79.35714285714286, + "grad_norm": 9.11857795715332, + "learning_rate": 1.0321428571428573e-05, + "loss": 0.1747, + "step": 28886 + }, + { + "epoch": 79.35989010989012, + "grad_norm": 18.35404396057129, + "learning_rate": 1.0320054945054945e-05, + "loss": 0.5282, + "step": 28887 + }, + { + "epoch": 79.36263736263736, + "grad_norm": 15.986690521240234, + "learning_rate": 1.031868131868132e-05, + "loss": 0.3774, + "step": 28888 + }, + { + "epoch": 79.36538461538461, + "grad_norm": 10.119482040405273, + "learning_rate": 1.0317307692307693e-05, + "loss": 0.1138, + "step": 28889 + }, + { + "epoch": 79.36813186813187, + "grad_norm": 16.91822624206543, + "learning_rate": 1.0315934065934066e-05, + "loss": 0.3884, + "step": 28890 + }, + { + "epoch": 79.37087912087912, + "grad_norm": 6.529543876647949, + "learning_rate": 1.031456043956044e-05, + "loss": 0.0813, + "step": 28891 + }, + { + "epoch": 79.37362637362638, + "grad_norm": 21.826013565063477, + "learning_rate": 1.0313186813186813e-05, + "loss": 0.2858, + "step": 28892 + }, + { + "epoch": 79.37637362637362, + "grad_norm": 0.9900150299072266, + "learning_rate": 1.0311813186813188e-05, + "loss": 0.0147, + "step": 28893 + }, + { + "epoch": 79.37912087912088, + "grad_norm": 15.274240493774414, + "learning_rate": 1.0310439560439562e-05, + "loss": 0.1841, + "step": 28894 + }, + { + "epoch": 79.38186813186813, + "grad_norm": 6.5430097579956055, + "learning_rate": 1.0309065934065933e-05, + "loss": 0.0712, + "step": 28895 + }, + { + "epoch": 79.38461538461539, + "grad_norm": 8.77660846710205, + "learning_rate": 1.0307692307692309e-05, + "loss": 0.1238, + "step": 28896 + }, + { + "epoch": 79.38736263736264, + "grad_norm": 10.901819229125977, + "learning_rate": 1.0306318681318682e-05, + "loss": 0.1094, + "step": 28897 + }, + { + "epoch": 79.39010989010988, + "grad_norm": 19.756135940551758, + "learning_rate": 1.0304945054945055e-05, + "loss": 0.5405, + "step": 28898 + }, + { + "epoch": 79.39285714285714, + "grad_norm": 9.428825378417969, + "learning_rate": 1.0303571428571429e-05, + "loss": 0.1436, + "step": 28899 + }, + { + "epoch": 79.3956043956044, + "grad_norm": 12.207120895385742, + "learning_rate": 1.0302197802197802e-05, + "loss": 0.2929, + "step": 28900 + }, + { + "epoch": 79.39835164835165, + "grad_norm": 10.873129844665527, + "learning_rate": 1.0300824175824177e-05, + "loss": 0.2609, + "step": 28901 + }, + { + "epoch": 79.4010989010989, + "grad_norm": 7.358376502990723, + "learning_rate": 1.0299450549450549e-05, + "loss": 0.2034, + "step": 28902 + }, + { + "epoch": 79.40384615384616, + "grad_norm": 15.190788269042969, + "learning_rate": 1.0298076923076924e-05, + "loss": 0.3339, + "step": 28903 + }, + { + "epoch": 79.4065934065934, + "grad_norm": 10.614666938781738, + "learning_rate": 1.0296703296703298e-05, + "loss": 0.0786, + "step": 28904 + }, + { + "epoch": 79.40934065934066, + "grad_norm": 14.06961727142334, + "learning_rate": 1.0295329670329671e-05, + "loss": 0.1713, + "step": 28905 + }, + { + "epoch": 79.41208791208791, + "grad_norm": 16.0543155670166, + "learning_rate": 1.0293956043956044e-05, + "loss": 0.2775, + "step": 28906 + }, + { + "epoch": 79.41483516483517, + "grad_norm": 23.87683868408203, + "learning_rate": 1.0292582417582418e-05, + "loss": 0.5892, + "step": 28907 + }, + { + "epoch": 79.41758241758242, + "grad_norm": 27.408878326416016, + "learning_rate": 1.0291208791208793e-05, + "loss": 0.3993, + "step": 28908 + }, + { + "epoch": 79.42032967032966, + "grad_norm": 6.104647159576416, + "learning_rate": 1.0289835164835166e-05, + "loss": 0.0774, + "step": 28909 + }, + { + "epoch": 79.42307692307692, + "grad_norm": 17.748125076293945, + "learning_rate": 1.0288461538461538e-05, + "loss": 0.4262, + "step": 28910 + }, + { + "epoch": 79.42582417582418, + "grad_norm": 8.75183391571045, + "learning_rate": 1.0287087912087913e-05, + "loss": 0.1035, + "step": 28911 + }, + { + "epoch": 79.42857142857143, + "grad_norm": 24.1508846282959, + "learning_rate": 1.0285714285714286e-05, + "loss": 0.7388, + "step": 28912 + }, + { + "epoch": 79.43131868131869, + "grad_norm": 15.484065055847168, + "learning_rate": 1.028434065934066e-05, + "loss": 0.4607, + "step": 28913 + }, + { + "epoch": 79.43406593406593, + "grad_norm": 13.20391845703125, + "learning_rate": 1.0282967032967033e-05, + "loss": 0.3401, + "step": 28914 + }, + { + "epoch": 79.43681318681318, + "grad_norm": 7.176555633544922, + "learning_rate": 1.0281593406593407e-05, + "loss": 0.0645, + "step": 28915 + }, + { + "epoch": 79.43956043956044, + "grad_norm": 4.2505927085876465, + "learning_rate": 1.0280219780219782e-05, + "loss": 0.065, + "step": 28916 + }, + { + "epoch": 79.4423076923077, + "grad_norm": 19.537948608398438, + "learning_rate": 1.0278846153846153e-05, + "loss": 0.355, + "step": 28917 + }, + { + "epoch": 79.44505494505495, + "grad_norm": 20.075124740600586, + "learning_rate": 1.0277472527472529e-05, + "loss": 0.5662, + "step": 28918 + }, + { + "epoch": 79.4478021978022, + "grad_norm": 7.204807758331299, + "learning_rate": 1.0276098901098902e-05, + "loss": 0.1464, + "step": 28919 + }, + { + "epoch": 79.45054945054945, + "grad_norm": 5.495880603790283, + "learning_rate": 1.0274725274725275e-05, + "loss": 0.08, + "step": 28920 + }, + { + "epoch": 79.4532967032967, + "grad_norm": 2.9757161140441895, + "learning_rate": 1.0273351648351649e-05, + "loss": 0.0771, + "step": 28921 + }, + { + "epoch": 79.45604395604396, + "grad_norm": 14.952569961547852, + "learning_rate": 1.0271978021978022e-05, + "loss": 0.3204, + "step": 28922 + }, + { + "epoch": 79.45879120879121, + "grad_norm": 10.633283615112305, + "learning_rate": 1.0270604395604396e-05, + "loss": 0.1923, + "step": 28923 + }, + { + "epoch": 79.46153846153847, + "grad_norm": 6.920191287994385, + "learning_rate": 1.0269230769230769e-05, + "loss": 0.0831, + "step": 28924 + }, + { + "epoch": 79.46428571428571, + "grad_norm": 11.512249946594238, + "learning_rate": 1.0267857142857142e-05, + "loss": 0.3455, + "step": 28925 + }, + { + "epoch": 79.46703296703296, + "grad_norm": 10.75846004486084, + "learning_rate": 1.0266483516483518e-05, + "loss": 0.1824, + "step": 28926 + }, + { + "epoch": 79.46978021978022, + "grad_norm": 21.67021942138672, + "learning_rate": 1.0265109890109891e-05, + "loss": 0.6968, + "step": 28927 + }, + { + "epoch": 79.47252747252747, + "grad_norm": 18.092851638793945, + "learning_rate": 1.0263736263736264e-05, + "loss": 0.3839, + "step": 28928 + }, + { + "epoch": 79.47527472527473, + "grad_norm": 7.323409080505371, + "learning_rate": 1.0262362637362638e-05, + "loss": 0.1504, + "step": 28929 + }, + { + "epoch": 79.47802197802197, + "grad_norm": 9.141854286193848, + "learning_rate": 1.0260989010989011e-05, + "loss": 0.1039, + "step": 28930 + }, + { + "epoch": 79.48076923076923, + "grad_norm": 15.852733612060547, + "learning_rate": 1.0259615384615386e-05, + "loss": 0.2162, + "step": 28931 + }, + { + "epoch": 79.48351648351648, + "grad_norm": 16.652332305908203, + "learning_rate": 1.0258241758241758e-05, + "loss": 0.2065, + "step": 28932 + }, + { + "epoch": 79.48626373626374, + "grad_norm": 2.0610082149505615, + "learning_rate": 1.0256868131868131e-05, + "loss": 0.0263, + "step": 28933 + }, + { + "epoch": 79.48901098901099, + "grad_norm": 9.207290649414062, + "learning_rate": 1.0255494505494506e-05, + "loss": 0.1771, + "step": 28934 + }, + { + "epoch": 79.49175824175825, + "grad_norm": 14.257007598876953, + "learning_rate": 1.025412087912088e-05, + "loss": 0.1477, + "step": 28935 + }, + { + "epoch": 79.49450549450549, + "grad_norm": 7.881979465484619, + "learning_rate": 1.0252747252747253e-05, + "loss": 0.1655, + "step": 28936 + }, + { + "epoch": 79.49725274725274, + "grad_norm": 14.364538192749023, + "learning_rate": 1.0251373626373627e-05, + "loss": 0.2335, + "step": 28937 + }, + { + "epoch": 79.5, + "grad_norm": 3.0548179149627686, + "learning_rate": 1.025e-05, + "loss": 0.0354, + "step": 28938 + }, + { + "epoch": 79.50274725274726, + "grad_norm": 17.121259689331055, + "learning_rate": 1.0248626373626374e-05, + "loss": 0.2863, + "step": 28939 + }, + { + "epoch": 79.50549450549451, + "grad_norm": 3.8766021728515625, + "learning_rate": 1.0247252747252747e-05, + "loss": 0.0333, + "step": 28940 + }, + { + "epoch": 79.50824175824175, + "grad_norm": 19.007692337036133, + "learning_rate": 1.0245879120879122e-05, + "loss": 0.2119, + "step": 28941 + }, + { + "epoch": 79.51098901098901, + "grad_norm": 6.997519016265869, + "learning_rate": 1.0244505494505495e-05, + "loss": 0.0452, + "step": 28942 + }, + { + "epoch": 79.51373626373626, + "grad_norm": 16.686920166015625, + "learning_rate": 1.0243131868131869e-05, + "loss": 0.2422, + "step": 28943 + }, + { + "epoch": 79.51648351648352, + "grad_norm": 12.169967651367188, + "learning_rate": 1.0241758241758242e-05, + "loss": 0.2455, + "step": 28944 + }, + { + "epoch": 79.51923076923077, + "grad_norm": 29.761497497558594, + "learning_rate": 1.0240384615384616e-05, + "loss": 0.6264, + "step": 28945 + }, + { + "epoch": 79.52197802197803, + "grad_norm": 17.08046531677246, + "learning_rate": 1.023901098901099e-05, + "loss": 0.2118, + "step": 28946 + }, + { + "epoch": 79.52472527472527, + "grad_norm": 17.645959854125977, + "learning_rate": 1.0237637362637362e-05, + "loss": 0.2482, + "step": 28947 + }, + { + "epoch": 79.52747252747253, + "grad_norm": 11.81316089630127, + "learning_rate": 1.0236263736263736e-05, + "loss": 0.1353, + "step": 28948 + }, + { + "epoch": 79.53021978021978, + "grad_norm": 4.848506450653076, + "learning_rate": 1.0234890109890111e-05, + "loss": 0.0675, + "step": 28949 + }, + { + "epoch": 79.53296703296704, + "grad_norm": 9.343018531799316, + "learning_rate": 1.0233516483516484e-05, + "loss": 0.2851, + "step": 28950 + }, + { + "epoch": 79.53571428571429, + "grad_norm": 10.60819149017334, + "learning_rate": 1.0232142857142858e-05, + "loss": 0.2089, + "step": 28951 + }, + { + "epoch": 79.53846153846153, + "grad_norm": 8.154520034790039, + "learning_rate": 1.0230769230769231e-05, + "loss": 0.2001, + "step": 28952 + }, + { + "epoch": 79.54120879120879, + "grad_norm": 14.324722290039062, + "learning_rate": 1.0229395604395605e-05, + "loss": 0.2121, + "step": 28953 + }, + { + "epoch": 79.54395604395604, + "grad_norm": 25.33856773376465, + "learning_rate": 1.0228021978021978e-05, + "loss": 0.4279, + "step": 28954 + }, + { + "epoch": 79.5467032967033, + "grad_norm": 5.070753574371338, + "learning_rate": 1.0226648351648351e-05, + "loss": 0.0549, + "step": 28955 + }, + { + "epoch": 79.54945054945055, + "grad_norm": 10.725859642028809, + "learning_rate": 1.0225274725274727e-05, + "loss": 0.2178, + "step": 28956 + }, + { + "epoch": 79.5521978021978, + "grad_norm": 11.193706512451172, + "learning_rate": 1.02239010989011e-05, + "loss": 0.1734, + "step": 28957 + }, + { + "epoch": 79.55494505494505, + "grad_norm": 18.883464813232422, + "learning_rate": 1.0222527472527473e-05, + "loss": 0.3094, + "step": 28958 + }, + { + "epoch": 79.5576923076923, + "grad_norm": 7.342922210693359, + "learning_rate": 1.0221153846153847e-05, + "loss": 0.078, + "step": 28959 + }, + { + "epoch": 79.56043956043956, + "grad_norm": 5.484202861785889, + "learning_rate": 1.021978021978022e-05, + "loss": 0.0779, + "step": 28960 + }, + { + "epoch": 79.56318681318682, + "grad_norm": 22.30341339111328, + "learning_rate": 1.0218406593406595e-05, + "loss": 0.7504, + "step": 28961 + }, + { + "epoch": 79.56593406593407, + "grad_norm": 9.621374130249023, + "learning_rate": 1.0217032967032967e-05, + "loss": 0.1401, + "step": 28962 + }, + { + "epoch": 79.56868131868131, + "grad_norm": 25.823646545410156, + "learning_rate": 1.021565934065934e-05, + "loss": 0.4887, + "step": 28963 + }, + { + "epoch": 79.57142857142857, + "grad_norm": 6.233109951019287, + "learning_rate": 1.0214285714285715e-05, + "loss": 0.103, + "step": 28964 + }, + { + "epoch": 79.57417582417582, + "grad_norm": 16.378604888916016, + "learning_rate": 1.0212912087912089e-05, + "loss": 0.2519, + "step": 28965 + }, + { + "epoch": 79.57692307692308, + "grad_norm": 11.431565284729004, + "learning_rate": 1.0211538461538462e-05, + "loss": 0.1613, + "step": 28966 + }, + { + "epoch": 79.57967032967034, + "grad_norm": 24.085811614990234, + "learning_rate": 1.0210164835164836e-05, + "loss": 0.6818, + "step": 28967 + }, + { + "epoch": 79.58241758241758, + "grad_norm": 7.547903537750244, + "learning_rate": 1.0208791208791209e-05, + "loss": 0.1719, + "step": 28968 + }, + { + "epoch": 79.58516483516483, + "grad_norm": 9.709396362304688, + "learning_rate": 1.0207417582417583e-05, + "loss": 0.2061, + "step": 28969 + }, + { + "epoch": 79.58791208791209, + "grad_norm": 20.531593322753906, + "learning_rate": 1.0206043956043956e-05, + "loss": 0.568, + "step": 28970 + }, + { + "epoch": 79.59065934065934, + "grad_norm": 14.584039688110352, + "learning_rate": 1.0204670329670331e-05, + "loss": 0.4336, + "step": 28971 + }, + { + "epoch": 79.5934065934066, + "grad_norm": 26.704153060913086, + "learning_rate": 1.0203296703296704e-05, + "loss": 0.743, + "step": 28972 + }, + { + "epoch": 79.59615384615384, + "grad_norm": 13.123160362243652, + "learning_rate": 1.0201923076923076e-05, + "loss": 0.2544, + "step": 28973 + }, + { + "epoch": 79.5989010989011, + "grad_norm": 6.366422176361084, + "learning_rate": 1.0200549450549451e-05, + "loss": 0.1513, + "step": 28974 + }, + { + "epoch": 79.60164835164835, + "grad_norm": 20.432302474975586, + "learning_rate": 1.0199175824175825e-05, + "loss": 0.5588, + "step": 28975 + }, + { + "epoch": 79.6043956043956, + "grad_norm": 6.914139747619629, + "learning_rate": 1.01978021978022e-05, + "loss": 0.1367, + "step": 28976 + }, + { + "epoch": 79.60714285714286, + "grad_norm": 17.68869400024414, + "learning_rate": 1.0196428571428571e-05, + "loss": 0.5221, + "step": 28977 + }, + { + "epoch": 79.60989010989012, + "grad_norm": 3.918828248977661, + "learning_rate": 1.0195054945054945e-05, + "loss": 0.0362, + "step": 28978 + }, + { + "epoch": 79.61263736263736, + "grad_norm": 6.303725242614746, + "learning_rate": 1.019368131868132e-05, + "loss": 0.1141, + "step": 28979 + }, + { + "epoch": 79.61538461538461, + "grad_norm": 18.908960342407227, + "learning_rate": 1.0192307692307693e-05, + "loss": 0.3004, + "step": 28980 + }, + { + "epoch": 79.61813186813187, + "grad_norm": 10.899432182312012, + "learning_rate": 1.0190934065934065e-05, + "loss": 0.1563, + "step": 28981 + }, + { + "epoch": 79.62087912087912, + "grad_norm": 18.77685546875, + "learning_rate": 1.018956043956044e-05, + "loss": 0.3949, + "step": 28982 + }, + { + "epoch": 79.62362637362638, + "grad_norm": 8.820027351379395, + "learning_rate": 1.0188186813186814e-05, + "loss": 0.0638, + "step": 28983 + }, + { + "epoch": 79.62637362637362, + "grad_norm": 3.820117235183716, + "learning_rate": 1.0186813186813187e-05, + "loss": 0.0524, + "step": 28984 + }, + { + "epoch": 79.62912087912088, + "grad_norm": 13.279606819152832, + "learning_rate": 1.018543956043956e-05, + "loss": 0.2566, + "step": 28985 + }, + { + "epoch": 79.63186813186813, + "grad_norm": 13.388962745666504, + "learning_rate": 1.0184065934065934e-05, + "loss": 0.2286, + "step": 28986 + }, + { + "epoch": 79.63461538461539, + "grad_norm": 7.130218029022217, + "learning_rate": 1.0182692307692309e-05, + "loss": 0.1068, + "step": 28987 + }, + { + "epoch": 79.63736263736264, + "grad_norm": 13.772055625915527, + "learning_rate": 1.018131868131868e-05, + "loss": 0.2862, + "step": 28988 + }, + { + "epoch": 79.64010989010988, + "grad_norm": 4.606718063354492, + "learning_rate": 1.0179945054945056e-05, + "loss": 0.0705, + "step": 28989 + }, + { + "epoch": 79.64285714285714, + "grad_norm": 17.38550567626953, + "learning_rate": 1.0178571428571429e-05, + "loss": 0.185, + "step": 28990 + }, + { + "epoch": 79.6456043956044, + "grad_norm": 6.29503870010376, + "learning_rate": 1.0177197802197803e-05, + "loss": 0.138, + "step": 28991 + }, + { + "epoch": 79.64835164835165, + "grad_norm": 11.752053260803223, + "learning_rate": 1.0175824175824176e-05, + "loss": 0.3101, + "step": 28992 + }, + { + "epoch": 79.6510989010989, + "grad_norm": 7.247312068939209, + "learning_rate": 1.017445054945055e-05, + "loss": 0.0867, + "step": 28993 + }, + { + "epoch": 79.65384615384616, + "grad_norm": 0.9473925232887268, + "learning_rate": 1.0173076923076924e-05, + "loss": 0.0101, + "step": 28994 + }, + { + "epoch": 79.6565934065934, + "grad_norm": 14.837204933166504, + "learning_rate": 1.0171703296703298e-05, + "loss": 0.1883, + "step": 28995 + }, + { + "epoch": 79.65934065934066, + "grad_norm": 15.653611183166504, + "learning_rate": 1.017032967032967e-05, + "loss": 0.4248, + "step": 28996 + }, + { + "epoch": 79.66208791208791, + "grad_norm": 5.576277256011963, + "learning_rate": 1.0168956043956045e-05, + "loss": 0.0653, + "step": 28997 + }, + { + "epoch": 79.66483516483517, + "grad_norm": 13.96340560913086, + "learning_rate": 1.0167582417582418e-05, + "loss": 0.2339, + "step": 28998 + }, + { + "epoch": 79.66758241758242, + "grad_norm": 7.506217956542969, + "learning_rate": 1.0166208791208791e-05, + "loss": 0.147, + "step": 28999 + }, + { + "epoch": 79.67032967032966, + "grad_norm": 7.701479434967041, + "learning_rate": 1.0164835164835165e-05, + "loss": 0.108, + "step": 29000 + }, + { + "epoch": 79.67307692307692, + "grad_norm": 2.962552070617676, + "learning_rate": 1.0163461538461538e-05, + "loss": 0.0434, + "step": 29001 + }, + { + "epoch": 79.67582417582418, + "grad_norm": 2.045346260070801, + "learning_rate": 1.0162087912087913e-05, + "loss": 0.0288, + "step": 29002 + }, + { + "epoch": 79.67857142857143, + "grad_norm": 1.5041002035140991, + "learning_rate": 1.0160714285714285e-05, + "loss": 0.0193, + "step": 29003 + }, + { + "epoch": 79.68131868131869, + "grad_norm": 3.5227696895599365, + "learning_rate": 1.015934065934066e-05, + "loss": 0.0369, + "step": 29004 + }, + { + "epoch": 79.68406593406593, + "grad_norm": 8.3223876953125, + "learning_rate": 1.0157967032967034e-05, + "loss": 0.2076, + "step": 29005 + }, + { + "epoch": 79.68681318681318, + "grad_norm": 1.4957783222198486, + "learning_rate": 1.0156593406593407e-05, + "loss": 0.0241, + "step": 29006 + }, + { + "epoch": 79.68956043956044, + "grad_norm": 10.75940990447998, + "learning_rate": 1.015521978021978e-05, + "loss": 0.1743, + "step": 29007 + }, + { + "epoch": 79.6923076923077, + "grad_norm": 9.220489501953125, + "learning_rate": 1.0153846153846154e-05, + "loss": 0.092, + "step": 29008 + }, + { + "epoch": 79.69505494505495, + "grad_norm": 5.7736310958862305, + "learning_rate": 1.0152472527472529e-05, + "loss": 0.0965, + "step": 29009 + }, + { + "epoch": 79.6978021978022, + "grad_norm": 16.75299072265625, + "learning_rate": 1.0151098901098902e-05, + "loss": 0.5234, + "step": 29010 + }, + { + "epoch": 79.70054945054945, + "grad_norm": 13.571329116821289, + "learning_rate": 1.0149725274725274e-05, + "loss": 0.1826, + "step": 29011 + }, + { + "epoch": 79.7032967032967, + "grad_norm": 20.32160186767578, + "learning_rate": 1.014835164835165e-05, + "loss": 0.3366, + "step": 29012 + }, + { + "epoch": 79.70604395604396, + "grad_norm": 14.183712005615234, + "learning_rate": 1.0146978021978023e-05, + "loss": 0.2084, + "step": 29013 + }, + { + "epoch": 79.70879120879121, + "grad_norm": 2.5512278079986572, + "learning_rate": 1.0145604395604396e-05, + "loss": 0.0363, + "step": 29014 + }, + { + "epoch": 79.71153846153847, + "grad_norm": 10.243437767028809, + "learning_rate": 1.014423076923077e-05, + "loss": 0.1485, + "step": 29015 + }, + { + "epoch": 79.71428571428571, + "grad_norm": 22.599950790405273, + "learning_rate": 1.0142857142857143e-05, + "loss": 0.5879, + "step": 29016 + }, + { + "epoch": 79.71703296703296, + "grad_norm": 6.1728620529174805, + "learning_rate": 1.0141483516483518e-05, + "loss": 0.0611, + "step": 29017 + }, + { + "epoch": 79.71978021978022, + "grad_norm": 9.43416976928711, + "learning_rate": 1.014010989010989e-05, + "loss": 0.2261, + "step": 29018 + }, + { + "epoch": 79.72252747252747, + "grad_norm": 3.4319818019866943, + "learning_rate": 1.0138736263736265e-05, + "loss": 0.0469, + "step": 29019 + }, + { + "epoch": 79.72527472527473, + "grad_norm": 9.785188674926758, + "learning_rate": 1.0137362637362638e-05, + "loss": 0.2809, + "step": 29020 + }, + { + "epoch": 79.72802197802197, + "grad_norm": 18.30122184753418, + "learning_rate": 1.0135989010989012e-05, + "loss": 0.263, + "step": 29021 + }, + { + "epoch": 79.73076923076923, + "grad_norm": 13.00859260559082, + "learning_rate": 1.0134615384615385e-05, + "loss": 0.2285, + "step": 29022 + }, + { + "epoch": 79.73351648351648, + "grad_norm": 12.232083320617676, + "learning_rate": 1.0133241758241758e-05, + "loss": 0.2984, + "step": 29023 + }, + { + "epoch": 79.73626373626374, + "grad_norm": 9.210023880004883, + "learning_rate": 1.0131868131868133e-05, + "loss": 0.2376, + "step": 29024 + }, + { + "epoch": 79.73901098901099, + "grad_norm": 23.872886657714844, + "learning_rate": 1.0130494505494507e-05, + "loss": 0.4499, + "step": 29025 + }, + { + "epoch": 79.74175824175825, + "grad_norm": 2.2801830768585205, + "learning_rate": 1.0129120879120879e-05, + "loss": 0.0272, + "step": 29026 + }, + { + "epoch": 79.74450549450549, + "grad_norm": 8.922776222229004, + "learning_rate": 1.0127747252747254e-05, + "loss": 0.0699, + "step": 29027 + }, + { + "epoch": 79.74725274725274, + "grad_norm": 24.4091739654541, + "learning_rate": 1.0126373626373627e-05, + "loss": 0.5952, + "step": 29028 + }, + { + "epoch": 79.75, + "grad_norm": 4.665346622467041, + "learning_rate": 1.0125e-05, + "loss": 0.0573, + "step": 29029 + }, + { + "epoch": 79.75274725274726, + "grad_norm": 17.29485321044922, + "learning_rate": 1.0123626373626374e-05, + "loss": 0.1866, + "step": 29030 + }, + { + "epoch": 79.75549450549451, + "grad_norm": 7.226638317108154, + "learning_rate": 1.0122252747252747e-05, + "loss": 0.0759, + "step": 29031 + }, + { + "epoch": 79.75824175824175, + "grad_norm": 17.324975967407227, + "learning_rate": 1.0120879120879122e-05, + "loss": 0.4768, + "step": 29032 + }, + { + "epoch": 79.76098901098901, + "grad_norm": 8.888871192932129, + "learning_rate": 1.0119505494505494e-05, + "loss": 0.0964, + "step": 29033 + }, + { + "epoch": 79.76373626373626, + "grad_norm": 5.434460163116455, + "learning_rate": 1.011813186813187e-05, + "loss": 0.0509, + "step": 29034 + }, + { + "epoch": 79.76648351648352, + "grad_norm": 2.8107690811157227, + "learning_rate": 1.0116758241758243e-05, + "loss": 0.0339, + "step": 29035 + }, + { + "epoch": 79.76923076923077, + "grad_norm": 28.845014572143555, + "learning_rate": 1.0115384615384616e-05, + "loss": 0.4525, + "step": 29036 + }, + { + "epoch": 79.77197802197803, + "grad_norm": 5.242373466491699, + "learning_rate": 1.011401098901099e-05, + "loss": 0.068, + "step": 29037 + }, + { + "epoch": 79.77472527472527, + "grad_norm": 14.707155227661133, + "learning_rate": 1.0112637362637363e-05, + "loss": 0.1521, + "step": 29038 + }, + { + "epoch": 79.77747252747253, + "grad_norm": 5.141895771026611, + "learning_rate": 1.0111263736263736e-05, + "loss": 0.0863, + "step": 29039 + }, + { + "epoch": 79.78021978021978, + "grad_norm": 10.205036163330078, + "learning_rate": 1.0109890109890111e-05, + "loss": 0.1148, + "step": 29040 + }, + { + "epoch": 79.78296703296704, + "grad_norm": 20.50568962097168, + "learning_rate": 1.0108516483516483e-05, + "loss": 0.2356, + "step": 29041 + }, + { + "epoch": 79.78571428571429, + "grad_norm": 11.198080062866211, + "learning_rate": 1.0107142857142858e-05, + "loss": 0.1429, + "step": 29042 + }, + { + "epoch": 79.78846153846153, + "grad_norm": 14.730034828186035, + "learning_rate": 1.0105769230769232e-05, + "loss": 0.2399, + "step": 29043 + }, + { + "epoch": 79.79120879120879, + "grad_norm": 19.937170028686523, + "learning_rate": 1.0104395604395605e-05, + "loss": 0.3579, + "step": 29044 + }, + { + "epoch": 79.79395604395604, + "grad_norm": 22.838199615478516, + "learning_rate": 1.0103021978021978e-05, + "loss": 0.5047, + "step": 29045 + }, + { + "epoch": 79.7967032967033, + "grad_norm": 20.919044494628906, + "learning_rate": 1.0101648351648352e-05, + "loss": 0.3026, + "step": 29046 + }, + { + "epoch": 79.79945054945055, + "grad_norm": 10.754884719848633, + "learning_rate": 1.0100274725274727e-05, + "loss": 0.2187, + "step": 29047 + }, + { + "epoch": 79.8021978021978, + "grad_norm": 8.689825057983398, + "learning_rate": 1.0098901098901099e-05, + "loss": 0.1269, + "step": 29048 + }, + { + "epoch": 79.80494505494505, + "grad_norm": 4.694169521331787, + "learning_rate": 1.0097527472527472e-05, + "loss": 0.0524, + "step": 29049 + }, + { + "epoch": 79.8076923076923, + "grad_norm": 14.196124076843262, + "learning_rate": 1.0096153846153847e-05, + "loss": 0.3979, + "step": 29050 + }, + { + "epoch": 79.81043956043956, + "grad_norm": 12.470989227294922, + "learning_rate": 1.009478021978022e-05, + "loss": 0.1851, + "step": 29051 + }, + { + "epoch": 79.81318681318682, + "grad_norm": 1.5218652486801147, + "learning_rate": 1.0093406593406594e-05, + "loss": 0.0175, + "step": 29052 + }, + { + "epoch": 79.81593406593407, + "grad_norm": 21.466829299926758, + "learning_rate": 1.0092032967032967e-05, + "loss": 0.3973, + "step": 29053 + }, + { + "epoch": 79.81868131868131, + "grad_norm": 8.670783042907715, + "learning_rate": 1.009065934065934e-05, + "loss": 0.2289, + "step": 29054 + }, + { + "epoch": 79.82142857142857, + "grad_norm": 15.038566589355469, + "learning_rate": 1.0089285714285716e-05, + "loss": 0.4485, + "step": 29055 + }, + { + "epoch": 79.82417582417582, + "grad_norm": 5.949440956115723, + "learning_rate": 1.0087912087912088e-05, + "loss": 0.0461, + "step": 29056 + }, + { + "epoch": 79.82692307692308, + "grad_norm": 6.325222969055176, + "learning_rate": 1.0086538461538463e-05, + "loss": 0.127, + "step": 29057 + }, + { + "epoch": 79.82967032967034, + "grad_norm": 6.8847527503967285, + "learning_rate": 1.0085164835164836e-05, + "loss": 0.1766, + "step": 29058 + }, + { + "epoch": 79.83241758241758, + "grad_norm": 20.448841094970703, + "learning_rate": 1.008379120879121e-05, + "loss": 0.7602, + "step": 29059 + }, + { + "epoch": 79.83516483516483, + "grad_norm": 12.9929780960083, + "learning_rate": 1.0082417582417583e-05, + "loss": 0.1164, + "step": 29060 + }, + { + "epoch": 79.83791208791209, + "grad_norm": 6.882672309875488, + "learning_rate": 1.0081043956043956e-05, + "loss": 0.1347, + "step": 29061 + }, + { + "epoch": 79.84065934065934, + "grad_norm": 11.807244300842285, + "learning_rate": 1.0079670329670331e-05, + "loss": 0.2541, + "step": 29062 + }, + { + "epoch": 79.8434065934066, + "grad_norm": 7.638205051422119, + "learning_rate": 1.0078296703296703e-05, + "loss": 0.1196, + "step": 29063 + }, + { + "epoch": 79.84615384615384, + "grad_norm": 24.802284240722656, + "learning_rate": 1.0076923076923076e-05, + "loss": 0.633, + "step": 29064 + }, + { + "epoch": 79.8489010989011, + "grad_norm": 9.106727600097656, + "learning_rate": 1.0075549450549452e-05, + "loss": 0.1531, + "step": 29065 + }, + { + "epoch": 79.85164835164835, + "grad_norm": 4.420281410217285, + "learning_rate": 1.0074175824175825e-05, + "loss": 0.0711, + "step": 29066 + }, + { + "epoch": 79.8543956043956, + "grad_norm": 27.43755531311035, + "learning_rate": 1.0072802197802198e-05, + "loss": 0.5808, + "step": 29067 + }, + { + "epoch": 79.85714285714286, + "grad_norm": 19.94127082824707, + "learning_rate": 1.0071428571428572e-05, + "loss": 0.3156, + "step": 29068 + }, + { + "epoch": 79.85989010989012, + "grad_norm": 20.75020980834961, + "learning_rate": 1.0070054945054945e-05, + "loss": 0.604, + "step": 29069 + }, + { + "epoch": 79.86263736263736, + "grad_norm": 17.815364837646484, + "learning_rate": 1.006868131868132e-05, + "loss": 0.3216, + "step": 29070 + }, + { + "epoch": 79.86538461538461, + "grad_norm": 21.564922332763672, + "learning_rate": 1.0067307692307692e-05, + "loss": 0.4033, + "step": 29071 + }, + { + "epoch": 79.86813186813187, + "grad_norm": 14.932955741882324, + "learning_rate": 1.0065934065934067e-05, + "loss": 0.4234, + "step": 29072 + }, + { + "epoch": 79.87087912087912, + "grad_norm": 14.249954223632812, + "learning_rate": 1.006456043956044e-05, + "loss": 0.2077, + "step": 29073 + }, + { + "epoch": 79.87362637362638, + "grad_norm": 8.31333065032959, + "learning_rate": 1.0063186813186814e-05, + "loss": 0.1262, + "step": 29074 + }, + { + "epoch": 79.87637362637362, + "grad_norm": 30.272916793823242, + "learning_rate": 1.0061813186813187e-05, + "loss": 0.6473, + "step": 29075 + }, + { + "epoch": 79.87912087912088, + "grad_norm": 8.940181732177734, + "learning_rate": 1.006043956043956e-05, + "loss": 0.176, + "step": 29076 + }, + { + "epoch": 79.88186813186813, + "grad_norm": 16.97651481628418, + "learning_rate": 1.0059065934065936e-05, + "loss": 0.476, + "step": 29077 + }, + { + "epoch": 79.88461538461539, + "grad_norm": 12.478543281555176, + "learning_rate": 1.0057692307692308e-05, + "loss": 0.2441, + "step": 29078 + }, + { + "epoch": 79.88736263736264, + "grad_norm": 3.8102941513061523, + "learning_rate": 1.0056318681318681e-05, + "loss": 0.0479, + "step": 29079 + }, + { + "epoch": 79.89010989010988, + "grad_norm": 22.76463508605957, + "learning_rate": 1.0054945054945056e-05, + "loss": 0.3286, + "step": 29080 + }, + { + "epoch": 79.89285714285714, + "grad_norm": 6.075450897216797, + "learning_rate": 1.005357142857143e-05, + "loss": 0.1047, + "step": 29081 + }, + { + "epoch": 79.8956043956044, + "grad_norm": 12.646438598632812, + "learning_rate": 1.0052197802197803e-05, + "loss": 0.1297, + "step": 29082 + }, + { + "epoch": 79.89835164835165, + "grad_norm": 10.127481460571289, + "learning_rate": 1.0050824175824176e-05, + "loss": 0.0982, + "step": 29083 + }, + { + "epoch": 79.9010989010989, + "grad_norm": 5.124300003051758, + "learning_rate": 1.004945054945055e-05, + "loss": 0.0608, + "step": 29084 + }, + { + "epoch": 79.90384615384616, + "grad_norm": 24.1794376373291, + "learning_rate": 1.0048076923076923e-05, + "loss": 0.85, + "step": 29085 + }, + { + "epoch": 79.9065934065934, + "grad_norm": 17.097169876098633, + "learning_rate": 1.0046703296703297e-05, + "loss": 0.396, + "step": 29086 + }, + { + "epoch": 79.90934065934066, + "grad_norm": 12.647696495056152, + "learning_rate": 1.0045329670329672e-05, + "loss": 0.2005, + "step": 29087 + }, + { + "epoch": 79.91208791208791, + "grad_norm": 7.191840171813965, + "learning_rate": 1.0043956043956045e-05, + "loss": 0.0932, + "step": 29088 + }, + { + "epoch": 79.91483516483517, + "grad_norm": 10.85921573638916, + "learning_rate": 1.0042582417582418e-05, + "loss": 0.3123, + "step": 29089 + }, + { + "epoch": 79.91758241758242, + "grad_norm": 10.14858341217041, + "learning_rate": 1.0041208791208792e-05, + "loss": 0.2029, + "step": 29090 + }, + { + "epoch": 79.92032967032966, + "grad_norm": 8.028667449951172, + "learning_rate": 1.0039835164835165e-05, + "loss": 0.161, + "step": 29091 + }, + { + "epoch": 79.92307692307692, + "grad_norm": 4.335801124572754, + "learning_rate": 1.0038461538461539e-05, + "loss": 0.0836, + "step": 29092 + }, + { + "epoch": 79.92582417582418, + "grad_norm": 1.9515889883041382, + "learning_rate": 1.0037087912087912e-05, + "loss": 0.0209, + "step": 29093 + }, + { + "epoch": 79.92857142857143, + "grad_norm": 6.599313735961914, + "learning_rate": 1.0035714285714285e-05, + "loss": 0.0544, + "step": 29094 + }, + { + "epoch": 79.93131868131869, + "grad_norm": 3.135371685028076, + "learning_rate": 1.003434065934066e-05, + "loss": 0.0398, + "step": 29095 + }, + { + "epoch": 79.93406593406593, + "grad_norm": 6.868593215942383, + "learning_rate": 1.0032967032967034e-05, + "loss": 0.0943, + "step": 29096 + }, + { + "epoch": 79.93681318681318, + "grad_norm": 3.8406717777252197, + "learning_rate": 1.0031593406593406e-05, + "loss": 0.0343, + "step": 29097 + }, + { + "epoch": 79.93956043956044, + "grad_norm": 12.822412490844727, + "learning_rate": 1.003021978021978e-05, + "loss": 0.1893, + "step": 29098 + }, + { + "epoch": 79.9423076923077, + "grad_norm": 3.138834238052368, + "learning_rate": 1.0028846153846154e-05, + "loss": 0.0279, + "step": 29099 + }, + { + "epoch": 79.94505494505495, + "grad_norm": 15.967358589172363, + "learning_rate": 1.0027472527472528e-05, + "loss": 0.2154, + "step": 29100 + }, + { + "epoch": 79.9478021978022, + "grad_norm": 23.337562561035156, + "learning_rate": 1.0026098901098901e-05, + "loss": 0.2861, + "step": 29101 + }, + { + "epoch": 79.95054945054945, + "grad_norm": 9.120481491088867, + "learning_rate": 1.0024725274725274e-05, + "loss": 0.1539, + "step": 29102 + }, + { + "epoch": 79.9532967032967, + "grad_norm": 10.954529762268066, + "learning_rate": 1.002335164835165e-05, + "loss": 0.1564, + "step": 29103 + }, + { + "epoch": 79.95604395604396, + "grad_norm": 10.205493927001953, + "learning_rate": 1.0021978021978023e-05, + "loss": 0.1812, + "step": 29104 + }, + { + "epoch": 79.95879120879121, + "grad_norm": 5.770230293273926, + "learning_rate": 1.0020604395604396e-05, + "loss": 0.0745, + "step": 29105 + }, + { + "epoch": 79.96153846153847, + "grad_norm": 6.514898300170898, + "learning_rate": 1.001923076923077e-05, + "loss": 0.1209, + "step": 29106 + }, + { + "epoch": 79.96428571428571, + "grad_norm": 12.20121955871582, + "learning_rate": 1.0017857142857143e-05, + "loss": 0.1482, + "step": 29107 + }, + { + "epoch": 79.96703296703296, + "grad_norm": 16.6519775390625, + "learning_rate": 1.0016483516483517e-05, + "loss": 0.5009, + "step": 29108 + }, + { + "epoch": 79.96978021978022, + "grad_norm": 7.093507289886475, + "learning_rate": 1.001510989010989e-05, + "loss": 0.0537, + "step": 29109 + }, + { + "epoch": 79.97252747252747, + "grad_norm": 2.8011434078216553, + "learning_rate": 1.0013736263736265e-05, + "loss": 0.0391, + "step": 29110 + }, + { + "epoch": 79.97527472527473, + "grad_norm": 14.303596496582031, + "learning_rate": 1.0012362637362638e-05, + "loss": 0.2825, + "step": 29111 + }, + { + "epoch": 79.97802197802197, + "grad_norm": 15.571295738220215, + "learning_rate": 1.001098901098901e-05, + "loss": 0.4507, + "step": 29112 + }, + { + "epoch": 79.98076923076923, + "grad_norm": 6.70890998840332, + "learning_rate": 1.0009615384615385e-05, + "loss": 0.1783, + "step": 29113 + }, + { + "epoch": 79.98351648351648, + "grad_norm": 16.470523834228516, + "learning_rate": 1.0008241758241759e-05, + "loss": 0.2987, + "step": 29114 + }, + { + "epoch": 79.98626373626374, + "grad_norm": 7.99337100982666, + "learning_rate": 1.0006868131868132e-05, + "loss": 0.0899, + "step": 29115 + }, + { + "epoch": 79.98901098901099, + "grad_norm": 24.546483993530273, + "learning_rate": 1.0005494505494505e-05, + "loss": 0.905, + "step": 29116 + }, + { + "epoch": 79.99175824175825, + "grad_norm": 4.1190185546875, + "learning_rate": 1.0004120879120879e-05, + "loss": 0.0599, + "step": 29117 + }, + { + "epoch": 79.99450549450549, + "grad_norm": 4.927531719207764, + "learning_rate": 1.0002747252747254e-05, + "loss": 0.088, + "step": 29118 + }, + { + "epoch": 79.99725274725274, + "grad_norm": 11.578052520751953, + "learning_rate": 1.0001373626373627e-05, + "loss": 0.2703, + "step": 29119 + }, + { + "epoch": 80.0, + "grad_norm": 70.24449157714844, + "learning_rate": 1e-05, + "loss": 1.0882, + "step": 29120 + }, + { + "epoch": 80.0, + "eval_accuracy": 0.696969696969697, + "eval_f1": 0.6928607941177056, + "eval_f1_DuraRiadoRio_64x64": 0.6609442060085837, + "eval_f1_Mole_64x64": 0.5353535353535354, + "eval_f1_Quebrado_64x64": 0.8047337278106509, + "eval_f1_RiadoRio_64x64": 0.6150234741784038, + "eval_f1_RioFechado_64x64": 0.8482490272373541, + "eval_loss": 1.580288290977478, + "eval_precision": 0.7947218450759839, + "eval_precision_DuraRiadoRio_64x64": 0.8651685393258427, + "eval_precision_Mole_64x64": 0.9814814814814815, + "eval_precision_Quebrado_64x64": 0.7010309278350515, + "eval_precision_RiadoRio_64x64": 0.4781021897810219, + "eval_precision_RioFechado_64x64": 0.9478260869565217, + "eval_recall": 0.6953339922576395, + "eval_recall_DuraRiadoRio_64x64": 0.5347222222222222, + "eval_recall_Mole_64x64": 0.3680555555555556, + "eval_recall_Quebrado_64x64": 0.9444444444444444, + "eval_recall_RiadoRio_64x64": 0.8618421052631579, + "eval_recall_RioFechado_64x64": 0.7676056338028169, + "eval_runtime": 1.7888, + "eval_samples_per_second": 405.869, + "eval_steps_per_second": 25.716, + "step": 29120 + }, + { + "epoch": 80.00274725274726, + "grad_norm": 13.753767013549805, + "learning_rate": 9.998626373626374e-06, + "loss": 0.2717, + "step": 29121 + }, + { + "epoch": 80.00549450549451, + "grad_norm": 5.520532608032227, + "learning_rate": 9.997252747252748e-06, + "loss": 0.115, + "step": 29122 + }, + { + "epoch": 80.00824175824175, + "grad_norm": 3.9829206466674805, + "learning_rate": 9.995879120879121e-06, + "loss": 0.0602, + "step": 29123 + }, + { + "epoch": 80.01098901098901, + "grad_norm": 12.083115577697754, + "learning_rate": 9.994505494505494e-06, + "loss": 0.2973, + "step": 29124 + }, + { + "epoch": 80.01373626373626, + "grad_norm": 13.953746795654297, + "learning_rate": 9.99313186813187e-06, + "loss": 0.2867, + "step": 29125 + }, + { + "epoch": 80.01648351648352, + "grad_norm": 10.747942924499512, + "learning_rate": 9.991758241758243e-06, + "loss": 0.2042, + "step": 29126 + }, + { + "epoch": 80.01923076923077, + "grad_norm": 8.805489540100098, + "learning_rate": 9.990384615384615e-06, + "loss": 0.1639, + "step": 29127 + }, + { + "epoch": 80.02197802197803, + "grad_norm": 14.530471801757812, + "learning_rate": 9.98901098901099e-06, + "loss": 0.142, + "step": 29128 + }, + { + "epoch": 80.02472527472527, + "grad_norm": 3.970728874206543, + "learning_rate": 9.987637362637363e-06, + "loss": 0.111, + "step": 29129 + }, + { + "epoch": 80.02747252747253, + "grad_norm": 17.90984535217285, + "learning_rate": 9.986263736263737e-06, + "loss": 0.368, + "step": 29130 + }, + { + "epoch": 80.03021978021978, + "grad_norm": 21.244722366333008, + "learning_rate": 9.98489010989011e-06, + "loss": 0.5426, + "step": 29131 + }, + { + "epoch": 80.03296703296704, + "grad_norm": 14.65149211883545, + "learning_rate": 9.983516483516483e-06, + "loss": 0.2059, + "step": 29132 + }, + { + "epoch": 80.03571428571429, + "grad_norm": 14.041821479797363, + "learning_rate": 9.982142857142858e-06, + "loss": 0.2992, + "step": 29133 + }, + { + "epoch": 80.03846153846153, + "grad_norm": 6.1945109367370605, + "learning_rate": 9.98076923076923e-06, + "loss": 0.0855, + "step": 29134 + }, + { + "epoch": 80.04120879120879, + "grad_norm": 13.586568832397461, + "learning_rate": 9.979395604395605e-06, + "loss": 0.1935, + "step": 29135 + }, + { + "epoch": 80.04395604395604, + "grad_norm": 3.4826748371124268, + "learning_rate": 9.978021978021979e-06, + "loss": 0.0563, + "step": 29136 + }, + { + "epoch": 80.0467032967033, + "grad_norm": 5.972961902618408, + "learning_rate": 9.976648351648352e-06, + "loss": 0.0941, + "step": 29137 + }, + { + "epoch": 80.04945054945055, + "grad_norm": 10.869324684143066, + "learning_rate": 9.975274725274726e-06, + "loss": 0.2895, + "step": 29138 + }, + { + "epoch": 80.0521978021978, + "grad_norm": 6.107985973358154, + "learning_rate": 9.973901098901099e-06, + "loss": 0.086, + "step": 29139 + }, + { + "epoch": 80.05494505494505, + "grad_norm": 10.685150146484375, + "learning_rate": 9.972527472527474e-06, + "loss": 0.2146, + "step": 29140 + }, + { + "epoch": 80.0576923076923, + "grad_norm": 23.00834083557129, + "learning_rate": 9.971153846153847e-06, + "loss": 0.4576, + "step": 29141 + }, + { + "epoch": 80.06043956043956, + "grad_norm": 18.43146514892578, + "learning_rate": 9.96978021978022e-06, + "loss": 0.623, + "step": 29142 + }, + { + "epoch": 80.06318681318682, + "grad_norm": 8.881059646606445, + "learning_rate": 9.968406593406594e-06, + "loss": 0.1408, + "step": 29143 + }, + { + "epoch": 80.06593406593407, + "grad_norm": 3.543087959289551, + "learning_rate": 9.967032967032968e-06, + "loss": 0.0624, + "step": 29144 + }, + { + "epoch": 80.06868131868131, + "grad_norm": 9.827858924865723, + "learning_rate": 9.965659340659341e-06, + "loss": 0.1607, + "step": 29145 + }, + { + "epoch": 80.07142857142857, + "grad_norm": 22.215213775634766, + "learning_rate": 9.964285714285714e-06, + "loss": 0.6592, + "step": 29146 + }, + { + "epoch": 80.07417582417582, + "grad_norm": 13.179256439208984, + "learning_rate": 9.962912087912088e-06, + "loss": 0.372, + "step": 29147 + }, + { + "epoch": 80.07692307692308, + "grad_norm": 9.706077575683594, + "learning_rate": 9.961538461538463e-06, + "loss": 0.125, + "step": 29148 + }, + { + "epoch": 80.07967032967034, + "grad_norm": 3.823148488998413, + "learning_rate": 9.960164835164835e-06, + "loss": 0.0442, + "step": 29149 + }, + { + "epoch": 80.08241758241758, + "grad_norm": 16.53067970275879, + "learning_rate": 9.958791208791208e-06, + "loss": 0.3641, + "step": 29150 + }, + { + "epoch": 80.08516483516483, + "grad_norm": 11.615096092224121, + "learning_rate": 9.957417582417583e-06, + "loss": 0.2094, + "step": 29151 + }, + { + "epoch": 80.08791208791209, + "grad_norm": 11.453584671020508, + "learning_rate": 9.956043956043957e-06, + "loss": 0.2874, + "step": 29152 + }, + { + "epoch": 80.09065934065934, + "grad_norm": 7.390404224395752, + "learning_rate": 9.95467032967033e-06, + "loss": 0.0988, + "step": 29153 + }, + { + "epoch": 80.0934065934066, + "grad_norm": 13.241072654724121, + "learning_rate": 9.953296703296703e-06, + "loss": 0.1216, + "step": 29154 + }, + { + "epoch": 80.09615384615384, + "grad_norm": 12.579798698425293, + "learning_rate": 9.951923076923077e-06, + "loss": 0.2199, + "step": 29155 + }, + { + "epoch": 80.0989010989011, + "grad_norm": 3.3533148765563965, + "learning_rate": 9.950549450549452e-06, + "loss": 0.0508, + "step": 29156 + }, + { + "epoch": 80.10164835164835, + "grad_norm": 6.089837074279785, + "learning_rate": 9.949175824175824e-06, + "loss": 0.1002, + "step": 29157 + }, + { + "epoch": 80.1043956043956, + "grad_norm": 2.559314250946045, + "learning_rate": 9.947802197802199e-06, + "loss": 0.0241, + "step": 29158 + }, + { + "epoch": 80.10714285714286, + "grad_norm": 7.556962490081787, + "learning_rate": 9.946428571428572e-06, + "loss": 0.1245, + "step": 29159 + }, + { + "epoch": 80.10989010989012, + "grad_norm": 6.381978511810303, + "learning_rate": 9.945054945054946e-06, + "loss": 0.0921, + "step": 29160 + }, + { + "epoch": 80.11263736263736, + "grad_norm": 9.003220558166504, + "learning_rate": 9.943681318681319e-06, + "loss": 0.1252, + "step": 29161 + }, + { + "epoch": 80.11538461538461, + "grad_norm": 13.994909286499023, + "learning_rate": 9.942307692307692e-06, + "loss": 0.1601, + "step": 29162 + }, + { + "epoch": 80.11813186813187, + "grad_norm": 14.641736030578613, + "learning_rate": 9.940934065934067e-06, + "loss": 0.216, + "step": 29163 + }, + { + "epoch": 80.12087912087912, + "grad_norm": 11.765027046203613, + "learning_rate": 9.93956043956044e-06, + "loss": 0.1872, + "step": 29164 + }, + { + "epoch": 80.12362637362638, + "grad_norm": 9.224932670593262, + "learning_rate": 9.938186813186813e-06, + "loss": 0.2123, + "step": 29165 + }, + { + "epoch": 80.12637362637362, + "grad_norm": 16.530231475830078, + "learning_rate": 9.936813186813188e-06, + "loss": 0.2303, + "step": 29166 + }, + { + "epoch": 80.12912087912088, + "grad_norm": 9.919100761413574, + "learning_rate": 9.935439560439561e-06, + "loss": 0.1431, + "step": 29167 + }, + { + "epoch": 80.13186813186813, + "grad_norm": 12.877706527709961, + "learning_rate": 9.934065934065935e-06, + "loss": 0.254, + "step": 29168 + }, + { + "epoch": 80.13461538461539, + "grad_norm": 8.988946914672852, + "learning_rate": 9.932692307692308e-06, + "loss": 0.2072, + "step": 29169 + }, + { + "epoch": 80.13736263736264, + "grad_norm": 9.43252944946289, + "learning_rate": 9.931318681318681e-06, + "loss": 0.1454, + "step": 29170 + }, + { + "epoch": 80.14010989010988, + "grad_norm": 21.768442153930664, + "learning_rate": 9.929945054945056e-06, + "loss": 0.5488, + "step": 29171 + }, + { + "epoch": 80.14285714285714, + "grad_norm": 20.646530151367188, + "learning_rate": 9.928571428571428e-06, + "loss": 0.4107, + "step": 29172 + }, + { + "epoch": 80.1456043956044, + "grad_norm": 12.456029891967773, + "learning_rate": 9.927197802197803e-06, + "loss": 0.1586, + "step": 29173 + }, + { + "epoch": 80.14835164835165, + "grad_norm": 10.577435493469238, + "learning_rate": 9.925824175824177e-06, + "loss": 0.1521, + "step": 29174 + }, + { + "epoch": 80.1510989010989, + "grad_norm": 11.402676582336426, + "learning_rate": 9.92445054945055e-06, + "loss": 0.1982, + "step": 29175 + }, + { + "epoch": 80.15384615384616, + "grad_norm": 12.963523864746094, + "learning_rate": 9.923076923076923e-06, + "loss": 0.1683, + "step": 29176 + }, + { + "epoch": 80.1565934065934, + "grad_norm": 27.629308700561523, + "learning_rate": 9.921703296703297e-06, + "loss": 0.636, + "step": 29177 + }, + { + "epoch": 80.15934065934066, + "grad_norm": 1.6393176317214966, + "learning_rate": 9.920329670329672e-06, + "loss": 0.0267, + "step": 29178 + }, + { + "epoch": 80.16208791208791, + "grad_norm": 21.960840225219727, + "learning_rate": 9.918956043956044e-06, + "loss": 0.3969, + "step": 29179 + }, + { + "epoch": 80.16483516483517, + "grad_norm": 13.917092323303223, + "learning_rate": 9.917582417582417e-06, + "loss": 0.4314, + "step": 29180 + }, + { + "epoch": 80.16758241758242, + "grad_norm": 23.363615036010742, + "learning_rate": 9.916208791208792e-06, + "loss": 0.3991, + "step": 29181 + }, + { + "epoch": 80.17032967032966, + "grad_norm": 3.739307165145874, + "learning_rate": 9.914835164835166e-06, + "loss": 0.0599, + "step": 29182 + }, + { + "epoch": 80.17307692307692, + "grad_norm": 1.0855937004089355, + "learning_rate": 9.913461538461539e-06, + "loss": 0.0168, + "step": 29183 + }, + { + "epoch": 80.17582417582418, + "grad_norm": 10.75086784362793, + "learning_rate": 9.912087912087912e-06, + "loss": 0.1411, + "step": 29184 + }, + { + "epoch": 80.17857142857143, + "grad_norm": 19.90286636352539, + "learning_rate": 9.910714285714286e-06, + "loss": 0.5786, + "step": 29185 + }, + { + "epoch": 80.18131868131869, + "grad_norm": 4.556629657745361, + "learning_rate": 9.909340659340661e-06, + "loss": 0.1735, + "step": 29186 + }, + { + "epoch": 80.18406593406593, + "grad_norm": 4.002642631530762, + "learning_rate": 9.907967032967033e-06, + "loss": 0.085, + "step": 29187 + }, + { + "epoch": 80.18681318681318, + "grad_norm": 9.927996635437012, + "learning_rate": 9.906593406593408e-06, + "loss": 0.1637, + "step": 29188 + }, + { + "epoch": 80.18956043956044, + "grad_norm": 13.99307918548584, + "learning_rate": 9.905219780219781e-06, + "loss": 0.329, + "step": 29189 + }, + { + "epoch": 80.1923076923077, + "grad_norm": 5.3906049728393555, + "learning_rate": 9.903846153846155e-06, + "loss": 0.0691, + "step": 29190 + }, + { + "epoch": 80.19505494505495, + "grad_norm": 10.877334594726562, + "learning_rate": 9.902472527472528e-06, + "loss": 0.173, + "step": 29191 + }, + { + "epoch": 80.1978021978022, + "grad_norm": 11.268006324768066, + "learning_rate": 9.901098901098901e-06, + "loss": 0.2135, + "step": 29192 + }, + { + "epoch": 80.20054945054945, + "grad_norm": 7.374366283416748, + "learning_rate": 9.899725274725276e-06, + "loss": 0.0684, + "step": 29193 + }, + { + "epoch": 80.2032967032967, + "grad_norm": 14.594680786132812, + "learning_rate": 9.898351648351648e-06, + "loss": 0.4291, + "step": 29194 + }, + { + "epoch": 80.20604395604396, + "grad_norm": 8.127217292785645, + "learning_rate": 9.896978021978022e-06, + "loss": 0.1046, + "step": 29195 + }, + { + "epoch": 80.20879120879121, + "grad_norm": 8.389016151428223, + "learning_rate": 9.895604395604397e-06, + "loss": 0.0982, + "step": 29196 + }, + { + "epoch": 80.21153846153847, + "grad_norm": 14.416391372680664, + "learning_rate": 9.89423076923077e-06, + "loss": 0.2877, + "step": 29197 + }, + { + "epoch": 80.21428571428571, + "grad_norm": 15.785037994384766, + "learning_rate": 9.892857142857143e-06, + "loss": 0.1951, + "step": 29198 + }, + { + "epoch": 80.21703296703296, + "grad_norm": 13.524820327758789, + "learning_rate": 9.891483516483517e-06, + "loss": 0.3209, + "step": 29199 + }, + { + "epoch": 80.21978021978022, + "grad_norm": 7.347943305969238, + "learning_rate": 9.89010989010989e-06, + "loss": 0.1163, + "step": 29200 + }, + { + "epoch": 80.22252747252747, + "grad_norm": 5.3170905113220215, + "learning_rate": 9.888736263736265e-06, + "loss": 0.0804, + "step": 29201 + }, + { + "epoch": 80.22527472527473, + "grad_norm": 10.889620780944824, + "learning_rate": 9.887362637362637e-06, + "loss": 0.1338, + "step": 29202 + }, + { + "epoch": 80.22802197802197, + "grad_norm": 12.383596420288086, + "learning_rate": 9.885989010989012e-06, + "loss": 0.1766, + "step": 29203 + }, + { + "epoch": 80.23076923076923, + "grad_norm": 13.293070793151855, + "learning_rate": 9.884615384615386e-06, + "loss": 0.3703, + "step": 29204 + }, + { + "epoch": 80.23351648351648, + "grad_norm": 9.86467456817627, + "learning_rate": 9.883241758241759e-06, + "loss": 0.1266, + "step": 29205 + }, + { + "epoch": 80.23626373626374, + "grad_norm": 10.742542266845703, + "learning_rate": 9.881868131868132e-06, + "loss": 0.1971, + "step": 29206 + }, + { + "epoch": 80.23901098901099, + "grad_norm": 15.390788078308105, + "learning_rate": 9.880494505494506e-06, + "loss": 0.4371, + "step": 29207 + }, + { + "epoch": 80.24175824175825, + "grad_norm": 18.080360412597656, + "learning_rate": 9.87912087912088e-06, + "loss": 0.3344, + "step": 29208 + }, + { + "epoch": 80.24450549450549, + "grad_norm": 17.051687240600586, + "learning_rate": 9.877747252747253e-06, + "loss": 0.2899, + "step": 29209 + }, + { + "epoch": 80.24725274725274, + "grad_norm": 11.847455024719238, + "learning_rate": 9.876373626373626e-06, + "loss": 0.1729, + "step": 29210 + }, + { + "epoch": 80.25, + "grad_norm": 3.5501348972320557, + "learning_rate": 9.875000000000001e-06, + "loss": 0.0634, + "step": 29211 + }, + { + "epoch": 80.25274725274726, + "grad_norm": 13.641237258911133, + "learning_rate": 9.873626373626375e-06, + "loss": 0.1694, + "step": 29212 + }, + { + "epoch": 80.25549450549451, + "grad_norm": 19.193639755249023, + "learning_rate": 9.872252747252746e-06, + "loss": 0.5846, + "step": 29213 + }, + { + "epoch": 80.25824175824175, + "grad_norm": 5.099453449249268, + "learning_rate": 9.870879120879121e-06, + "loss": 0.085, + "step": 29214 + }, + { + "epoch": 80.26098901098901, + "grad_norm": 6.208006381988525, + "learning_rate": 9.869505494505495e-06, + "loss": 0.0554, + "step": 29215 + }, + { + "epoch": 80.26373626373626, + "grad_norm": 4.987484931945801, + "learning_rate": 9.86813186813187e-06, + "loss": 0.0988, + "step": 29216 + }, + { + "epoch": 80.26648351648352, + "grad_norm": 10.808810234069824, + "learning_rate": 9.866758241758242e-06, + "loss": 0.158, + "step": 29217 + }, + { + "epoch": 80.26923076923077, + "grad_norm": 7.616715908050537, + "learning_rate": 9.865384615384615e-06, + "loss": 0.057, + "step": 29218 + }, + { + "epoch": 80.27197802197803, + "grad_norm": 10.891408920288086, + "learning_rate": 9.86401098901099e-06, + "loss": 0.1044, + "step": 29219 + }, + { + "epoch": 80.27472527472527, + "grad_norm": 12.869203567504883, + "learning_rate": 9.862637362637364e-06, + "loss": 0.2315, + "step": 29220 + }, + { + "epoch": 80.27747252747253, + "grad_norm": 12.654982566833496, + "learning_rate": 9.861263736263737e-06, + "loss": 0.2208, + "step": 29221 + }, + { + "epoch": 80.28021978021978, + "grad_norm": 5.834089756011963, + "learning_rate": 9.85989010989011e-06, + "loss": 0.0667, + "step": 29222 + }, + { + "epoch": 80.28296703296704, + "grad_norm": 18.923858642578125, + "learning_rate": 9.858516483516484e-06, + "loss": 0.6047, + "step": 29223 + }, + { + "epoch": 80.28571428571429, + "grad_norm": 12.137539863586426, + "learning_rate": 9.857142857142857e-06, + "loss": 0.1952, + "step": 29224 + }, + { + "epoch": 80.28846153846153, + "grad_norm": 2.1734671592712402, + "learning_rate": 9.85576923076923e-06, + "loss": 0.0286, + "step": 29225 + }, + { + "epoch": 80.29120879120879, + "grad_norm": 5.20550537109375, + "learning_rate": 9.854395604395606e-06, + "loss": 0.0785, + "step": 29226 + }, + { + "epoch": 80.29395604395604, + "grad_norm": 7.603967666625977, + "learning_rate": 9.853021978021979e-06, + "loss": 0.1485, + "step": 29227 + }, + { + "epoch": 80.2967032967033, + "grad_norm": 6.897307395935059, + "learning_rate": 9.85164835164835e-06, + "loss": 0.113, + "step": 29228 + }, + { + "epoch": 80.29945054945055, + "grad_norm": 21.28206443786621, + "learning_rate": 9.850274725274726e-06, + "loss": 0.3919, + "step": 29229 + }, + { + "epoch": 80.3021978021978, + "grad_norm": 10.39546012878418, + "learning_rate": 9.8489010989011e-06, + "loss": 0.1163, + "step": 29230 + }, + { + "epoch": 80.30494505494505, + "grad_norm": 11.710118293762207, + "learning_rate": 9.847527472527474e-06, + "loss": 0.1581, + "step": 29231 + }, + { + "epoch": 80.3076923076923, + "grad_norm": 7.00641393661499, + "learning_rate": 9.846153846153846e-06, + "loss": 0.1916, + "step": 29232 + }, + { + "epoch": 80.31043956043956, + "grad_norm": 11.789597511291504, + "learning_rate": 9.84478021978022e-06, + "loss": 0.1669, + "step": 29233 + }, + { + "epoch": 80.31318681318682, + "grad_norm": 16.198598861694336, + "learning_rate": 9.843406593406595e-06, + "loss": 0.4335, + "step": 29234 + }, + { + "epoch": 80.31593406593407, + "grad_norm": 0.270883709192276, + "learning_rate": 9.842032967032968e-06, + "loss": 0.0033, + "step": 29235 + }, + { + "epoch": 80.31868131868131, + "grad_norm": 6.34710168838501, + "learning_rate": 9.840659340659341e-06, + "loss": 0.0914, + "step": 29236 + }, + { + "epoch": 80.32142857142857, + "grad_norm": 4.291672229766846, + "learning_rate": 9.839285714285715e-06, + "loss": 0.0841, + "step": 29237 + }, + { + "epoch": 80.32417582417582, + "grad_norm": 9.630982398986816, + "learning_rate": 9.837912087912088e-06, + "loss": 0.2502, + "step": 29238 + }, + { + "epoch": 80.32692307692308, + "grad_norm": 5.82620906829834, + "learning_rate": 9.836538461538462e-06, + "loss": 0.1116, + "step": 29239 + }, + { + "epoch": 80.32967032967034, + "grad_norm": 5.141600608825684, + "learning_rate": 9.835164835164835e-06, + "loss": 0.0774, + "step": 29240 + }, + { + "epoch": 80.33241758241758, + "grad_norm": 12.605371475219727, + "learning_rate": 9.83379120879121e-06, + "loss": 0.1682, + "step": 29241 + }, + { + "epoch": 80.33516483516483, + "grad_norm": 3.209467887878418, + "learning_rate": 9.832417582417584e-06, + "loss": 0.0423, + "step": 29242 + }, + { + "epoch": 80.33791208791209, + "grad_norm": 2.1639206409454346, + "learning_rate": 9.831043956043955e-06, + "loss": 0.0312, + "step": 29243 + }, + { + "epoch": 80.34065934065934, + "grad_norm": 11.577252388000488, + "learning_rate": 9.82967032967033e-06, + "loss": 0.1761, + "step": 29244 + }, + { + "epoch": 80.3434065934066, + "grad_norm": 7.583005428314209, + "learning_rate": 9.828296703296704e-06, + "loss": 0.1356, + "step": 29245 + }, + { + "epoch": 80.34615384615384, + "grad_norm": 13.296930313110352, + "learning_rate": 9.826923076923077e-06, + "loss": 0.3134, + "step": 29246 + }, + { + "epoch": 80.3489010989011, + "grad_norm": 4.351614952087402, + "learning_rate": 9.82554945054945e-06, + "loss": 0.06, + "step": 29247 + }, + { + "epoch": 80.35164835164835, + "grad_norm": 12.752705574035645, + "learning_rate": 9.824175824175824e-06, + "loss": 0.1361, + "step": 29248 + }, + { + "epoch": 80.3543956043956, + "grad_norm": 13.14079475402832, + "learning_rate": 9.822802197802199e-06, + "loss": 0.14, + "step": 29249 + }, + { + "epoch": 80.35714285714286, + "grad_norm": 1.8130478858947754, + "learning_rate": 9.821428571428573e-06, + "loss": 0.0237, + "step": 29250 + }, + { + "epoch": 80.35989010989012, + "grad_norm": 6.299769878387451, + "learning_rate": 9.820054945054946e-06, + "loss": 0.1174, + "step": 29251 + }, + { + "epoch": 80.36263736263736, + "grad_norm": 3.3494975566864014, + "learning_rate": 9.81868131868132e-06, + "loss": 0.0601, + "step": 29252 + }, + { + "epoch": 80.36538461538461, + "grad_norm": 14.692989349365234, + "learning_rate": 9.817307692307693e-06, + "loss": 0.2478, + "step": 29253 + }, + { + "epoch": 80.36813186813187, + "grad_norm": 7.404515743255615, + "learning_rate": 9.815934065934066e-06, + "loss": 0.1528, + "step": 29254 + }, + { + "epoch": 80.37087912087912, + "grad_norm": 19.4010009765625, + "learning_rate": 9.81456043956044e-06, + "loss": 0.6128, + "step": 29255 + }, + { + "epoch": 80.37362637362638, + "grad_norm": 4.893151760101318, + "learning_rate": 9.813186813186815e-06, + "loss": 0.0658, + "step": 29256 + }, + { + "epoch": 80.37637362637362, + "grad_norm": 12.457228660583496, + "learning_rate": 9.811813186813188e-06, + "loss": 0.1475, + "step": 29257 + }, + { + "epoch": 80.37912087912088, + "grad_norm": 8.536072731018066, + "learning_rate": 9.81043956043956e-06, + "loss": 0.1096, + "step": 29258 + }, + { + "epoch": 80.38186813186813, + "grad_norm": 10.206452369689941, + "learning_rate": 9.809065934065935e-06, + "loss": 0.2253, + "step": 29259 + }, + { + "epoch": 80.38461538461539, + "grad_norm": 2.8617191314697266, + "learning_rate": 9.807692307692308e-06, + "loss": 0.042, + "step": 29260 + }, + { + "epoch": 80.38736263736264, + "grad_norm": 14.879427909851074, + "learning_rate": 9.806318681318682e-06, + "loss": 0.5662, + "step": 29261 + }, + { + "epoch": 80.39010989010988, + "grad_norm": 0.7870120406150818, + "learning_rate": 9.804945054945055e-06, + "loss": 0.0069, + "step": 29262 + }, + { + "epoch": 80.39285714285714, + "grad_norm": 24.019453048706055, + "learning_rate": 9.803571428571428e-06, + "loss": 0.4712, + "step": 29263 + }, + { + "epoch": 80.3956043956044, + "grad_norm": 10.218300819396973, + "learning_rate": 9.802197802197804e-06, + "loss": 0.1466, + "step": 29264 + }, + { + "epoch": 80.39835164835165, + "grad_norm": 2.6675009727478027, + "learning_rate": 9.800824175824177e-06, + "loss": 0.0553, + "step": 29265 + }, + { + "epoch": 80.4010989010989, + "grad_norm": 13.888413429260254, + "learning_rate": 9.799450549450549e-06, + "loss": 0.3663, + "step": 29266 + }, + { + "epoch": 80.40384615384616, + "grad_norm": 12.286564826965332, + "learning_rate": 9.798076923076924e-06, + "loss": 0.1631, + "step": 29267 + }, + { + "epoch": 80.4065934065934, + "grad_norm": 25.019691467285156, + "learning_rate": 9.796703296703297e-06, + "loss": 0.7732, + "step": 29268 + }, + { + "epoch": 80.40934065934066, + "grad_norm": 7.3142924308776855, + "learning_rate": 9.79532967032967e-06, + "loss": 0.0783, + "step": 29269 + }, + { + "epoch": 80.41208791208791, + "grad_norm": 11.843361854553223, + "learning_rate": 9.793956043956044e-06, + "loss": 0.2907, + "step": 29270 + }, + { + "epoch": 80.41483516483517, + "grad_norm": 20.958656311035156, + "learning_rate": 9.792582417582417e-06, + "loss": 0.4308, + "step": 29271 + }, + { + "epoch": 80.41758241758242, + "grad_norm": 13.591405868530273, + "learning_rate": 9.791208791208793e-06, + "loss": 0.1425, + "step": 29272 + }, + { + "epoch": 80.42032967032966, + "grad_norm": 18.616378784179688, + "learning_rate": 9.789835164835164e-06, + "loss": 0.4761, + "step": 29273 + }, + { + "epoch": 80.42307692307692, + "grad_norm": 12.207666397094727, + "learning_rate": 9.78846153846154e-06, + "loss": 0.2057, + "step": 29274 + }, + { + "epoch": 80.42582417582418, + "grad_norm": 9.430135726928711, + "learning_rate": 9.787087912087913e-06, + "loss": 0.1876, + "step": 29275 + }, + { + "epoch": 80.42857142857143, + "grad_norm": 1.3842182159423828, + "learning_rate": 9.785714285714286e-06, + "loss": 0.0166, + "step": 29276 + }, + { + "epoch": 80.43131868131869, + "grad_norm": 14.558428764343262, + "learning_rate": 9.78434065934066e-06, + "loss": 0.224, + "step": 29277 + }, + { + "epoch": 80.43406593406593, + "grad_norm": 13.478791236877441, + "learning_rate": 9.782967032967033e-06, + "loss": 0.3694, + "step": 29278 + }, + { + "epoch": 80.43681318681318, + "grad_norm": 16.882625579833984, + "learning_rate": 9.781593406593408e-06, + "loss": 0.6784, + "step": 29279 + }, + { + "epoch": 80.43956043956044, + "grad_norm": 16.226661682128906, + "learning_rate": 9.78021978021978e-06, + "loss": 0.2639, + "step": 29280 + }, + { + "epoch": 80.4423076923077, + "grad_norm": 16.171581268310547, + "learning_rate": 9.778846153846153e-06, + "loss": 0.3391, + "step": 29281 + }, + { + "epoch": 80.44505494505495, + "grad_norm": 17.284255981445312, + "learning_rate": 9.777472527472528e-06, + "loss": 0.3223, + "step": 29282 + }, + { + "epoch": 80.4478021978022, + "grad_norm": 4.543748378753662, + "learning_rate": 9.776098901098902e-06, + "loss": 0.0432, + "step": 29283 + }, + { + "epoch": 80.45054945054945, + "grad_norm": 16.592674255371094, + "learning_rate": 9.774725274725275e-06, + "loss": 0.1836, + "step": 29284 + }, + { + "epoch": 80.4532967032967, + "grad_norm": 5.826227188110352, + "learning_rate": 9.773351648351649e-06, + "loss": 0.0613, + "step": 29285 + }, + { + "epoch": 80.45604395604396, + "grad_norm": 8.22659969329834, + "learning_rate": 9.771978021978022e-06, + "loss": 0.1125, + "step": 29286 + }, + { + "epoch": 80.45879120879121, + "grad_norm": 4.108090877532959, + "learning_rate": 9.770604395604397e-06, + "loss": 0.0756, + "step": 29287 + }, + { + "epoch": 80.46153846153847, + "grad_norm": 29.9693546295166, + "learning_rate": 9.769230769230769e-06, + "loss": 0.7913, + "step": 29288 + }, + { + "epoch": 80.46428571428571, + "grad_norm": 11.83562183380127, + "learning_rate": 9.767857142857144e-06, + "loss": 0.1209, + "step": 29289 + }, + { + "epoch": 80.46703296703296, + "grad_norm": 4.260273456573486, + "learning_rate": 9.766483516483517e-06, + "loss": 0.0534, + "step": 29290 + }, + { + "epoch": 80.46978021978022, + "grad_norm": 13.794897079467773, + "learning_rate": 9.76510989010989e-06, + "loss": 0.3146, + "step": 29291 + }, + { + "epoch": 80.47252747252747, + "grad_norm": 17.73317527770996, + "learning_rate": 9.763736263736264e-06, + "loss": 0.1671, + "step": 29292 + }, + { + "epoch": 80.47527472527473, + "grad_norm": 12.854863166809082, + "learning_rate": 9.762362637362637e-06, + "loss": 0.1999, + "step": 29293 + }, + { + "epoch": 80.47802197802197, + "grad_norm": 14.694795608520508, + "learning_rate": 9.760989010989013e-06, + "loss": 0.235, + "step": 29294 + }, + { + "epoch": 80.48076923076923, + "grad_norm": 19.6026554107666, + "learning_rate": 9.759615384615384e-06, + "loss": 0.5045, + "step": 29295 + }, + { + "epoch": 80.48351648351648, + "grad_norm": 13.721379280090332, + "learning_rate": 9.758241758241758e-06, + "loss": 0.1392, + "step": 29296 + }, + { + "epoch": 80.48626373626374, + "grad_norm": 5.4649176597595215, + "learning_rate": 9.756868131868133e-06, + "loss": 0.0689, + "step": 29297 + }, + { + "epoch": 80.48901098901099, + "grad_norm": 14.253954887390137, + "learning_rate": 9.755494505494506e-06, + "loss": 0.1843, + "step": 29298 + }, + { + "epoch": 80.49175824175825, + "grad_norm": 6.00343656539917, + "learning_rate": 9.75412087912088e-06, + "loss": 0.064, + "step": 29299 + }, + { + "epoch": 80.49450549450549, + "grad_norm": 15.221083641052246, + "learning_rate": 9.752747252747253e-06, + "loss": 0.3279, + "step": 29300 + }, + { + "epoch": 80.49725274725274, + "grad_norm": 15.78162670135498, + "learning_rate": 9.751373626373626e-06, + "loss": 0.4405, + "step": 29301 + }, + { + "epoch": 80.5, + "grad_norm": 13.591249465942383, + "learning_rate": 9.750000000000002e-06, + "loss": 0.1552, + "step": 29302 + }, + { + "epoch": 80.50274725274726, + "grad_norm": 15.573287010192871, + "learning_rate": 9.748626373626373e-06, + "loss": 0.3447, + "step": 29303 + }, + { + "epoch": 80.50549450549451, + "grad_norm": 6.039116382598877, + "learning_rate": 9.747252747252748e-06, + "loss": 0.1355, + "step": 29304 + }, + { + "epoch": 80.50824175824175, + "grad_norm": 11.539793968200684, + "learning_rate": 9.745879120879122e-06, + "loss": 0.3184, + "step": 29305 + }, + { + "epoch": 80.51098901098901, + "grad_norm": 5.88655948638916, + "learning_rate": 9.744505494505495e-06, + "loss": 0.1452, + "step": 29306 + }, + { + "epoch": 80.51373626373626, + "grad_norm": 14.091215133666992, + "learning_rate": 9.743131868131869e-06, + "loss": 0.2372, + "step": 29307 + }, + { + "epoch": 80.51648351648352, + "grad_norm": 12.313985824584961, + "learning_rate": 9.741758241758242e-06, + "loss": 0.3383, + "step": 29308 + }, + { + "epoch": 80.51923076923077, + "grad_norm": 22.118629455566406, + "learning_rate": 9.740384615384617e-06, + "loss": 0.6174, + "step": 29309 + }, + { + "epoch": 80.52197802197803, + "grad_norm": 19.63990020751953, + "learning_rate": 9.739010989010989e-06, + "loss": 0.4412, + "step": 29310 + }, + { + "epoch": 80.52472527472527, + "grad_norm": 5.467000961303711, + "learning_rate": 9.737637362637362e-06, + "loss": 0.1538, + "step": 29311 + }, + { + "epoch": 80.52747252747253, + "grad_norm": 14.62368392944336, + "learning_rate": 9.736263736263737e-06, + "loss": 0.5183, + "step": 29312 + }, + { + "epoch": 80.53021978021978, + "grad_norm": 4.634358882904053, + "learning_rate": 9.73489010989011e-06, + "loss": 0.0654, + "step": 29313 + }, + { + "epoch": 80.53296703296704, + "grad_norm": 14.271537780761719, + "learning_rate": 9.733516483516484e-06, + "loss": 0.2194, + "step": 29314 + }, + { + "epoch": 80.53571428571429, + "grad_norm": 11.011309623718262, + "learning_rate": 9.732142857142858e-06, + "loss": 0.1838, + "step": 29315 + }, + { + "epoch": 80.53846153846153, + "grad_norm": 15.821576118469238, + "learning_rate": 9.730769230769231e-06, + "loss": 0.4472, + "step": 29316 + }, + { + "epoch": 80.54120879120879, + "grad_norm": 8.757980346679688, + "learning_rate": 9.729395604395606e-06, + "loss": 0.1183, + "step": 29317 + }, + { + "epoch": 80.54395604395604, + "grad_norm": 4.779523849487305, + "learning_rate": 9.728021978021978e-06, + "loss": 0.0561, + "step": 29318 + }, + { + "epoch": 80.5467032967033, + "grad_norm": 15.682072639465332, + "learning_rate": 9.726648351648353e-06, + "loss": 0.1687, + "step": 29319 + }, + { + "epoch": 80.54945054945055, + "grad_norm": 11.159175872802734, + "learning_rate": 9.725274725274726e-06, + "loss": 0.2086, + "step": 29320 + }, + { + "epoch": 80.5521978021978, + "grad_norm": 13.98366928100586, + "learning_rate": 9.7239010989011e-06, + "loss": 0.3713, + "step": 29321 + }, + { + "epoch": 80.55494505494505, + "grad_norm": 11.387481689453125, + "learning_rate": 9.722527472527473e-06, + "loss": 0.2158, + "step": 29322 + }, + { + "epoch": 80.5576923076923, + "grad_norm": 19.34577751159668, + "learning_rate": 9.721153846153846e-06, + "loss": 0.5572, + "step": 29323 + }, + { + "epoch": 80.56043956043956, + "grad_norm": 9.227910041809082, + "learning_rate": 9.71978021978022e-06, + "loss": 0.1565, + "step": 29324 + }, + { + "epoch": 80.56318681318682, + "grad_norm": 4.152676105499268, + "learning_rate": 9.718406593406593e-06, + "loss": 0.0693, + "step": 29325 + }, + { + "epoch": 80.56593406593407, + "grad_norm": 12.860344886779785, + "learning_rate": 9.717032967032967e-06, + "loss": 0.1128, + "step": 29326 + }, + { + "epoch": 80.56868131868131, + "grad_norm": 2.9203226566314697, + "learning_rate": 9.715659340659342e-06, + "loss": 0.0411, + "step": 29327 + }, + { + "epoch": 80.57142857142857, + "grad_norm": 16.68308448791504, + "learning_rate": 9.714285714285715e-06, + "loss": 0.3428, + "step": 29328 + }, + { + "epoch": 80.57417582417582, + "grad_norm": 29.88984489440918, + "learning_rate": 9.712912087912087e-06, + "loss": 0.4533, + "step": 29329 + }, + { + "epoch": 80.57692307692308, + "grad_norm": 24.37156867980957, + "learning_rate": 9.711538461538462e-06, + "loss": 0.7747, + "step": 29330 + }, + { + "epoch": 80.57967032967034, + "grad_norm": 11.059270858764648, + "learning_rate": 9.710164835164835e-06, + "loss": 0.2438, + "step": 29331 + }, + { + "epoch": 80.58241758241758, + "grad_norm": 6.711821556091309, + "learning_rate": 9.70879120879121e-06, + "loss": 0.1053, + "step": 29332 + }, + { + "epoch": 80.58516483516483, + "grad_norm": 9.800851821899414, + "learning_rate": 9.707417582417582e-06, + "loss": 0.1742, + "step": 29333 + }, + { + "epoch": 80.58791208791209, + "grad_norm": 7.629771709442139, + "learning_rate": 9.706043956043956e-06, + "loss": 0.0922, + "step": 29334 + }, + { + "epoch": 80.59065934065934, + "grad_norm": 6.87375020980835, + "learning_rate": 9.70467032967033e-06, + "loss": 0.1149, + "step": 29335 + }, + { + "epoch": 80.5934065934066, + "grad_norm": 5.71623420715332, + "learning_rate": 9.703296703296704e-06, + "loss": 0.0976, + "step": 29336 + }, + { + "epoch": 80.59615384615384, + "grad_norm": 8.217935562133789, + "learning_rate": 9.701923076923078e-06, + "loss": 0.0789, + "step": 29337 + }, + { + "epoch": 80.5989010989011, + "grad_norm": 2.7134299278259277, + "learning_rate": 9.700549450549451e-06, + "loss": 0.0311, + "step": 29338 + }, + { + "epoch": 80.60164835164835, + "grad_norm": 15.811721801757812, + "learning_rate": 9.699175824175824e-06, + "loss": 0.4849, + "step": 29339 + }, + { + "epoch": 80.6043956043956, + "grad_norm": 4.304319381713867, + "learning_rate": 9.697802197802198e-06, + "loss": 0.0599, + "step": 29340 + }, + { + "epoch": 80.60714285714286, + "grad_norm": 8.539910316467285, + "learning_rate": 9.696428571428571e-06, + "loss": 0.136, + "step": 29341 + }, + { + "epoch": 80.60989010989012, + "grad_norm": 6.304854393005371, + "learning_rate": 9.695054945054946e-06, + "loss": 0.1116, + "step": 29342 + }, + { + "epoch": 80.61263736263736, + "grad_norm": 7.6602983474731445, + "learning_rate": 9.69368131868132e-06, + "loss": 0.2252, + "step": 29343 + }, + { + "epoch": 80.61538461538461, + "grad_norm": 17.00025749206543, + "learning_rate": 9.692307692307691e-06, + "loss": 0.2847, + "step": 29344 + }, + { + "epoch": 80.61813186813187, + "grad_norm": 5.0390543937683105, + "learning_rate": 9.690934065934066e-06, + "loss": 0.2115, + "step": 29345 + }, + { + "epoch": 80.62087912087912, + "grad_norm": 14.11386489868164, + "learning_rate": 9.68956043956044e-06, + "loss": 0.5147, + "step": 29346 + }, + { + "epoch": 80.62362637362638, + "grad_norm": 18.91289520263672, + "learning_rate": 9.688186813186815e-06, + "loss": 0.5311, + "step": 29347 + }, + { + "epoch": 80.62637362637362, + "grad_norm": 16.8136043548584, + "learning_rate": 9.686813186813187e-06, + "loss": 0.304, + "step": 29348 + }, + { + "epoch": 80.62912087912088, + "grad_norm": 13.655534744262695, + "learning_rate": 9.68543956043956e-06, + "loss": 0.3754, + "step": 29349 + }, + { + "epoch": 80.63186813186813, + "grad_norm": 1.371415376663208, + "learning_rate": 9.684065934065935e-06, + "loss": 0.0181, + "step": 29350 + }, + { + "epoch": 80.63461538461539, + "grad_norm": 18.285446166992188, + "learning_rate": 9.682692307692309e-06, + "loss": 0.4824, + "step": 29351 + }, + { + "epoch": 80.63736263736264, + "grad_norm": 9.776632308959961, + "learning_rate": 9.681318681318682e-06, + "loss": 0.1425, + "step": 29352 + }, + { + "epoch": 80.64010989010988, + "grad_norm": 18.063329696655273, + "learning_rate": 9.679945054945055e-06, + "loss": 0.3267, + "step": 29353 + }, + { + "epoch": 80.64285714285714, + "grad_norm": 3.6810200214385986, + "learning_rate": 9.678571428571429e-06, + "loss": 0.0698, + "step": 29354 + }, + { + "epoch": 80.6456043956044, + "grad_norm": 4.32224178314209, + "learning_rate": 9.677197802197802e-06, + "loss": 0.0577, + "step": 29355 + }, + { + "epoch": 80.64835164835165, + "grad_norm": 9.075992584228516, + "learning_rate": 9.675824175824176e-06, + "loss": 0.1373, + "step": 29356 + }, + { + "epoch": 80.6510989010989, + "grad_norm": 9.979231834411621, + "learning_rate": 9.67445054945055e-06, + "loss": 0.2079, + "step": 29357 + }, + { + "epoch": 80.65384615384616, + "grad_norm": 17.97949981689453, + "learning_rate": 9.673076923076924e-06, + "loss": 0.2673, + "step": 29358 + }, + { + "epoch": 80.6565934065934, + "grad_norm": 7.416726112365723, + "learning_rate": 9.671703296703296e-06, + "loss": 0.1006, + "step": 29359 + }, + { + "epoch": 80.65934065934066, + "grad_norm": 18.53956413269043, + "learning_rate": 9.670329670329671e-06, + "loss": 0.4008, + "step": 29360 + }, + { + "epoch": 80.66208791208791, + "grad_norm": 16.148103713989258, + "learning_rate": 9.668956043956044e-06, + "loss": 0.2776, + "step": 29361 + }, + { + "epoch": 80.66483516483517, + "grad_norm": 8.948052406311035, + "learning_rate": 9.66758241758242e-06, + "loss": 0.1711, + "step": 29362 + }, + { + "epoch": 80.66758241758242, + "grad_norm": 8.868388175964355, + "learning_rate": 9.666208791208791e-06, + "loss": 0.1795, + "step": 29363 + }, + { + "epoch": 80.67032967032966, + "grad_norm": 7.694831848144531, + "learning_rate": 9.664835164835165e-06, + "loss": 0.1347, + "step": 29364 + }, + { + "epoch": 80.67307692307692, + "grad_norm": 9.56712532043457, + "learning_rate": 9.66346153846154e-06, + "loss": 0.1776, + "step": 29365 + }, + { + "epoch": 80.67582417582418, + "grad_norm": 13.563800811767578, + "learning_rate": 9.662087912087913e-06, + "loss": 0.2422, + "step": 29366 + }, + { + "epoch": 80.67857142857143, + "grad_norm": 18.46065330505371, + "learning_rate": 9.660714285714287e-06, + "loss": 0.418, + "step": 29367 + }, + { + "epoch": 80.68131868131869, + "grad_norm": 12.223705291748047, + "learning_rate": 9.65934065934066e-06, + "loss": 0.1544, + "step": 29368 + }, + { + "epoch": 80.68406593406593, + "grad_norm": 22.984264373779297, + "learning_rate": 9.657967032967033e-06, + "loss": 0.3361, + "step": 29369 + }, + { + "epoch": 80.68681318681318, + "grad_norm": 17.191503524780273, + "learning_rate": 9.656593406593407e-06, + "loss": 0.4756, + "step": 29370 + }, + { + "epoch": 80.68956043956044, + "grad_norm": 25.03338623046875, + "learning_rate": 9.65521978021978e-06, + "loss": 0.3671, + "step": 29371 + }, + { + "epoch": 80.6923076923077, + "grad_norm": 13.27880573272705, + "learning_rate": 9.653846153846155e-06, + "loss": 0.1538, + "step": 29372 + }, + { + "epoch": 80.69505494505495, + "grad_norm": 14.899308204650879, + "learning_rate": 9.652472527472529e-06, + "loss": 0.265, + "step": 29373 + }, + { + "epoch": 80.6978021978022, + "grad_norm": 1.7547012567520142, + "learning_rate": 9.6510989010989e-06, + "loss": 0.0204, + "step": 29374 + }, + { + "epoch": 80.70054945054945, + "grad_norm": 9.041918754577637, + "learning_rate": 9.649725274725275e-06, + "loss": 0.1101, + "step": 29375 + }, + { + "epoch": 80.7032967032967, + "grad_norm": 15.82408618927002, + "learning_rate": 9.648351648351649e-06, + "loss": 0.3823, + "step": 29376 + }, + { + "epoch": 80.70604395604396, + "grad_norm": 1.2726091146469116, + "learning_rate": 9.646978021978022e-06, + "loss": 0.0133, + "step": 29377 + }, + { + "epoch": 80.70879120879121, + "grad_norm": 17.1958065032959, + "learning_rate": 9.645604395604396e-06, + "loss": 0.3822, + "step": 29378 + }, + { + "epoch": 80.71153846153847, + "grad_norm": 14.375933647155762, + "learning_rate": 9.644230769230769e-06, + "loss": 0.3595, + "step": 29379 + }, + { + "epoch": 80.71428571428571, + "grad_norm": 11.19936466217041, + "learning_rate": 9.642857142857144e-06, + "loss": 0.1472, + "step": 29380 + }, + { + "epoch": 80.71703296703296, + "grad_norm": 8.323290824890137, + "learning_rate": 9.641483516483518e-06, + "loss": 0.285, + "step": 29381 + }, + { + "epoch": 80.71978021978022, + "grad_norm": 5.550174236297607, + "learning_rate": 9.64010989010989e-06, + "loss": 0.0503, + "step": 29382 + }, + { + "epoch": 80.72252747252747, + "grad_norm": 18.795766830444336, + "learning_rate": 9.638736263736264e-06, + "loss": 0.3432, + "step": 29383 + }, + { + "epoch": 80.72527472527473, + "grad_norm": 12.53823471069336, + "learning_rate": 9.637362637362638e-06, + "loss": 0.3196, + "step": 29384 + }, + { + "epoch": 80.72802197802197, + "grad_norm": 21.535234451293945, + "learning_rate": 9.635989010989011e-06, + "loss": 0.5486, + "step": 29385 + }, + { + "epoch": 80.73076923076923, + "grad_norm": 8.754448890686035, + "learning_rate": 9.634615384615385e-06, + "loss": 0.0962, + "step": 29386 + }, + { + "epoch": 80.73351648351648, + "grad_norm": 17.771865844726562, + "learning_rate": 9.633241758241758e-06, + "loss": 0.5277, + "step": 29387 + }, + { + "epoch": 80.73626373626374, + "grad_norm": 9.028647422790527, + "learning_rate": 9.631868131868133e-06, + "loss": 0.1989, + "step": 29388 + }, + { + "epoch": 80.73901098901099, + "grad_norm": 6.506931304931641, + "learning_rate": 9.630494505494505e-06, + "loss": 0.1326, + "step": 29389 + }, + { + "epoch": 80.74175824175825, + "grad_norm": 3.2651939392089844, + "learning_rate": 9.62912087912088e-06, + "loss": 0.0282, + "step": 29390 + }, + { + "epoch": 80.74450549450549, + "grad_norm": 13.00995922088623, + "learning_rate": 9.627747252747253e-06, + "loss": 0.3247, + "step": 29391 + }, + { + "epoch": 80.74725274725274, + "grad_norm": 5.400407314300537, + "learning_rate": 9.626373626373627e-06, + "loss": 0.0666, + "step": 29392 + }, + { + "epoch": 80.75, + "grad_norm": 10.802421569824219, + "learning_rate": 9.625e-06, + "loss": 0.1826, + "step": 29393 + }, + { + "epoch": 80.75274725274726, + "grad_norm": 3.601060628890991, + "learning_rate": 9.623626373626374e-06, + "loss": 0.0555, + "step": 29394 + }, + { + "epoch": 80.75549450549451, + "grad_norm": 23.4086856842041, + "learning_rate": 9.622252747252749e-06, + "loss": 0.3074, + "step": 29395 + }, + { + "epoch": 80.75824175824175, + "grad_norm": 11.25174617767334, + "learning_rate": 9.620879120879122e-06, + "loss": 0.1612, + "step": 29396 + }, + { + "epoch": 80.76098901098901, + "grad_norm": 5.093877792358398, + "learning_rate": 9.619505494505494e-06, + "loss": 0.0622, + "step": 29397 + }, + { + "epoch": 80.76373626373626, + "grad_norm": 3.0042223930358887, + "learning_rate": 9.618131868131869e-06, + "loss": 0.0363, + "step": 29398 + }, + { + "epoch": 80.76648351648352, + "grad_norm": 16.95855712890625, + "learning_rate": 9.616758241758242e-06, + "loss": 0.3936, + "step": 29399 + }, + { + "epoch": 80.76923076923077, + "grad_norm": 19.553918838500977, + "learning_rate": 9.615384615384616e-06, + "loss": 0.4724, + "step": 29400 + }, + { + "epoch": 80.77197802197803, + "grad_norm": 10.712830543518066, + "learning_rate": 9.614010989010989e-06, + "loss": 0.1078, + "step": 29401 + }, + { + "epoch": 80.77472527472527, + "grad_norm": 11.16818618774414, + "learning_rate": 9.612637362637363e-06, + "loss": 0.1112, + "step": 29402 + }, + { + "epoch": 80.77747252747253, + "grad_norm": 5.542724609375, + "learning_rate": 9.611263736263738e-06, + "loss": 0.1314, + "step": 29403 + }, + { + "epoch": 80.78021978021978, + "grad_norm": 13.28181266784668, + "learning_rate": 9.60989010989011e-06, + "loss": 0.2723, + "step": 29404 + }, + { + "epoch": 80.78296703296704, + "grad_norm": 6.6991167068481445, + "learning_rate": 9.608516483516484e-06, + "loss": 0.139, + "step": 29405 + }, + { + "epoch": 80.78571428571429, + "grad_norm": 4.519719123840332, + "learning_rate": 9.607142857142858e-06, + "loss": 0.1014, + "step": 29406 + }, + { + "epoch": 80.78846153846153, + "grad_norm": 1.4329198598861694, + "learning_rate": 9.605769230769231e-06, + "loss": 0.022, + "step": 29407 + }, + { + "epoch": 80.79120879120879, + "grad_norm": 17.54999351501465, + "learning_rate": 9.604395604395605e-06, + "loss": 0.2635, + "step": 29408 + }, + { + "epoch": 80.79395604395604, + "grad_norm": 12.97570514678955, + "learning_rate": 9.603021978021978e-06, + "loss": 0.3737, + "step": 29409 + }, + { + "epoch": 80.7967032967033, + "grad_norm": 20.753162384033203, + "learning_rate": 9.601648351648353e-06, + "loss": 0.8235, + "step": 29410 + }, + { + "epoch": 80.79945054945055, + "grad_norm": 5.608880519866943, + "learning_rate": 9.600274725274727e-06, + "loss": 0.0715, + "step": 29411 + }, + { + "epoch": 80.8021978021978, + "grad_norm": 6.6687703132629395, + "learning_rate": 9.598901098901098e-06, + "loss": 0.0656, + "step": 29412 + }, + { + "epoch": 80.80494505494505, + "grad_norm": 21.02163314819336, + "learning_rate": 9.597527472527473e-06, + "loss": 0.2997, + "step": 29413 + }, + { + "epoch": 80.8076923076923, + "grad_norm": 9.827447891235352, + "learning_rate": 9.596153846153847e-06, + "loss": 0.2758, + "step": 29414 + }, + { + "epoch": 80.81043956043956, + "grad_norm": 2.6544604301452637, + "learning_rate": 9.59478021978022e-06, + "loss": 0.041, + "step": 29415 + }, + { + "epoch": 80.81318681318682, + "grad_norm": 22.18245506286621, + "learning_rate": 9.593406593406594e-06, + "loss": 0.6297, + "step": 29416 + }, + { + "epoch": 80.81593406593407, + "grad_norm": 10.384445190429688, + "learning_rate": 9.592032967032967e-06, + "loss": 0.1763, + "step": 29417 + }, + { + "epoch": 80.81868131868131, + "grad_norm": 16.158103942871094, + "learning_rate": 9.590659340659342e-06, + "loss": 0.2581, + "step": 29418 + }, + { + "epoch": 80.82142857142857, + "grad_norm": 7.5559210777282715, + "learning_rate": 9.589285714285714e-06, + "loss": 0.1334, + "step": 29419 + }, + { + "epoch": 80.82417582417582, + "grad_norm": 7.869043827056885, + "learning_rate": 9.587912087912089e-06, + "loss": 0.0783, + "step": 29420 + }, + { + "epoch": 80.82692307692308, + "grad_norm": 4.246941089630127, + "learning_rate": 9.586538461538462e-06, + "loss": 0.0821, + "step": 29421 + }, + { + "epoch": 80.82967032967034, + "grad_norm": 10.716242790222168, + "learning_rate": 9.585164835164836e-06, + "loss": 0.2409, + "step": 29422 + }, + { + "epoch": 80.83241758241758, + "grad_norm": 19.10564613342285, + "learning_rate": 9.58379120879121e-06, + "loss": 0.3374, + "step": 29423 + }, + { + "epoch": 80.83516483516483, + "grad_norm": 7.127635955810547, + "learning_rate": 9.582417582417583e-06, + "loss": 0.0806, + "step": 29424 + }, + { + "epoch": 80.83791208791209, + "grad_norm": 5.823753833770752, + "learning_rate": 9.581043956043958e-06, + "loss": 0.084, + "step": 29425 + }, + { + "epoch": 80.84065934065934, + "grad_norm": 20.994112014770508, + "learning_rate": 9.579670329670331e-06, + "loss": 0.4344, + "step": 29426 + }, + { + "epoch": 80.8434065934066, + "grad_norm": 9.065673828125, + "learning_rate": 9.578296703296703e-06, + "loss": 0.099, + "step": 29427 + }, + { + "epoch": 80.84615384615384, + "grad_norm": 11.72497272491455, + "learning_rate": 9.576923076923078e-06, + "loss": 0.1604, + "step": 29428 + }, + { + "epoch": 80.8489010989011, + "grad_norm": 13.37994384765625, + "learning_rate": 9.575549450549451e-06, + "loss": 0.3915, + "step": 29429 + }, + { + "epoch": 80.85164835164835, + "grad_norm": 15.62470531463623, + "learning_rate": 9.574175824175825e-06, + "loss": 0.2886, + "step": 29430 + }, + { + "epoch": 80.8543956043956, + "grad_norm": 12.392782211303711, + "learning_rate": 9.572802197802198e-06, + "loss": 0.2411, + "step": 29431 + }, + { + "epoch": 80.85714285714286, + "grad_norm": 13.650404930114746, + "learning_rate": 9.571428571428572e-06, + "loss": 0.2065, + "step": 29432 + }, + { + "epoch": 80.85989010989012, + "grad_norm": 5.075901508331299, + "learning_rate": 9.570054945054947e-06, + "loss": 0.0827, + "step": 29433 + }, + { + "epoch": 80.86263736263736, + "grad_norm": 16.785476684570312, + "learning_rate": 9.568681318681318e-06, + "loss": 0.649, + "step": 29434 + }, + { + "epoch": 80.86538461538461, + "grad_norm": 6.544099807739258, + "learning_rate": 9.567307692307692e-06, + "loss": 0.1001, + "step": 29435 + }, + { + "epoch": 80.86813186813187, + "grad_norm": 11.69880485534668, + "learning_rate": 9.565934065934067e-06, + "loss": 0.1503, + "step": 29436 + }, + { + "epoch": 80.87087912087912, + "grad_norm": 18.199256896972656, + "learning_rate": 9.56456043956044e-06, + "loss": 0.2013, + "step": 29437 + }, + { + "epoch": 80.87362637362638, + "grad_norm": 8.23401927947998, + "learning_rate": 9.563186813186814e-06, + "loss": 0.0765, + "step": 29438 + }, + { + "epoch": 80.87637362637362, + "grad_norm": 11.19907283782959, + "learning_rate": 9.561813186813187e-06, + "loss": 0.1261, + "step": 29439 + }, + { + "epoch": 80.87912087912088, + "grad_norm": 22.14192771911621, + "learning_rate": 9.56043956043956e-06, + "loss": 0.6994, + "step": 29440 + }, + { + "epoch": 80.88186813186813, + "grad_norm": 11.202120780944824, + "learning_rate": 9.559065934065934e-06, + "loss": 0.173, + "step": 29441 + }, + { + "epoch": 80.88461538461539, + "grad_norm": 22.35061264038086, + "learning_rate": 9.557692307692307e-06, + "loss": 0.4326, + "step": 29442 + }, + { + "epoch": 80.88736263736264, + "grad_norm": 16.655832290649414, + "learning_rate": 9.556318681318682e-06, + "loss": 0.1905, + "step": 29443 + }, + { + "epoch": 80.89010989010988, + "grad_norm": 16.799543380737305, + "learning_rate": 9.554945054945056e-06, + "loss": 0.3954, + "step": 29444 + }, + { + "epoch": 80.89285714285714, + "grad_norm": 16.74554443359375, + "learning_rate": 9.55357142857143e-06, + "loss": 0.1791, + "step": 29445 + }, + { + "epoch": 80.8956043956044, + "grad_norm": 13.464826583862305, + "learning_rate": 9.552197802197803e-06, + "loss": 0.0993, + "step": 29446 + }, + { + "epoch": 80.89835164835165, + "grad_norm": 11.741179466247559, + "learning_rate": 9.550824175824176e-06, + "loss": 0.1712, + "step": 29447 + }, + { + "epoch": 80.9010989010989, + "grad_norm": 6.886359691619873, + "learning_rate": 9.549450549450551e-06, + "loss": 0.1196, + "step": 29448 + }, + { + "epoch": 80.90384615384616, + "grad_norm": 8.512632369995117, + "learning_rate": 9.548076923076923e-06, + "loss": 0.0863, + "step": 29449 + }, + { + "epoch": 80.9065934065934, + "grad_norm": 9.617587089538574, + "learning_rate": 9.546703296703296e-06, + "loss": 0.1349, + "step": 29450 + }, + { + "epoch": 80.90934065934066, + "grad_norm": 5.240663528442383, + "learning_rate": 9.545329670329671e-06, + "loss": 0.0867, + "step": 29451 + }, + { + "epoch": 80.91208791208791, + "grad_norm": 18.317716598510742, + "learning_rate": 9.543956043956045e-06, + "loss": 0.349, + "step": 29452 + }, + { + "epoch": 80.91483516483517, + "grad_norm": 19.087810516357422, + "learning_rate": 9.542582417582418e-06, + "loss": 0.3369, + "step": 29453 + }, + { + "epoch": 80.91758241758242, + "grad_norm": 4.8888983726501465, + "learning_rate": 9.541208791208792e-06, + "loss": 0.0813, + "step": 29454 + }, + { + "epoch": 80.92032967032966, + "grad_norm": 16.371335983276367, + "learning_rate": 9.539835164835165e-06, + "loss": 0.246, + "step": 29455 + }, + { + "epoch": 80.92307692307692, + "grad_norm": 9.07483196258545, + "learning_rate": 9.538461538461538e-06, + "loss": 0.0911, + "step": 29456 + }, + { + "epoch": 80.92582417582418, + "grad_norm": 5.377566337585449, + "learning_rate": 9.537087912087912e-06, + "loss": 0.0867, + "step": 29457 + }, + { + "epoch": 80.92857142857143, + "grad_norm": 12.659698486328125, + "learning_rate": 9.535714285714287e-06, + "loss": 0.1948, + "step": 29458 + }, + { + "epoch": 80.93131868131869, + "grad_norm": 14.201236724853516, + "learning_rate": 9.53434065934066e-06, + "loss": 0.1722, + "step": 29459 + }, + { + "epoch": 80.93406593406593, + "grad_norm": 7.530688285827637, + "learning_rate": 9.532967032967034e-06, + "loss": 0.0983, + "step": 29460 + }, + { + "epoch": 80.93681318681318, + "grad_norm": 7.141036033630371, + "learning_rate": 9.531593406593407e-06, + "loss": 0.2254, + "step": 29461 + }, + { + "epoch": 80.93956043956044, + "grad_norm": 7.723754405975342, + "learning_rate": 9.53021978021978e-06, + "loss": 0.0845, + "step": 29462 + }, + { + "epoch": 80.9423076923077, + "grad_norm": 12.353116989135742, + "learning_rate": 9.528846153846156e-06, + "loss": 0.2488, + "step": 29463 + }, + { + "epoch": 80.94505494505495, + "grad_norm": 2.4282634258270264, + "learning_rate": 9.527472527472527e-06, + "loss": 0.0245, + "step": 29464 + }, + { + "epoch": 80.9478021978022, + "grad_norm": 3.6923842430114746, + "learning_rate": 9.5260989010989e-06, + "loss": 0.1867, + "step": 29465 + }, + { + "epoch": 80.95054945054945, + "grad_norm": 7.839773178100586, + "learning_rate": 9.524725274725276e-06, + "loss": 0.0856, + "step": 29466 + }, + { + "epoch": 80.9532967032967, + "grad_norm": 17.92125701904297, + "learning_rate": 9.52335164835165e-06, + "loss": 0.2065, + "step": 29467 + }, + { + "epoch": 80.95604395604396, + "grad_norm": 11.149209022521973, + "learning_rate": 9.521978021978023e-06, + "loss": 0.1175, + "step": 29468 + }, + { + "epoch": 80.95879120879121, + "grad_norm": 15.602204322814941, + "learning_rate": 9.520604395604396e-06, + "loss": 0.2762, + "step": 29469 + }, + { + "epoch": 80.96153846153847, + "grad_norm": 17.63224983215332, + "learning_rate": 9.51923076923077e-06, + "loss": 0.4209, + "step": 29470 + }, + { + "epoch": 80.96428571428571, + "grad_norm": 14.82719898223877, + "learning_rate": 9.517857142857143e-06, + "loss": 0.3097, + "step": 29471 + }, + { + "epoch": 80.96703296703296, + "grad_norm": 3.4675989151000977, + "learning_rate": 9.516483516483516e-06, + "loss": 0.0479, + "step": 29472 + }, + { + "epoch": 80.96978021978022, + "grad_norm": 4.121267318725586, + "learning_rate": 9.515109890109891e-06, + "loss": 0.0544, + "step": 29473 + }, + { + "epoch": 80.97252747252747, + "grad_norm": 23.722503662109375, + "learning_rate": 9.513736263736265e-06, + "loss": 0.4541, + "step": 29474 + }, + { + "epoch": 80.97527472527473, + "grad_norm": 12.065597534179688, + "learning_rate": 9.512362637362638e-06, + "loss": 0.1037, + "step": 29475 + }, + { + "epoch": 80.97802197802197, + "grad_norm": 12.301457405090332, + "learning_rate": 9.510989010989012e-06, + "loss": 0.3456, + "step": 29476 + }, + { + "epoch": 80.98076923076923, + "grad_norm": 14.907705307006836, + "learning_rate": 9.509615384615385e-06, + "loss": 0.2118, + "step": 29477 + }, + { + "epoch": 80.98351648351648, + "grad_norm": 20.836313247680664, + "learning_rate": 9.50824175824176e-06, + "loss": 0.4342, + "step": 29478 + }, + { + "epoch": 80.98626373626374, + "grad_norm": 3.83235502243042, + "learning_rate": 9.506868131868132e-06, + "loss": 0.0583, + "step": 29479 + }, + { + "epoch": 80.98901098901099, + "grad_norm": 1.4690136909484863, + "learning_rate": 9.505494505494505e-06, + "loss": 0.0185, + "step": 29480 + }, + { + "epoch": 80.99175824175825, + "grad_norm": 8.820075035095215, + "learning_rate": 9.50412087912088e-06, + "loss": 0.1785, + "step": 29481 + }, + { + "epoch": 80.99450549450549, + "grad_norm": 28.081153869628906, + "learning_rate": 9.502747252747254e-06, + "loss": 0.3243, + "step": 29482 + }, + { + "epoch": 80.99725274725274, + "grad_norm": 16.692096710205078, + "learning_rate": 9.501373626373627e-06, + "loss": 0.3364, + "step": 29483 + }, + { + "epoch": 81.0, + "grad_norm": 39.77339553833008, + "learning_rate": 9.5e-06, + "loss": 0.6209, + "step": 29484 + }, + { + "epoch": 81.0, + "eval_accuracy": 0.7796143250688705, + "eval_f1": 0.7647173380727462, + "eval_f1_DuraRiadoRio_64x64": 0.7249357326478149, + "eval_f1_Mole_64x64": 0.8447653429602888, + "eval_f1_Quebrado_64x64": 0.8956228956228957, + "eval_f1_RiadoRio_64x64": 0.44660194174757284, + "eval_f1_RioFechado_64x64": 0.911660777385159, + "eval_loss": 1.203691005706787, + "eval_precision": 0.8182471933653392, + "eval_precision_DuraRiadoRio_64x64": 0.5755102040816327, + "eval_precision_Mole_64x64": 0.8796992481203008, + "eval_precision_Quebrado_64x64": 0.869281045751634, + "eval_precision_RiadoRio_64x64": 0.8518518518518519, + "eval_precision_RioFechado_64x64": 0.9148936170212766, + "eval_recall": 0.7852720121900997, + "eval_recall_DuraRiadoRio_64x64": 0.9791666666666666, + "eval_recall_Mole_64x64": 0.8125, + "eval_recall_Quebrado_64x64": 0.9236111111111112, + "eval_recall_RiadoRio_64x64": 0.3026315789473684, + "eval_recall_RioFechado_64x64": 0.9084507042253521, + "eval_runtime": 1.7587, + "eval_samples_per_second": 412.804, + "eval_steps_per_second": 26.156, + "step": 29484 + }, + { + "epoch": 81.00274725274726, + "grad_norm": 9.547040939331055, + "learning_rate": 9.498626373626374e-06, + "loss": 0.1163, + "step": 29485 + }, + { + "epoch": 81.00549450549451, + "grad_norm": 4.381016731262207, + "learning_rate": 9.497252747252747e-06, + "loss": 0.0905, + "step": 29486 + }, + { + "epoch": 81.00824175824175, + "grad_norm": 21.36003875732422, + "learning_rate": 9.49587912087912e-06, + "loss": 0.5262, + "step": 29487 + }, + { + "epoch": 81.01098901098901, + "grad_norm": 17.556604385375977, + "learning_rate": 9.494505494505496e-06, + "loss": 0.2763, + "step": 29488 + }, + { + "epoch": 81.01373626373626, + "grad_norm": 10.8814697265625, + "learning_rate": 9.49313186813187e-06, + "loss": 0.1944, + "step": 29489 + }, + { + "epoch": 81.01648351648352, + "grad_norm": 11.369833946228027, + "learning_rate": 9.491758241758241e-06, + "loss": 0.133, + "step": 29490 + }, + { + "epoch": 81.01923076923077, + "grad_norm": 8.942181587219238, + "learning_rate": 9.490384615384616e-06, + "loss": 0.1218, + "step": 29491 + }, + { + "epoch": 81.02197802197803, + "grad_norm": 26.774776458740234, + "learning_rate": 9.48901098901099e-06, + "loss": 0.612, + "step": 29492 + }, + { + "epoch": 81.02472527472527, + "grad_norm": 6.445992469787598, + "learning_rate": 9.487637362637363e-06, + "loss": 0.0795, + "step": 29493 + }, + { + "epoch": 81.02747252747253, + "grad_norm": 21.954124450683594, + "learning_rate": 9.486263736263736e-06, + "loss": 0.5711, + "step": 29494 + }, + { + "epoch": 81.03021978021978, + "grad_norm": 2.5384459495544434, + "learning_rate": 9.48489010989011e-06, + "loss": 0.0329, + "step": 29495 + }, + { + "epoch": 81.03296703296704, + "grad_norm": 21.649322509765625, + "learning_rate": 9.483516483516485e-06, + "loss": 0.3483, + "step": 29496 + }, + { + "epoch": 81.03571428571429, + "grad_norm": 15.108762741088867, + "learning_rate": 9.482142857142858e-06, + "loss": 0.3878, + "step": 29497 + }, + { + "epoch": 81.03846153846153, + "grad_norm": 9.747364044189453, + "learning_rate": 9.48076923076923e-06, + "loss": 0.2705, + "step": 29498 + }, + { + "epoch": 81.04120879120879, + "grad_norm": 14.751167297363281, + "learning_rate": 9.479395604395605e-06, + "loss": 0.3373, + "step": 29499 + }, + { + "epoch": 81.04395604395604, + "grad_norm": 11.81246280670166, + "learning_rate": 9.478021978021978e-06, + "loss": 0.1642, + "step": 29500 + }, + { + "epoch": 81.0467032967033, + "grad_norm": 17.755226135253906, + "learning_rate": 9.476648351648352e-06, + "loss": 0.3975, + "step": 29501 + }, + { + "epoch": 81.04945054945055, + "grad_norm": 1.829217791557312, + "learning_rate": 9.475274725274725e-06, + "loss": 0.0245, + "step": 29502 + }, + { + "epoch": 81.0521978021978, + "grad_norm": 11.319655418395996, + "learning_rate": 9.473901098901099e-06, + "loss": 0.2447, + "step": 29503 + }, + { + "epoch": 81.05494505494505, + "grad_norm": 28.218242645263672, + "learning_rate": 9.472527472527474e-06, + "loss": 0.2879, + "step": 29504 + }, + { + "epoch": 81.0576923076923, + "grad_norm": 9.4248628616333, + "learning_rate": 9.471153846153845e-06, + "loss": 0.1215, + "step": 29505 + }, + { + "epoch": 81.06043956043956, + "grad_norm": 11.714912414550781, + "learning_rate": 9.46978021978022e-06, + "loss": 0.1542, + "step": 29506 + }, + { + "epoch": 81.06318681318682, + "grad_norm": 42.94487762451172, + "learning_rate": 9.468406593406594e-06, + "loss": 0.4284, + "step": 29507 + }, + { + "epoch": 81.06593406593407, + "grad_norm": 8.43618106842041, + "learning_rate": 9.467032967032967e-06, + "loss": 0.1519, + "step": 29508 + }, + { + "epoch": 81.06868131868131, + "grad_norm": 21.430849075317383, + "learning_rate": 9.46565934065934e-06, + "loss": 0.3036, + "step": 29509 + }, + { + "epoch": 81.07142857142857, + "grad_norm": 1.7501678466796875, + "learning_rate": 9.464285714285714e-06, + "loss": 0.0202, + "step": 29510 + }, + { + "epoch": 81.07417582417582, + "grad_norm": 14.365086555480957, + "learning_rate": 9.46291208791209e-06, + "loss": 0.2888, + "step": 29511 + }, + { + "epoch": 81.07692307692308, + "grad_norm": 17.356014251708984, + "learning_rate": 9.461538461538463e-06, + "loss": 0.5284, + "step": 29512 + }, + { + "epoch": 81.07967032967034, + "grad_norm": 8.765056610107422, + "learning_rate": 9.460164835164834e-06, + "loss": 0.154, + "step": 29513 + }, + { + "epoch": 81.08241758241758, + "grad_norm": 12.05852222442627, + "learning_rate": 9.45879120879121e-06, + "loss": 0.3108, + "step": 29514 + }, + { + "epoch": 81.08516483516483, + "grad_norm": 12.861404418945312, + "learning_rate": 9.457417582417583e-06, + "loss": 0.1771, + "step": 29515 + }, + { + "epoch": 81.08791208791209, + "grad_norm": 7.107258319854736, + "learning_rate": 9.456043956043956e-06, + "loss": 0.0718, + "step": 29516 + }, + { + "epoch": 81.09065934065934, + "grad_norm": 15.195696830749512, + "learning_rate": 9.45467032967033e-06, + "loss": 0.3323, + "step": 29517 + }, + { + "epoch": 81.0934065934066, + "grad_norm": 3.5087199211120605, + "learning_rate": 9.453296703296703e-06, + "loss": 0.0454, + "step": 29518 + }, + { + "epoch": 81.09615384615384, + "grad_norm": 12.992219924926758, + "learning_rate": 9.451923076923078e-06, + "loss": 0.2823, + "step": 29519 + }, + { + "epoch": 81.0989010989011, + "grad_norm": 0.9976471662521362, + "learning_rate": 9.45054945054945e-06, + "loss": 0.0174, + "step": 29520 + }, + { + "epoch": 81.10164835164835, + "grad_norm": 6.145142078399658, + "learning_rate": 9.449175824175825e-06, + "loss": 0.1013, + "step": 29521 + }, + { + "epoch": 81.1043956043956, + "grad_norm": 23.291156768798828, + "learning_rate": 9.447802197802198e-06, + "loss": 0.3401, + "step": 29522 + }, + { + "epoch": 81.10714285714286, + "grad_norm": 17.478717803955078, + "learning_rate": 9.446428571428572e-06, + "loss": 0.3547, + "step": 29523 + }, + { + "epoch": 81.10989010989012, + "grad_norm": 15.399103164672852, + "learning_rate": 9.445054945054945e-06, + "loss": 0.3335, + "step": 29524 + }, + { + "epoch": 81.11263736263736, + "grad_norm": 8.626708030700684, + "learning_rate": 9.443681318681319e-06, + "loss": 0.2133, + "step": 29525 + }, + { + "epoch": 81.11538461538461, + "grad_norm": 1.6369565725326538, + "learning_rate": 9.442307692307694e-06, + "loss": 0.0182, + "step": 29526 + }, + { + "epoch": 81.11813186813187, + "grad_norm": 8.370542526245117, + "learning_rate": 9.440934065934067e-06, + "loss": 0.1988, + "step": 29527 + }, + { + "epoch": 81.12087912087912, + "grad_norm": 3.838625907897949, + "learning_rate": 9.439560439560439e-06, + "loss": 0.0993, + "step": 29528 + }, + { + "epoch": 81.12362637362638, + "grad_norm": 12.245165824890137, + "learning_rate": 9.438186813186814e-06, + "loss": 0.2077, + "step": 29529 + }, + { + "epoch": 81.12637362637362, + "grad_norm": 11.826454162597656, + "learning_rate": 9.436813186813187e-06, + "loss": 0.3142, + "step": 29530 + }, + { + "epoch": 81.12912087912088, + "grad_norm": 8.776468276977539, + "learning_rate": 9.43543956043956e-06, + "loss": 0.0747, + "step": 29531 + }, + { + "epoch": 81.13186813186813, + "grad_norm": 21.757251739501953, + "learning_rate": 9.434065934065934e-06, + "loss": 1.5228, + "step": 29532 + }, + { + "epoch": 81.13461538461539, + "grad_norm": 2.8522732257843018, + "learning_rate": 9.432692307692308e-06, + "loss": 0.0314, + "step": 29533 + }, + { + "epoch": 81.13736263736264, + "grad_norm": 6.76032018661499, + "learning_rate": 9.431318681318683e-06, + "loss": 0.0905, + "step": 29534 + }, + { + "epoch": 81.14010989010988, + "grad_norm": 14.436067581176758, + "learning_rate": 9.429945054945054e-06, + "loss": 0.3053, + "step": 29535 + }, + { + "epoch": 81.14285714285714, + "grad_norm": 11.172998428344727, + "learning_rate": 9.42857142857143e-06, + "loss": 0.168, + "step": 29536 + }, + { + "epoch": 81.1456043956044, + "grad_norm": 6.273331165313721, + "learning_rate": 9.427197802197803e-06, + "loss": 0.1437, + "step": 29537 + }, + { + "epoch": 81.14835164835165, + "grad_norm": 6.38698673248291, + "learning_rate": 9.425824175824176e-06, + "loss": 0.0931, + "step": 29538 + }, + { + "epoch": 81.1510989010989, + "grad_norm": 17.066743850708008, + "learning_rate": 9.42445054945055e-06, + "loss": 0.2843, + "step": 29539 + }, + { + "epoch": 81.15384615384616, + "grad_norm": 19.412370681762695, + "learning_rate": 9.423076923076923e-06, + "loss": 0.3822, + "step": 29540 + }, + { + "epoch": 81.1565934065934, + "grad_norm": 13.608819961547852, + "learning_rate": 9.421703296703298e-06, + "loss": 0.2089, + "step": 29541 + }, + { + "epoch": 81.15934065934066, + "grad_norm": 23.044719696044922, + "learning_rate": 9.420329670329672e-06, + "loss": 0.5474, + "step": 29542 + }, + { + "epoch": 81.16208791208791, + "grad_norm": 11.086199760437012, + "learning_rate": 9.418956043956043e-06, + "loss": 0.4573, + "step": 29543 + }, + { + "epoch": 81.16483516483517, + "grad_norm": 13.51689624786377, + "learning_rate": 9.417582417582418e-06, + "loss": 0.3199, + "step": 29544 + }, + { + "epoch": 81.16758241758242, + "grad_norm": 16.033597946166992, + "learning_rate": 9.416208791208792e-06, + "loss": 0.1989, + "step": 29545 + }, + { + "epoch": 81.17032967032966, + "grad_norm": 13.94384765625, + "learning_rate": 9.414835164835165e-06, + "loss": 0.197, + "step": 29546 + }, + { + "epoch": 81.17307692307692, + "grad_norm": 13.933124542236328, + "learning_rate": 9.413461538461539e-06, + "loss": 0.233, + "step": 29547 + }, + { + "epoch": 81.17582417582418, + "grad_norm": 4.2272772789001465, + "learning_rate": 9.412087912087912e-06, + "loss": 0.0643, + "step": 29548 + }, + { + "epoch": 81.17857142857143, + "grad_norm": 17.20306968688965, + "learning_rate": 9.410714285714287e-06, + "loss": 0.5897, + "step": 29549 + }, + { + "epoch": 81.18131868131869, + "grad_norm": 11.397491455078125, + "learning_rate": 9.409340659340659e-06, + "loss": 0.2474, + "step": 29550 + }, + { + "epoch": 81.18406593406593, + "grad_norm": 10.826942443847656, + "learning_rate": 9.407967032967032e-06, + "loss": 0.2711, + "step": 29551 + }, + { + "epoch": 81.18681318681318, + "grad_norm": 8.21532917022705, + "learning_rate": 9.406593406593407e-06, + "loss": 0.112, + "step": 29552 + }, + { + "epoch": 81.18956043956044, + "grad_norm": 5.9926533699035645, + "learning_rate": 9.40521978021978e-06, + "loss": 0.0736, + "step": 29553 + }, + { + "epoch": 81.1923076923077, + "grad_norm": 2.0617311000823975, + "learning_rate": 9.403846153846154e-06, + "loss": 0.0151, + "step": 29554 + }, + { + "epoch": 81.19505494505495, + "grad_norm": 11.938844680786133, + "learning_rate": 9.402472527472528e-06, + "loss": 0.2319, + "step": 29555 + }, + { + "epoch": 81.1978021978022, + "grad_norm": 12.545372009277344, + "learning_rate": 9.401098901098901e-06, + "loss": 0.1795, + "step": 29556 + }, + { + "epoch": 81.20054945054945, + "grad_norm": 13.42886734008789, + "learning_rate": 9.399725274725276e-06, + "loss": 0.2364, + "step": 29557 + }, + { + "epoch": 81.2032967032967, + "grad_norm": 11.079425811767578, + "learning_rate": 9.398351648351648e-06, + "loss": 0.179, + "step": 29558 + }, + { + "epoch": 81.20604395604396, + "grad_norm": 13.466181755065918, + "learning_rate": 9.396978021978023e-06, + "loss": 0.2045, + "step": 29559 + }, + { + "epoch": 81.20879120879121, + "grad_norm": 8.302312850952148, + "learning_rate": 9.395604395604396e-06, + "loss": 0.1134, + "step": 29560 + }, + { + "epoch": 81.21153846153847, + "grad_norm": 12.407926559448242, + "learning_rate": 9.39423076923077e-06, + "loss": 0.1851, + "step": 29561 + }, + { + "epoch": 81.21428571428571, + "grad_norm": 3.883545398712158, + "learning_rate": 9.392857142857143e-06, + "loss": 0.058, + "step": 29562 + }, + { + "epoch": 81.21703296703296, + "grad_norm": 15.732755661010742, + "learning_rate": 9.391483516483517e-06, + "loss": 0.1783, + "step": 29563 + }, + { + "epoch": 81.21978021978022, + "grad_norm": 11.047286987304688, + "learning_rate": 9.390109890109892e-06, + "loss": 0.2434, + "step": 29564 + }, + { + "epoch": 81.22252747252747, + "grad_norm": 10.513762474060059, + "learning_rate": 9.388736263736263e-06, + "loss": 0.265, + "step": 29565 + }, + { + "epoch": 81.22527472527473, + "grad_norm": 16.78224754333496, + "learning_rate": 9.387362637362637e-06, + "loss": 0.3708, + "step": 29566 + }, + { + "epoch": 81.22802197802197, + "grad_norm": 17.645580291748047, + "learning_rate": 9.385989010989012e-06, + "loss": 0.372, + "step": 29567 + }, + { + "epoch": 81.23076923076923, + "grad_norm": 11.252803802490234, + "learning_rate": 9.384615384615385e-06, + "loss": 0.2388, + "step": 29568 + }, + { + "epoch": 81.23351648351648, + "grad_norm": 16.0618953704834, + "learning_rate": 9.383241758241759e-06, + "loss": 0.3205, + "step": 29569 + }, + { + "epoch": 81.23626373626374, + "grad_norm": 6.458106517791748, + "learning_rate": 9.381868131868132e-06, + "loss": 0.1336, + "step": 29570 + }, + { + "epoch": 81.23901098901099, + "grad_norm": 1.4464991092681885, + "learning_rate": 9.380494505494506e-06, + "loss": 0.0222, + "step": 29571 + }, + { + "epoch": 81.24175824175825, + "grad_norm": 9.768431663513184, + "learning_rate": 9.37912087912088e-06, + "loss": 0.1829, + "step": 29572 + }, + { + "epoch": 81.24450549450549, + "grad_norm": 7.1089887619018555, + "learning_rate": 9.377747252747252e-06, + "loss": 0.0779, + "step": 29573 + }, + { + "epoch": 81.24725274725274, + "grad_norm": 16.00839614868164, + "learning_rate": 9.376373626373627e-06, + "loss": 0.2769, + "step": 29574 + }, + { + "epoch": 81.25, + "grad_norm": 12.504105567932129, + "learning_rate": 9.375000000000001e-06, + "loss": 0.3977, + "step": 29575 + }, + { + "epoch": 81.25274725274726, + "grad_norm": 14.996858596801758, + "learning_rate": 9.373626373626374e-06, + "loss": 0.2057, + "step": 29576 + }, + { + "epoch": 81.25549450549451, + "grad_norm": 6.066094875335693, + "learning_rate": 9.372252747252748e-06, + "loss": 0.0984, + "step": 29577 + }, + { + "epoch": 81.25824175824175, + "grad_norm": 9.909998893737793, + "learning_rate": 9.370879120879121e-06, + "loss": 0.0906, + "step": 29578 + }, + { + "epoch": 81.26098901098901, + "grad_norm": 12.73479175567627, + "learning_rate": 9.369505494505496e-06, + "loss": 0.199, + "step": 29579 + }, + { + "epoch": 81.26373626373626, + "grad_norm": 0.38662147521972656, + "learning_rate": 9.368131868131868e-06, + "loss": 0.0066, + "step": 29580 + }, + { + "epoch": 81.26648351648352, + "grad_norm": 16.674890518188477, + "learning_rate": 9.366758241758241e-06, + "loss": 0.3119, + "step": 29581 + }, + { + "epoch": 81.26923076923077, + "grad_norm": 3.953341245651245, + "learning_rate": 9.365384615384616e-06, + "loss": 0.0546, + "step": 29582 + }, + { + "epoch": 81.27197802197803, + "grad_norm": 11.112137794494629, + "learning_rate": 9.36401098901099e-06, + "loss": 0.181, + "step": 29583 + }, + { + "epoch": 81.27472527472527, + "grad_norm": 15.763240814208984, + "learning_rate": 9.362637362637363e-06, + "loss": 0.3618, + "step": 29584 + }, + { + "epoch": 81.27747252747253, + "grad_norm": 15.573064804077148, + "learning_rate": 9.361263736263737e-06, + "loss": 0.3441, + "step": 29585 + }, + { + "epoch": 81.28021978021978, + "grad_norm": 12.793564796447754, + "learning_rate": 9.35989010989011e-06, + "loss": 0.4139, + "step": 29586 + }, + { + "epoch": 81.28296703296704, + "grad_norm": 3.052877426147461, + "learning_rate": 9.358516483516485e-06, + "loss": 0.033, + "step": 29587 + }, + { + "epoch": 81.28571428571429, + "grad_norm": 12.379615783691406, + "learning_rate": 9.357142857142857e-06, + "loss": 0.2628, + "step": 29588 + }, + { + "epoch": 81.28846153846153, + "grad_norm": 17.213071823120117, + "learning_rate": 9.355769230769232e-06, + "loss": 0.4105, + "step": 29589 + }, + { + "epoch": 81.29120879120879, + "grad_norm": 12.51457405090332, + "learning_rate": 9.354395604395605e-06, + "loss": 0.3034, + "step": 29590 + }, + { + "epoch": 81.29395604395604, + "grad_norm": 13.356691360473633, + "learning_rate": 9.353021978021979e-06, + "loss": 0.2465, + "step": 29591 + }, + { + "epoch": 81.2967032967033, + "grad_norm": 11.252567291259766, + "learning_rate": 9.351648351648352e-06, + "loss": 0.3393, + "step": 29592 + }, + { + "epoch": 81.29945054945055, + "grad_norm": 4.724492073059082, + "learning_rate": 9.350274725274726e-06, + "loss": 0.0714, + "step": 29593 + }, + { + "epoch": 81.3021978021978, + "grad_norm": 9.682013511657715, + "learning_rate": 9.3489010989011e-06, + "loss": 0.1864, + "step": 29594 + }, + { + "epoch": 81.30494505494505, + "grad_norm": 17.266605377197266, + "learning_rate": 9.347527472527472e-06, + "loss": 0.3482, + "step": 29595 + }, + { + "epoch": 81.3076923076923, + "grad_norm": 5.705070972442627, + "learning_rate": 9.346153846153846e-06, + "loss": 0.0433, + "step": 29596 + }, + { + "epoch": 81.31043956043956, + "grad_norm": 7.245028972625732, + "learning_rate": 9.344780219780221e-06, + "loss": 0.1499, + "step": 29597 + }, + { + "epoch": 81.31318681318682, + "grad_norm": 18.079761505126953, + "learning_rate": 9.343406593406594e-06, + "loss": 0.2136, + "step": 29598 + }, + { + "epoch": 81.31593406593407, + "grad_norm": 5.62375545501709, + "learning_rate": 9.342032967032968e-06, + "loss": 0.1081, + "step": 29599 + }, + { + "epoch": 81.31868131868131, + "grad_norm": 22.178613662719727, + "learning_rate": 9.340659340659341e-06, + "loss": 0.5331, + "step": 29600 + }, + { + "epoch": 81.32142857142857, + "grad_norm": 28.81971549987793, + "learning_rate": 9.339285714285715e-06, + "loss": 0.6013, + "step": 29601 + }, + { + "epoch": 81.32417582417582, + "grad_norm": 17.348861694335938, + "learning_rate": 9.337912087912088e-06, + "loss": 0.2455, + "step": 29602 + }, + { + "epoch": 81.32692307692308, + "grad_norm": 8.008465766906738, + "learning_rate": 9.336538461538461e-06, + "loss": 0.1391, + "step": 29603 + }, + { + "epoch": 81.32967032967034, + "grad_norm": 12.620539665222168, + "learning_rate": 9.335164835164835e-06, + "loss": 0.3193, + "step": 29604 + }, + { + "epoch": 81.33241758241758, + "grad_norm": 16.755136489868164, + "learning_rate": 9.33379120879121e-06, + "loss": 0.4797, + "step": 29605 + }, + { + "epoch": 81.33516483516483, + "grad_norm": 6.685769557952881, + "learning_rate": 9.332417582417583e-06, + "loss": 0.0742, + "step": 29606 + }, + { + "epoch": 81.33791208791209, + "grad_norm": 13.081544876098633, + "learning_rate": 9.331043956043957e-06, + "loss": 0.3208, + "step": 29607 + }, + { + "epoch": 81.34065934065934, + "grad_norm": 11.390356063842773, + "learning_rate": 9.32967032967033e-06, + "loss": 0.213, + "step": 29608 + }, + { + "epoch": 81.3434065934066, + "grad_norm": 3.998708963394165, + "learning_rate": 9.328296703296703e-06, + "loss": 0.0408, + "step": 29609 + }, + { + "epoch": 81.34615384615384, + "grad_norm": 22.167768478393555, + "learning_rate": 9.326923076923077e-06, + "loss": 0.6118, + "step": 29610 + }, + { + "epoch": 81.3489010989011, + "grad_norm": 7.329930305480957, + "learning_rate": 9.32554945054945e-06, + "loss": 0.1164, + "step": 29611 + }, + { + "epoch": 81.35164835164835, + "grad_norm": 16.609451293945312, + "learning_rate": 9.324175824175825e-06, + "loss": 0.2597, + "step": 29612 + }, + { + "epoch": 81.3543956043956, + "grad_norm": 18.570865631103516, + "learning_rate": 9.322802197802199e-06, + "loss": 0.199, + "step": 29613 + }, + { + "epoch": 81.35714285714286, + "grad_norm": 12.283082008361816, + "learning_rate": 9.32142857142857e-06, + "loss": 0.1913, + "step": 29614 + }, + { + "epoch": 81.35989010989012, + "grad_norm": 21.37423324584961, + "learning_rate": 9.320054945054946e-06, + "loss": 0.5789, + "step": 29615 + }, + { + "epoch": 81.36263736263736, + "grad_norm": 16.899293899536133, + "learning_rate": 9.318681318681319e-06, + "loss": 0.2055, + "step": 29616 + }, + { + "epoch": 81.36538461538461, + "grad_norm": 7.534045219421387, + "learning_rate": 9.317307692307692e-06, + "loss": 0.0822, + "step": 29617 + }, + { + "epoch": 81.36813186813187, + "grad_norm": 15.419196128845215, + "learning_rate": 9.315934065934066e-06, + "loss": 0.4096, + "step": 29618 + }, + { + "epoch": 81.37087912087912, + "grad_norm": 9.22896671295166, + "learning_rate": 9.31456043956044e-06, + "loss": 0.1653, + "step": 29619 + }, + { + "epoch": 81.37362637362638, + "grad_norm": 3.7526416778564453, + "learning_rate": 9.313186813186814e-06, + "loss": 0.0251, + "step": 29620 + }, + { + "epoch": 81.37637362637362, + "grad_norm": 11.807997703552246, + "learning_rate": 9.311813186813188e-06, + "loss": 0.2696, + "step": 29621 + }, + { + "epoch": 81.37912087912088, + "grad_norm": 3.989027738571167, + "learning_rate": 9.310439560439561e-06, + "loss": 0.069, + "step": 29622 + }, + { + "epoch": 81.38186813186813, + "grad_norm": 6.571648120880127, + "learning_rate": 9.309065934065935e-06, + "loss": 0.13, + "step": 29623 + }, + { + "epoch": 81.38461538461539, + "grad_norm": 15.604351997375488, + "learning_rate": 9.307692307692308e-06, + "loss": 0.2173, + "step": 29624 + }, + { + "epoch": 81.38736263736264, + "grad_norm": 13.34501838684082, + "learning_rate": 9.306318681318681e-06, + "loss": 0.1523, + "step": 29625 + }, + { + "epoch": 81.39010989010988, + "grad_norm": 14.920321464538574, + "learning_rate": 9.304945054945055e-06, + "loss": 0.1561, + "step": 29626 + }, + { + "epoch": 81.39285714285714, + "grad_norm": 9.791935920715332, + "learning_rate": 9.30357142857143e-06, + "loss": 0.132, + "step": 29627 + }, + { + "epoch": 81.3956043956044, + "grad_norm": 16.612668991088867, + "learning_rate": 9.302197802197803e-06, + "loss": 0.295, + "step": 29628 + }, + { + "epoch": 81.39835164835165, + "grad_norm": 19.31301498413086, + "learning_rate": 9.300824175824175e-06, + "loss": 0.7881, + "step": 29629 + }, + { + "epoch": 81.4010989010989, + "grad_norm": 5.294517993927002, + "learning_rate": 9.29945054945055e-06, + "loss": 0.0679, + "step": 29630 + }, + { + "epoch": 81.40384615384616, + "grad_norm": 9.1686372756958, + "learning_rate": 9.298076923076924e-06, + "loss": 0.1464, + "step": 29631 + }, + { + "epoch": 81.4065934065934, + "grad_norm": 10.024951934814453, + "learning_rate": 9.296703296703297e-06, + "loss": 0.2353, + "step": 29632 + }, + { + "epoch": 81.40934065934066, + "grad_norm": 1.6474553346633911, + "learning_rate": 9.29532967032967e-06, + "loss": 0.0279, + "step": 29633 + }, + { + "epoch": 81.41208791208791, + "grad_norm": 15.875367164611816, + "learning_rate": 9.293956043956044e-06, + "loss": 0.4004, + "step": 29634 + }, + { + "epoch": 81.41483516483517, + "grad_norm": 11.986905097961426, + "learning_rate": 9.292582417582419e-06, + "loss": 0.1571, + "step": 29635 + }, + { + "epoch": 81.41758241758242, + "grad_norm": 14.80717945098877, + "learning_rate": 9.291208791208792e-06, + "loss": 0.2646, + "step": 29636 + }, + { + "epoch": 81.42032967032966, + "grad_norm": 8.623741149902344, + "learning_rate": 9.289835164835166e-06, + "loss": 0.1621, + "step": 29637 + }, + { + "epoch": 81.42307692307692, + "grad_norm": 10.636881828308105, + "learning_rate": 9.288461538461539e-06, + "loss": 0.1694, + "step": 29638 + }, + { + "epoch": 81.42582417582418, + "grad_norm": 7.92732048034668, + "learning_rate": 9.287087912087912e-06, + "loss": 0.1549, + "step": 29639 + }, + { + "epoch": 81.42857142857143, + "grad_norm": 6.7358198165893555, + "learning_rate": 9.285714285714286e-06, + "loss": 0.0755, + "step": 29640 + }, + { + "epoch": 81.43131868131869, + "grad_norm": 9.731193542480469, + "learning_rate": 9.28434065934066e-06, + "loss": 0.1736, + "step": 29641 + }, + { + "epoch": 81.43406593406593, + "grad_norm": 9.390179634094238, + "learning_rate": 9.282967032967034e-06, + "loss": 0.2538, + "step": 29642 + }, + { + "epoch": 81.43681318681318, + "grad_norm": 5.743276119232178, + "learning_rate": 9.281593406593408e-06, + "loss": 0.1484, + "step": 29643 + }, + { + "epoch": 81.43956043956044, + "grad_norm": 15.30923843383789, + "learning_rate": 9.28021978021978e-06, + "loss": 0.4043, + "step": 29644 + }, + { + "epoch": 81.4423076923077, + "grad_norm": 18.25392723083496, + "learning_rate": 9.278846153846155e-06, + "loss": 0.3214, + "step": 29645 + }, + { + "epoch": 81.44505494505495, + "grad_norm": 14.626312255859375, + "learning_rate": 9.277472527472528e-06, + "loss": 0.3602, + "step": 29646 + }, + { + "epoch": 81.4478021978022, + "grad_norm": 5.347764492034912, + "learning_rate": 9.276098901098901e-06, + "loss": 0.0687, + "step": 29647 + }, + { + "epoch": 81.45054945054945, + "grad_norm": 14.469752311706543, + "learning_rate": 9.274725274725275e-06, + "loss": 0.4697, + "step": 29648 + }, + { + "epoch": 81.4532967032967, + "grad_norm": 17.06294822692871, + "learning_rate": 9.273351648351648e-06, + "loss": 0.3597, + "step": 29649 + }, + { + "epoch": 81.45604395604396, + "grad_norm": 8.329708099365234, + "learning_rate": 9.271978021978023e-06, + "loss": 0.1974, + "step": 29650 + }, + { + "epoch": 81.45879120879121, + "grad_norm": 8.198826789855957, + "learning_rate": 9.270604395604395e-06, + "loss": 0.0694, + "step": 29651 + }, + { + "epoch": 81.46153846153847, + "grad_norm": 12.050628662109375, + "learning_rate": 9.26923076923077e-06, + "loss": 0.2597, + "step": 29652 + }, + { + "epoch": 81.46428571428571, + "grad_norm": 20.16263198852539, + "learning_rate": 9.267857142857144e-06, + "loss": 0.3492, + "step": 29653 + }, + { + "epoch": 81.46703296703296, + "grad_norm": 14.861381530761719, + "learning_rate": 9.266483516483517e-06, + "loss": 0.0856, + "step": 29654 + }, + { + "epoch": 81.46978021978022, + "grad_norm": 10.625910758972168, + "learning_rate": 9.26510989010989e-06, + "loss": 0.2094, + "step": 29655 + }, + { + "epoch": 81.47252747252747, + "grad_norm": 10.646719932556152, + "learning_rate": 9.263736263736264e-06, + "loss": 0.2497, + "step": 29656 + }, + { + "epoch": 81.47527472527473, + "grad_norm": 14.803814888000488, + "learning_rate": 9.262362637362639e-06, + "loss": 0.3481, + "step": 29657 + }, + { + "epoch": 81.47802197802197, + "grad_norm": 11.172062873840332, + "learning_rate": 9.260989010989012e-06, + "loss": 0.1495, + "step": 29658 + }, + { + "epoch": 81.48076923076923, + "grad_norm": 14.388806343078613, + "learning_rate": 9.259615384615384e-06, + "loss": 0.2772, + "step": 29659 + }, + { + "epoch": 81.48351648351648, + "grad_norm": 3.7162740230560303, + "learning_rate": 9.258241758241759e-06, + "loss": 0.0425, + "step": 29660 + }, + { + "epoch": 81.48626373626374, + "grad_norm": 9.366735458374023, + "learning_rate": 9.256868131868132e-06, + "loss": 0.1075, + "step": 29661 + }, + { + "epoch": 81.48901098901099, + "grad_norm": 1.5865247249603271, + "learning_rate": 9.255494505494506e-06, + "loss": 0.0207, + "step": 29662 + }, + { + "epoch": 81.49175824175825, + "grad_norm": 10.043803215026855, + "learning_rate": 9.25412087912088e-06, + "loss": 0.2177, + "step": 29663 + }, + { + "epoch": 81.49450549450549, + "grad_norm": 5.623903751373291, + "learning_rate": 9.252747252747253e-06, + "loss": 0.0649, + "step": 29664 + }, + { + "epoch": 81.49725274725274, + "grad_norm": 2.657656192779541, + "learning_rate": 9.251373626373628e-06, + "loss": 0.0519, + "step": 29665 + }, + { + "epoch": 81.5, + "grad_norm": 3.9402294158935547, + "learning_rate": 9.25e-06, + "loss": 0.0423, + "step": 29666 + }, + { + "epoch": 81.50274725274726, + "grad_norm": 12.446121215820312, + "learning_rate": 9.248626373626373e-06, + "loss": 0.2316, + "step": 29667 + }, + { + "epoch": 81.50549450549451, + "grad_norm": 4.873765468597412, + "learning_rate": 9.247252747252748e-06, + "loss": 0.0533, + "step": 29668 + }, + { + "epoch": 81.50824175824175, + "grad_norm": 9.666868209838867, + "learning_rate": 9.245879120879121e-06, + "loss": 0.1977, + "step": 29669 + }, + { + "epoch": 81.51098901098901, + "grad_norm": 8.025799751281738, + "learning_rate": 9.244505494505495e-06, + "loss": 0.1283, + "step": 29670 + }, + { + "epoch": 81.51373626373626, + "grad_norm": 9.41679859161377, + "learning_rate": 9.243131868131868e-06, + "loss": 0.2025, + "step": 29671 + }, + { + "epoch": 81.51648351648352, + "grad_norm": 8.8191556930542, + "learning_rate": 9.241758241758242e-06, + "loss": 0.2072, + "step": 29672 + }, + { + "epoch": 81.51923076923077, + "grad_norm": 9.751553535461426, + "learning_rate": 9.240384615384617e-06, + "loss": 0.1733, + "step": 29673 + }, + { + "epoch": 81.52197802197803, + "grad_norm": 8.154651641845703, + "learning_rate": 9.239010989010988e-06, + "loss": 0.0754, + "step": 29674 + }, + { + "epoch": 81.52472527472527, + "grad_norm": 7.3186845779418945, + "learning_rate": 9.237637362637364e-06, + "loss": 0.1352, + "step": 29675 + }, + { + "epoch": 81.52747252747253, + "grad_norm": 10.683660507202148, + "learning_rate": 9.236263736263737e-06, + "loss": 0.2423, + "step": 29676 + }, + { + "epoch": 81.53021978021978, + "grad_norm": 13.196404457092285, + "learning_rate": 9.23489010989011e-06, + "loss": 0.2229, + "step": 29677 + }, + { + "epoch": 81.53296703296704, + "grad_norm": 5.500785827636719, + "learning_rate": 9.233516483516484e-06, + "loss": 0.0617, + "step": 29678 + }, + { + "epoch": 81.53571428571429, + "grad_norm": 10.983386039733887, + "learning_rate": 9.232142857142857e-06, + "loss": 0.1091, + "step": 29679 + }, + { + "epoch": 81.53846153846153, + "grad_norm": 6.510824203491211, + "learning_rate": 9.230769230769232e-06, + "loss": 0.1572, + "step": 29680 + }, + { + "epoch": 81.54120879120879, + "grad_norm": 14.586164474487305, + "learning_rate": 9.229395604395604e-06, + "loss": 0.3055, + "step": 29681 + }, + { + "epoch": 81.54395604395604, + "grad_norm": 6.242335796356201, + "learning_rate": 9.228021978021977e-06, + "loss": 0.0906, + "step": 29682 + }, + { + "epoch": 81.5467032967033, + "grad_norm": 12.392960548400879, + "learning_rate": 9.226648351648353e-06, + "loss": 0.2306, + "step": 29683 + }, + { + "epoch": 81.54945054945055, + "grad_norm": 8.408507347106934, + "learning_rate": 9.225274725274726e-06, + "loss": 0.1296, + "step": 29684 + }, + { + "epoch": 81.5521978021978, + "grad_norm": 9.322798728942871, + "learning_rate": 9.2239010989011e-06, + "loss": 0.2316, + "step": 29685 + }, + { + "epoch": 81.55494505494505, + "grad_norm": 12.42827033996582, + "learning_rate": 9.222527472527473e-06, + "loss": 0.5641, + "step": 29686 + }, + { + "epoch": 81.5576923076923, + "grad_norm": 9.132275581359863, + "learning_rate": 9.221153846153846e-06, + "loss": 0.1697, + "step": 29687 + }, + { + "epoch": 81.56043956043956, + "grad_norm": 12.021127700805664, + "learning_rate": 9.219780219780221e-06, + "loss": 0.2984, + "step": 29688 + }, + { + "epoch": 81.56318681318682, + "grad_norm": 21.30250358581543, + "learning_rate": 9.218406593406593e-06, + "loss": 0.4127, + "step": 29689 + }, + { + "epoch": 81.56593406593407, + "grad_norm": 18.909971237182617, + "learning_rate": 9.217032967032968e-06, + "loss": 0.2257, + "step": 29690 + }, + { + "epoch": 81.56868131868131, + "grad_norm": 7.3842692375183105, + "learning_rate": 9.215659340659341e-06, + "loss": 0.2216, + "step": 29691 + }, + { + "epoch": 81.57142857142857, + "grad_norm": 12.26123046875, + "learning_rate": 9.214285714285715e-06, + "loss": 0.2402, + "step": 29692 + }, + { + "epoch": 81.57417582417582, + "grad_norm": 10.15934944152832, + "learning_rate": 9.212912087912088e-06, + "loss": 0.2636, + "step": 29693 + }, + { + "epoch": 81.57692307692308, + "grad_norm": 20.045392990112305, + "learning_rate": 9.211538461538462e-06, + "loss": 0.5706, + "step": 29694 + }, + { + "epoch": 81.57967032967034, + "grad_norm": 4.595564842224121, + "learning_rate": 9.210164835164837e-06, + "loss": 0.0687, + "step": 29695 + }, + { + "epoch": 81.58241758241758, + "grad_norm": 35.916534423828125, + "learning_rate": 9.208791208791209e-06, + "loss": 0.7982, + "step": 29696 + }, + { + "epoch": 81.58516483516483, + "grad_norm": 12.146058082580566, + "learning_rate": 9.207417582417582e-06, + "loss": 0.3788, + "step": 29697 + }, + { + "epoch": 81.58791208791209, + "grad_norm": 8.242033958435059, + "learning_rate": 9.206043956043957e-06, + "loss": 0.0678, + "step": 29698 + }, + { + "epoch": 81.59065934065934, + "grad_norm": 3.176490068435669, + "learning_rate": 9.20467032967033e-06, + "loss": 0.0385, + "step": 29699 + }, + { + "epoch": 81.5934065934066, + "grad_norm": 4.567854404449463, + "learning_rate": 9.203296703296704e-06, + "loss": 0.0706, + "step": 29700 + }, + { + "epoch": 81.59615384615384, + "grad_norm": 11.976652145385742, + "learning_rate": 9.201923076923077e-06, + "loss": 0.2698, + "step": 29701 + }, + { + "epoch": 81.5989010989011, + "grad_norm": 25.0039005279541, + "learning_rate": 9.20054945054945e-06, + "loss": 0.4279, + "step": 29702 + }, + { + "epoch": 81.60164835164835, + "grad_norm": 28.99030876159668, + "learning_rate": 9.199175824175826e-06, + "loss": 0.7135, + "step": 29703 + }, + { + "epoch": 81.6043956043956, + "grad_norm": 15.36738109588623, + "learning_rate": 9.197802197802197e-06, + "loss": 0.4057, + "step": 29704 + }, + { + "epoch": 81.60714285714286, + "grad_norm": 8.464847564697266, + "learning_rate": 9.196428571428573e-06, + "loss": 0.1191, + "step": 29705 + }, + { + "epoch": 81.60989010989012, + "grad_norm": 15.307613372802734, + "learning_rate": 9.195054945054946e-06, + "loss": 0.2023, + "step": 29706 + }, + { + "epoch": 81.61263736263736, + "grad_norm": 12.922510147094727, + "learning_rate": 9.19368131868132e-06, + "loss": 0.2072, + "step": 29707 + }, + { + "epoch": 81.61538461538461, + "grad_norm": 10.29030704498291, + "learning_rate": 9.192307692307693e-06, + "loss": 0.1819, + "step": 29708 + }, + { + "epoch": 81.61813186813187, + "grad_norm": 5.7027177810668945, + "learning_rate": 9.190934065934066e-06, + "loss": 0.0798, + "step": 29709 + }, + { + "epoch": 81.62087912087912, + "grad_norm": 11.192991256713867, + "learning_rate": 9.189560439560441e-06, + "loss": 0.1946, + "step": 29710 + }, + { + "epoch": 81.62362637362638, + "grad_norm": 1.517898440361023, + "learning_rate": 9.188186813186813e-06, + "loss": 0.0166, + "step": 29711 + }, + { + "epoch": 81.62637362637362, + "grad_norm": 3.1364715099334717, + "learning_rate": 9.186813186813186e-06, + "loss": 0.0241, + "step": 29712 + }, + { + "epoch": 81.62912087912088, + "grad_norm": 7.309556484222412, + "learning_rate": 9.185439560439562e-06, + "loss": 0.1399, + "step": 29713 + }, + { + "epoch": 81.63186813186813, + "grad_norm": 11.581066131591797, + "learning_rate": 9.184065934065935e-06, + "loss": 0.1046, + "step": 29714 + }, + { + "epoch": 81.63461538461539, + "grad_norm": 10.042021751403809, + "learning_rate": 9.182692307692308e-06, + "loss": 0.117, + "step": 29715 + }, + { + "epoch": 81.63736263736264, + "grad_norm": 9.437715530395508, + "learning_rate": 9.181318681318682e-06, + "loss": 0.1194, + "step": 29716 + }, + { + "epoch": 81.64010989010988, + "grad_norm": 3.790153980255127, + "learning_rate": 9.179945054945055e-06, + "loss": 0.0856, + "step": 29717 + }, + { + "epoch": 81.64285714285714, + "grad_norm": 18.17926597595215, + "learning_rate": 9.17857142857143e-06, + "loss": 0.4866, + "step": 29718 + }, + { + "epoch": 81.6456043956044, + "grad_norm": 5.882901668548584, + "learning_rate": 9.177197802197802e-06, + "loss": 0.155, + "step": 29719 + }, + { + "epoch": 81.64835164835165, + "grad_norm": 21.985536575317383, + "learning_rate": 9.175824175824175e-06, + "loss": 0.4874, + "step": 29720 + }, + { + "epoch": 81.6510989010989, + "grad_norm": 8.43673038482666, + "learning_rate": 9.17445054945055e-06, + "loss": 0.1544, + "step": 29721 + }, + { + "epoch": 81.65384615384616, + "grad_norm": 5.8228044509887695, + "learning_rate": 9.173076923076924e-06, + "loss": 0.0916, + "step": 29722 + }, + { + "epoch": 81.6565934065934, + "grad_norm": 16.70033836364746, + "learning_rate": 9.171703296703297e-06, + "loss": 0.4465, + "step": 29723 + }, + { + "epoch": 81.65934065934066, + "grad_norm": 10.680474281311035, + "learning_rate": 9.17032967032967e-06, + "loss": 0.2104, + "step": 29724 + }, + { + "epoch": 81.66208791208791, + "grad_norm": 11.338929176330566, + "learning_rate": 9.168956043956044e-06, + "loss": 0.1483, + "step": 29725 + }, + { + "epoch": 81.66483516483517, + "grad_norm": 6.650384426116943, + "learning_rate": 9.167582417582417e-06, + "loss": 0.0939, + "step": 29726 + }, + { + "epoch": 81.66758241758242, + "grad_norm": 15.439958572387695, + "learning_rate": 9.166208791208791e-06, + "loss": 0.2707, + "step": 29727 + }, + { + "epoch": 81.67032967032966, + "grad_norm": 15.8975830078125, + "learning_rate": 9.164835164835166e-06, + "loss": 0.2896, + "step": 29728 + }, + { + "epoch": 81.67307692307692, + "grad_norm": 1.5773977041244507, + "learning_rate": 9.16346153846154e-06, + "loss": 0.0247, + "step": 29729 + }, + { + "epoch": 81.67582417582418, + "grad_norm": 32.09116744995117, + "learning_rate": 9.162087912087911e-06, + "loss": 0.3234, + "step": 29730 + }, + { + "epoch": 81.67857142857143, + "grad_norm": 7.277284145355225, + "learning_rate": 9.160714285714286e-06, + "loss": 0.14, + "step": 29731 + }, + { + "epoch": 81.68131868131869, + "grad_norm": 15.601374626159668, + "learning_rate": 9.15934065934066e-06, + "loss": 0.1836, + "step": 29732 + }, + { + "epoch": 81.68406593406593, + "grad_norm": 3.5187206268310547, + "learning_rate": 9.157967032967035e-06, + "loss": 0.0398, + "step": 29733 + }, + { + "epoch": 81.68681318681318, + "grad_norm": 12.676444053649902, + "learning_rate": 9.156593406593406e-06, + "loss": 0.3062, + "step": 29734 + }, + { + "epoch": 81.68956043956044, + "grad_norm": 8.102384567260742, + "learning_rate": 9.15521978021978e-06, + "loss": 0.12, + "step": 29735 + }, + { + "epoch": 81.6923076923077, + "grad_norm": 15.174544334411621, + "learning_rate": 9.153846153846155e-06, + "loss": 0.2713, + "step": 29736 + }, + { + "epoch": 81.69505494505495, + "grad_norm": 3.373382091522217, + "learning_rate": 9.152472527472528e-06, + "loss": 0.0257, + "step": 29737 + }, + { + "epoch": 81.6978021978022, + "grad_norm": 9.933296203613281, + "learning_rate": 9.151098901098902e-06, + "loss": 0.0889, + "step": 29738 + }, + { + "epoch": 81.70054945054945, + "grad_norm": 19.484121322631836, + "learning_rate": 9.149725274725275e-06, + "loss": 0.3494, + "step": 29739 + }, + { + "epoch": 81.7032967032967, + "grad_norm": 17.560190200805664, + "learning_rate": 9.148351648351649e-06, + "loss": 0.5396, + "step": 29740 + }, + { + "epoch": 81.70604395604396, + "grad_norm": 10.454878807067871, + "learning_rate": 9.146978021978022e-06, + "loss": 0.1929, + "step": 29741 + }, + { + "epoch": 81.70879120879121, + "grad_norm": 6.930241107940674, + "learning_rate": 9.145604395604395e-06, + "loss": 0.107, + "step": 29742 + }, + { + "epoch": 81.71153846153847, + "grad_norm": 7.3662824630737305, + "learning_rate": 9.14423076923077e-06, + "loss": 0.1563, + "step": 29743 + }, + { + "epoch": 81.71428571428571, + "grad_norm": 14.282007217407227, + "learning_rate": 9.142857142857144e-06, + "loss": 0.2066, + "step": 29744 + }, + { + "epoch": 81.71703296703296, + "grad_norm": 9.916183471679688, + "learning_rate": 9.141483516483516e-06, + "loss": 0.27, + "step": 29745 + }, + { + "epoch": 81.71978021978022, + "grad_norm": 8.140249252319336, + "learning_rate": 9.14010989010989e-06, + "loss": 0.1891, + "step": 29746 + }, + { + "epoch": 81.72252747252747, + "grad_norm": 25.50809097290039, + "learning_rate": 9.138736263736264e-06, + "loss": 0.7486, + "step": 29747 + }, + { + "epoch": 81.72527472527473, + "grad_norm": 6.841641426086426, + "learning_rate": 9.137362637362638e-06, + "loss": 0.1185, + "step": 29748 + }, + { + "epoch": 81.72802197802197, + "grad_norm": 8.68935489654541, + "learning_rate": 9.135989010989011e-06, + "loss": 0.0946, + "step": 29749 + }, + { + "epoch": 81.73076923076923, + "grad_norm": 13.396393775939941, + "learning_rate": 9.134615384615384e-06, + "loss": 0.1529, + "step": 29750 + }, + { + "epoch": 81.73351648351648, + "grad_norm": 5.5937089920043945, + "learning_rate": 9.13324175824176e-06, + "loss": 0.0934, + "step": 29751 + }, + { + "epoch": 81.73626373626374, + "grad_norm": 8.904897689819336, + "learning_rate": 9.131868131868133e-06, + "loss": 0.0984, + "step": 29752 + }, + { + "epoch": 81.73901098901099, + "grad_norm": 18.737546920776367, + "learning_rate": 9.130494505494506e-06, + "loss": 0.3273, + "step": 29753 + }, + { + "epoch": 81.74175824175825, + "grad_norm": 8.840217590332031, + "learning_rate": 9.12912087912088e-06, + "loss": 0.209, + "step": 29754 + }, + { + "epoch": 81.74450549450549, + "grad_norm": 5.662876129150391, + "learning_rate": 9.127747252747253e-06, + "loss": 0.0761, + "step": 29755 + }, + { + "epoch": 81.74725274725274, + "grad_norm": 21.56210708618164, + "learning_rate": 9.126373626373626e-06, + "loss": 0.5642, + "step": 29756 + }, + { + "epoch": 81.75, + "grad_norm": 12.442275047302246, + "learning_rate": 9.125e-06, + "loss": 0.468, + "step": 29757 + }, + { + "epoch": 81.75274725274726, + "grad_norm": 17.526132583618164, + "learning_rate": 9.123626373626375e-06, + "loss": 0.3104, + "step": 29758 + }, + { + "epoch": 81.75549450549451, + "grad_norm": 28.221973419189453, + "learning_rate": 9.122252747252748e-06, + "loss": 0.7653, + "step": 29759 + }, + { + "epoch": 81.75824175824175, + "grad_norm": 7.2147216796875, + "learning_rate": 9.12087912087912e-06, + "loss": 0.114, + "step": 29760 + }, + { + "epoch": 81.76098901098901, + "grad_norm": 9.081796646118164, + "learning_rate": 9.119505494505495e-06, + "loss": 0.1041, + "step": 29761 + }, + { + "epoch": 81.76373626373626, + "grad_norm": 22.833637237548828, + "learning_rate": 9.118131868131869e-06, + "loss": 0.7915, + "step": 29762 + }, + { + "epoch": 81.76648351648352, + "grad_norm": 14.471006393432617, + "learning_rate": 9.116758241758242e-06, + "loss": 0.2141, + "step": 29763 + }, + { + "epoch": 81.76923076923077, + "grad_norm": 8.189794540405273, + "learning_rate": 9.115384615384615e-06, + "loss": 0.1057, + "step": 29764 + }, + { + "epoch": 81.77197802197803, + "grad_norm": 9.375631332397461, + "learning_rate": 9.114010989010989e-06, + "loss": 0.2012, + "step": 29765 + }, + { + "epoch": 81.77472527472527, + "grad_norm": 4.022426128387451, + "learning_rate": 9.112637362637364e-06, + "loss": 0.0634, + "step": 29766 + }, + { + "epoch": 81.77747252747253, + "grad_norm": 6.385841369628906, + "learning_rate": 9.111263736263737e-06, + "loss": 0.1287, + "step": 29767 + }, + { + "epoch": 81.78021978021978, + "grad_norm": 27.32982635498047, + "learning_rate": 9.10989010989011e-06, + "loss": 0.9544, + "step": 29768 + }, + { + "epoch": 81.78296703296704, + "grad_norm": 8.167019844055176, + "learning_rate": 9.108516483516484e-06, + "loss": 0.1238, + "step": 29769 + }, + { + "epoch": 81.78571428571429, + "grad_norm": 9.014791488647461, + "learning_rate": 9.107142857142858e-06, + "loss": 0.1551, + "step": 29770 + }, + { + "epoch": 81.78846153846153, + "grad_norm": 4.210240840911865, + "learning_rate": 9.105769230769231e-06, + "loss": 0.0459, + "step": 29771 + }, + { + "epoch": 81.79120879120879, + "grad_norm": 12.794259071350098, + "learning_rate": 9.104395604395604e-06, + "loss": 0.3712, + "step": 29772 + }, + { + "epoch": 81.79395604395604, + "grad_norm": 25.17439842224121, + "learning_rate": 9.10302197802198e-06, + "loss": 0.5259, + "step": 29773 + }, + { + "epoch": 81.7967032967033, + "grad_norm": 10.618755340576172, + "learning_rate": 9.101648351648353e-06, + "loss": 0.096, + "step": 29774 + }, + { + "epoch": 81.79945054945055, + "grad_norm": 1.5502750873565674, + "learning_rate": 9.100274725274725e-06, + "loss": 0.0162, + "step": 29775 + }, + { + "epoch": 81.8021978021978, + "grad_norm": 21.272537231445312, + "learning_rate": 9.0989010989011e-06, + "loss": 0.5319, + "step": 29776 + }, + { + "epoch": 81.80494505494505, + "grad_norm": 7.027003765106201, + "learning_rate": 9.097527472527473e-06, + "loss": 0.149, + "step": 29777 + }, + { + "epoch": 81.8076923076923, + "grad_norm": 12.454917907714844, + "learning_rate": 9.096153846153847e-06, + "loss": 0.2507, + "step": 29778 + }, + { + "epoch": 81.81043956043956, + "grad_norm": 1.4255303144454956, + "learning_rate": 9.09478021978022e-06, + "loss": 0.0214, + "step": 29779 + }, + { + "epoch": 81.81318681318682, + "grad_norm": 15.46281909942627, + "learning_rate": 9.093406593406593e-06, + "loss": 0.3286, + "step": 29780 + }, + { + "epoch": 81.81593406593407, + "grad_norm": 10.912935256958008, + "learning_rate": 9.092032967032968e-06, + "loss": 0.1718, + "step": 29781 + }, + { + "epoch": 81.81868131868131, + "grad_norm": 15.859305381774902, + "learning_rate": 9.090659340659342e-06, + "loss": 0.5043, + "step": 29782 + }, + { + "epoch": 81.82142857142857, + "grad_norm": 5.477272033691406, + "learning_rate": 9.089285714285714e-06, + "loss": 0.0647, + "step": 29783 + }, + { + "epoch": 81.82417582417582, + "grad_norm": 10.083358764648438, + "learning_rate": 9.087912087912089e-06, + "loss": 0.1495, + "step": 29784 + }, + { + "epoch": 81.82692307692308, + "grad_norm": 18.280649185180664, + "learning_rate": 9.086538461538462e-06, + "loss": 0.3278, + "step": 29785 + }, + { + "epoch": 81.82967032967034, + "grad_norm": 5.766756534576416, + "learning_rate": 9.085164835164835e-06, + "loss": 0.0889, + "step": 29786 + }, + { + "epoch": 81.83241758241758, + "grad_norm": 12.28933334350586, + "learning_rate": 9.083791208791209e-06, + "loss": 0.1608, + "step": 29787 + }, + { + "epoch": 81.83516483516483, + "grad_norm": 14.68857479095459, + "learning_rate": 9.082417582417582e-06, + "loss": 0.3991, + "step": 29788 + }, + { + "epoch": 81.83791208791209, + "grad_norm": 11.485030174255371, + "learning_rate": 9.081043956043957e-06, + "loss": 0.2029, + "step": 29789 + }, + { + "epoch": 81.84065934065934, + "grad_norm": 13.92287826538086, + "learning_rate": 9.079670329670329e-06, + "loss": 0.2339, + "step": 29790 + }, + { + "epoch": 81.8434065934066, + "grad_norm": 26.605995178222656, + "learning_rate": 9.078296703296704e-06, + "loss": 0.55, + "step": 29791 + }, + { + "epoch": 81.84615384615384, + "grad_norm": 15.486800193786621, + "learning_rate": 9.076923076923078e-06, + "loss": 0.33, + "step": 29792 + }, + { + "epoch": 81.8489010989011, + "grad_norm": 5.415383338928223, + "learning_rate": 9.075549450549451e-06, + "loss": 0.0709, + "step": 29793 + }, + { + "epoch": 81.85164835164835, + "grad_norm": 21.461490631103516, + "learning_rate": 9.074175824175824e-06, + "loss": 0.4188, + "step": 29794 + }, + { + "epoch": 81.8543956043956, + "grad_norm": 6.13110876083374, + "learning_rate": 9.072802197802198e-06, + "loss": 0.0536, + "step": 29795 + }, + { + "epoch": 81.85714285714286, + "grad_norm": 10.794280052185059, + "learning_rate": 9.071428571428573e-06, + "loss": 0.2662, + "step": 29796 + }, + { + "epoch": 81.85989010989012, + "grad_norm": 4.11927604675293, + "learning_rate": 9.070054945054945e-06, + "loss": 0.0634, + "step": 29797 + }, + { + "epoch": 81.86263736263736, + "grad_norm": 12.31330680847168, + "learning_rate": 9.068681318681318e-06, + "loss": 0.1813, + "step": 29798 + }, + { + "epoch": 81.86538461538461, + "grad_norm": 3.9988083839416504, + "learning_rate": 9.067307692307693e-06, + "loss": 0.0636, + "step": 29799 + }, + { + "epoch": 81.86813186813187, + "grad_norm": 2.077177047729492, + "learning_rate": 9.065934065934067e-06, + "loss": 0.0211, + "step": 29800 + }, + { + "epoch": 81.87087912087912, + "grad_norm": 17.222990036010742, + "learning_rate": 9.06456043956044e-06, + "loss": 0.2828, + "step": 29801 + }, + { + "epoch": 81.87362637362638, + "grad_norm": 4.71472692489624, + "learning_rate": 9.063186813186813e-06, + "loss": 0.0626, + "step": 29802 + }, + { + "epoch": 81.87637362637362, + "grad_norm": 12.453418731689453, + "learning_rate": 9.061813186813187e-06, + "loss": 0.2213, + "step": 29803 + }, + { + "epoch": 81.87912087912088, + "grad_norm": 11.494091987609863, + "learning_rate": 9.060439560439562e-06, + "loss": 0.1211, + "step": 29804 + }, + { + "epoch": 81.88186813186813, + "grad_norm": 7.91851282119751, + "learning_rate": 9.059065934065934e-06, + "loss": 0.0737, + "step": 29805 + }, + { + "epoch": 81.88461538461539, + "grad_norm": 10.524758338928223, + "learning_rate": 9.057692307692309e-06, + "loss": 0.2832, + "step": 29806 + }, + { + "epoch": 81.88736263736264, + "grad_norm": 16.728622436523438, + "learning_rate": 9.056318681318682e-06, + "loss": 0.3863, + "step": 29807 + }, + { + "epoch": 81.89010989010988, + "grad_norm": 6.0505290031433105, + "learning_rate": 9.054945054945055e-06, + "loss": 0.0614, + "step": 29808 + }, + { + "epoch": 81.89285714285714, + "grad_norm": 14.349604606628418, + "learning_rate": 9.053571428571429e-06, + "loss": 0.3367, + "step": 29809 + }, + { + "epoch": 81.8956043956044, + "grad_norm": 7.390890598297119, + "learning_rate": 9.052197802197802e-06, + "loss": 0.1443, + "step": 29810 + }, + { + "epoch": 81.89835164835165, + "grad_norm": 22.238378524780273, + "learning_rate": 9.050824175824177e-06, + "loss": 0.5003, + "step": 29811 + }, + { + "epoch": 81.9010989010989, + "grad_norm": 21.675729751586914, + "learning_rate": 9.049450549450549e-06, + "loss": 0.4179, + "step": 29812 + }, + { + "epoch": 81.90384615384616, + "grad_norm": 20.755844116210938, + "learning_rate": 9.048076923076923e-06, + "loss": 0.3632, + "step": 29813 + }, + { + "epoch": 81.9065934065934, + "grad_norm": 11.39157772064209, + "learning_rate": 9.046703296703298e-06, + "loss": 0.1099, + "step": 29814 + }, + { + "epoch": 81.90934065934066, + "grad_norm": 5.830648422241211, + "learning_rate": 9.045329670329671e-06, + "loss": 0.0469, + "step": 29815 + }, + { + "epoch": 81.91208791208791, + "grad_norm": 16.461286544799805, + "learning_rate": 9.043956043956044e-06, + "loss": 0.3289, + "step": 29816 + }, + { + "epoch": 81.91483516483517, + "grad_norm": 23.540870666503906, + "learning_rate": 9.042582417582418e-06, + "loss": 0.488, + "step": 29817 + }, + { + "epoch": 81.91758241758242, + "grad_norm": 10.45661449432373, + "learning_rate": 9.041208791208791e-06, + "loss": 0.2349, + "step": 29818 + }, + { + "epoch": 81.92032967032966, + "grad_norm": 11.588102340698242, + "learning_rate": 9.039835164835166e-06, + "loss": 0.2266, + "step": 29819 + }, + { + "epoch": 81.92307692307692, + "grad_norm": 8.523324012756348, + "learning_rate": 9.038461538461538e-06, + "loss": 0.1236, + "step": 29820 + }, + { + "epoch": 81.92582417582418, + "grad_norm": 10.303604125976562, + "learning_rate": 9.037087912087913e-06, + "loss": 0.1433, + "step": 29821 + }, + { + "epoch": 81.92857142857143, + "grad_norm": 14.89597225189209, + "learning_rate": 9.035714285714287e-06, + "loss": 0.189, + "step": 29822 + }, + { + "epoch": 81.93131868131869, + "grad_norm": 9.82104206085205, + "learning_rate": 9.03434065934066e-06, + "loss": 0.1495, + "step": 29823 + }, + { + "epoch": 81.93406593406593, + "grad_norm": 2.345597267150879, + "learning_rate": 9.032967032967033e-06, + "loss": 0.0387, + "step": 29824 + }, + { + "epoch": 81.93681318681318, + "grad_norm": 6.305716037750244, + "learning_rate": 9.031593406593407e-06, + "loss": 0.0595, + "step": 29825 + }, + { + "epoch": 81.93956043956044, + "grad_norm": 5.532804489135742, + "learning_rate": 9.030219780219782e-06, + "loss": 0.133, + "step": 29826 + }, + { + "epoch": 81.9423076923077, + "grad_norm": 12.860971450805664, + "learning_rate": 9.028846153846154e-06, + "loss": 0.1953, + "step": 29827 + }, + { + "epoch": 81.94505494505495, + "grad_norm": 2.514122247695923, + "learning_rate": 9.027472527472527e-06, + "loss": 0.034, + "step": 29828 + }, + { + "epoch": 81.9478021978022, + "grad_norm": 3.9507710933685303, + "learning_rate": 9.026098901098902e-06, + "loss": 0.0552, + "step": 29829 + }, + { + "epoch": 81.95054945054945, + "grad_norm": 11.536864280700684, + "learning_rate": 9.024725274725276e-06, + "loss": 0.1606, + "step": 29830 + }, + { + "epoch": 81.9532967032967, + "grad_norm": 21.479299545288086, + "learning_rate": 9.023351648351649e-06, + "loss": 0.6704, + "step": 29831 + }, + { + "epoch": 81.95604395604396, + "grad_norm": 20.955175399780273, + "learning_rate": 9.021978021978022e-06, + "loss": 0.5763, + "step": 29832 + }, + { + "epoch": 81.95879120879121, + "grad_norm": 5.489163398742676, + "learning_rate": 9.020604395604396e-06, + "loss": 0.0553, + "step": 29833 + }, + { + "epoch": 81.96153846153847, + "grad_norm": 1.9294767379760742, + "learning_rate": 9.01923076923077e-06, + "loss": 0.0186, + "step": 29834 + }, + { + "epoch": 81.96428571428571, + "grad_norm": 10.700244903564453, + "learning_rate": 9.017857142857143e-06, + "loss": 0.1113, + "step": 29835 + }, + { + "epoch": 81.96703296703296, + "grad_norm": 21.65104103088379, + "learning_rate": 9.016483516483516e-06, + "loss": 0.551, + "step": 29836 + }, + { + "epoch": 81.96978021978022, + "grad_norm": 10.488011360168457, + "learning_rate": 9.015109890109891e-06, + "loss": 0.1648, + "step": 29837 + }, + { + "epoch": 81.97252747252747, + "grad_norm": 4.991575241088867, + "learning_rate": 9.013736263736264e-06, + "loss": 0.0968, + "step": 29838 + }, + { + "epoch": 81.97527472527473, + "grad_norm": 11.999127388000488, + "learning_rate": 9.012362637362638e-06, + "loss": 0.1067, + "step": 29839 + }, + { + "epoch": 81.97802197802197, + "grad_norm": 2.140695571899414, + "learning_rate": 9.010989010989011e-06, + "loss": 0.0298, + "step": 29840 + }, + { + "epoch": 81.98076923076923, + "grad_norm": 18.094966888427734, + "learning_rate": 9.009615384615385e-06, + "loss": 0.4991, + "step": 29841 + }, + { + "epoch": 81.98351648351648, + "grad_norm": 13.476950645446777, + "learning_rate": 9.008241758241758e-06, + "loss": 0.3173, + "step": 29842 + }, + { + "epoch": 81.98626373626374, + "grad_norm": 9.664420127868652, + "learning_rate": 9.006868131868131e-06, + "loss": 0.1451, + "step": 29843 + }, + { + "epoch": 81.98901098901099, + "grad_norm": 7.862970352172852, + "learning_rate": 9.005494505494507e-06, + "loss": 0.0532, + "step": 29844 + }, + { + "epoch": 81.99175824175825, + "grad_norm": 10.26554012298584, + "learning_rate": 9.00412087912088e-06, + "loss": 0.0893, + "step": 29845 + }, + { + "epoch": 81.99450549450549, + "grad_norm": 6.317856311798096, + "learning_rate": 9.002747252747252e-06, + "loss": 0.0782, + "step": 29846 + }, + { + "epoch": 81.99725274725274, + "grad_norm": 6.124960899353027, + "learning_rate": 9.001373626373627e-06, + "loss": 0.0456, + "step": 29847 + }, + { + "epoch": 82.0, + "grad_norm": 15.216706275939941, + "learning_rate": 9e-06, + "loss": 0.1106, + "step": 29848 + }, + { + "epoch": 82.0, + "eval_accuracy": 0.8168044077134986, + "eval_f1": 0.8178866453842571, + "eval_f1_DuraRiadoRio_64x64": 0.7467811158798283, + "eval_f1_Mole_64x64": 0.8343558282208589, + "eval_f1_Quebrado_64x64": 0.8368794326241135, + "eval_f1_RiadoRio_64x64": 0.7108433734939759, + "eval_f1_RioFechado_64x64": 0.9605734767025089, + "eval_loss": 0.6860358119010925, + "eval_precision": 0.8427022092490162, + "eval_precision_DuraRiadoRio_64x64": 0.9775280898876404, + "eval_precision_Mole_64x64": 0.7472527472527473, + "eval_precision_Quebrado_64x64": 0.855072463768116, + "eval_precision_RiadoRio_64x64": 0.6555555555555556, + "eval_precision_RioFechado_64x64": 0.9781021897810219, + "eval_recall": 0.8176066633720451, + "eval_recall_DuraRiadoRio_64x64": 0.6041666666666666, + "eval_recall_Mole_64x64": 0.9444444444444444, + "eval_recall_Quebrado_64x64": 0.8194444444444444, + "eval_recall_RiadoRio_64x64": 0.7763157894736842, + "eval_recall_RioFechado_64x64": 0.9436619718309859, + "eval_runtime": 1.8604, + "eval_samples_per_second": 390.245, + "eval_steps_per_second": 24.726, + "step": 29848 + }, + { + "epoch": 82.00274725274726, + "grad_norm": 15.009257316589355, + "learning_rate": 8.998626373626375e-06, + "loss": 0.2204, + "step": 29849 + }, + { + "epoch": 82.00549450549451, + "grad_norm": 25.085491180419922, + "learning_rate": 8.997252747252747e-06, + "loss": 0.4, + "step": 29850 + }, + { + "epoch": 82.00824175824175, + "grad_norm": 5.601482391357422, + "learning_rate": 8.99587912087912e-06, + "loss": 0.0516, + "step": 29851 + }, + { + "epoch": 82.01098901098901, + "grad_norm": 9.331530570983887, + "learning_rate": 8.994505494505496e-06, + "loss": 0.1313, + "step": 29852 + }, + { + "epoch": 82.01373626373626, + "grad_norm": 3.1074297428131104, + "learning_rate": 8.993131868131869e-06, + "loss": 0.045, + "step": 29853 + }, + { + "epoch": 82.01648351648352, + "grad_norm": 7.442678928375244, + "learning_rate": 8.991758241758242e-06, + "loss": 0.084, + "step": 29854 + }, + { + "epoch": 82.01923076923077, + "grad_norm": 17.223440170288086, + "learning_rate": 8.990384615384616e-06, + "loss": 0.4362, + "step": 29855 + }, + { + "epoch": 82.02197802197803, + "grad_norm": 5.460489273071289, + "learning_rate": 8.98901098901099e-06, + "loss": 0.0596, + "step": 29856 + }, + { + "epoch": 82.02472527472527, + "grad_norm": 5.422791481018066, + "learning_rate": 8.987637362637363e-06, + "loss": 0.0314, + "step": 29857 + }, + { + "epoch": 82.02747252747253, + "grad_norm": 3.192227363586426, + "learning_rate": 8.986263736263736e-06, + "loss": 0.0539, + "step": 29858 + }, + { + "epoch": 82.03021978021978, + "grad_norm": 10.062862396240234, + "learning_rate": 8.984890109890111e-06, + "loss": 0.2206, + "step": 29859 + }, + { + "epoch": 82.03296703296704, + "grad_norm": 17.295663833618164, + "learning_rate": 8.983516483516484e-06, + "loss": 0.4311, + "step": 29860 + }, + { + "epoch": 82.03571428571429, + "grad_norm": 12.287435531616211, + "learning_rate": 8.982142857142856e-06, + "loss": 0.2866, + "step": 29861 + }, + { + "epoch": 82.03846153846153, + "grad_norm": 5.152690410614014, + "learning_rate": 8.980769230769231e-06, + "loss": 0.072, + "step": 29862 + }, + { + "epoch": 82.04120879120879, + "grad_norm": 14.451058387756348, + "learning_rate": 8.979395604395605e-06, + "loss": 0.253, + "step": 29863 + }, + { + "epoch": 82.04395604395604, + "grad_norm": 1.879493236541748, + "learning_rate": 8.97802197802198e-06, + "loss": 0.0232, + "step": 29864 + }, + { + "epoch": 82.0467032967033, + "grad_norm": 3.620572090148926, + "learning_rate": 8.976648351648352e-06, + "loss": 0.0382, + "step": 29865 + }, + { + "epoch": 82.04945054945055, + "grad_norm": 11.969992637634277, + "learning_rate": 8.975274725274725e-06, + "loss": 0.1442, + "step": 29866 + }, + { + "epoch": 82.0521978021978, + "grad_norm": 13.05932903289795, + "learning_rate": 8.9739010989011e-06, + "loss": 0.1414, + "step": 29867 + }, + { + "epoch": 82.05494505494505, + "grad_norm": 12.963563919067383, + "learning_rate": 8.972527472527473e-06, + "loss": 0.3525, + "step": 29868 + }, + { + "epoch": 82.0576923076923, + "grad_norm": 2.3298003673553467, + "learning_rate": 8.971153846153847e-06, + "loss": 0.0296, + "step": 29869 + }, + { + "epoch": 82.06043956043956, + "grad_norm": 9.999619483947754, + "learning_rate": 8.96978021978022e-06, + "loss": 0.1551, + "step": 29870 + }, + { + "epoch": 82.06318681318682, + "grad_norm": 16.227506637573242, + "learning_rate": 8.968406593406594e-06, + "loss": 0.2405, + "step": 29871 + }, + { + "epoch": 82.06593406593407, + "grad_norm": 12.262764930725098, + "learning_rate": 8.967032967032967e-06, + "loss": 0.3439, + "step": 29872 + }, + { + "epoch": 82.06868131868131, + "grad_norm": 12.014537811279297, + "learning_rate": 8.96565934065934e-06, + "loss": 0.1128, + "step": 29873 + }, + { + "epoch": 82.07142857142857, + "grad_norm": 9.12710189819336, + "learning_rate": 8.964285714285716e-06, + "loss": 0.1764, + "step": 29874 + }, + { + "epoch": 82.07417582417582, + "grad_norm": 14.967883110046387, + "learning_rate": 8.962912087912089e-06, + "loss": 0.1748, + "step": 29875 + }, + { + "epoch": 82.07692307692308, + "grad_norm": 8.763068199157715, + "learning_rate": 8.96153846153846e-06, + "loss": 0.1042, + "step": 29876 + }, + { + "epoch": 82.07967032967034, + "grad_norm": 3.320774793624878, + "learning_rate": 8.960164835164836e-06, + "loss": 0.0415, + "step": 29877 + }, + { + "epoch": 82.08241758241758, + "grad_norm": 11.147309303283691, + "learning_rate": 8.95879120879121e-06, + "loss": 0.186, + "step": 29878 + }, + { + "epoch": 82.08516483516483, + "grad_norm": 14.370576858520508, + "learning_rate": 8.957417582417584e-06, + "loss": 0.2877, + "step": 29879 + }, + { + "epoch": 82.08791208791209, + "grad_norm": 21.271726608276367, + "learning_rate": 8.956043956043956e-06, + "loss": 0.4235, + "step": 29880 + }, + { + "epoch": 82.09065934065934, + "grad_norm": 5.356967449188232, + "learning_rate": 8.95467032967033e-06, + "loss": 0.0698, + "step": 29881 + }, + { + "epoch": 82.0934065934066, + "grad_norm": 2.3525259494781494, + "learning_rate": 8.953296703296705e-06, + "loss": 0.0415, + "step": 29882 + }, + { + "epoch": 82.09615384615384, + "grad_norm": 4.051823139190674, + "learning_rate": 8.951923076923078e-06, + "loss": 0.0673, + "step": 29883 + }, + { + "epoch": 82.0989010989011, + "grad_norm": 11.522649765014648, + "learning_rate": 8.950549450549451e-06, + "loss": 0.1552, + "step": 29884 + }, + { + "epoch": 82.10164835164835, + "grad_norm": 6.737415790557861, + "learning_rate": 8.949175824175825e-06, + "loss": 0.1068, + "step": 29885 + }, + { + "epoch": 82.1043956043956, + "grad_norm": 5.165663242340088, + "learning_rate": 8.947802197802198e-06, + "loss": 0.1292, + "step": 29886 + }, + { + "epoch": 82.10714285714286, + "grad_norm": 6.3637189865112305, + "learning_rate": 8.946428571428572e-06, + "loss": 0.085, + "step": 29887 + }, + { + "epoch": 82.10989010989012, + "grad_norm": 4.610636234283447, + "learning_rate": 8.945054945054945e-06, + "loss": 0.0558, + "step": 29888 + }, + { + "epoch": 82.11263736263736, + "grad_norm": 9.160126686096191, + "learning_rate": 8.943681318681318e-06, + "loss": 0.2464, + "step": 29889 + }, + { + "epoch": 82.11538461538461, + "grad_norm": 5.756378173828125, + "learning_rate": 8.942307692307693e-06, + "loss": 0.0952, + "step": 29890 + }, + { + "epoch": 82.11813186813187, + "grad_norm": 18.339845657348633, + "learning_rate": 8.940934065934065e-06, + "loss": 0.7167, + "step": 29891 + }, + { + "epoch": 82.12087912087912, + "grad_norm": 19.132102966308594, + "learning_rate": 8.93956043956044e-06, + "loss": 0.2985, + "step": 29892 + }, + { + "epoch": 82.12362637362638, + "grad_norm": 14.293560028076172, + "learning_rate": 8.938186813186814e-06, + "loss": 0.253, + "step": 29893 + }, + { + "epoch": 82.12637362637362, + "grad_norm": 6.178803443908691, + "learning_rate": 8.936813186813187e-06, + "loss": 0.0527, + "step": 29894 + }, + { + "epoch": 82.12912087912088, + "grad_norm": 11.565093994140625, + "learning_rate": 8.93543956043956e-06, + "loss": 0.1128, + "step": 29895 + }, + { + "epoch": 82.13186813186813, + "grad_norm": 17.26473045349121, + "learning_rate": 8.934065934065934e-06, + "loss": 0.2776, + "step": 29896 + }, + { + "epoch": 82.13461538461539, + "grad_norm": 3.458721876144409, + "learning_rate": 8.932692307692309e-06, + "loss": 0.0356, + "step": 29897 + }, + { + "epoch": 82.13736263736264, + "grad_norm": 7.243395805358887, + "learning_rate": 8.931318681318682e-06, + "loss": 0.0937, + "step": 29898 + }, + { + "epoch": 82.14010989010988, + "grad_norm": 12.34082317352295, + "learning_rate": 8.929945054945054e-06, + "loss": 0.1693, + "step": 29899 + }, + { + "epoch": 82.14285714285714, + "grad_norm": 10.295059204101562, + "learning_rate": 8.92857142857143e-06, + "loss": 0.2236, + "step": 29900 + }, + { + "epoch": 82.1456043956044, + "grad_norm": 12.67436408996582, + "learning_rate": 8.927197802197803e-06, + "loss": 0.094, + "step": 29901 + }, + { + "epoch": 82.14835164835165, + "grad_norm": 1.2654846906661987, + "learning_rate": 8.925824175824176e-06, + "loss": 0.0223, + "step": 29902 + }, + { + "epoch": 82.1510989010989, + "grad_norm": 6.200390815734863, + "learning_rate": 8.92445054945055e-06, + "loss": 0.1461, + "step": 29903 + }, + { + "epoch": 82.15384615384616, + "grad_norm": 5.449524402618408, + "learning_rate": 8.923076923076923e-06, + "loss": 0.1334, + "step": 29904 + }, + { + "epoch": 82.1565934065934, + "grad_norm": 3.0397636890411377, + "learning_rate": 8.921703296703298e-06, + "loss": 0.0285, + "step": 29905 + }, + { + "epoch": 82.15934065934066, + "grad_norm": 12.983854293823242, + "learning_rate": 8.92032967032967e-06, + "loss": 0.1913, + "step": 29906 + }, + { + "epoch": 82.16208791208791, + "grad_norm": 10.942386627197266, + "learning_rate": 8.918956043956045e-06, + "loss": 0.3262, + "step": 29907 + }, + { + "epoch": 82.16483516483517, + "grad_norm": 8.562314987182617, + "learning_rate": 8.917582417582418e-06, + "loss": 0.096, + "step": 29908 + }, + { + "epoch": 82.16758241758242, + "grad_norm": 5.4234089851379395, + "learning_rate": 8.916208791208792e-06, + "loss": 0.0662, + "step": 29909 + }, + { + "epoch": 82.17032967032966, + "grad_norm": 10.743616104125977, + "learning_rate": 8.914835164835165e-06, + "loss": 0.291, + "step": 29910 + }, + { + "epoch": 82.17307692307692, + "grad_norm": 3.7040979862213135, + "learning_rate": 8.913461538461538e-06, + "loss": 0.0314, + "step": 29911 + }, + { + "epoch": 82.17582417582418, + "grad_norm": 9.283127784729004, + "learning_rate": 8.912087912087914e-06, + "loss": 0.0862, + "step": 29912 + }, + { + "epoch": 82.17857142857143, + "grad_norm": 13.06384563446045, + "learning_rate": 8.910714285714287e-06, + "loss": 0.248, + "step": 29913 + }, + { + "epoch": 82.18131868131869, + "grad_norm": 28.575016021728516, + "learning_rate": 8.909340659340659e-06, + "loss": 0.8308, + "step": 29914 + }, + { + "epoch": 82.18406593406593, + "grad_norm": 17.286867141723633, + "learning_rate": 8.907967032967034e-06, + "loss": 0.5483, + "step": 29915 + }, + { + "epoch": 82.18681318681318, + "grad_norm": 6.685302257537842, + "learning_rate": 8.906593406593407e-06, + "loss": 0.0973, + "step": 29916 + }, + { + "epoch": 82.18956043956044, + "grad_norm": 6.469482421875, + "learning_rate": 8.90521978021978e-06, + "loss": 0.0807, + "step": 29917 + }, + { + "epoch": 82.1923076923077, + "grad_norm": 15.414097785949707, + "learning_rate": 8.903846153846154e-06, + "loss": 0.3912, + "step": 29918 + }, + { + "epoch": 82.19505494505495, + "grad_norm": 13.060667991638184, + "learning_rate": 8.902472527472527e-06, + "loss": 0.2224, + "step": 29919 + }, + { + "epoch": 82.1978021978022, + "grad_norm": 14.660971641540527, + "learning_rate": 8.901098901098902e-06, + "loss": 0.3138, + "step": 29920 + }, + { + "epoch": 82.20054945054945, + "grad_norm": 5.402413845062256, + "learning_rate": 8.899725274725274e-06, + "loss": 0.0733, + "step": 29921 + }, + { + "epoch": 82.2032967032967, + "grad_norm": 7.863491535186768, + "learning_rate": 8.89835164835165e-06, + "loss": 0.1269, + "step": 29922 + }, + { + "epoch": 82.20604395604396, + "grad_norm": 2.227013349533081, + "learning_rate": 8.896978021978023e-06, + "loss": 0.0299, + "step": 29923 + }, + { + "epoch": 82.20879120879121, + "grad_norm": 13.274423599243164, + "learning_rate": 8.895604395604396e-06, + "loss": 0.2829, + "step": 29924 + }, + { + "epoch": 82.21153846153847, + "grad_norm": 1.950449824333191, + "learning_rate": 8.89423076923077e-06, + "loss": 0.0276, + "step": 29925 + }, + { + "epoch": 82.21428571428571, + "grad_norm": 7.171482086181641, + "learning_rate": 8.892857142857143e-06, + "loss": 0.0527, + "step": 29926 + }, + { + "epoch": 82.21703296703296, + "grad_norm": 16.544939041137695, + "learning_rate": 8.891483516483518e-06, + "loss": 0.3226, + "step": 29927 + }, + { + "epoch": 82.21978021978022, + "grad_norm": 2.2058889865875244, + "learning_rate": 8.890109890109891e-06, + "loss": 0.0239, + "step": 29928 + }, + { + "epoch": 82.22252747252747, + "grad_norm": 8.87052059173584, + "learning_rate": 8.888736263736263e-06, + "loss": 0.1938, + "step": 29929 + }, + { + "epoch": 82.22527472527473, + "grad_norm": 7.180139064788818, + "learning_rate": 8.887362637362638e-06, + "loss": 0.0666, + "step": 29930 + }, + { + "epoch": 82.22802197802197, + "grad_norm": 7.265781879425049, + "learning_rate": 8.885989010989012e-06, + "loss": 0.1088, + "step": 29931 + }, + { + "epoch": 82.23076923076923, + "grad_norm": 13.811797142028809, + "learning_rate": 8.884615384615385e-06, + "loss": 0.2204, + "step": 29932 + }, + { + "epoch": 82.23351648351648, + "grad_norm": 17.67646026611328, + "learning_rate": 8.883241758241758e-06, + "loss": 0.5687, + "step": 29933 + }, + { + "epoch": 82.23626373626374, + "grad_norm": 4.184723854064941, + "learning_rate": 8.881868131868132e-06, + "loss": 0.0613, + "step": 29934 + }, + { + "epoch": 82.23901098901099, + "grad_norm": 5.433104515075684, + "learning_rate": 8.880494505494507e-06, + "loss": 0.0696, + "step": 29935 + }, + { + "epoch": 82.24175824175825, + "grad_norm": 22.353103637695312, + "learning_rate": 8.879120879120879e-06, + "loss": 0.4568, + "step": 29936 + }, + { + "epoch": 82.24450549450549, + "grad_norm": 18.307754516601562, + "learning_rate": 8.877747252747254e-06, + "loss": 0.4289, + "step": 29937 + }, + { + "epoch": 82.24725274725274, + "grad_norm": 4.8349456787109375, + "learning_rate": 8.876373626373627e-06, + "loss": 0.1011, + "step": 29938 + }, + { + "epoch": 82.25, + "grad_norm": 5.824193954467773, + "learning_rate": 8.875e-06, + "loss": 0.086, + "step": 29939 + }, + { + "epoch": 82.25274725274726, + "grad_norm": 16.802764892578125, + "learning_rate": 8.873626373626374e-06, + "loss": 0.2125, + "step": 29940 + }, + { + "epoch": 82.25549450549451, + "grad_norm": 15.49583625793457, + "learning_rate": 8.872252747252747e-06, + "loss": 0.2699, + "step": 29941 + }, + { + "epoch": 82.25824175824175, + "grad_norm": 6.416767597198486, + "learning_rate": 8.870879120879122e-06, + "loss": 0.1052, + "step": 29942 + }, + { + "epoch": 82.26098901098901, + "grad_norm": 9.276190757751465, + "learning_rate": 8.869505494505496e-06, + "loss": 0.2022, + "step": 29943 + }, + { + "epoch": 82.26373626373626, + "grad_norm": 6.489510536193848, + "learning_rate": 8.868131868131868e-06, + "loss": 0.0974, + "step": 29944 + }, + { + "epoch": 82.26648351648352, + "grad_norm": 12.603837013244629, + "learning_rate": 8.866758241758243e-06, + "loss": 0.167, + "step": 29945 + }, + { + "epoch": 82.26923076923077, + "grad_norm": 6.0156707763671875, + "learning_rate": 8.865384615384616e-06, + "loss": 0.0511, + "step": 29946 + }, + { + "epoch": 82.27197802197803, + "grad_norm": 8.202390670776367, + "learning_rate": 8.86401098901099e-06, + "loss": 0.1223, + "step": 29947 + }, + { + "epoch": 82.27472527472527, + "grad_norm": 12.388444900512695, + "learning_rate": 8.862637362637363e-06, + "loss": 0.3136, + "step": 29948 + }, + { + "epoch": 82.27747252747253, + "grad_norm": 21.513582229614258, + "learning_rate": 8.861263736263736e-06, + "loss": 0.2518, + "step": 29949 + }, + { + "epoch": 82.28021978021978, + "grad_norm": 23.29563331604004, + "learning_rate": 8.859890109890111e-06, + "loss": 0.5435, + "step": 29950 + }, + { + "epoch": 82.28296703296704, + "grad_norm": 26.448902130126953, + "learning_rate": 8.858516483516483e-06, + "loss": 0.7058, + "step": 29951 + }, + { + "epoch": 82.28571428571429, + "grad_norm": 5.088770389556885, + "learning_rate": 8.857142857142857e-06, + "loss": 0.0581, + "step": 29952 + }, + { + "epoch": 82.28846153846153, + "grad_norm": 8.216267585754395, + "learning_rate": 8.855769230769232e-06, + "loss": 0.1911, + "step": 29953 + }, + { + "epoch": 82.29120879120879, + "grad_norm": 10.248016357421875, + "learning_rate": 8.854395604395605e-06, + "loss": 0.0857, + "step": 29954 + }, + { + "epoch": 82.29395604395604, + "grad_norm": 12.639389038085938, + "learning_rate": 8.853021978021978e-06, + "loss": 0.3423, + "step": 29955 + }, + { + "epoch": 82.2967032967033, + "grad_norm": 14.52686595916748, + "learning_rate": 8.851648351648352e-06, + "loss": 0.1985, + "step": 29956 + }, + { + "epoch": 82.29945054945055, + "grad_norm": 16.299949645996094, + "learning_rate": 8.850274725274725e-06, + "loss": 0.309, + "step": 29957 + }, + { + "epoch": 82.3021978021978, + "grad_norm": 4.337110996246338, + "learning_rate": 8.848901098901099e-06, + "loss": 0.0601, + "step": 29958 + }, + { + "epoch": 82.30494505494505, + "grad_norm": 12.563031196594238, + "learning_rate": 8.847527472527472e-06, + "loss": 0.1141, + "step": 29959 + }, + { + "epoch": 82.3076923076923, + "grad_norm": 1.7036617994308472, + "learning_rate": 8.846153846153847e-06, + "loss": 0.0153, + "step": 29960 + }, + { + "epoch": 82.31043956043956, + "grad_norm": 14.461648941040039, + "learning_rate": 8.84478021978022e-06, + "loss": 0.2701, + "step": 29961 + }, + { + "epoch": 82.31318681318682, + "grad_norm": 5.798123836517334, + "learning_rate": 8.843406593406594e-06, + "loss": 0.083, + "step": 29962 + }, + { + "epoch": 82.31593406593407, + "grad_norm": 14.866530418395996, + "learning_rate": 8.842032967032967e-06, + "loss": 0.223, + "step": 29963 + }, + { + "epoch": 82.31868131868131, + "grad_norm": 3.2224650382995605, + "learning_rate": 8.84065934065934e-06, + "loss": 0.0412, + "step": 29964 + }, + { + "epoch": 82.32142857142857, + "grad_norm": 16.917585372924805, + "learning_rate": 8.839285714285716e-06, + "loss": 0.2195, + "step": 29965 + }, + { + "epoch": 82.32417582417582, + "grad_norm": 7.062230587005615, + "learning_rate": 8.837912087912088e-06, + "loss": 0.13, + "step": 29966 + }, + { + "epoch": 82.32692307692308, + "grad_norm": 9.320395469665527, + "learning_rate": 8.836538461538461e-06, + "loss": 0.061, + "step": 29967 + }, + { + "epoch": 82.32967032967034, + "grad_norm": 14.941950798034668, + "learning_rate": 8.835164835164836e-06, + "loss": 0.5256, + "step": 29968 + }, + { + "epoch": 82.33241758241758, + "grad_norm": 15.685619354248047, + "learning_rate": 8.83379120879121e-06, + "loss": 0.2094, + "step": 29969 + }, + { + "epoch": 82.33516483516483, + "grad_norm": 8.4431734085083, + "learning_rate": 8.832417582417583e-06, + "loss": 0.0497, + "step": 29970 + }, + { + "epoch": 82.33791208791209, + "grad_norm": 15.982802391052246, + "learning_rate": 8.831043956043956e-06, + "loss": 0.1839, + "step": 29971 + }, + { + "epoch": 82.34065934065934, + "grad_norm": 10.254640579223633, + "learning_rate": 8.82967032967033e-06, + "loss": 0.2347, + "step": 29972 + }, + { + "epoch": 82.3434065934066, + "grad_norm": 17.414464950561523, + "learning_rate": 8.828296703296703e-06, + "loss": 0.3895, + "step": 29973 + }, + { + "epoch": 82.34615384615384, + "grad_norm": 5.247625827789307, + "learning_rate": 8.826923076923077e-06, + "loss": 0.0486, + "step": 29974 + }, + { + "epoch": 82.3489010989011, + "grad_norm": 12.527205467224121, + "learning_rate": 8.825549450549452e-06, + "loss": 0.1055, + "step": 29975 + }, + { + "epoch": 82.35164835164835, + "grad_norm": 23.261131286621094, + "learning_rate": 8.824175824175825e-06, + "loss": 0.3613, + "step": 29976 + }, + { + "epoch": 82.3543956043956, + "grad_norm": 11.18459415435791, + "learning_rate": 8.822802197802199e-06, + "loss": 0.1175, + "step": 29977 + }, + { + "epoch": 82.35714285714286, + "grad_norm": 29.128053665161133, + "learning_rate": 8.821428571428572e-06, + "loss": 1.3124, + "step": 29978 + }, + { + "epoch": 82.35989010989012, + "grad_norm": 10.353178024291992, + "learning_rate": 8.820054945054945e-06, + "loss": 0.1313, + "step": 29979 + }, + { + "epoch": 82.36263736263736, + "grad_norm": 7.249992847442627, + "learning_rate": 8.81868131868132e-06, + "loss": 0.0863, + "step": 29980 + }, + { + "epoch": 82.36538461538461, + "grad_norm": 11.613896369934082, + "learning_rate": 8.817307692307692e-06, + "loss": 0.3043, + "step": 29981 + }, + { + "epoch": 82.36813186813187, + "grad_norm": 2.9201321601867676, + "learning_rate": 8.815934065934066e-06, + "loss": 0.0441, + "step": 29982 + }, + { + "epoch": 82.37087912087912, + "grad_norm": 4.122747421264648, + "learning_rate": 8.81456043956044e-06, + "loss": 0.0473, + "step": 29983 + }, + { + "epoch": 82.37362637362638, + "grad_norm": 13.376686096191406, + "learning_rate": 8.813186813186814e-06, + "loss": 0.4425, + "step": 29984 + }, + { + "epoch": 82.37637362637362, + "grad_norm": 14.576947212219238, + "learning_rate": 8.811813186813187e-06, + "loss": 0.1087, + "step": 29985 + }, + { + "epoch": 82.37912087912088, + "grad_norm": 17.73980712890625, + "learning_rate": 8.810439560439561e-06, + "loss": 0.4455, + "step": 29986 + }, + { + "epoch": 82.38186813186813, + "grad_norm": 2.5096967220306396, + "learning_rate": 8.809065934065934e-06, + "loss": 0.0256, + "step": 29987 + }, + { + "epoch": 82.38461538461539, + "grad_norm": 8.086953163146973, + "learning_rate": 8.807692307692308e-06, + "loss": 0.1033, + "step": 29988 + }, + { + "epoch": 82.38736263736264, + "grad_norm": 7.512203693389893, + "learning_rate": 8.806318681318681e-06, + "loss": 0.0962, + "step": 29989 + }, + { + "epoch": 82.39010989010988, + "grad_norm": 4.831345558166504, + "learning_rate": 8.804945054945056e-06, + "loss": 0.0836, + "step": 29990 + }, + { + "epoch": 82.39285714285714, + "grad_norm": 28.12449836730957, + "learning_rate": 8.80357142857143e-06, + "loss": 0.7195, + "step": 29991 + }, + { + "epoch": 82.3956043956044, + "grad_norm": 4.95561408996582, + "learning_rate": 8.802197802197803e-06, + "loss": 0.0968, + "step": 29992 + }, + { + "epoch": 82.39835164835165, + "grad_norm": 7.191010475158691, + "learning_rate": 8.800824175824176e-06, + "loss": 0.0633, + "step": 29993 + }, + { + "epoch": 82.4010989010989, + "grad_norm": 12.062260627746582, + "learning_rate": 8.79945054945055e-06, + "loss": 0.2374, + "step": 29994 + }, + { + "epoch": 82.40384615384616, + "grad_norm": 12.735127449035645, + "learning_rate": 8.798076923076925e-06, + "loss": 0.1366, + "step": 29995 + }, + { + "epoch": 82.4065934065934, + "grad_norm": 15.306105613708496, + "learning_rate": 8.796703296703297e-06, + "loss": 0.3101, + "step": 29996 + }, + { + "epoch": 82.40934065934066, + "grad_norm": 10.493590354919434, + "learning_rate": 8.79532967032967e-06, + "loss": 0.2909, + "step": 29997 + }, + { + "epoch": 82.41208791208791, + "grad_norm": 6.487281799316406, + "learning_rate": 8.793956043956045e-06, + "loss": 0.1318, + "step": 29998 + }, + { + "epoch": 82.41483516483517, + "grad_norm": 10.14142894744873, + "learning_rate": 8.792582417582419e-06, + "loss": 0.122, + "step": 29999 + }, + { + "epoch": 82.41758241758242, + "grad_norm": 15.775590896606445, + "learning_rate": 8.791208791208792e-06, + "loss": 0.3486, + "step": 30000 + }, + { + "epoch": 82.42032967032966, + "grad_norm": 6.232901573181152, + "learning_rate": 8.789835164835165e-06, + "loss": 0.0833, + "step": 30001 + }, + { + "epoch": 82.42307692307692, + "grad_norm": 8.520196914672852, + "learning_rate": 8.788461538461539e-06, + "loss": 0.1812, + "step": 30002 + }, + { + "epoch": 82.42582417582418, + "grad_norm": 19.3250675201416, + "learning_rate": 8.787087912087912e-06, + "loss": 0.2416, + "step": 30003 + }, + { + "epoch": 82.42857142857143, + "grad_norm": 16.92448616027832, + "learning_rate": 8.785714285714286e-06, + "loss": 0.2362, + "step": 30004 + }, + { + "epoch": 82.43131868131869, + "grad_norm": 11.827007293701172, + "learning_rate": 8.784340659340659e-06, + "loss": 0.1325, + "step": 30005 + }, + { + "epoch": 82.43406593406593, + "grad_norm": 6.377560615539551, + "learning_rate": 8.782967032967034e-06, + "loss": 0.075, + "step": 30006 + }, + { + "epoch": 82.43681318681318, + "grad_norm": 10.306145668029785, + "learning_rate": 8.781593406593406e-06, + "loss": 0.2029, + "step": 30007 + }, + { + "epoch": 82.43956043956044, + "grad_norm": 19.236318588256836, + "learning_rate": 8.780219780219781e-06, + "loss": 0.4688, + "step": 30008 + }, + { + "epoch": 82.4423076923077, + "grad_norm": 16.568218231201172, + "learning_rate": 8.778846153846154e-06, + "loss": 0.3857, + "step": 30009 + }, + { + "epoch": 82.44505494505495, + "grad_norm": 11.931924819946289, + "learning_rate": 8.777472527472528e-06, + "loss": 0.302, + "step": 30010 + }, + { + "epoch": 82.4478021978022, + "grad_norm": 13.457197189331055, + "learning_rate": 8.776098901098901e-06, + "loss": 0.5474, + "step": 30011 + }, + { + "epoch": 82.45054945054945, + "grad_norm": 11.841567993164062, + "learning_rate": 8.774725274725275e-06, + "loss": 0.0832, + "step": 30012 + }, + { + "epoch": 82.4532967032967, + "grad_norm": 8.560807228088379, + "learning_rate": 8.77335164835165e-06, + "loss": 0.0851, + "step": 30013 + }, + { + "epoch": 82.45604395604396, + "grad_norm": 14.454998016357422, + "learning_rate": 8.771978021978023e-06, + "loss": 0.5222, + "step": 30014 + }, + { + "epoch": 82.45879120879121, + "grad_norm": 2.3141067028045654, + "learning_rate": 8.770604395604395e-06, + "loss": 0.0298, + "step": 30015 + }, + { + "epoch": 82.46153846153847, + "grad_norm": 6.275355815887451, + "learning_rate": 8.76923076923077e-06, + "loss": 0.1224, + "step": 30016 + }, + { + "epoch": 82.46428571428571, + "grad_norm": 12.10385799407959, + "learning_rate": 8.767857142857143e-06, + "loss": 0.1195, + "step": 30017 + }, + { + "epoch": 82.46703296703296, + "grad_norm": 20.29051971435547, + "learning_rate": 8.766483516483517e-06, + "loss": 0.3823, + "step": 30018 + }, + { + "epoch": 82.46978021978022, + "grad_norm": 4.625065326690674, + "learning_rate": 8.76510989010989e-06, + "loss": 0.0533, + "step": 30019 + }, + { + "epoch": 82.47252747252747, + "grad_norm": 9.76402473449707, + "learning_rate": 8.763736263736263e-06, + "loss": 0.108, + "step": 30020 + }, + { + "epoch": 82.47527472527473, + "grad_norm": 9.106142044067383, + "learning_rate": 8.762362637362639e-06, + "loss": 0.1608, + "step": 30021 + }, + { + "epoch": 82.47802197802197, + "grad_norm": 8.92497444152832, + "learning_rate": 8.76098901098901e-06, + "loss": 0.1663, + "step": 30022 + }, + { + "epoch": 82.48076923076923, + "grad_norm": 1.886873483657837, + "learning_rate": 8.759615384615385e-06, + "loss": 0.0249, + "step": 30023 + }, + { + "epoch": 82.48351648351648, + "grad_norm": 13.131539344787598, + "learning_rate": 8.758241758241759e-06, + "loss": 0.1186, + "step": 30024 + }, + { + "epoch": 82.48626373626374, + "grad_norm": 11.350215911865234, + "learning_rate": 8.756868131868132e-06, + "loss": 0.1763, + "step": 30025 + }, + { + "epoch": 82.48901098901099, + "grad_norm": 19.785236358642578, + "learning_rate": 8.755494505494506e-06, + "loss": 0.3726, + "step": 30026 + }, + { + "epoch": 82.49175824175825, + "grad_norm": 5.110353469848633, + "learning_rate": 8.754120879120879e-06, + "loss": 0.0789, + "step": 30027 + }, + { + "epoch": 82.49450549450549, + "grad_norm": 12.419079780578613, + "learning_rate": 8.752747252747254e-06, + "loss": 0.225, + "step": 30028 + }, + { + "epoch": 82.49725274725274, + "grad_norm": 5.243100166320801, + "learning_rate": 8.751373626373628e-06, + "loss": 0.0498, + "step": 30029 + }, + { + "epoch": 82.5, + "grad_norm": 12.191681861877441, + "learning_rate": 8.75e-06, + "loss": 0.4201, + "step": 30030 + }, + { + "epoch": 82.50274725274726, + "grad_norm": 10.9288969039917, + "learning_rate": 8.748626373626374e-06, + "loss": 0.1891, + "step": 30031 + }, + { + "epoch": 82.50549450549451, + "grad_norm": 4.7720489501953125, + "learning_rate": 8.747252747252748e-06, + "loss": 0.058, + "step": 30032 + }, + { + "epoch": 82.50824175824175, + "grad_norm": 13.4181547164917, + "learning_rate": 8.745879120879121e-06, + "loss": 0.1368, + "step": 30033 + }, + { + "epoch": 82.51098901098901, + "grad_norm": 7.92188835144043, + "learning_rate": 8.744505494505495e-06, + "loss": 0.2548, + "step": 30034 + }, + { + "epoch": 82.51373626373626, + "grad_norm": 17.398731231689453, + "learning_rate": 8.743131868131868e-06, + "loss": 0.2978, + "step": 30035 + }, + { + "epoch": 82.51648351648352, + "grad_norm": 13.542616844177246, + "learning_rate": 8.741758241758243e-06, + "loss": 0.1748, + "step": 30036 + }, + { + "epoch": 82.51923076923077, + "grad_norm": 15.419675827026367, + "learning_rate": 8.740384615384615e-06, + "loss": 0.2262, + "step": 30037 + }, + { + "epoch": 82.52197802197803, + "grad_norm": 9.446454048156738, + "learning_rate": 8.73901098901099e-06, + "loss": 0.1548, + "step": 30038 + }, + { + "epoch": 82.52472527472527, + "grad_norm": 15.746785163879395, + "learning_rate": 8.737637362637363e-06, + "loss": 0.2708, + "step": 30039 + }, + { + "epoch": 82.52747252747253, + "grad_norm": 10.165399551391602, + "learning_rate": 8.736263736263737e-06, + "loss": 0.1798, + "step": 30040 + }, + { + "epoch": 82.53021978021978, + "grad_norm": 3.353044271469116, + "learning_rate": 8.73489010989011e-06, + "loss": 0.0361, + "step": 30041 + }, + { + "epoch": 82.53296703296704, + "grad_norm": 11.967862129211426, + "learning_rate": 8.733516483516484e-06, + "loss": 0.133, + "step": 30042 + }, + { + "epoch": 82.53571428571429, + "grad_norm": 1.4594112634658813, + "learning_rate": 8.732142857142859e-06, + "loss": 0.0138, + "step": 30043 + }, + { + "epoch": 82.53846153846153, + "grad_norm": 8.285561561584473, + "learning_rate": 8.730769230769232e-06, + "loss": 0.1578, + "step": 30044 + }, + { + "epoch": 82.54120879120879, + "grad_norm": 19.02237892150879, + "learning_rate": 8.729395604395604e-06, + "loss": 0.6449, + "step": 30045 + }, + { + "epoch": 82.54395604395604, + "grad_norm": 4.854092597961426, + "learning_rate": 8.728021978021979e-06, + "loss": 0.0352, + "step": 30046 + }, + { + "epoch": 82.5467032967033, + "grad_norm": 8.480178833007812, + "learning_rate": 8.726648351648352e-06, + "loss": 0.27, + "step": 30047 + }, + { + "epoch": 82.54945054945055, + "grad_norm": 32.219017028808594, + "learning_rate": 8.725274725274726e-06, + "loss": 0.5964, + "step": 30048 + }, + { + "epoch": 82.5521978021978, + "grad_norm": 12.247965812683105, + "learning_rate": 8.723901098901099e-06, + "loss": 0.1689, + "step": 30049 + }, + { + "epoch": 82.55494505494505, + "grad_norm": 0.510995090007782, + "learning_rate": 8.722527472527472e-06, + "loss": 0.0066, + "step": 30050 + }, + { + "epoch": 82.5576923076923, + "grad_norm": 5.357217311859131, + "learning_rate": 8.721153846153848e-06, + "loss": 0.0579, + "step": 30051 + }, + { + "epoch": 82.56043956043956, + "grad_norm": 8.412233352661133, + "learning_rate": 8.71978021978022e-06, + "loss": 0.1016, + "step": 30052 + }, + { + "epoch": 82.56318681318682, + "grad_norm": 11.039977073669434, + "learning_rate": 8.718406593406594e-06, + "loss": 0.1196, + "step": 30053 + }, + { + "epoch": 82.56593406593407, + "grad_norm": 2.917062759399414, + "learning_rate": 8.717032967032968e-06, + "loss": 0.0528, + "step": 30054 + }, + { + "epoch": 82.56868131868131, + "grad_norm": 15.86021900177002, + "learning_rate": 8.715659340659341e-06, + "loss": 0.1891, + "step": 30055 + }, + { + "epoch": 82.57142857142857, + "grad_norm": 11.96358585357666, + "learning_rate": 8.714285714285715e-06, + "loss": 0.3348, + "step": 30056 + }, + { + "epoch": 82.57417582417582, + "grad_norm": 15.099940299987793, + "learning_rate": 8.712912087912088e-06, + "loss": 0.3056, + "step": 30057 + }, + { + "epoch": 82.57692307692308, + "grad_norm": 12.552669525146484, + "learning_rate": 8.711538461538463e-06, + "loss": 0.3945, + "step": 30058 + }, + { + "epoch": 82.57967032967034, + "grad_norm": 7.301403522491455, + "learning_rate": 8.710164835164837e-06, + "loss": 0.1328, + "step": 30059 + }, + { + "epoch": 82.58241758241758, + "grad_norm": 13.489410400390625, + "learning_rate": 8.708791208791208e-06, + "loss": 0.1423, + "step": 30060 + }, + { + "epoch": 82.58516483516483, + "grad_norm": 12.568549156188965, + "learning_rate": 8.707417582417583e-06, + "loss": 0.3081, + "step": 30061 + }, + { + "epoch": 82.58791208791209, + "grad_norm": 8.337625503540039, + "learning_rate": 8.706043956043957e-06, + "loss": 0.102, + "step": 30062 + }, + { + "epoch": 82.59065934065934, + "grad_norm": 2.288759469985962, + "learning_rate": 8.70467032967033e-06, + "loss": 0.0262, + "step": 30063 + }, + { + "epoch": 82.5934065934066, + "grad_norm": 15.837352752685547, + "learning_rate": 8.703296703296704e-06, + "loss": 0.2158, + "step": 30064 + }, + { + "epoch": 82.59615384615384, + "grad_norm": 16.414283752441406, + "learning_rate": 8.701923076923077e-06, + "loss": 0.2032, + "step": 30065 + }, + { + "epoch": 82.5989010989011, + "grad_norm": 7.699711799621582, + "learning_rate": 8.700549450549452e-06, + "loss": 0.0784, + "step": 30066 + }, + { + "epoch": 82.60164835164835, + "grad_norm": 11.60639476776123, + "learning_rate": 8.699175824175824e-06, + "loss": 0.2782, + "step": 30067 + }, + { + "epoch": 82.6043956043956, + "grad_norm": 12.41304874420166, + "learning_rate": 8.697802197802197e-06, + "loss": 0.2011, + "step": 30068 + }, + { + "epoch": 82.60714285714286, + "grad_norm": 9.076258659362793, + "learning_rate": 8.696428571428572e-06, + "loss": 0.1933, + "step": 30069 + }, + { + "epoch": 82.60989010989012, + "grad_norm": 10.691758155822754, + "learning_rate": 8.695054945054946e-06, + "loss": 0.2329, + "step": 30070 + }, + { + "epoch": 82.61263736263736, + "grad_norm": 20.48081398010254, + "learning_rate": 8.693681318681319e-06, + "loss": 0.5058, + "step": 30071 + }, + { + "epoch": 82.61538461538461, + "grad_norm": 14.156404495239258, + "learning_rate": 8.692307692307692e-06, + "loss": 0.2751, + "step": 30072 + }, + { + "epoch": 82.61813186813187, + "grad_norm": 3.8990705013275146, + "learning_rate": 8.690934065934066e-06, + "loss": 0.0522, + "step": 30073 + }, + { + "epoch": 82.62087912087912, + "grad_norm": 11.230382919311523, + "learning_rate": 8.689560439560441e-06, + "loss": 0.1232, + "step": 30074 + }, + { + "epoch": 82.62362637362638, + "grad_norm": 7.2541961669921875, + "learning_rate": 8.688186813186813e-06, + "loss": 0.0824, + "step": 30075 + }, + { + "epoch": 82.62637362637362, + "grad_norm": 23.473766326904297, + "learning_rate": 8.686813186813188e-06, + "loss": 0.3312, + "step": 30076 + }, + { + "epoch": 82.62912087912088, + "grad_norm": 13.305466651916504, + "learning_rate": 8.685439560439561e-06, + "loss": 0.1883, + "step": 30077 + }, + { + "epoch": 82.63186813186813, + "grad_norm": 11.922685623168945, + "learning_rate": 8.684065934065935e-06, + "loss": 0.2587, + "step": 30078 + }, + { + "epoch": 82.63461538461539, + "grad_norm": 7.889672756195068, + "learning_rate": 8.682692307692308e-06, + "loss": 0.1612, + "step": 30079 + }, + { + "epoch": 82.63736263736264, + "grad_norm": 18.795974731445312, + "learning_rate": 8.681318681318681e-06, + "loss": 0.5123, + "step": 30080 + }, + { + "epoch": 82.64010989010988, + "grad_norm": 20.680932998657227, + "learning_rate": 8.679945054945057e-06, + "loss": 0.5115, + "step": 30081 + }, + { + "epoch": 82.64285714285714, + "grad_norm": 12.345986366271973, + "learning_rate": 8.678571428571428e-06, + "loss": 0.2395, + "step": 30082 + }, + { + "epoch": 82.6456043956044, + "grad_norm": 21.529756546020508, + "learning_rate": 8.677197802197802e-06, + "loss": 0.3626, + "step": 30083 + }, + { + "epoch": 82.64835164835165, + "grad_norm": 10.76805305480957, + "learning_rate": 8.675824175824177e-06, + "loss": 0.2116, + "step": 30084 + }, + { + "epoch": 82.6510989010989, + "grad_norm": 6.476062297821045, + "learning_rate": 8.67445054945055e-06, + "loss": 0.0689, + "step": 30085 + }, + { + "epoch": 82.65384615384616, + "grad_norm": 5.658257961273193, + "learning_rate": 8.673076923076924e-06, + "loss": 0.0789, + "step": 30086 + }, + { + "epoch": 82.6565934065934, + "grad_norm": 12.630901336669922, + "learning_rate": 8.671703296703297e-06, + "loss": 0.1975, + "step": 30087 + }, + { + "epoch": 82.65934065934066, + "grad_norm": 17.314739227294922, + "learning_rate": 8.67032967032967e-06, + "loss": 0.3606, + "step": 30088 + }, + { + "epoch": 82.66208791208791, + "grad_norm": 11.46932315826416, + "learning_rate": 8.668956043956045e-06, + "loss": 0.1387, + "step": 30089 + }, + { + "epoch": 82.66483516483517, + "grad_norm": 2.59971022605896, + "learning_rate": 8.667582417582417e-06, + "loss": 0.025, + "step": 30090 + }, + { + "epoch": 82.66758241758242, + "grad_norm": 3.4102838039398193, + "learning_rate": 8.666208791208792e-06, + "loss": 0.0481, + "step": 30091 + }, + { + "epoch": 82.67032967032966, + "grad_norm": 1.9119036197662354, + "learning_rate": 8.664835164835166e-06, + "loss": 0.028, + "step": 30092 + }, + { + "epoch": 82.67307692307692, + "grad_norm": 24.335481643676758, + "learning_rate": 8.663461538461539e-06, + "loss": 0.1724, + "step": 30093 + }, + { + "epoch": 82.67582417582418, + "grad_norm": 19.196626663208008, + "learning_rate": 8.662087912087913e-06, + "loss": 0.3426, + "step": 30094 + }, + { + "epoch": 82.67857142857143, + "grad_norm": 15.91320514678955, + "learning_rate": 8.660714285714286e-06, + "loss": 0.1715, + "step": 30095 + }, + { + "epoch": 82.68131868131869, + "grad_norm": 3.1865036487579346, + "learning_rate": 8.659340659340661e-06, + "loss": 0.0536, + "step": 30096 + }, + { + "epoch": 82.68406593406593, + "grad_norm": 6.696000099182129, + "learning_rate": 8.657967032967033e-06, + "loss": 0.113, + "step": 30097 + }, + { + "epoch": 82.68681318681318, + "grad_norm": 0.5630325078964233, + "learning_rate": 8.656593406593406e-06, + "loss": 0.0093, + "step": 30098 + }, + { + "epoch": 82.68956043956044, + "grad_norm": 6.525702476501465, + "learning_rate": 8.655219780219781e-06, + "loss": 0.0845, + "step": 30099 + }, + { + "epoch": 82.6923076923077, + "grad_norm": 2.908820867538452, + "learning_rate": 8.653846153846155e-06, + "loss": 0.0296, + "step": 30100 + }, + { + "epoch": 82.69505494505495, + "grad_norm": 11.731971740722656, + "learning_rate": 8.652472527472528e-06, + "loss": 0.2431, + "step": 30101 + }, + { + "epoch": 82.6978021978022, + "grad_norm": 1.5768104791641235, + "learning_rate": 8.651098901098901e-06, + "loss": 0.0087, + "step": 30102 + }, + { + "epoch": 82.70054945054945, + "grad_norm": 20.422147750854492, + "learning_rate": 8.649725274725275e-06, + "loss": 0.3073, + "step": 30103 + }, + { + "epoch": 82.7032967032967, + "grad_norm": 25.24747657775879, + "learning_rate": 8.64835164835165e-06, + "loss": 0.3247, + "step": 30104 + }, + { + "epoch": 82.70604395604396, + "grad_norm": 8.40810775756836, + "learning_rate": 8.646978021978022e-06, + "loss": 0.1029, + "step": 30105 + }, + { + "epoch": 82.70879120879121, + "grad_norm": 16.51624870300293, + "learning_rate": 8.645604395604397e-06, + "loss": 0.3006, + "step": 30106 + }, + { + "epoch": 82.71153846153847, + "grad_norm": 7.009608268737793, + "learning_rate": 8.64423076923077e-06, + "loss": 0.0775, + "step": 30107 + }, + { + "epoch": 82.71428571428571, + "grad_norm": 5.743194103240967, + "learning_rate": 8.642857142857144e-06, + "loss": 0.0491, + "step": 30108 + }, + { + "epoch": 82.71703296703296, + "grad_norm": 21.783992767333984, + "learning_rate": 8.641483516483517e-06, + "loss": 0.3165, + "step": 30109 + }, + { + "epoch": 82.71978021978022, + "grad_norm": 6.95573091506958, + "learning_rate": 8.64010989010989e-06, + "loss": 0.1094, + "step": 30110 + }, + { + "epoch": 82.72252747252747, + "grad_norm": 4.986480712890625, + "learning_rate": 8.638736263736266e-06, + "loss": 0.0765, + "step": 30111 + }, + { + "epoch": 82.72527472527473, + "grad_norm": 5.905483722686768, + "learning_rate": 8.637362637362637e-06, + "loss": 0.0508, + "step": 30112 + }, + { + "epoch": 82.72802197802197, + "grad_norm": 5.279075622558594, + "learning_rate": 8.63598901098901e-06, + "loss": 0.0586, + "step": 30113 + }, + { + "epoch": 82.73076923076923, + "grad_norm": 20.2650203704834, + "learning_rate": 8.634615384615386e-06, + "loss": 0.2967, + "step": 30114 + }, + { + "epoch": 82.73351648351648, + "grad_norm": 24.300630569458008, + "learning_rate": 8.633241758241759e-06, + "loss": 0.8364, + "step": 30115 + }, + { + "epoch": 82.73626373626374, + "grad_norm": 22.75442886352539, + "learning_rate": 8.631868131868131e-06, + "loss": 0.7812, + "step": 30116 + }, + { + "epoch": 82.73901098901099, + "grad_norm": 2.7069272994995117, + "learning_rate": 8.630494505494506e-06, + "loss": 0.0364, + "step": 30117 + }, + { + "epoch": 82.74175824175825, + "grad_norm": 15.213176727294922, + "learning_rate": 8.62912087912088e-06, + "loss": 0.6594, + "step": 30118 + }, + { + "epoch": 82.74450549450549, + "grad_norm": 9.890832901000977, + "learning_rate": 8.627747252747253e-06, + "loss": 0.1828, + "step": 30119 + }, + { + "epoch": 82.74725274725274, + "grad_norm": 15.593326568603516, + "learning_rate": 8.626373626373626e-06, + "loss": 0.5592, + "step": 30120 + }, + { + "epoch": 82.75, + "grad_norm": 6.154655456542969, + "learning_rate": 8.625e-06, + "loss": 0.0551, + "step": 30121 + }, + { + "epoch": 82.75274725274726, + "grad_norm": 9.590354919433594, + "learning_rate": 8.623626373626375e-06, + "loss": 0.2125, + "step": 30122 + }, + { + "epoch": 82.75549450549451, + "grad_norm": 8.339669227600098, + "learning_rate": 8.622252747252748e-06, + "loss": 0.2142, + "step": 30123 + }, + { + "epoch": 82.75824175824175, + "grad_norm": 16.641040802001953, + "learning_rate": 8.620879120879121e-06, + "loss": 0.3679, + "step": 30124 + }, + { + "epoch": 82.76098901098901, + "grad_norm": 28.533533096313477, + "learning_rate": 8.619505494505495e-06, + "loss": 0.9819, + "step": 30125 + }, + { + "epoch": 82.76373626373626, + "grad_norm": 18.366968154907227, + "learning_rate": 8.618131868131868e-06, + "loss": 0.5179, + "step": 30126 + }, + { + "epoch": 82.76648351648352, + "grad_norm": 7.444225788116455, + "learning_rate": 8.616758241758242e-06, + "loss": 0.087, + "step": 30127 + }, + { + "epoch": 82.76923076923077, + "grad_norm": 11.149794578552246, + "learning_rate": 8.615384615384615e-06, + "loss": 0.1164, + "step": 30128 + }, + { + "epoch": 82.77197802197803, + "grad_norm": 5.519325256347656, + "learning_rate": 8.61401098901099e-06, + "loss": 0.0459, + "step": 30129 + }, + { + "epoch": 82.77472527472527, + "grad_norm": 1.0470736026763916, + "learning_rate": 8.612637362637364e-06, + "loss": 0.0102, + "step": 30130 + }, + { + "epoch": 82.77747252747253, + "grad_norm": 13.694517135620117, + "learning_rate": 8.611263736263735e-06, + "loss": 0.2025, + "step": 30131 + }, + { + "epoch": 82.78021978021978, + "grad_norm": 9.330710411071777, + "learning_rate": 8.60989010989011e-06, + "loss": 0.1673, + "step": 30132 + }, + { + "epoch": 82.78296703296704, + "grad_norm": 13.0112886428833, + "learning_rate": 8.608516483516484e-06, + "loss": 0.2586, + "step": 30133 + }, + { + "epoch": 82.78571428571429, + "grad_norm": 14.344181060791016, + "learning_rate": 8.607142857142857e-06, + "loss": 0.2403, + "step": 30134 + }, + { + "epoch": 82.78846153846153, + "grad_norm": 1.8847588300704956, + "learning_rate": 8.60576923076923e-06, + "loss": 0.0309, + "step": 30135 + }, + { + "epoch": 82.79120879120879, + "grad_norm": 18.82662010192871, + "learning_rate": 8.604395604395604e-06, + "loss": 0.1635, + "step": 30136 + }, + { + "epoch": 82.79395604395604, + "grad_norm": 17.54734992980957, + "learning_rate": 8.60302197802198e-06, + "loss": 0.3164, + "step": 30137 + }, + { + "epoch": 82.7967032967033, + "grad_norm": 17.420677185058594, + "learning_rate": 8.601648351648353e-06, + "loss": 0.5121, + "step": 30138 + }, + { + "epoch": 82.79945054945055, + "grad_norm": 3.887636184692383, + "learning_rate": 8.600274725274726e-06, + "loss": 0.0402, + "step": 30139 + }, + { + "epoch": 82.8021978021978, + "grad_norm": 7.298720359802246, + "learning_rate": 8.5989010989011e-06, + "loss": 0.1628, + "step": 30140 + }, + { + "epoch": 82.80494505494505, + "grad_norm": 5.9192023277282715, + "learning_rate": 8.597527472527473e-06, + "loss": 0.0956, + "step": 30141 + }, + { + "epoch": 82.8076923076923, + "grad_norm": 14.011969566345215, + "learning_rate": 8.596153846153846e-06, + "loss": 0.1995, + "step": 30142 + }, + { + "epoch": 82.81043956043956, + "grad_norm": 13.366767883300781, + "learning_rate": 8.59478021978022e-06, + "loss": 0.2285, + "step": 30143 + }, + { + "epoch": 82.81318681318682, + "grad_norm": 8.444137573242188, + "learning_rate": 8.593406593406595e-06, + "loss": 0.0676, + "step": 30144 + }, + { + "epoch": 82.81593406593407, + "grad_norm": 8.194245338439941, + "learning_rate": 8.592032967032968e-06, + "loss": 0.225, + "step": 30145 + }, + { + "epoch": 82.81868131868131, + "grad_norm": 17.04312515258789, + "learning_rate": 8.59065934065934e-06, + "loss": 0.3005, + "step": 30146 + }, + { + "epoch": 82.82142857142857, + "grad_norm": 11.889622688293457, + "learning_rate": 8.589285714285715e-06, + "loss": 0.1645, + "step": 30147 + }, + { + "epoch": 82.82417582417582, + "grad_norm": 6.788101673126221, + "learning_rate": 8.587912087912088e-06, + "loss": 0.0659, + "step": 30148 + }, + { + "epoch": 82.82692307692308, + "grad_norm": 17.591968536376953, + "learning_rate": 8.586538461538462e-06, + "loss": 0.275, + "step": 30149 + }, + { + "epoch": 82.82967032967034, + "grad_norm": 0.8856992721557617, + "learning_rate": 8.585164835164835e-06, + "loss": 0.011, + "step": 30150 + }, + { + "epoch": 82.83241758241758, + "grad_norm": 14.342848777770996, + "learning_rate": 8.583791208791209e-06, + "loss": 0.2012, + "step": 30151 + }, + { + "epoch": 82.83516483516483, + "grad_norm": 7.344765663146973, + "learning_rate": 8.582417582417584e-06, + "loss": 0.0671, + "step": 30152 + }, + { + "epoch": 82.83791208791209, + "grad_norm": 1.3754583597183228, + "learning_rate": 8.581043956043955e-06, + "loss": 0.0125, + "step": 30153 + }, + { + "epoch": 82.84065934065934, + "grad_norm": 10.712766647338867, + "learning_rate": 8.57967032967033e-06, + "loss": 0.1626, + "step": 30154 + }, + { + "epoch": 82.8434065934066, + "grad_norm": 3.1683943271636963, + "learning_rate": 8.578296703296704e-06, + "loss": 0.0242, + "step": 30155 + }, + { + "epoch": 82.84615384615384, + "grad_norm": 10.225024223327637, + "learning_rate": 8.576923076923077e-06, + "loss": 0.0915, + "step": 30156 + }, + { + "epoch": 82.8489010989011, + "grad_norm": 11.262857437133789, + "learning_rate": 8.57554945054945e-06, + "loss": 0.1899, + "step": 30157 + }, + { + "epoch": 82.85164835164835, + "grad_norm": 10.344407081604004, + "learning_rate": 8.574175824175824e-06, + "loss": 0.1558, + "step": 30158 + }, + { + "epoch": 82.8543956043956, + "grad_norm": 20.818408966064453, + "learning_rate": 8.5728021978022e-06, + "loss": 0.5141, + "step": 30159 + }, + { + "epoch": 82.85714285714286, + "grad_norm": 4.64712381362915, + "learning_rate": 8.571428571428573e-06, + "loss": 0.0343, + "step": 30160 + }, + { + "epoch": 82.85989010989012, + "grad_norm": 11.318971633911133, + "learning_rate": 8.570054945054944e-06, + "loss": 0.134, + "step": 30161 + }, + { + "epoch": 82.86263736263736, + "grad_norm": 38.750587463378906, + "learning_rate": 8.56868131868132e-06, + "loss": 0.6231, + "step": 30162 + }, + { + "epoch": 82.86538461538461, + "grad_norm": 17.807584762573242, + "learning_rate": 8.567307692307693e-06, + "loss": 0.2936, + "step": 30163 + }, + { + "epoch": 82.86813186813187, + "grad_norm": 9.573585510253906, + "learning_rate": 8.565934065934066e-06, + "loss": 0.2113, + "step": 30164 + }, + { + "epoch": 82.87087912087912, + "grad_norm": 11.779452323913574, + "learning_rate": 8.56456043956044e-06, + "loss": 0.342, + "step": 30165 + }, + { + "epoch": 82.87362637362638, + "grad_norm": 10.851539611816406, + "learning_rate": 8.563186813186813e-06, + "loss": 0.1761, + "step": 30166 + }, + { + "epoch": 82.87637362637362, + "grad_norm": 24.513675689697266, + "learning_rate": 8.561813186813188e-06, + "loss": 0.4408, + "step": 30167 + }, + { + "epoch": 82.87912087912088, + "grad_norm": 16.229970932006836, + "learning_rate": 8.56043956043956e-06, + "loss": 0.1218, + "step": 30168 + }, + { + "epoch": 82.88186813186813, + "grad_norm": 4.774394989013672, + "learning_rate": 8.559065934065935e-06, + "loss": 0.1244, + "step": 30169 + }, + { + "epoch": 82.88461538461539, + "grad_norm": 2.6939871311187744, + "learning_rate": 8.557692307692308e-06, + "loss": 0.0373, + "step": 30170 + }, + { + "epoch": 82.88736263736264, + "grad_norm": 11.085694313049316, + "learning_rate": 8.556318681318682e-06, + "loss": 0.147, + "step": 30171 + }, + { + "epoch": 82.89010989010988, + "grad_norm": 9.312578201293945, + "learning_rate": 8.554945054945055e-06, + "loss": 0.1659, + "step": 30172 + }, + { + "epoch": 82.89285714285714, + "grad_norm": 6.531888961791992, + "learning_rate": 8.553571428571429e-06, + "loss": 0.1029, + "step": 30173 + }, + { + "epoch": 82.8956043956044, + "grad_norm": 9.943748474121094, + "learning_rate": 8.552197802197802e-06, + "loss": 0.18, + "step": 30174 + }, + { + "epoch": 82.89835164835165, + "grad_norm": 6.954440116882324, + "learning_rate": 8.550824175824177e-06, + "loss": 0.171, + "step": 30175 + }, + { + "epoch": 82.9010989010989, + "grad_norm": 3.863384485244751, + "learning_rate": 8.549450549450549e-06, + "loss": 0.0559, + "step": 30176 + }, + { + "epoch": 82.90384615384616, + "grad_norm": 14.964766502380371, + "learning_rate": 8.548076923076924e-06, + "loss": 0.1994, + "step": 30177 + }, + { + "epoch": 82.9065934065934, + "grad_norm": 10.030292510986328, + "learning_rate": 8.546703296703297e-06, + "loss": 0.1557, + "step": 30178 + }, + { + "epoch": 82.90934065934066, + "grad_norm": 13.676414489746094, + "learning_rate": 8.54532967032967e-06, + "loss": 0.2731, + "step": 30179 + }, + { + "epoch": 82.91208791208791, + "grad_norm": 8.851713180541992, + "learning_rate": 8.543956043956044e-06, + "loss": 0.1485, + "step": 30180 + }, + { + "epoch": 82.91483516483517, + "grad_norm": 9.344416618347168, + "learning_rate": 8.542582417582418e-06, + "loss": 0.1408, + "step": 30181 + }, + { + "epoch": 82.91758241758242, + "grad_norm": 17.307716369628906, + "learning_rate": 8.541208791208793e-06, + "loss": 0.2807, + "step": 30182 + }, + { + "epoch": 82.92032967032966, + "grad_norm": 5.882152557373047, + "learning_rate": 8.539835164835164e-06, + "loss": 0.0634, + "step": 30183 + }, + { + "epoch": 82.92307692307692, + "grad_norm": 11.260261535644531, + "learning_rate": 8.538461538461538e-06, + "loss": 0.2869, + "step": 30184 + }, + { + "epoch": 82.92582417582418, + "grad_norm": 11.779809951782227, + "learning_rate": 8.537087912087913e-06, + "loss": 0.0918, + "step": 30185 + }, + { + "epoch": 82.92857142857143, + "grad_norm": 3.561213493347168, + "learning_rate": 8.535714285714286e-06, + "loss": 0.0541, + "step": 30186 + }, + { + "epoch": 82.93131868131869, + "grad_norm": 16.705656051635742, + "learning_rate": 8.53434065934066e-06, + "loss": 0.4512, + "step": 30187 + }, + { + "epoch": 82.93406593406593, + "grad_norm": 7.3776679039001465, + "learning_rate": 8.532967032967033e-06, + "loss": 0.1441, + "step": 30188 + }, + { + "epoch": 82.93681318681318, + "grad_norm": 11.298013687133789, + "learning_rate": 8.531593406593406e-06, + "loss": 0.1012, + "step": 30189 + }, + { + "epoch": 82.93956043956044, + "grad_norm": 14.088425636291504, + "learning_rate": 8.530219780219782e-06, + "loss": 0.1525, + "step": 30190 + }, + { + "epoch": 82.9423076923077, + "grad_norm": 13.962623596191406, + "learning_rate": 8.528846153846153e-06, + "loss": 0.3029, + "step": 30191 + }, + { + "epoch": 82.94505494505495, + "grad_norm": 12.480956077575684, + "learning_rate": 8.527472527472528e-06, + "loss": 0.2411, + "step": 30192 + }, + { + "epoch": 82.9478021978022, + "grad_norm": 9.066052436828613, + "learning_rate": 8.526098901098902e-06, + "loss": 0.1316, + "step": 30193 + }, + { + "epoch": 82.95054945054945, + "grad_norm": 12.766170501708984, + "learning_rate": 8.524725274725275e-06, + "loss": 0.2119, + "step": 30194 + }, + { + "epoch": 82.9532967032967, + "grad_norm": 12.376811981201172, + "learning_rate": 8.523351648351649e-06, + "loss": 0.179, + "step": 30195 + }, + { + "epoch": 82.95604395604396, + "grad_norm": 7.138250827789307, + "learning_rate": 8.521978021978022e-06, + "loss": 0.0566, + "step": 30196 + }, + { + "epoch": 82.95879120879121, + "grad_norm": 9.691824913024902, + "learning_rate": 8.520604395604397e-06, + "loss": 0.1057, + "step": 30197 + }, + { + "epoch": 82.96153846153847, + "grad_norm": 6.084129333496094, + "learning_rate": 8.519230769230769e-06, + "loss": 0.0732, + "step": 30198 + }, + { + "epoch": 82.96428571428571, + "grad_norm": 14.916682243347168, + "learning_rate": 8.517857142857142e-06, + "loss": 0.2956, + "step": 30199 + }, + { + "epoch": 82.96703296703296, + "grad_norm": 2.492440700531006, + "learning_rate": 8.516483516483517e-06, + "loss": 0.0332, + "step": 30200 + }, + { + "epoch": 82.96978021978022, + "grad_norm": 17.6180419921875, + "learning_rate": 8.51510989010989e-06, + "loss": 0.3292, + "step": 30201 + }, + { + "epoch": 82.97252747252747, + "grad_norm": 7.979526042938232, + "learning_rate": 8.513736263736264e-06, + "loss": 0.1054, + "step": 30202 + }, + { + "epoch": 82.97527472527473, + "grad_norm": 2.640082836151123, + "learning_rate": 8.512362637362638e-06, + "loss": 0.0226, + "step": 30203 + }, + { + "epoch": 82.97802197802197, + "grad_norm": 9.929168701171875, + "learning_rate": 8.510989010989011e-06, + "loss": 0.1404, + "step": 30204 + }, + { + "epoch": 82.98076923076923, + "grad_norm": 5.854581356048584, + "learning_rate": 8.509615384615386e-06, + "loss": 0.0866, + "step": 30205 + }, + { + "epoch": 82.98351648351648, + "grad_norm": 19.483182907104492, + "learning_rate": 8.508241758241758e-06, + "loss": 0.3686, + "step": 30206 + }, + { + "epoch": 82.98626373626374, + "grad_norm": 4.707534313201904, + "learning_rate": 8.506868131868133e-06, + "loss": 0.0619, + "step": 30207 + }, + { + "epoch": 82.98901098901099, + "grad_norm": 10.800585746765137, + "learning_rate": 8.505494505494506e-06, + "loss": 0.2027, + "step": 30208 + }, + { + "epoch": 82.99175824175825, + "grad_norm": 27.163108825683594, + "learning_rate": 8.50412087912088e-06, + "loss": 0.5666, + "step": 30209 + }, + { + "epoch": 82.99450549450549, + "grad_norm": 14.439018249511719, + "learning_rate": 8.502747252747253e-06, + "loss": 0.1571, + "step": 30210 + }, + { + "epoch": 82.99725274725274, + "grad_norm": 10.716148376464844, + "learning_rate": 8.501373626373627e-06, + "loss": 0.3522, + "step": 30211 + }, + { + "epoch": 83.0, + "grad_norm": 1.6327191591262817, + "learning_rate": 8.500000000000002e-06, + "loss": 0.0101, + "step": 30212 + }, + { + "epoch": 83.0, + "eval_accuracy": 0.4903581267217631, + "eval_f1": 0.449599622558285, + "eval_f1_DuraRiadoRio_64x64": 0.1518987341772152, + "eval_f1_Mole_64x64": 0.49746192893401014, + "eval_f1_Quebrado_64x64": 0.4711111111111111, + "eval_f1_RiadoRio_64x64": 0.6074498567335244, + "eval_f1_RioFechado_64x64": 0.5200764818355641, + "eval_loss": 6.192848205566406, + "eval_precision": 0.6662037186502492, + "eval_precision_DuraRiadoRio_64x64": 0.8571428571428571, + "eval_precision_Mole_64x64": 0.9245283018867925, + "eval_precision_Quebrado_64x64": 0.654320987654321, + "eval_precision_RiadoRio_64x64": 0.5380710659898477, + "eval_precision_RioFechado_64x64": 0.3569553805774278, + "eval_recall": 0.4893563133185076, + "eval_recall_DuraRiadoRio_64x64": 0.08333333333333333, + "eval_recall_Mole_64x64": 0.3402777777777778, + "eval_recall_Quebrado_64x64": 0.3680555555555556, + "eval_recall_RiadoRio_64x64": 0.6973684210526315, + "eval_recall_RioFechado_64x64": 0.9577464788732394, + "eval_runtime": 1.736, + "eval_samples_per_second": 418.207, + "eval_steps_per_second": 26.498, + "step": 30212 + }, + { + "epoch": 83.00274725274726, + "grad_norm": 13.505027770996094, + "learning_rate": 8.498626373626373e-06, + "loss": 0.193, + "step": 30213 + }, + { + "epoch": 83.00549450549451, + "grad_norm": 10.56850528717041, + "learning_rate": 8.497252747252747e-06, + "loss": 0.1981, + "step": 30214 + }, + { + "epoch": 83.00824175824175, + "grad_norm": 24.052730560302734, + "learning_rate": 8.495879120879122e-06, + "loss": 0.4236, + "step": 30215 + }, + { + "epoch": 83.01098901098901, + "grad_norm": 18.811450958251953, + "learning_rate": 8.494505494505495e-06, + "loss": 0.3001, + "step": 30216 + }, + { + "epoch": 83.01373626373626, + "grad_norm": 5.703061580657959, + "learning_rate": 8.493131868131869e-06, + "loss": 0.0996, + "step": 30217 + }, + { + "epoch": 83.01648351648352, + "grad_norm": 14.964632987976074, + "learning_rate": 8.491758241758242e-06, + "loss": 0.3027, + "step": 30218 + }, + { + "epoch": 83.01923076923077, + "grad_norm": 11.232535362243652, + "learning_rate": 8.490384615384615e-06, + "loss": 0.1876, + "step": 30219 + }, + { + "epoch": 83.02197802197803, + "grad_norm": 8.083891868591309, + "learning_rate": 8.48901098901099e-06, + "loss": 0.1558, + "step": 30220 + }, + { + "epoch": 83.02472527472527, + "grad_norm": 2.3332862854003906, + "learning_rate": 8.487637362637362e-06, + "loss": 0.0178, + "step": 30221 + }, + { + "epoch": 83.02747252747253, + "grad_norm": 12.59077262878418, + "learning_rate": 8.486263736263737e-06, + "loss": 0.367, + "step": 30222 + }, + { + "epoch": 83.03021978021978, + "grad_norm": 12.139653205871582, + "learning_rate": 8.48489010989011e-06, + "loss": 0.2612, + "step": 30223 + }, + { + "epoch": 83.03296703296704, + "grad_norm": 12.60628604888916, + "learning_rate": 8.483516483516484e-06, + "loss": 0.1403, + "step": 30224 + }, + { + "epoch": 83.03571428571429, + "grad_norm": 9.557559967041016, + "learning_rate": 8.482142857142858e-06, + "loss": 0.1281, + "step": 30225 + }, + { + "epoch": 83.03846153846153, + "grad_norm": 28.535781860351562, + "learning_rate": 8.480769230769231e-06, + "loss": 0.3454, + "step": 30226 + }, + { + "epoch": 83.04120879120879, + "grad_norm": 22.4425048828125, + "learning_rate": 8.479395604395606e-06, + "loss": 0.4012, + "step": 30227 + }, + { + "epoch": 83.04395604395604, + "grad_norm": 7.9626336097717285, + "learning_rate": 8.478021978021978e-06, + "loss": 0.0828, + "step": 30228 + }, + { + "epoch": 83.0467032967033, + "grad_norm": 11.749751091003418, + "learning_rate": 8.476648351648351e-06, + "loss": 0.1809, + "step": 30229 + }, + { + "epoch": 83.04945054945055, + "grad_norm": 18.483449935913086, + "learning_rate": 8.475274725274726e-06, + "loss": 0.4404, + "step": 30230 + }, + { + "epoch": 83.0521978021978, + "grad_norm": 6.796433448791504, + "learning_rate": 8.4739010989011e-06, + "loss": 0.1832, + "step": 30231 + }, + { + "epoch": 83.05494505494505, + "grad_norm": 11.090100288391113, + "learning_rate": 8.472527472527471e-06, + "loss": 0.2544, + "step": 30232 + }, + { + "epoch": 83.0576923076923, + "grad_norm": 14.797469139099121, + "learning_rate": 8.471153846153847e-06, + "loss": 0.3916, + "step": 30233 + }, + { + "epoch": 83.06043956043956, + "grad_norm": 6.266848087310791, + "learning_rate": 8.46978021978022e-06, + "loss": 0.0902, + "step": 30234 + }, + { + "epoch": 83.06318681318682, + "grad_norm": 23.386428833007812, + "learning_rate": 8.468406593406595e-06, + "loss": 0.3517, + "step": 30235 + }, + { + "epoch": 83.06593406593407, + "grad_norm": 16.420785903930664, + "learning_rate": 8.467032967032967e-06, + "loss": 0.4087, + "step": 30236 + }, + { + "epoch": 83.06868131868131, + "grad_norm": 24.96907615661621, + "learning_rate": 8.46565934065934e-06, + "loss": 0.6573, + "step": 30237 + }, + { + "epoch": 83.07142857142857, + "grad_norm": 7.464832305908203, + "learning_rate": 8.464285714285715e-06, + "loss": 0.1542, + "step": 30238 + }, + { + "epoch": 83.07417582417582, + "grad_norm": 6.679478645324707, + "learning_rate": 8.462912087912089e-06, + "loss": 0.0668, + "step": 30239 + }, + { + "epoch": 83.07692307692308, + "grad_norm": 12.115127563476562, + "learning_rate": 8.461538461538462e-06, + "loss": 0.147, + "step": 30240 + }, + { + "epoch": 83.07967032967034, + "grad_norm": 13.490381240844727, + "learning_rate": 8.460164835164836e-06, + "loss": 0.2205, + "step": 30241 + }, + { + "epoch": 83.08241758241758, + "grad_norm": 2.4175500869750977, + "learning_rate": 8.458791208791209e-06, + "loss": 0.022, + "step": 30242 + }, + { + "epoch": 83.08516483516483, + "grad_norm": 7.359704971313477, + "learning_rate": 8.457417582417582e-06, + "loss": 0.2262, + "step": 30243 + }, + { + "epoch": 83.08791208791209, + "grad_norm": 9.977738380432129, + "learning_rate": 8.456043956043956e-06, + "loss": 0.1705, + "step": 30244 + }, + { + "epoch": 83.09065934065934, + "grad_norm": 13.773571968078613, + "learning_rate": 8.45467032967033e-06, + "loss": 0.1854, + "step": 30245 + }, + { + "epoch": 83.0934065934066, + "grad_norm": 12.572183609008789, + "learning_rate": 8.453296703296704e-06, + "loss": 0.2847, + "step": 30246 + }, + { + "epoch": 83.09615384615384, + "grad_norm": 10.673041343688965, + "learning_rate": 8.451923076923076e-06, + "loss": 0.1342, + "step": 30247 + }, + { + "epoch": 83.0989010989011, + "grad_norm": 3.2774946689605713, + "learning_rate": 8.450549450549451e-06, + "loss": 0.0407, + "step": 30248 + }, + { + "epoch": 83.10164835164835, + "grad_norm": 3.7723147869110107, + "learning_rate": 8.449175824175824e-06, + "loss": 0.0368, + "step": 30249 + }, + { + "epoch": 83.1043956043956, + "grad_norm": 11.086543083190918, + "learning_rate": 8.4478021978022e-06, + "loss": 0.2952, + "step": 30250 + }, + { + "epoch": 83.10714285714286, + "grad_norm": 6.316198348999023, + "learning_rate": 8.446428571428571e-06, + "loss": 0.1318, + "step": 30251 + }, + { + "epoch": 83.10989010989012, + "grad_norm": 14.82681655883789, + "learning_rate": 8.445054945054945e-06, + "loss": 0.1609, + "step": 30252 + }, + { + "epoch": 83.11263736263736, + "grad_norm": 3.0207583904266357, + "learning_rate": 8.44368131868132e-06, + "loss": 0.0477, + "step": 30253 + }, + { + "epoch": 83.11538461538461, + "grad_norm": 22.915843963623047, + "learning_rate": 8.442307692307693e-06, + "loss": 0.3647, + "step": 30254 + }, + { + "epoch": 83.11813186813187, + "grad_norm": 0.2910562753677368, + "learning_rate": 8.440934065934067e-06, + "loss": 0.0034, + "step": 30255 + }, + { + "epoch": 83.12087912087912, + "grad_norm": 4.395040512084961, + "learning_rate": 8.43956043956044e-06, + "loss": 0.0857, + "step": 30256 + }, + { + "epoch": 83.12362637362638, + "grad_norm": 7.964730739593506, + "learning_rate": 8.438186813186813e-06, + "loss": 0.0952, + "step": 30257 + }, + { + "epoch": 83.12637362637362, + "grad_norm": 7.5201592445373535, + "learning_rate": 8.436813186813187e-06, + "loss": 0.1318, + "step": 30258 + }, + { + "epoch": 83.12912087912088, + "grad_norm": 16.33588218688965, + "learning_rate": 8.43543956043956e-06, + "loss": 0.2757, + "step": 30259 + }, + { + "epoch": 83.13186813186813, + "grad_norm": 17.8004207611084, + "learning_rate": 8.434065934065935e-06, + "loss": 0.3395, + "step": 30260 + }, + { + "epoch": 83.13461538461539, + "grad_norm": 17.41916847229004, + "learning_rate": 8.432692307692309e-06, + "loss": 0.2113, + "step": 30261 + }, + { + "epoch": 83.13736263736264, + "grad_norm": 7.363930702209473, + "learning_rate": 8.43131868131868e-06, + "loss": 0.1347, + "step": 30262 + }, + { + "epoch": 83.14010989010988, + "grad_norm": 21.86408805847168, + "learning_rate": 8.429945054945056e-06, + "loss": 0.6021, + "step": 30263 + }, + { + "epoch": 83.14285714285714, + "grad_norm": 13.42162036895752, + "learning_rate": 8.428571428571429e-06, + "loss": 0.1886, + "step": 30264 + }, + { + "epoch": 83.1456043956044, + "grad_norm": 11.755255699157715, + "learning_rate": 8.427197802197802e-06, + "loss": 0.2362, + "step": 30265 + }, + { + "epoch": 83.14835164835165, + "grad_norm": 5.367508888244629, + "learning_rate": 8.425824175824176e-06, + "loss": 0.0619, + "step": 30266 + }, + { + "epoch": 83.1510989010989, + "grad_norm": 3.1268601417541504, + "learning_rate": 8.42445054945055e-06, + "loss": 0.0419, + "step": 30267 + }, + { + "epoch": 83.15384615384616, + "grad_norm": 10.284136772155762, + "learning_rate": 8.423076923076924e-06, + "loss": 0.2235, + "step": 30268 + }, + { + "epoch": 83.1565934065934, + "grad_norm": 10.25168228149414, + "learning_rate": 8.421703296703298e-06, + "loss": 0.0971, + "step": 30269 + }, + { + "epoch": 83.15934065934066, + "grad_norm": 17.90514373779297, + "learning_rate": 8.420329670329671e-06, + "loss": 0.2788, + "step": 30270 + }, + { + "epoch": 83.16208791208791, + "grad_norm": 11.249763488769531, + "learning_rate": 8.418956043956044e-06, + "loss": 0.1974, + "step": 30271 + }, + { + "epoch": 83.16483516483517, + "grad_norm": 17.292335510253906, + "learning_rate": 8.417582417582418e-06, + "loss": 0.4348, + "step": 30272 + }, + { + "epoch": 83.16758241758242, + "grad_norm": 16.587797164916992, + "learning_rate": 8.416208791208791e-06, + "loss": 0.211, + "step": 30273 + }, + { + "epoch": 83.17032967032966, + "grad_norm": 5.0238518714904785, + "learning_rate": 8.414835164835165e-06, + "loss": 0.0357, + "step": 30274 + }, + { + "epoch": 83.17307692307692, + "grad_norm": 12.551255226135254, + "learning_rate": 8.41346153846154e-06, + "loss": 0.1225, + "step": 30275 + }, + { + "epoch": 83.17582417582418, + "grad_norm": 8.320490837097168, + "learning_rate": 8.412087912087913e-06, + "loss": 0.1203, + "step": 30276 + }, + { + "epoch": 83.17857142857143, + "grad_norm": 8.17495346069336, + "learning_rate": 8.410714285714285e-06, + "loss": 0.1464, + "step": 30277 + }, + { + "epoch": 83.18131868131869, + "grad_norm": 12.968184471130371, + "learning_rate": 8.40934065934066e-06, + "loss": 0.2197, + "step": 30278 + }, + { + "epoch": 83.18406593406593, + "grad_norm": 6.340452671051025, + "learning_rate": 8.407967032967033e-06, + "loss": 0.1265, + "step": 30279 + }, + { + "epoch": 83.18681318681318, + "grad_norm": 2.025272846221924, + "learning_rate": 8.406593406593407e-06, + "loss": 0.0299, + "step": 30280 + }, + { + "epoch": 83.18956043956044, + "grad_norm": 12.845227241516113, + "learning_rate": 8.40521978021978e-06, + "loss": 0.2182, + "step": 30281 + }, + { + "epoch": 83.1923076923077, + "grad_norm": 12.18667984008789, + "learning_rate": 8.403846153846154e-06, + "loss": 0.1589, + "step": 30282 + }, + { + "epoch": 83.19505494505495, + "grad_norm": 6.772008419036865, + "learning_rate": 8.402472527472529e-06, + "loss": 0.1522, + "step": 30283 + }, + { + "epoch": 83.1978021978022, + "grad_norm": 19.865041732788086, + "learning_rate": 8.401098901098902e-06, + "loss": 0.2772, + "step": 30284 + }, + { + "epoch": 83.20054945054945, + "grad_norm": 9.789783477783203, + "learning_rate": 8.399725274725276e-06, + "loss": 0.1828, + "step": 30285 + }, + { + "epoch": 83.2032967032967, + "grad_norm": 22.201820373535156, + "learning_rate": 8.398351648351649e-06, + "loss": 0.4333, + "step": 30286 + }, + { + "epoch": 83.20604395604396, + "grad_norm": 1.7338290214538574, + "learning_rate": 8.396978021978022e-06, + "loss": 0.0181, + "step": 30287 + }, + { + "epoch": 83.20879120879121, + "grad_norm": 9.926384925842285, + "learning_rate": 8.395604395604396e-06, + "loss": 0.1228, + "step": 30288 + }, + { + "epoch": 83.21153846153847, + "grad_norm": 5.098001956939697, + "learning_rate": 8.39423076923077e-06, + "loss": 0.0267, + "step": 30289 + }, + { + "epoch": 83.21428571428571, + "grad_norm": 12.079442977905273, + "learning_rate": 8.392857142857143e-06, + "loss": 0.3236, + "step": 30290 + }, + { + "epoch": 83.21703296703296, + "grad_norm": 7.803143501281738, + "learning_rate": 8.391483516483518e-06, + "loss": 0.31, + "step": 30291 + }, + { + "epoch": 83.21978021978022, + "grad_norm": 7.081927299499512, + "learning_rate": 8.39010989010989e-06, + "loss": 0.0961, + "step": 30292 + }, + { + "epoch": 83.22252747252747, + "grad_norm": 17.71411895751953, + "learning_rate": 8.388736263736265e-06, + "loss": 0.439, + "step": 30293 + }, + { + "epoch": 83.22527472527473, + "grad_norm": 12.056441307067871, + "learning_rate": 8.387362637362638e-06, + "loss": 0.1584, + "step": 30294 + }, + { + "epoch": 83.22802197802197, + "grad_norm": 4.910620212554932, + "learning_rate": 8.385989010989011e-06, + "loss": 0.0918, + "step": 30295 + }, + { + "epoch": 83.23076923076923, + "grad_norm": 8.802323341369629, + "learning_rate": 8.384615384615385e-06, + "loss": 0.066, + "step": 30296 + }, + { + "epoch": 83.23351648351648, + "grad_norm": 26.80423927307129, + "learning_rate": 8.383241758241758e-06, + "loss": 0.625, + "step": 30297 + }, + { + "epoch": 83.23626373626374, + "grad_norm": 4.090753078460693, + "learning_rate": 8.381868131868133e-06, + "loss": 0.0429, + "step": 30298 + }, + { + "epoch": 83.23901098901099, + "grad_norm": 12.50908374786377, + "learning_rate": 8.380494505494507e-06, + "loss": 0.2735, + "step": 30299 + }, + { + "epoch": 83.24175824175825, + "grad_norm": 10.713125228881836, + "learning_rate": 8.379120879120878e-06, + "loss": 0.269, + "step": 30300 + }, + { + "epoch": 83.24450549450549, + "grad_norm": 17.572641372680664, + "learning_rate": 8.377747252747253e-06, + "loss": 0.2608, + "step": 30301 + }, + { + "epoch": 83.24725274725274, + "grad_norm": 2.799806594848633, + "learning_rate": 8.376373626373627e-06, + "loss": 0.0291, + "step": 30302 + }, + { + "epoch": 83.25, + "grad_norm": 12.428641319274902, + "learning_rate": 8.375e-06, + "loss": 0.277, + "step": 30303 + }, + { + "epoch": 83.25274725274726, + "grad_norm": 2.194518566131592, + "learning_rate": 8.373626373626374e-06, + "loss": 0.0225, + "step": 30304 + }, + { + "epoch": 83.25549450549451, + "grad_norm": 5.511595726013184, + "learning_rate": 8.372252747252747e-06, + "loss": 0.1002, + "step": 30305 + }, + { + "epoch": 83.25824175824175, + "grad_norm": 7.053444862365723, + "learning_rate": 8.370879120879122e-06, + "loss": 0.1453, + "step": 30306 + }, + { + "epoch": 83.26098901098901, + "grad_norm": 14.07149887084961, + "learning_rate": 8.369505494505494e-06, + "loss": 0.3981, + "step": 30307 + }, + { + "epoch": 83.26373626373626, + "grad_norm": 12.905762672424316, + "learning_rate": 8.368131868131869e-06, + "loss": 0.2976, + "step": 30308 + }, + { + "epoch": 83.26648351648352, + "grad_norm": 16.863252639770508, + "learning_rate": 8.366758241758242e-06, + "loss": 0.4409, + "step": 30309 + }, + { + "epoch": 83.26923076923077, + "grad_norm": 5.993769645690918, + "learning_rate": 8.365384615384616e-06, + "loss": 0.1016, + "step": 30310 + }, + { + "epoch": 83.27197802197803, + "grad_norm": 23.807464599609375, + "learning_rate": 8.36401098901099e-06, + "loss": 0.3209, + "step": 30311 + }, + { + "epoch": 83.27472527472527, + "grad_norm": 3.8325650691986084, + "learning_rate": 8.362637362637363e-06, + "loss": 0.0293, + "step": 30312 + }, + { + "epoch": 83.27747252747253, + "grad_norm": 5.42618989944458, + "learning_rate": 8.361263736263738e-06, + "loss": 0.087, + "step": 30313 + }, + { + "epoch": 83.28021978021978, + "grad_norm": 18.872159957885742, + "learning_rate": 8.35989010989011e-06, + "loss": 0.5506, + "step": 30314 + }, + { + "epoch": 83.28296703296704, + "grad_norm": 10.801225662231445, + "learning_rate": 8.358516483516483e-06, + "loss": 0.1013, + "step": 30315 + }, + { + "epoch": 83.28571428571429, + "grad_norm": 19.337162017822266, + "learning_rate": 8.357142857142858e-06, + "loss": 0.299, + "step": 30316 + }, + { + "epoch": 83.28846153846153, + "grad_norm": 12.336113929748535, + "learning_rate": 8.355769230769231e-06, + "loss": 0.3116, + "step": 30317 + }, + { + "epoch": 83.29120879120879, + "grad_norm": 13.486396789550781, + "learning_rate": 8.354395604395605e-06, + "loss": 0.3008, + "step": 30318 + }, + { + "epoch": 83.29395604395604, + "grad_norm": 19.029756546020508, + "learning_rate": 8.353021978021978e-06, + "loss": 0.3316, + "step": 30319 + }, + { + "epoch": 83.2967032967033, + "grad_norm": 7.091092109680176, + "learning_rate": 8.351648351648352e-06, + "loss": 0.0679, + "step": 30320 + }, + { + "epoch": 83.29945054945055, + "grad_norm": 5.347369194030762, + "learning_rate": 8.350274725274727e-06, + "loss": 0.0807, + "step": 30321 + }, + { + "epoch": 83.3021978021978, + "grad_norm": 4.091264247894287, + "learning_rate": 8.348901098901098e-06, + "loss": 0.0452, + "step": 30322 + }, + { + "epoch": 83.30494505494505, + "grad_norm": 11.624934196472168, + "learning_rate": 8.347527472527474e-06, + "loss": 0.4195, + "step": 30323 + }, + { + "epoch": 83.3076923076923, + "grad_norm": 5.179755210876465, + "learning_rate": 8.346153846153847e-06, + "loss": 0.0465, + "step": 30324 + }, + { + "epoch": 83.31043956043956, + "grad_norm": 4.682059288024902, + "learning_rate": 8.34478021978022e-06, + "loss": 0.0855, + "step": 30325 + }, + { + "epoch": 83.31318681318682, + "grad_norm": 9.520981788635254, + "learning_rate": 8.343406593406594e-06, + "loss": 0.166, + "step": 30326 + }, + { + "epoch": 83.31593406593407, + "grad_norm": 21.02186393737793, + "learning_rate": 8.342032967032967e-06, + "loss": 0.7812, + "step": 30327 + }, + { + "epoch": 83.31868131868131, + "grad_norm": 2.4285171031951904, + "learning_rate": 8.340659340659342e-06, + "loss": 0.0255, + "step": 30328 + }, + { + "epoch": 83.32142857142857, + "grad_norm": 18.26232147216797, + "learning_rate": 8.339285714285714e-06, + "loss": 0.4714, + "step": 30329 + }, + { + "epoch": 83.32417582417582, + "grad_norm": 1.4440820217132568, + "learning_rate": 8.337912087912087e-06, + "loss": 0.0195, + "step": 30330 + }, + { + "epoch": 83.32692307692308, + "grad_norm": 11.089868545532227, + "learning_rate": 8.336538461538462e-06, + "loss": 0.1458, + "step": 30331 + }, + { + "epoch": 83.32967032967034, + "grad_norm": 24.0394287109375, + "learning_rate": 8.335164835164836e-06, + "loss": 0.5142, + "step": 30332 + }, + { + "epoch": 83.33241758241758, + "grad_norm": 16.218910217285156, + "learning_rate": 8.33379120879121e-06, + "loss": 0.2611, + "step": 30333 + }, + { + "epoch": 83.33516483516483, + "grad_norm": 20.949655532836914, + "learning_rate": 8.332417582417583e-06, + "loss": 0.6877, + "step": 30334 + }, + { + "epoch": 83.33791208791209, + "grad_norm": 8.36728572845459, + "learning_rate": 8.331043956043956e-06, + "loss": 0.086, + "step": 30335 + }, + { + "epoch": 83.34065934065934, + "grad_norm": 6.293044090270996, + "learning_rate": 8.329670329670331e-06, + "loss": 0.0778, + "step": 30336 + }, + { + "epoch": 83.3434065934066, + "grad_norm": 15.762443542480469, + "learning_rate": 8.328296703296703e-06, + "loss": 0.2877, + "step": 30337 + }, + { + "epoch": 83.34615384615384, + "grad_norm": 13.243583679199219, + "learning_rate": 8.326923076923078e-06, + "loss": 0.1863, + "step": 30338 + }, + { + "epoch": 83.3489010989011, + "grad_norm": 11.790013313293457, + "learning_rate": 8.325549450549451e-06, + "loss": 0.2931, + "step": 30339 + }, + { + "epoch": 83.35164835164835, + "grad_norm": 14.766942977905273, + "learning_rate": 8.324175824175825e-06, + "loss": 0.3903, + "step": 30340 + }, + { + "epoch": 83.3543956043956, + "grad_norm": 9.934752464294434, + "learning_rate": 8.322802197802198e-06, + "loss": 0.2746, + "step": 30341 + }, + { + "epoch": 83.35714285714286, + "grad_norm": 5.493930816650391, + "learning_rate": 8.321428571428572e-06, + "loss": 0.0709, + "step": 30342 + }, + { + "epoch": 83.35989010989012, + "grad_norm": 13.094517707824707, + "learning_rate": 8.320054945054945e-06, + "loss": 0.2118, + "step": 30343 + }, + { + "epoch": 83.36263736263736, + "grad_norm": 16.707202911376953, + "learning_rate": 8.318681318681318e-06, + "loss": 0.192, + "step": 30344 + }, + { + "epoch": 83.36538461538461, + "grad_norm": 2.3291141986846924, + "learning_rate": 8.317307692307692e-06, + "loss": 0.0273, + "step": 30345 + }, + { + "epoch": 83.36813186813187, + "grad_norm": 2.3174972534179688, + "learning_rate": 8.315934065934067e-06, + "loss": 0.0369, + "step": 30346 + }, + { + "epoch": 83.37087912087912, + "grad_norm": 12.420795440673828, + "learning_rate": 8.31456043956044e-06, + "loss": 0.2066, + "step": 30347 + }, + { + "epoch": 83.37362637362638, + "grad_norm": 7.362749099731445, + "learning_rate": 8.313186813186814e-06, + "loss": 0.1933, + "step": 30348 + }, + { + "epoch": 83.37637362637362, + "grad_norm": 6.589777946472168, + "learning_rate": 8.311813186813187e-06, + "loss": 0.0642, + "step": 30349 + }, + { + "epoch": 83.37912087912088, + "grad_norm": 14.209745407104492, + "learning_rate": 8.31043956043956e-06, + "loss": 0.3142, + "step": 30350 + }, + { + "epoch": 83.38186813186813, + "grad_norm": 20.265869140625, + "learning_rate": 8.309065934065936e-06, + "loss": 0.5443, + "step": 30351 + }, + { + "epoch": 83.38461538461539, + "grad_norm": 14.854166030883789, + "learning_rate": 8.307692307692307e-06, + "loss": 0.3921, + "step": 30352 + }, + { + "epoch": 83.38736263736264, + "grad_norm": 9.825576782226562, + "learning_rate": 8.30631868131868e-06, + "loss": 0.1615, + "step": 30353 + }, + { + "epoch": 83.39010989010988, + "grad_norm": 8.659550666809082, + "learning_rate": 8.304945054945056e-06, + "loss": 0.1303, + "step": 30354 + }, + { + "epoch": 83.39285714285714, + "grad_norm": 11.797205924987793, + "learning_rate": 8.30357142857143e-06, + "loss": 0.2153, + "step": 30355 + }, + { + "epoch": 83.3956043956044, + "grad_norm": 10.755520820617676, + "learning_rate": 8.302197802197803e-06, + "loss": 0.2342, + "step": 30356 + }, + { + "epoch": 83.39835164835165, + "grad_norm": 15.803224563598633, + "learning_rate": 8.300824175824176e-06, + "loss": 0.2443, + "step": 30357 + }, + { + "epoch": 83.4010989010989, + "grad_norm": 4.545196056365967, + "learning_rate": 8.29945054945055e-06, + "loss": 0.0603, + "step": 30358 + }, + { + "epoch": 83.40384615384616, + "grad_norm": 4.969666481018066, + "learning_rate": 8.298076923076923e-06, + "loss": 0.0917, + "step": 30359 + }, + { + "epoch": 83.4065934065934, + "grad_norm": 3.1232893466949463, + "learning_rate": 8.296703296703296e-06, + "loss": 0.023, + "step": 30360 + }, + { + "epoch": 83.40934065934066, + "grad_norm": 4.081723213195801, + "learning_rate": 8.295329670329671e-06, + "loss": 0.06, + "step": 30361 + }, + { + "epoch": 83.41208791208791, + "grad_norm": 19.593002319335938, + "learning_rate": 8.293956043956045e-06, + "loss": 0.6276, + "step": 30362 + }, + { + "epoch": 83.41483516483517, + "grad_norm": 10.539220809936523, + "learning_rate": 8.292582417582417e-06, + "loss": 0.3208, + "step": 30363 + }, + { + "epoch": 83.41758241758242, + "grad_norm": 4.0671772956848145, + "learning_rate": 8.291208791208792e-06, + "loss": 0.0465, + "step": 30364 + }, + { + "epoch": 83.42032967032966, + "grad_norm": 7.921205997467041, + "learning_rate": 8.289835164835165e-06, + "loss": 0.1337, + "step": 30365 + }, + { + "epoch": 83.42307692307692, + "grad_norm": 5.30382776260376, + "learning_rate": 8.28846153846154e-06, + "loss": 0.0781, + "step": 30366 + }, + { + "epoch": 83.42582417582418, + "grad_norm": 3.95306658744812, + "learning_rate": 8.287087912087912e-06, + "loss": 0.0801, + "step": 30367 + }, + { + "epoch": 83.42857142857143, + "grad_norm": 17.99030876159668, + "learning_rate": 8.285714285714285e-06, + "loss": 0.3703, + "step": 30368 + }, + { + "epoch": 83.43131868131869, + "grad_norm": 5.352978229522705, + "learning_rate": 8.28434065934066e-06, + "loss": 0.0906, + "step": 30369 + }, + { + "epoch": 83.43406593406593, + "grad_norm": 15.705277442932129, + "learning_rate": 8.282967032967034e-06, + "loss": 0.257, + "step": 30370 + }, + { + "epoch": 83.43681318681318, + "grad_norm": 22.607982635498047, + "learning_rate": 8.281593406593407e-06, + "loss": 0.6907, + "step": 30371 + }, + { + "epoch": 83.43956043956044, + "grad_norm": 7.742849349975586, + "learning_rate": 8.28021978021978e-06, + "loss": 0.107, + "step": 30372 + }, + { + "epoch": 83.4423076923077, + "grad_norm": 10.887022972106934, + "learning_rate": 8.278846153846154e-06, + "loss": 0.2909, + "step": 30373 + }, + { + "epoch": 83.44505494505495, + "grad_norm": 10.684916496276855, + "learning_rate": 8.277472527472527e-06, + "loss": 0.0908, + "step": 30374 + }, + { + "epoch": 83.4478021978022, + "grad_norm": 12.240548133850098, + "learning_rate": 8.2760989010989e-06, + "loss": 0.19, + "step": 30375 + }, + { + "epoch": 83.45054945054945, + "grad_norm": 17.349040985107422, + "learning_rate": 8.274725274725276e-06, + "loss": 0.2873, + "step": 30376 + }, + { + "epoch": 83.4532967032967, + "grad_norm": 23.256229400634766, + "learning_rate": 8.27335164835165e-06, + "loss": 0.55, + "step": 30377 + }, + { + "epoch": 83.45604395604396, + "grad_norm": 26.692577362060547, + "learning_rate": 8.271978021978021e-06, + "loss": 0.7761, + "step": 30378 + }, + { + "epoch": 83.45879120879121, + "grad_norm": 11.802988052368164, + "learning_rate": 8.270604395604396e-06, + "loss": 0.109, + "step": 30379 + }, + { + "epoch": 83.46153846153847, + "grad_norm": 10.388458251953125, + "learning_rate": 8.26923076923077e-06, + "loss": 0.1595, + "step": 30380 + }, + { + "epoch": 83.46428571428571, + "grad_norm": 18.854354858398438, + "learning_rate": 8.267857142857145e-06, + "loss": 0.5101, + "step": 30381 + }, + { + "epoch": 83.46703296703296, + "grad_norm": 20.185897827148438, + "learning_rate": 8.266483516483516e-06, + "loss": 0.1335, + "step": 30382 + }, + { + "epoch": 83.46978021978022, + "grad_norm": 18.000667572021484, + "learning_rate": 8.26510989010989e-06, + "loss": 0.4579, + "step": 30383 + }, + { + "epoch": 83.47252747252747, + "grad_norm": 11.925871849060059, + "learning_rate": 8.263736263736265e-06, + "loss": 0.1512, + "step": 30384 + }, + { + "epoch": 83.47527472527473, + "grad_norm": 8.654518127441406, + "learning_rate": 8.262362637362638e-06, + "loss": 0.2133, + "step": 30385 + }, + { + "epoch": 83.47802197802197, + "grad_norm": 8.546448707580566, + "learning_rate": 8.260989010989012e-06, + "loss": 0.1486, + "step": 30386 + }, + { + "epoch": 83.48076923076923, + "grad_norm": 6.8797407150268555, + "learning_rate": 8.259615384615385e-06, + "loss": 0.0894, + "step": 30387 + }, + { + "epoch": 83.48351648351648, + "grad_norm": 12.873988151550293, + "learning_rate": 8.258241758241758e-06, + "loss": 0.1905, + "step": 30388 + }, + { + "epoch": 83.48626373626374, + "grad_norm": 18.456504821777344, + "learning_rate": 8.256868131868132e-06, + "loss": 0.239, + "step": 30389 + }, + { + "epoch": 83.48901098901099, + "grad_norm": 11.324017524719238, + "learning_rate": 8.255494505494505e-06, + "loss": 0.148, + "step": 30390 + }, + { + "epoch": 83.49175824175825, + "grad_norm": 29.06413459777832, + "learning_rate": 8.25412087912088e-06, + "loss": 0.5254, + "step": 30391 + }, + { + "epoch": 83.49450549450549, + "grad_norm": 10.80764102935791, + "learning_rate": 8.252747252747254e-06, + "loss": 0.1806, + "step": 30392 + }, + { + "epoch": 83.49725274725274, + "grad_norm": 3.7302772998809814, + "learning_rate": 8.251373626373626e-06, + "loss": 0.0249, + "step": 30393 + }, + { + "epoch": 83.5, + "grad_norm": 23.6727352142334, + "learning_rate": 8.25e-06, + "loss": 0.8748, + "step": 30394 + }, + { + "epoch": 83.50274725274726, + "grad_norm": 12.908187866210938, + "learning_rate": 8.248626373626374e-06, + "loss": 0.2666, + "step": 30395 + }, + { + "epoch": 83.50549450549451, + "grad_norm": 4.613491058349609, + "learning_rate": 8.247252747252749e-06, + "loss": 0.0366, + "step": 30396 + }, + { + "epoch": 83.50824175824175, + "grad_norm": 4.174905300140381, + "learning_rate": 8.245879120879121e-06, + "loss": 0.0704, + "step": 30397 + }, + { + "epoch": 83.51098901098901, + "grad_norm": 25.96551513671875, + "learning_rate": 8.244505494505494e-06, + "loss": 0.3623, + "step": 30398 + }, + { + "epoch": 83.51373626373626, + "grad_norm": 18.434431076049805, + "learning_rate": 8.24313186813187e-06, + "loss": 0.3627, + "step": 30399 + }, + { + "epoch": 83.51648351648352, + "grad_norm": 5.316224575042725, + "learning_rate": 8.241758241758243e-06, + "loss": 0.1043, + "step": 30400 + }, + { + "epoch": 83.51923076923077, + "grad_norm": 7.717904090881348, + "learning_rate": 8.240384615384614e-06, + "loss": 0.1396, + "step": 30401 + }, + { + "epoch": 83.52197802197803, + "grad_norm": 7.859055519104004, + "learning_rate": 8.23901098901099e-06, + "loss": 0.0876, + "step": 30402 + }, + { + "epoch": 83.52472527472527, + "grad_norm": 14.657646179199219, + "learning_rate": 8.237637362637363e-06, + "loss": 0.5146, + "step": 30403 + }, + { + "epoch": 83.52747252747253, + "grad_norm": 33.65507507324219, + "learning_rate": 8.236263736263736e-06, + "loss": 0.7429, + "step": 30404 + }, + { + "epoch": 83.53021978021978, + "grad_norm": 19.880788803100586, + "learning_rate": 8.23489010989011e-06, + "loss": 0.2507, + "step": 30405 + }, + { + "epoch": 83.53296703296704, + "grad_norm": 9.537957191467285, + "learning_rate": 8.233516483516483e-06, + "loss": 0.1367, + "step": 30406 + }, + { + "epoch": 83.53571428571429, + "grad_norm": 19.052446365356445, + "learning_rate": 8.232142857142858e-06, + "loss": 0.574, + "step": 30407 + }, + { + "epoch": 83.53846153846153, + "grad_norm": 7.390403747558594, + "learning_rate": 8.23076923076923e-06, + "loss": 0.1906, + "step": 30408 + }, + { + "epoch": 83.54120879120879, + "grad_norm": 12.720178604125977, + "learning_rate": 8.229395604395605e-06, + "loss": 0.3507, + "step": 30409 + }, + { + "epoch": 83.54395604395604, + "grad_norm": 26.826919555664062, + "learning_rate": 8.228021978021979e-06, + "loss": 0.5049, + "step": 30410 + }, + { + "epoch": 83.5467032967033, + "grad_norm": 14.486474990844727, + "learning_rate": 8.226648351648352e-06, + "loss": 0.2136, + "step": 30411 + }, + { + "epoch": 83.54945054945055, + "grad_norm": 3.778454542160034, + "learning_rate": 8.225274725274725e-06, + "loss": 0.0501, + "step": 30412 + }, + { + "epoch": 83.5521978021978, + "grad_norm": 6.560481071472168, + "learning_rate": 8.223901098901099e-06, + "loss": 0.066, + "step": 30413 + }, + { + "epoch": 83.55494505494505, + "grad_norm": 9.976716995239258, + "learning_rate": 8.222527472527474e-06, + "loss": 0.0905, + "step": 30414 + }, + { + "epoch": 83.5576923076923, + "grad_norm": 8.478879928588867, + "learning_rate": 8.221153846153847e-06, + "loss": 0.1046, + "step": 30415 + }, + { + "epoch": 83.56043956043956, + "grad_norm": 20.928436279296875, + "learning_rate": 8.219780219780219e-06, + "loss": 0.4733, + "step": 30416 + }, + { + "epoch": 83.56318681318682, + "grad_norm": 13.214483261108398, + "learning_rate": 8.218406593406594e-06, + "loss": 0.1784, + "step": 30417 + }, + { + "epoch": 83.56593406593407, + "grad_norm": 21.498552322387695, + "learning_rate": 8.217032967032967e-06, + "loss": 0.3697, + "step": 30418 + }, + { + "epoch": 83.56868131868131, + "grad_norm": 10.296160697937012, + "learning_rate": 8.215659340659341e-06, + "loss": 0.2097, + "step": 30419 + }, + { + "epoch": 83.57142857142857, + "grad_norm": 1.4700770378112793, + "learning_rate": 8.214285714285714e-06, + "loss": 0.0173, + "step": 30420 + }, + { + "epoch": 83.57417582417582, + "grad_norm": 2.6900980472564697, + "learning_rate": 8.212912087912088e-06, + "loss": 0.0391, + "step": 30421 + }, + { + "epoch": 83.57692307692308, + "grad_norm": 7.188537120819092, + "learning_rate": 8.211538461538463e-06, + "loss": 0.1268, + "step": 30422 + }, + { + "epoch": 83.57967032967034, + "grad_norm": 15.731220245361328, + "learning_rate": 8.210164835164835e-06, + "loss": 0.3163, + "step": 30423 + }, + { + "epoch": 83.58241758241758, + "grad_norm": 10.616083145141602, + "learning_rate": 8.20879120879121e-06, + "loss": 0.1619, + "step": 30424 + }, + { + "epoch": 83.58516483516483, + "grad_norm": 5.705341815948486, + "learning_rate": 8.207417582417583e-06, + "loss": 0.1157, + "step": 30425 + }, + { + "epoch": 83.58791208791209, + "grad_norm": 16.078611373901367, + "learning_rate": 8.206043956043956e-06, + "loss": 0.295, + "step": 30426 + }, + { + "epoch": 83.59065934065934, + "grad_norm": 21.538240432739258, + "learning_rate": 8.20467032967033e-06, + "loss": 0.7799, + "step": 30427 + }, + { + "epoch": 83.5934065934066, + "grad_norm": 5.420512676239014, + "learning_rate": 8.203296703296703e-06, + "loss": 0.089, + "step": 30428 + }, + { + "epoch": 83.59615384615384, + "grad_norm": 5.088050842285156, + "learning_rate": 8.201923076923078e-06, + "loss": 0.0602, + "step": 30429 + }, + { + "epoch": 83.5989010989011, + "grad_norm": 16.088241577148438, + "learning_rate": 8.200549450549452e-06, + "loss": 0.3988, + "step": 30430 + }, + { + "epoch": 83.60164835164835, + "grad_norm": 19.804956436157227, + "learning_rate": 8.199175824175823e-06, + "loss": 0.3613, + "step": 30431 + }, + { + "epoch": 83.6043956043956, + "grad_norm": 4.929948806762695, + "learning_rate": 8.197802197802199e-06, + "loss": 0.0548, + "step": 30432 + }, + { + "epoch": 83.60714285714286, + "grad_norm": 8.210124015808105, + "learning_rate": 8.196428571428572e-06, + "loss": 0.1652, + "step": 30433 + }, + { + "epoch": 83.60989010989012, + "grad_norm": 12.428197860717773, + "learning_rate": 8.195054945054945e-06, + "loss": 0.3324, + "step": 30434 + }, + { + "epoch": 83.61263736263736, + "grad_norm": 12.202526092529297, + "learning_rate": 8.193681318681319e-06, + "loss": 0.2793, + "step": 30435 + }, + { + "epoch": 83.61538461538461, + "grad_norm": 30.403980255126953, + "learning_rate": 8.192307692307692e-06, + "loss": 0.6189, + "step": 30436 + }, + { + "epoch": 83.61813186813187, + "grad_norm": 5.074399471282959, + "learning_rate": 8.190934065934067e-06, + "loss": 0.0494, + "step": 30437 + }, + { + "epoch": 83.62087912087912, + "grad_norm": 4.461077690124512, + "learning_rate": 8.189560439560439e-06, + "loss": 0.0531, + "step": 30438 + }, + { + "epoch": 83.62362637362638, + "grad_norm": 8.764530181884766, + "learning_rate": 8.188186813186814e-06, + "loss": 0.1012, + "step": 30439 + }, + { + "epoch": 83.62637362637362, + "grad_norm": 8.311690330505371, + "learning_rate": 8.186813186813188e-06, + "loss": 0.1898, + "step": 30440 + }, + { + "epoch": 83.62912087912088, + "grad_norm": 2.6986567974090576, + "learning_rate": 8.185439560439561e-06, + "loss": 0.0291, + "step": 30441 + }, + { + "epoch": 83.63186813186813, + "grad_norm": 7.481016635894775, + "learning_rate": 8.184065934065934e-06, + "loss": 0.0877, + "step": 30442 + }, + { + "epoch": 83.63461538461539, + "grad_norm": 19.96282196044922, + "learning_rate": 8.182692307692308e-06, + "loss": 0.3222, + "step": 30443 + }, + { + "epoch": 83.63736263736264, + "grad_norm": 11.553305625915527, + "learning_rate": 8.181318681318683e-06, + "loss": 0.132, + "step": 30444 + }, + { + "epoch": 83.64010989010988, + "grad_norm": 6.061967849731445, + "learning_rate": 8.179945054945056e-06, + "loss": 0.1268, + "step": 30445 + }, + { + "epoch": 83.64285714285714, + "grad_norm": 6.021245002746582, + "learning_rate": 8.178571428571428e-06, + "loss": 0.0737, + "step": 30446 + }, + { + "epoch": 83.6456043956044, + "grad_norm": 13.939529418945312, + "learning_rate": 8.177197802197803e-06, + "loss": 0.3899, + "step": 30447 + }, + { + "epoch": 83.64835164835165, + "grad_norm": 10.89007568359375, + "learning_rate": 8.175824175824176e-06, + "loss": 0.2368, + "step": 30448 + }, + { + "epoch": 83.6510989010989, + "grad_norm": 8.381791114807129, + "learning_rate": 8.17445054945055e-06, + "loss": 0.1138, + "step": 30449 + }, + { + "epoch": 83.65384615384616, + "grad_norm": 5.6637678146362305, + "learning_rate": 8.173076923076923e-06, + "loss": 0.06, + "step": 30450 + }, + { + "epoch": 83.6565934065934, + "grad_norm": 17.013944625854492, + "learning_rate": 8.171703296703297e-06, + "loss": 0.1796, + "step": 30451 + }, + { + "epoch": 83.65934065934066, + "grad_norm": 12.607446670532227, + "learning_rate": 8.170329670329672e-06, + "loss": 0.1517, + "step": 30452 + }, + { + "epoch": 83.66208791208791, + "grad_norm": 5.332396030426025, + "learning_rate": 8.168956043956043e-06, + "loss": 0.0838, + "step": 30453 + }, + { + "epoch": 83.66483516483517, + "grad_norm": 16.56182861328125, + "learning_rate": 8.167582417582419e-06, + "loss": 0.275, + "step": 30454 + }, + { + "epoch": 83.66758241758242, + "grad_norm": 1.4008547067642212, + "learning_rate": 8.166208791208792e-06, + "loss": 0.0222, + "step": 30455 + }, + { + "epoch": 83.67032967032966, + "grad_norm": 10.272804260253906, + "learning_rate": 8.164835164835165e-06, + "loss": 0.2165, + "step": 30456 + }, + { + "epoch": 83.67307692307692, + "grad_norm": 10.108524322509766, + "learning_rate": 8.163461538461539e-06, + "loss": 0.2213, + "step": 30457 + }, + { + "epoch": 83.67582417582418, + "grad_norm": 18.64845848083496, + "learning_rate": 8.162087912087912e-06, + "loss": 0.3901, + "step": 30458 + }, + { + "epoch": 83.67857142857143, + "grad_norm": 16.243581771850586, + "learning_rate": 8.160714285714286e-06, + "loss": 0.4019, + "step": 30459 + }, + { + "epoch": 83.68131868131869, + "grad_norm": 2.023240566253662, + "learning_rate": 8.15934065934066e-06, + "loss": 0.0157, + "step": 30460 + }, + { + "epoch": 83.68406593406593, + "grad_norm": 7.17013692855835, + "learning_rate": 8.157967032967032e-06, + "loss": 0.1816, + "step": 30461 + }, + { + "epoch": 83.68681318681318, + "grad_norm": 11.042661666870117, + "learning_rate": 8.156593406593408e-06, + "loss": 0.1653, + "step": 30462 + }, + { + "epoch": 83.68956043956044, + "grad_norm": 11.937037467956543, + "learning_rate": 8.155219780219781e-06, + "loss": 0.2128, + "step": 30463 + }, + { + "epoch": 83.6923076923077, + "grad_norm": 5.110751152038574, + "learning_rate": 8.153846153846154e-06, + "loss": 0.0705, + "step": 30464 + }, + { + "epoch": 83.69505494505495, + "grad_norm": 22.883378982543945, + "learning_rate": 8.152472527472528e-06, + "loss": 0.1693, + "step": 30465 + }, + { + "epoch": 83.6978021978022, + "grad_norm": 11.631401062011719, + "learning_rate": 8.151098901098901e-06, + "loss": 0.1235, + "step": 30466 + }, + { + "epoch": 83.70054945054945, + "grad_norm": 7.7293572425842285, + "learning_rate": 8.149725274725276e-06, + "loss": 0.1457, + "step": 30467 + }, + { + "epoch": 83.7032967032967, + "grad_norm": 12.869868278503418, + "learning_rate": 8.148351648351648e-06, + "loss": 0.3777, + "step": 30468 + }, + { + "epoch": 83.70604395604396, + "grad_norm": 13.920562744140625, + "learning_rate": 8.146978021978021e-06, + "loss": 0.2792, + "step": 30469 + }, + { + "epoch": 83.70879120879121, + "grad_norm": 21.9499568939209, + "learning_rate": 8.145604395604396e-06, + "loss": 0.2277, + "step": 30470 + }, + { + "epoch": 83.71153846153847, + "grad_norm": 15.801729202270508, + "learning_rate": 8.14423076923077e-06, + "loss": 0.2406, + "step": 30471 + }, + { + "epoch": 83.71428571428571, + "grad_norm": 0.8180060386657715, + "learning_rate": 8.142857142857143e-06, + "loss": 0.0105, + "step": 30472 + }, + { + "epoch": 83.71703296703296, + "grad_norm": 4.378969192504883, + "learning_rate": 8.141483516483517e-06, + "loss": 0.0584, + "step": 30473 + }, + { + "epoch": 83.71978021978022, + "grad_norm": 21.13927459716797, + "learning_rate": 8.14010989010989e-06, + "loss": 0.2528, + "step": 30474 + }, + { + "epoch": 83.72252747252747, + "grad_norm": 11.514106750488281, + "learning_rate": 8.138736263736264e-06, + "loss": 0.1215, + "step": 30475 + }, + { + "epoch": 83.72527472527473, + "grad_norm": 23.257415771484375, + "learning_rate": 8.137362637362637e-06, + "loss": 0.4159, + "step": 30476 + }, + { + "epoch": 83.72802197802197, + "grad_norm": 22.604297637939453, + "learning_rate": 8.135989010989012e-06, + "loss": 1.023, + "step": 30477 + }, + { + "epoch": 83.73076923076923, + "grad_norm": 14.748148918151855, + "learning_rate": 8.134615384615385e-06, + "loss": 0.2336, + "step": 30478 + }, + { + "epoch": 83.73351648351648, + "grad_norm": 9.000767707824707, + "learning_rate": 8.133241758241759e-06, + "loss": 0.0732, + "step": 30479 + }, + { + "epoch": 83.73626373626374, + "grad_norm": 11.917257308959961, + "learning_rate": 8.131868131868132e-06, + "loss": 0.3534, + "step": 30480 + }, + { + "epoch": 83.73901098901099, + "grad_norm": 8.17487907409668, + "learning_rate": 8.130494505494506e-06, + "loss": 0.1479, + "step": 30481 + }, + { + "epoch": 83.74175824175825, + "grad_norm": 20.039100646972656, + "learning_rate": 8.12912087912088e-06, + "loss": 0.3066, + "step": 30482 + }, + { + "epoch": 83.74450549450549, + "grad_norm": 15.111213684082031, + "learning_rate": 8.127747252747252e-06, + "loss": 0.3237, + "step": 30483 + }, + { + "epoch": 83.74725274725274, + "grad_norm": 23.748455047607422, + "learning_rate": 8.126373626373626e-06, + "loss": 0.3197, + "step": 30484 + }, + { + "epoch": 83.75, + "grad_norm": 4.885295391082764, + "learning_rate": 8.125000000000001e-06, + "loss": 0.0606, + "step": 30485 + }, + { + "epoch": 83.75274725274726, + "grad_norm": 8.174153327941895, + "learning_rate": 8.123626373626374e-06, + "loss": 0.1305, + "step": 30486 + }, + { + "epoch": 83.75549450549451, + "grad_norm": 15.02299976348877, + "learning_rate": 8.122252747252748e-06, + "loss": 0.3303, + "step": 30487 + }, + { + "epoch": 83.75824175824175, + "grad_norm": 14.475114822387695, + "learning_rate": 8.120879120879121e-06, + "loss": 0.2313, + "step": 30488 + }, + { + "epoch": 83.76098901098901, + "grad_norm": 8.579594612121582, + "learning_rate": 8.119505494505495e-06, + "loss": 0.1513, + "step": 30489 + }, + { + "epoch": 83.76373626373626, + "grad_norm": 11.976189613342285, + "learning_rate": 8.118131868131868e-06, + "loss": 0.1185, + "step": 30490 + }, + { + "epoch": 83.76648351648352, + "grad_norm": 17.1970157623291, + "learning_rate": 8.116758241758241e-06, + "loss": 0.222, + "step": 30491 + }, + { + "epoch": 83.76923076923077, + "grad_norm": 15.016107559204102, + "learning_rate": 8.115384615384617e-06, + "loss": 0.3516, + "step": 30492 + }, + { + "epoch": 83.77197802197803, + "grad_norm": 8.64374828338623, + "learning_rate": 8.11401098901099e-06, + "loss": 0.1769, + "step": 30493 + }, + { + "epoch": 83.77472527472527, + "grad_norm": 8.426535606384277, + "learning_rate": 8.112637362637363e-06, + "loss": 0.1204, + "step": 30494 + }, + { + "epoch": 83.77747252747253, + "grad_norm": 10.251175880432129, + "learning_rate": 8.111263736263737e-06, + "loss": 0.1484, + "step": 30495 + }, + { + "epoch": 83.78021978021978, + "grad_norm": 11.890650749206543, + "learning_rate": 8.10989010989011e-06, + "loss": 0.1385, + "step": 30496 + }, + { + "epoch": 83.78296703296704, + "grad_norm": 24.307720184326172, + "learning_rate": 8.108516483516485e-06, + "loss": 0.3722, + "step": 30497 + }, + { + "epoch": 83.78571428571429, + "grad_norm": 5.1894989013671875, + "learning_rate": 8.107142857142857e-06, + "loss": 0.0584, + "step": 30498 + }, + { + "epoch": 83.78846153846153, + "grad_norm": 12.26231575012207, + "learning_rate": 8.10576923076923e-06, + "loss": 0.139, + "step": 30499 + }, + { + "epoch": 83.79120879120879, + "grad_norm": 23.459209442138672, + "learning_rate": 8.104395604395605e-06, + "loss": 0.3543, + "step": 30500 + }, + { + "epoch": 83.79395604395604, + "grad_norm": 3.1571075916290283, + "learning_rate": 8.103021978021979e-06, + "loss": 0.0346, + "step": 30501 + }, + { + "epoch": 83.7967032967033, + "grad_norm": 15.707809448242188, + "learning_rate": 8.101648351648352e-06, + "loss": 0.305, + "step": 30502 + }, + { + "epoch": 83.79945054945055, + "grad_norm": 16.369184494018555, + "learning_rate": 8.100274725274726e-06, + "loss": 0.4045, + "step": 30503 + }, + { + "epoch": 83.8021978021978, + "grad_norm": 25.591110229492188, + "learning_rate": 8.098901098901099e-06, + "loss": 0.0865, + "step": 30504 + }, + { + "epoch": 83.80494505494505, + "grad_norm": 4.023438930511475, + "learning_rate": 8.097527472527473e-06, + "loss": 0.0843, + "step": 30505 + }, + { + "epoch": 83.8076923076923, + "grad_norm": 4.541772365570068, + "learning_rate": 8.096153846153846e-06, + "loss": 0.0876, + "step": 30506 + }, + { + "epoch": 83.81043956043956, + "grad_norm": 16.17568588256836, + "learning_rate": 8.094780219780221e-06, + "loss": 0.3524, + "step": 30507 + }, + { + "epoch": 83.81318681318682, + "grad_norm": 6.618261337280273, + "learning_rate": 8.093406593406594e-06, + "loss": 0.0681, + "step": 30508 + }, + { + "epoch": 83.81593406593407, + "grad_norm": 20.06870460510254, + "learning_rate": 8.092032967032968e-06, + "loss": 0.2563, + "step": 30509 + }, + { + "epoch": 83.81868131868131, + "grad_norm": 7.311895847320557, + "learning_rate": 8.090659340659341e-06, + "loss": 0.1651, + "step": 30510 + }, + { + "epoch": 83.82142857142857, + "grad_norm": 6.672245025634766, + "learning_rate": 8.089285714285715e-06, + "loss": 0.064, + "step": 30511 + }, + { + "epoch": 83.82417582417582, + "grad_norm": 9.658774375915527, + "learning_rate": 8.08791208791209e-06, + "loss": 0.2063, + "step": 30512 + }, + { + "epoch": 83.82692307692308, + "grad_norm": 12.079970359802246, + "learning_rate": 8.086538461538461e-06, + "loss": 0.2321, + "step": 30513 + }, + { + "epoch": 83.82967032967034, + "grad_norm": 11.232253074645996, + "learning_rate": 8.085164835164835e-06, + "loss": 0.2714, + "step": 30514 + }, + { + "epoch": 83.83241758241758, + "grad_norm": 9.617834091186523, + "learning_rate": 8.08379120879121e-06, + "loss": 0.2563, + "step": 30515 + }, + { + "epoch": 83.83516483516483, + "grad_norm": 17.87826156616211, + "learning_rate": 8.082417582417583e-06, + "loss": 0.3332, + "step": 30516 + }, + { + "epoch": 83.83791208791209, + "grad_norm": 14.979874610900879, + "learning_rate": 8.081043956043955e-06, + "loss": 0.1581, + "step": 30517 + }, + { + "epoch": 83.84065934065934, + "grad_norm": 16.495777130126953, + "learning_rate": 8.07967032967033e-06, + "loss": 0.3234, + "step": 30518 + }, + { + "epoch": 83.8434065934066, + "grad_norm": 3.7739758491516113, + "learning_rate": 8.078296703296704e-06, + "loss": 0.0395, + "step": 30519 + }, + { + "epoch": 83.84615384615384, + "grad_norm": 4.726994037628174, + "learning_rate": 8.076923076923077e-06, + "loss": 0.0511, + "step": 30520 + }, + { + "epoch": 83.8489010989011, + "grad_norm": 10.037801742553711, + "learning_rate": 8.07554945054945e-06, + "loss": 0.1221, + "step": 30521 + }, + { + "epoch": 83.85164835164835, + "grad_norm": 9.952190399169922, + "learning_rate": 8.074175824175824e-06, + "loss": 0.138, + "step": 30522 + }, + { + "epoch": 83.8543956043956, + "grad_norm": 2.6858537197113037, + "learning_rate": 8.072802197802199e-06, + "loss": 0.0323, + "step": 30523 + }, + { + "epoch": 83.85714285714286, + "grad_norm": 0.47746938467025757, + "learning_rate": 8.07142857142857e-06, + "loss": 0.0068, + "step": 30524 + }, + { + "epoch": 83.85989010989012, + "grad_norm": 1.5827420949935913, + "learning_rate": 8.070054945054946e-06, + "loss": 0.0205, + "step": 30525 + }, + { + "epoch": 83.86263736263736, + "grad_norm": 20.570615768432617, + "learning_rate": 8.068681318681319e-06, + "loss": 0.4746, + "step": 30526 + }, + { + "epoch": 83.86538461538461, + "grad_norm": 12.761488914489746, + "learning_rate": 8.067307692307693e-06, + "loss": 0.1885, + "step": 30527 + }, + { + "epoch": 83.86813186813187, + "grad_norm": 19.263105392456055, + "learning_rate": 8.065934065934066e-06, + "loss": 0.4797, + "step": 30528 + }, + { + "epoch": 83.87087912087912, + "grad_norm": 14.367401123046875, + "learning_rate": 8.06456043956044e-06, + "loss": 0.2166, + "step": 30529 + }, + { + "epoch": 83.87362637362638, + "grad_norm": 21.558561325073242, + "learning_rate": 8.063186813186814e-06, + "loss": 0.5265, + "step": 30530 + }, + { + "epoch": 83.87637362637362, + "grad_norm": 6.969512939453125, + "learning_rate": 8.061813186813188e-06, + "loss": 0.1478, + "step": 30531 + }, + { + "epoch": 83.87912087912088, + "grad_norm": 21.536827087402344, + "learning_rate": 8.06043956043956e-06, + "loss": 0.4082, + "step": 30532 + }, + { + "epoch": 83.88186813186813, + "grad_norm": 17.54279136657715, + "learning_rate": 8.059065934065935e-06, + "loss": 0.2811, + "step": 30533 + }, + { + "epoch": 83.88461538461539, + "grad_norm": 7.337064743041992, + "learning_rate": 8.057692307692308e-06, + "loss": 0.1237, + "step": 30534 + }, + { + "epoch": 83.88736263736264, + "grad_norm": 3.8262791633605957, + "learning_rate": 8.056318681318681e-06, + "loss": 0.0543, + "step": 30535 + }, + { + "epoch": 83.89010989010988, + "grad_norm": 20.383602142333984, + "learning_rate": 8.054945054945055e-06, + "loss": 0.3254, + "step": 30536 + }, + { + "epoch": 83.89285714285714, + "grad_norm": 20.382959365844727, + "learning_rate": 8.053571428571428e-06, + "loss": 0.5222, + "step": 30537 + }, + { + "epoch": 83.8956043956044, + "grad_norm": 4.7379069328308105, + "learning_rate": 8.052197802197803e-06, + "loss": 0.0486, + "step": 30538 + }, + { + "epoch": 83.89835164835165, + "grad_norm": 11.566662788391113, + "learning_rate": 8.050824175824175e-06, + "loss": 0.1678, + "step": 30539 + }, + { + "epoch": 83.9010989010989, + "grad_norm": 14.409049034118652, + "learning_rate": 8.04945054945055e-06, + "loss": 0.3794, + "step": 30540 + }, + { + "epoch": 83.90384615384616, + "grad_norm": 4.011183261871338, + "learning_rate": 8.048076923076924e-06, + "loss": 0.052, + "step": 30541 + }, + { + "epoch": 83.9065934065934, + "grad_norm": 9.099388122558594, + "learning_rate": 8.046703296703297e-06, + "loss": 0.1433, + "step": 30542 + }, + { + "epoch": 83.90934065934066, + "grad_norm": 17.351442337036133, + "learning_rate": 8.04532967032967e-06, + "loss": 0.3783, + "step": 30543 + }, + { + "epoch": 83.91208791208791, + "grad_norm": 9.54476547241211, + "learning_rate": 8.043956043956044e-06, + "loss": 0.1551, + "step": 30544 + }, + { + "epoch": 83.91483516483517, + "grad_norm": 5.445123672485352, + "learning_rate": 8.042582417582419e-06, + "loss": 0.0496, + "step": 30545 + }, + { + "epoch": 83.91758241758242, + "grad_norm": 5.5602264404296875, + "learning_rate": 8.041208791208792e-06, + "loss": 0.1268, + "step": 30546 + }, + { + "epoch": 83.92032967032966, + "grad_norm": 2.570251941680908, + "learning_rate": 8.039835164835164e-06, + "loss": 0.0313, + "step": 30547 + }, + { + "epoch": 83.92307692307692, + "grad_norm": 21.72480010986328, + "learning_rate": 8.03846153846154e-06, + "loss": 0.4002, + "step": 30548 + }, + { + "epoch": 83.92582417582418, + "grad_norm": 10.189997673034668, + "learning_rate": 8.037087912087913e-06, + "loss": 0.347, + "step": 30549 + }, + { + "epoch": 83.92857142857143, + "grad_norm": 14.542516708374023, + "learning_rate": 8.035714285714286e-06, + "loss": 0.3039, + "step": 30550 + }, + { + "epoch": 83.93131868131869, + "grad_norm": 12.034804344177246, + "learning_rate": 8.03434065934066e-06, + "loss": 0.1691, + "step": 30551 + }, + { + "epoch": 83.93406593406593, + "grad_norm": 9.015939712524414, + "learning_rate": 8.032967032967033e-06, + "loss": 0.0822, + "step": 30552 + }, + { + "epoch": 83.93681318681318, + "grad_norm": 10.501336097717285, + "learning_rate": 8.031593406593408e-06, + "loss": 0.118, + "step": 30553 + }, + { + "epoch": 83.93956043956044, + "grad_norm": 11.978273391723633, + "learning_rate": 8.03021978021978e-06, + "loss": 0.1708, + "step": 30554 + }, + { + "epoch": 83.9423076923077, + "grad_norm": 20.34105110168457, + "learning_rate": 8.028846153846155e-06, + "loss": 0.2469, + "step": 30555 + }, + { + "epoch": 83.94505494505495, + "grad_norm": 6.27665376663208, + "learning_rate": 8.027472527472528e-06, + "loss": 0.0762, + "step": 30556 + }, + { + "epoch": 83.9478021978022, + "grad_norm": 18.454816818237305, + "learning_rate": 8.026098901098902e-06, + "loss": 0.4615, + "step": 30557 + }, + { + "epoch": 83.95054945054945, + "grad_norm": 13.053455352783203, + "learning_rate": 8.024725274725275e-06, + "loss": 0.1159, + "step": 30558 + }, + { + "epoch": 83.9532967032967, + "grad_norm": 24.345945358276367, + "learning_rate": 8.023351648351648e-06, + "loss": 0.4545, + "step": 30559 + }, + { + "epoch": 83.95604395604396, + "grad_norm": 5.6994147300720215, + "learning_rate": 8.021978021978023e-06, + "loss": 0.0725, + "step": 30560 + }, + { + "epoch": 83.95879120879121, + "grad_norm": 9.668274879455566, + "learning_rate": 8.020604395604397e-06, + "loss": 0.193, + "step": 30561 + }, + { + "epoch": 83.96153846153847, + "grad_norm": 7.750007152557373, + "learning_rate": 8.019230769230769e-06, + "loss": 0.0883, + "step": 30562 + }, + { + "epoch": 83.96428571428571, + "grad_norm": 17.68947982788086, + "learning_rate": 8.017857142857144e-06, + "loss": 0.2972, + "step": 30563 + }, + { + "epoch": 83.96703296703296, + "grad_norm": 13.700601577758789, + "learning_rate": 8.016483516483517e-06, + "loss": 0.1284, + "step": 30564 + }, + { + "epoch": 83.96978021978022, + "grad_norm": 16.413997650146484, + "learning_rate": 8.01510989010989e-06, + "loss": 0.3253, + "step": 30565 + }, + { + "epoch": 83.97252747252747, + "grad_norm": 2.9651834964752197, + "learning_rate": 8.013736263736264e-06, + "loss": 0.0346, + "step": 30566 + }, + { + "epoch": 83.97527472527473, + "grad_norm": 8.189332962036133, + "learning_rate": 8.012362637362637e-06, + "loss": 0.1936, + "step": 30567 + }, + { + "epoch": 83.97802197802197, + "grad_norm": 4.740403652191162, + "learning_rate": 8.010989010989012e-06, + "loss": 0.0425, + "step": 30568 + }, + { + "epoch": 83.98076923076923, + "grad_norm": 15.826990127563477, + "learning_rate": 8.009615384615384e-06, + "loss": 0.2369, + "step": 30569 + }, + { + "epoch": 83.98351648351648, + "grad_norm": 2.4957525730133057, + "learning_rate": 8.00824175824176e-06, + "loss": 0.0346, + "step": 30570 + }, + { + "epoch": 83.98626373626374, + "grad_norm": 3.190398693084717, + "learning_rate": 8.006868131868133e-06, + "loss": 0.0207, + "step": 30571 + }, + { + "epoch": 83.98901098901099, + "grad_norm": 24.919029235839844, + "learning_rate": 8.005494505494506e-06, + "loss": 0.2111, + "step": 30572 + }, + { + "epoch": 83.99175824175825, + "grad_norm": 17.023988723754883, + "learning_rate": 8.00412087912088e-06, + "loss": 0.4383, + "step": 30573 + }, + { + "epoch": 83.99450549450549, + "grad_norm": 18.183841705322266, + "learning_rate": 8.002747252747253e-06, + "loss": 0.2176, + "step": 30574 + }, + { + "epoch": 83.99725274725274, + "grad_norm": 5.475189208984375, + "learning_rate": 8.001373626373626e-06, + "loss": 0.0487, + "step": 30575 + }, + { + "epoch": 84.0, + "grad_norm": 67.40560913085938, + "learning_rate": 8.000000000000001e-06, + "loss": 0.9885, + "step": 30576 + }, + { + "epoch": 84.0, + "eval_accuracy": 0.7479338842975206, + "eval_f1": 0.7438985089848155, + "eval_f1_DuraRiadoRio_64x64": 0.7228915662650602, + "eval_f1_Mole_64x64": 0.8774193548387097, + "eval_f1_Quebrado_64x64": 0.8075471698113208, + "eval_f1_RiadoRio_64x64": 0.6042553191489362, + "eval_f1_RioFechado_64x64": 0.7073791348600509, + "eval_loss": 1.1666656732559204, + "eval_precision": 0.7939848067085011, + "eval_precision_DuraRiadoRio_64x64": 0.8571428571428571, + "eval_precision_Mole_64x64": 0.8192771084337349, + "eval_precision_Quebrado_64x64": 0.8842975206611571, + "eval_precision_RiadoRio_64x64": 0.8554216867469879, + "eval_precision_RioFechado_64x64": 0.5537848605577689, + "eval_recall": 0.7516957005189029, + "eval_recall_DuraRiadoRio_64x64": 0.625, + "eval_recall_Mole_64x64": 0.9444444444444444, + "eval_recall_Quebrado_64x64": 0.7430555555555556, + "eval_recall_RiadoRio_64x64": 0.46710526315789475, + "eval_recall_RioFechado_64x64": 0.9788732394366197, + "eval_runtime": 1.7417, + "eval_samples_per_second": 416.841, + "eval_steps_per_second": 26.411, + "step": 30576 + }, + { + "epoch": 84.00274725274726, + "grad_norm": 9.828186988830566, + "learning_rate": 7.998626373626373e-06, + "loss": 0.1754, + "step": 30577 + }, + { + "epoch": 84.00549450549451, + "grad_norm": 11.138172149658203, + "learning_rate": 7.997252747252748e-06, + "loss": 0.1742, + "step": 30578 + }, + { + "epoch": 84.00824175824175, + "grad_norm": 0.5241766571998596, + "learning_rate": 7.995879120879122e-06, + "loss": 0.0075, + "step": 30579 + }, + { + "epoch": 84.01098901098901, + "grad_norm": 1.2532365322113037, + "learning_rate": 7.994505494505495e-06, + "loss": 0.0141, + "step": 30580 + }, + { + "epoch": 84.01373626373626, + "grad_norm": 15.830872535705566, + "learning_rate": 7.993131868131868e-06, + "loss": 0.4211, + "step": 30581 + }, + { + "epoch": 84.01648351648352, + "grad_norm": 4.1185221672058105, + "learning_rate": 7.991758241758242e-06, + "loss": 0.0443, + "step": 30582 + }, + { + "epoch": 84.01923076923077, + "grad_norm": 17.142803192138672, + "learning_rate": 7.990384615384617e-06, + "loss": 0.178, + "step": 30583 + }, + { + "epoch": 84.02197802197803, + "grad_norm": 7.5708417892456055, + "learning_rate": 7.989010989010989e-06, + "loss": 0.1696, + "step": 30584 + }, + { + "epoch": 84.02472527472527, + "grad_norm": 1.3840031623840332, + "learning_rate": 7.987637362637362e-06, + "loss": 0.0154, + "step": 30585 + }, + { + "epoch": 84.02747252747253, + "grad_norm": 15.313626289367676, + "learning_rate": 7.986263736263737e-06, + "loss": 0.5058, + "step": 30586 + }, + { + "epoch": 84.03021978021978, + "grad_norm": 12.051358222961426, + "learning_rate": 7.98489010989011e-06, + "loss": 0.2022, + "step": 30587 + }, + { + "epoch": 84.03296703296704, + "grad_norm": 6.164010047912598, + "learning_rate": 7.983516483516484e-06, + "loss": 0.0642, + "step": 30588 + }, + { + "epoch": 84.03571428571429, + "grad_norm": 12.958213806152344, + "learning_rate": 7.982142857142857e-06, + "loss": 0.1651, + "step": 30589 + }, + { + "epoch": 84.03846153846153, + "grad_norm": 11.639537811279297, + "learning_rate": 7.98076923076923e-06, + "loss": 0.1549, + "step": 30590 + }, + { + "epoch": 84.04120879120879, + "grad_norm": 6.223114967346191, + "learning_rate": 7.979395604395606e-06, + "loss": 0.0909, + "step": 30591 + }, + { + "epoch": 84.04395604395604, + "grad_norm": 12.2889986038208, + "learning_rate": 7.978021978021978e-06, + "loss": 0.1933, + "step": 30592 + }, + { + "epoch": 84.0467032967033, + "grad_norm": 14.957803726196289, + "learning_rate": 7.976648351648353e-06, + "loss": 0.2548, + "step": 30593 + }, + { + "epoch": 84.04945054945055, + "grad_norm": 9.735612869262695, + "learning_rate": 7.975274725274726e-06, + "loss": 0.2093, + "step": 30594 + }, + { + "epoch": 84.0521978021978, + "grad_norm": 18.837249755859375, + "learning_rate": 7.9739010989011e-06, + "loss": 0.4012, + "step": 30595 + }, + { + "epoch": 84.05494505494505, + "grad_norm": 13.371662139892578, + "learning_rate": 7.972527472527473e-06, + "loss": 0.2392, + "step": 30596 + }, + { + "epoch": 84.0576923076923, + "grad_norm": 18.43938446044922, + "learning_rate": 7.971153846153846e-06, + "loss": 0.3603, + "step": 30597 + }, + { + "epoch": 84.06043956043956, + "grad_norm": 10.138603210449219, + "learning_rate": 7.969780219780221e-06, + "loss": 0.196, + "step": 30598 + }, + { + "epoch": 84.06318681318682, + "grad_norm": 16.797853469848633, + "learning_rate": 7.968406593406593e-06, + "loss": 0.2066, + "step": 30599 + }, + { + "epoch": 84.06593406593407, + "grad_norm": 5.5175065994262695, + "learning_rate": 7.967032967032966e-06, + "loss": 0.0652, + "step": 30600 + }, + { + "epoch": 84.06868131868131, + "grad_norm": 5.895052433013916, + "learning_rate": 7.965659340659342e-06, + "loss": 0.0834, + "step": 30601 + }, + { + "epoch": 84.07142857142857, + "grad_norm": 10.297722816467285, + "learning_rate": 7.964285714285715e-06, + "loss": 0.0937, + "step": 30602 + }, + { + "epoch": 84.07417582417582, + "grad_norm": 11.720650672912598, + "learning_rate": 7.962912087912088e-06, + "loss": 0.1292, + "step": 30603 + }, + { + "epoch": 84.07692307692308, + "grad_norm": 10.108895301818848, + "learning_rate": 7.961538461538462e-06, + "loss": 0.1687, + "step": 30604 + }, + { + "epoch": 84.07967032967034, + "grad_norm": 13.121746063232422, + "learning_rate": 7.960164835164835e-06, + "loss": 0.1811, + "step": 30605 + }, + { + "epoch": 84.08241758241758, + "grad_norm": 10.618415832519531, + "learning_rate": 7.95879120879121e-06, + "loss": 0.1395, + "step": 30606 + }, + { + "epoch": 84.08516483516483, + "grad_norm": 9.221817016601562, + "learning_rate": 7.957417582417582e-06, + "loss": 0.1023, + "step": 30607 + }, + { + "epoch": 84.08791208791209, + "grad_norm": 12.497551918029785, + "learning_rate": 7.956043956043957e-06, + "loss": 0.1706, + "step": 30608 + }, + { + "epoch": 84.09065934065934, + "grad_norm": 12.52110481262207, + "learning_rate": 7.95467032967033e-06, + "loss": 0.2199, + "step": 30609 + }, + { + "epoch": 84.0934065934066, + "grad_norm": 11.849417686462402, + "learning_rate": 7.953296703296704e-06, + "loss": 0.4292, + "step": 30610 + }, + { + "epoch": 84.09615384615384, + "grad_norm": 11.447319984436035, + "learning_rate": 7.951923076923077e-06, + "loss": 0.2751, + "step": 30611 + }, + { + "epoch": 84.0989010989011, + "grad_norm": 17.102336883544922, + "learning_rate": 7.95054945054945e-06, + "loss": 0.3816, + "step": 30612 + }, + { + "epoch": 84.10164835164835, + "grad_norm": 9.944889068603516, + "learning_rate": 7.949175824175826e-06, + "loss": 0.1544, + "step": 30613 + }, + { + "epoch": 84.1043956043956, + "grad_norm": 8.213456153869629, + "learning_rate": 7.947802197802198e-06, + "loss": 0.1391, + "step": 30614 + }, + { + "epoch": 84.10714285714286, + "grad_norm": 20.343351364135742, + "learning_rate": 7.946428571428571e-06, + "loss": 0.4213, + "step": 30615 + }, + { + "epoch": 84.10989010989012, + "grad_norm": 16.56984519958496, + "learning_rate": 7.945054945054946e-06, + "loss": 0.4239, + "step": 30616 + }, + { + "epoch": 84.11263736263736, + "grad_norm": 4.295269012451172, + "learning_rate": 7.94368131868132e-06, + "loss": 0.0576, + "step": 30617 + }, + { + "epoch": 84.11538461538461, + "grad_norm": 11.803218841552734, + "learning_rate": 7.942307692307693e-06, + "loss": 0.2124, + "step": 30618 + }, + { + "epoch": 84.11813186813187, + "grad_norm": 15.527894020080566, + "learning_rate": 7.940934065934066e-06, + "loss": 0.2822, + "step": 30619 + }, + { + "epoch": 84.12087912087912, + "grad_norm": 16.22474479675293, + "learning_rate": 7.93956043956044e-06, + "loss": 0.3356, + "step": 30620 + }, + { + "epoch": 84.12362637362638, + "grad_norm": 29.787715911865234, + "learning_rate": 7.938186813186813e-06, + "loss": 1.1575, + "step": 30621 + }, + { + "epoch": 84.12637362637362, + "grad_norm": 8.330312728881836, + "learning_rate": 7.936813186813187e-06, + "loss": 0.1226, + "step": 30622 + }, + { + "epoch": 84.12912087912088, + "grad_norm": 6.995142936706543, + "learning_rate": 7.935439560439562e-06, + "loss": 0.0483, + "step": 30623 + }, + { + "epoch": 84.13186813186813, + "grad_norm": 12.331052780151367, + "learning_rate": 7.934065934065935e-06, + "loss": 0.1566, + "step": 30624 + }, + { + "epoch": 84.13461538461539, + "grad_norm": 4.487680435180664, + "learning_rate": 7.932692307692308e-06, + "loss": 0.0694, + "step": 30625 + }, + { + "epoch": 84.13736263736264, + "grad_norm": 12.170831680297852, + "learning_rate": 7.931318681318682e-06, + "loss": 0.1546, + "step": 30626 + }, + { + "epoch": 84.14010989010988, + "grad_norm": 12.004631042480469, + "learning_rate": 7.929945054945055e-06, + "loss": 0.1253, + "step": 30627 + }, + { + "epoch": 84.14285714285714, + "grad_norm": 11.250330924987793, + "learning_rate": 7.928571428571429e-06, + "loss": 0.1522, + "step": 30628 + }, + { + "epoch": 84.1456043956044, + "grad_norm": 27.21653938293457, + "learning_rate": 7.927197802197802e-06, + "loss": 0.6059, + "step": 30629 + }, + { + "epoch": 84.14835164835165, + "grad_norm": 12.492403984069824, + "learning_rate": 7.925824175824175e-06, + "loss": 0.114, + "step": 30630 + }, + { + "epoch": 84.1510989010989, + "grad_norm": 13.738545417785645, + "learning_rate": 7.92445054945055e-06, + "loss": 0.1197, + "step": 30631 + }, + { + "epoch": 84.15384615384616, + "grad_norm": 4.771180629730225, + "learning_rate": 7.923076923076924e-06, + "loss": 0.0759, + "step": 30632 + }, + { + "epoch": 84.1565934065934, + "grad_norm": 20.68065643310547, + "learning_rate": 7.921703296703296e-06, + "loss": 0.3245, + "step": 30633 + }, + { + "epoch": 84.15934065934066, + "grad_norm": 11.099339485168457, + "learning_rate": 7.92032967032967e-06, + "loss": 0.3308, + "step": 30634 + }, + { + "epoch": 84.16208791208791, + "grad_norm": 6.910579681396484, + "learning_rate": 7.918956043956044e-06, + "loss": 0.1642, + "step": 30635 + }, + { + "epoch": 84.16483516483517, + "grad_norm": 15.997809410095215, + "learning_rate": 7.917582417582418e-06, + "loss": 0.107, + "step": 30636 + }, + { + "epoch": 84.16758241758242, + "grad_norm": 5.525823593139648, + "learning_rate": 7.916208791208791e-06, + "loss": 0.0805, + "step": 30637 + }, + { + "epoch": 84.17032967032966, + "grad_norm": 7.3818464279174805, + "learning_rate": 7.914835164835164e-06, + "loss": 0.1038, + "step": 30638 + }, + { + "epoch": 84.17307692307692, + "grad_norm": 11.74558162689209, + "learning_rate": 7.91346153846154e-06, + "loss": 0.1666, + "step": 30639 + }, + { + "epoch": 84.17582417582418, + "grad_norm": 11.678659439086914, + "learning_rate": 7.912087912087913e-06, + "loss": 0.2266, + "step": 30640 + }, + { + "epoch": 84.17857142857143, + "grad_norm": 6.202332973480225, + "learning_rate": 7.910714285714286e-06, + "loss": 0.119, + "step": 30641 + }, + { + "epoch": 84.18131868131869, + "grad_norm": 9.574509620666504, + "learning_rate": 7.90934065934066e-06, + "loss": 0.1623, + "step": 30642 + }, + { + "epoch": 84.18406593406593, + "grad_norm": 7.107246398925781, + "learning_rate": 7.907967032967033e-06, + "loss": 0.1222, + "step": 30643 + }, + { + "epoch": 84.18681318681318, + "grad_norm": 1.5065462589263916, + "learning_rate": 7.906593406593407e-06, + "loss": 0.02, + "step": 30644 + }, + { + "epoch": 84.18956043956044, + "grad_norm": 15.192276954650879, + "learning_rate": 7.90521978021978e-06, + "loss": 0.1849, + "step": 30645 + }, + { + "epoch": 84.1923076923077, + "grad_norm": 14.784573554992676, + "learning_rate": 7.903846153846155e-06, + "loss": 0.2782, + "step": 30646 + }, + { + "epoch": 84.19505494505495, + "grad_norm": 4.564891338348389, + "learning_rate": 7.902472527472528e-06, + "loss": 0.057, + "step": 30647 + }, + { + "epoch": 84.1978021978022, + "grad_norm": 10.63476848602295, + "learning_rate": 7.9010989010989e-06, + "loss": 0.2053, + "step": 30648 + }, + { + "epoch": 84.20054945054945, + "grad_norm": 10.175996780395508, + "learning_rate": 7.899725274725275e-06, + "loss": 0.1727, + "step": 30649 + }, + { + "epoch": 84.2032967032967, + "grad_norm": 5.481226444244385, + "learning_rate": 7.898351648351649e-06, + "loss": 0.0752, + "step": 30650 + }, + { + "epoch": 84.20604395604396, + "grad_norm": 14.503080368041992, + "learning_rate": 7.896978021978022e-06, + "loss": 0.2367, + "step": 30651 + }, + { + "epoch": 84.20879120879121, + "grad_norm": 9.139126777648926, + "learning_rate": 7.895604395604395e-06, + "loss": 0.1926, + "step": 30652 + }, + { + "epoch": 84.21153846153847, + "grad_norm": 9.128458023071289, + "learning_rate": 7.894230769230769e-06, + "loss": 0.0633, + "step": 30653 + }, + { + "epoch": 84.21428571428571, + "grad_norm": 11.05483627319336, + "learning_rate": 7.892857142857144e-06, + "loss": 0.2421, + "step": 30654 + }, + { + "epoch": 84.21703296703296, + "grad_norm": 14.225518226623535, + "learning_rate": 7.891483516483517e-06, + "loss": 0.3696, + "step": 30655 + }, + { + "epoch": 84.21978021978022, + "grad_norm": 6.534529209136963, + "learning_rate": 7.89010989010989e-06, + "loss": 0.1651, + "step": 30656 + }, + { + "epoch": 84.22252747252747, + "grad_norm": 9.69595718383789, + "learning_rate": 7.888736263736264e-06, + "loss": 0.15, + "step": 30657 + }, + { + "epoch": 84.22527472527473, + "grad_norm": 5.618826866149902, + "learning_rate": 7.887362637362638e-06, + "loss": 0.0886, + "step": 30658 + }, + { + "epoch": 84.22802197802197, + "grad_norm": 10.106867790222168, + "learning_rate": 7.885989010989011e-06, + "loss": 0.1775, + "step": 30659 + }, + { + "epoch": 84.23076923076923, + "grad_norm": 14.073084831237793, + "learning_rate": 7.884615384615384e-06, + "loss": 0.1648, + "step": 30660 + }, + { + "epoch": 84.23351648351648, + "grad_norm": 20.417579650878906, + "learning_rate": 7.88324175824176e-06, + "loss": 0.4805, + "step": 30661 + }, + { + "epoch": 84.23626373626374, + "grad_norm": 19.450471878051758, + "learning_rate": 7.881868131868133e-06, + "loss": 0.4277, + "step": 30662 + }, + { + "epoch": 84.23901098901099, + "grad_norm": 15.272272109985352, + "learning_rate": 7.880494505494505e-06, + "loss": 0.125, + "step": 30663 + }, + { + "epoch": 84.24175824175825, + "grad_norm": 6.68079137802124, + "learning_rate": 7.87912087912088e-06, + "loss": 0.1086, + "step": 30664 + }, + { + "epoch": 84.24450549450549, + "grad_norm": 14.615662574768066, + "learning_rate": 7.877747252747253e-06, + "loss": 0.2963, + "step": 30665 + }, + { + "epoch": 84.24725274725274, + "grad_norm": 5.682408809661865, + "learning_rate": 7.876373626373627e-06, + "loss": 0.0863, + "step": 30666 + }, + { + "epoch": 84.25, + "grad_norm": 8.070147514343262, + "learning_rate": 7.875e-06, + "loss": 0.1532, + "step": 30667 + }, + { + "epoch": 84.25274725274726, + "grad_norm": 14.026437759399414, + "learning_rate": 7.873626373626373e-06, + "loss": 0.2364, + "step": 30668 + }, + { + "epoch": 84.25549450549451, + "grad_norm": 10.563615798950195, + "learning_rate": 7.872252747252748e-06, + "loss": 0.1456, + "step": 30669 + }, + { + "epoch": 84.25824175824175, + "grad_norm": 2.517927646636963, + "learning_rate": 7.87087912087912e-06, + "loss": 0.0231, + "step": 30670 + }, + { + "epoch": 84.26098901098901, + "grad_norm": 7.368809700012207, + "learning_rate": 7.869505494505495e-06, + "loss": 0.1856, + "step": 30671 + }, + { + "epoch": 84.26373626373626, + "grad_norm": 2.2393624782562256, + "learning_rate": 7.868131868131869e-06, + "loss": 0.0227, + "step": 30672 + }, + { + "epoch": 84.26648351648352, + "grad_norm": 13.454924583435059, + "learning_rate": 7.866758241758242e-06, + "loss": 0.2592, + "step": 30673 + }, + { + "epoch": 84.26923076923077, + "grad_norm": 14.19200611114502, + "learning_rate": 7.865384615384616e-06, + "loss": 0.2103, + "step": 30674 + }, + { + "epoch": 84.27197802197803, + "grad_norm": 14.992414474487305, + "learning_rate": 7.864010989010989e-06, + "loss": 0.2702, + "step": 30675 + }, + { + "epoch": 84.27472527472527, + "grad_norm": 22.62413215637207, + "learning_rate": 7.862637362637364e-06, + "loss": 0.4862, + "step": 30676 + }, + { + "epoch": 84.27747252747253, + "grad_norm": 10.751245498657227, + "learning_rate": 7.861263736263737e-06, + "loss": 0.1295, + "step": 30677 + }, + { + "epoch": 84.28021978021978, + "grad_norm": 1.5157147645950317, + "learning_rate": 7.85989010989011e-06, + "loss": 0.0239, + "step": 30678 + }, + { + "epoch": 84.28296703296704, + "grad_norm": 16.613540649414062, + "learning_rate": 7.858516483516484e-06, + "loss": 0.2767, + "step": 30679 + }, + { + "epoch": 84.28571428571429, + "grad_norm": 18.643491744995117, + "learning_rate": 7.857142857142858e-06, + "loss": 0.1113, + "step": 30680 + }, + { + "epoch": 84.28846153846153, + "grad_norm": 16.672636032104492, + "learning_rate": 7.855769230769231e-06, + "loss": 0.1951, + "step": 30681 + }, + { + "epoch": 84.29120879120879, + "grad_norm": 20.05303955078125, + "learning_rate": 7.854395604395604e-06, + "loss": 0.3856, + "step": 30682 + }, + { + "epoch": 84.29395604395604, + "grad_norm": 22.93172264099121, + "learning_rate": 7.853021978021978e-06, + "loss": 0.6211, + "step": 30683 + }, + { + "epoch": 84.2967032967033, + "grad_norm": 10.199562072753906, + "learning_rate": 7.851648351648353e-06, + "loss": 0.1729, + "step": 30684 + }, + { + "epoch": 84.29945054945055, + "grad_norm": 8.883909225463867, + "learning_rate": 7.850274725274725e-06, + "loss": 0.1015, + "step": 30685 + }, + { + "epoch": 84.3021978021978, + "grad_norm": 11.391935348510742, + "learning_rate": 7.848901098901098e-06, + "loss": 0.2224, + "step": 30686 + }, + { + "epoch": 84.30494505494505, + "grad_norm": 8.000443458557129, + "learning_rate": 7.847527472527473e-06, + "loss": 0.187, + "step": 30687 + }, + { + "epoch": 84.3076923076923, + "grad_norm": 6.759692192077637, + "learning_rate": 7.846153846153847e-06, + "loss": 0.0671, + "step": 30688 + }, + { + "epoch": 84.31043956043956, + "grad_norm": 2.4756698608398438, + "learning_rate": 7.84478021978022e-06, + "loss": 0.036, + "step": 30689 + }, + { + "epoch": 84.31318681318682, + "grad_norm": 19.360774993896484, + "learning_rate": 7.843406593406593e-06, + "loss": 0.2326, + "step": 30690 + }, + { + "epoch": 84.31593406593407, + "grad_norm": 8.020294189453125, + "learning_rate": 7.842032967032967e-06, + "loss": 0.1751, + "step": 30691 + }, + { + "epoch": 84.31868131868131, + "grad_norm": 16.284513473510742, + "learning_rate": 7.840659340659342e-06, + "loss": 0.3741, + "step": 30692 + }, + { + "epoch": 84.32142857142857, + "grad_norm": 10.60474681854248, + "learning_rate": 7.839285714285714e-06, + "loss": 0.233, + "step": 30693 + }, + { + "epoch": 84.32417582417582, + "grad_norm": 12.082298278808594, + "learning_rate": 7.837912087912089e-06, + "loss": 0.1856, + "step": 30694 + }, + { + "epoch": 84.32692307692308, + "grad_norm": 7.9698896408081055, + "learning_rate": 7.836538461538462e-06, + "loss": 0.1329, + "step": 30695 + }, + { + "epoch": 84.32967032967034, + "grad_norm": 22.94881820678711, + "learning_rate": 7.835164835164836e-06, + "loss": 0.6312, + "step": 30696 + }, + { + "epoch": 84.33241758241758, + "grad_norm": 5.615443229675293, + "learning_rate": 7.833791208791209e-06, + "loss": 0.0671, + "step": 30697 + }, + { + "epoch": 84.33516483516483, + "grad_norm": 22.553010940551758, + "learning_rate": 7.832417582417582e-06, + "loss": 0.5359, + "step": 30698 + }, + { + "epoch": 84.33791208791209, + "grad_norm": 8.135905265808105, + "learning_rate": 7.831043956043957e-06, + "loss": 0.1416, + "step": 30699 + }, + { + "epoch": 84.34065934065934, + "grad_norm": 14.738398551940918, + "learning_rate": 7.82967032967033e-06, + "loss": 0.1919, + "step": 30700 + }, + { + "epoch": 84.3434065934066, + "grad_norm": 14.401601791381836, + "learning_rate": 7.828296703296703e-06, + "loss": 0.1882, + "step": 30701 + }, + { + "epoch": 84.34615384615384, + "grad_norm": 11.419172286987305, + "learning_rate": 7.826923076923078e-06, + "loss": 0.2522, + "step": 30702 + }, + { + "epoch": 84.3489010989011, + "grad_norm": 19.0529727935791, + "learning_rate": 7.825549450549451e-06, + "loss": 0.5951, + "step": 30703 + }, + { + "epoch": 84.35164835164835, + "grad_norm": 8.528290748596191, + "learning_rate": 7.824175824175825e-06, + "loss": 0.1844, + "step": 30704 + }, + { + "epoch": 84.3543956043956, + "grad_norm": 7.637157917022705, + "learning_rate": 7.822802197802198e-06, + "loss": 0.084, + "step": 30705 + }, + { + "epoch": 84.35714285714286, + "grad_norm": 12.57361888885498, + "learning_rate": 7.821428571428571e-06, + "loss": 0.3756, + "step": 30706 + }, + { + "epoch": 84.35989010989012, + "grad_norm": 10.614816665649414, + "learning_rate": 7.820054945054946e-06, + "loss": 0.0816, + "step": 30707 + }, + { + "epoch": 84.36263736263736, + "grad_norm": 18.098508834838867, + "learning_rate": 7.818681318681318e-06, + "loss": 0.4204, + "step": 30708 + }, + { + "epoch": 84.36538461538461, + "grad_norm": 11.618606567382812, + "learning_rate": 7.817307692307693e-06, + "loss": 0.1507, + "step": 30709 + }, + { + "epoch": 84.36813186813187, + "grad_norm": 12.289417266845703, + "learning_rate": 7.815934065934067e-06, + "loss": 0.1451, + "step": 30710 + }, + { + "epoch": 84.37087912087912, + "grad_norm": 18.742576599121094, + "learning_rate": 7.81456043956044e-06, + "loss": 0.5846, + "step": 30711 + }, + { + "epoch": 84.37362637362638, + "grad_norm": 14.1908540725708, + "learning_rate": 7.813186813186813e-06, + "loss": 0.1778, + "step": 30712 + }, + { + "epoch": 84.37637362637362, + "grad_norm": 16.42441177368164, + "learning_rate": 7.811813186813187e-06, + "loss": 0.1902, + "step": 30713 + }, + { + "epoch": 84.37912087912088, + "grad_norm": 5.888668537139893, + "learning_rate": 7.810439560439562e-06, + "loss": 0.0752, + "step": 30714 + }, + { + "epoch": 84.38186813186813, + "grad_norm": 6.09412956237793, + "learning_rate": 7.809065934065934e-06, + "loss": 0.1326, + "step": 30715 + }, + { + "epoch": 84.38461538461539, + "grad_norm": 11.552068710327148, + "learning_rate": 7.807692307692307e-06, + "loss": 0.1688, + "step": 30716 + }, + { + "epoch": 84.38736263736264, + "grad_norm": 7.421144962310791, + "learning_rate": 7.806318681318682e-06, + "loss": 0.1703, + "step": 30717 + }, + { + "epoch": 84.39010989010988, + "grad_norm": 3.2650535106658936, + "learning_rate": 7.804945054945056e-06, + "loss": 0.0231, + "step": 30718 + }, + { + "epoch": 84.39285714285714, + "grad_norm": 20.214406967163086, + "learning_rate": 7.803571428571429e-06, + "loss": 0.3527, + "step": 30719 + }, + { + "epoch": 84.3956043956044, + "grad_norm": 12.007543563842773, + "learning_rate": 7.802197802197802e-06, + "loss": 0.1503, + "step": 30720 + }, + { + "epoch": 84.39835164835165, + "grad_norm": 17.2860050201416, + "learning_rate": 7.800824175824176e-06, + "loss": 0.4804, + "step": 30721 + }, + { + "epoch": 84.4010989010989, + "grad_norm": 12.927129745483398, + "learning_rate": 7.799450549450551e-06, + "loss": 0.2316, + "step": 30722 + }, + { + "epoch": 84.40384615384616, + "grad_norm": 7.19585657119751, + "learning_rate": 7.798076923076923e-06, + "loss": 0.1003, + "step": 30723 + }, + { + "epoch": 84.4065934065934, + "grad_norm": 17.66222381591797, + "learning_rate": 7.796703296703298e-06, + "loss": 0.1897, + "step": 30724 + }, + { + "epoch": 84.40934065934066, + "grad_norm": 17.333003997802734, + "learning_rate": 7.795329670329671e-06, + "loss": 0.2303, + "step": 30725 + }, + { + "epoch": 84.41208791208791, + "grad_norm": 8.962200164794922, + "learning_rate": 7.793956043956045e-06, + "loss": 0.0848, + "step": 30726 + }, + { + "epoch": 84.41483516483517, + "grad_norm": 13.412325859069824, + "learning_rate": 7.792582417582418e-06, + "loss": 0.2816, + "step": 30727 + }, + { + "epoch": 84.41758241758242, + "grad_norm": 10.717061042785645, + "learning_rate": 7.791208791208791e-06, + "loss": 0.0936, + "step": 30728 + }, + { + "epoch": 84.42032967032966, + "grad_norm": 19.658288955688477, + "learning_rate": 7.789835164835166e-06, + "loss": 0.3681, + "step": 30729 + }, + { + "epoch": 84.42307692307692, + "grad_norm": 16.65410614013672, + "learning_rate": 7.788461538461538e-06, + "loss": 0.3926, + "step": 30730 + }, + { + "epoch": 84.42582417582418, + "grad_norm": 10.303770065307617, + "learning_rate": 7.787087912087912e-06, + "loss": 0.1683, + "step": 30731 + }, + { + "epoch": 84.42857142857143, + "grad_norm": 12.48897933959961, + "learning_rate": 7.785714285714287e-06, + "loss": 0.2319, + "step": 30732 + }, + { + "epoch": 84.43131868131869, + "grad_norm": 8.397119522094727, + "learning_rate": 7.78434065934066e-06, + "loss": 0.1342, + "step": 30733 + }, + { + "epoch": 84.43406593406593, + "grad_norm": 13.465130805969238, + "learning_rate": 7.782967032967033e-06, + "loss": 0.1537, + "step": 30734 + }, + { + "epoch": 84.43681318681318, + "grad_norm": 22.095191955566406, + "learning_rate": 7.781593406593407e-06, + "loss": 0.3109, + "step": 30735 + }, + { + "epoch": 84.43956043956044, + "grad_norm": 11.798057556152344, + "learning_rate": 7.78021978021978e-06, + "loss": 0.212, + "step": 30736 + }, + { + "epoch": 84.4423076923077, + "grad_norm": 3.385671377182007, + "learning_rate": 7.778846153846155e-06, + "loss": 0.0556, + "step": 30737 + }, + { + "epoch": 84.44505494505495, + "grad_norm": 10.32751750946045, + "learning_rate": 7.777472527472527e-06, + "loss": 0.1484, + "step": 30738 + }, + { + "epoch": 84.4478021978022, + "grad_norm": 14.777750968933105, + "learning_rate": 7.776098901098902e-06, + "loss": 0.2101, + "step": 30739 + }, + { + "epoch": 84.45054945054945, + "grad_norm": 3.7556097507476807, + "learning_rate": 7.774725274725276e-06, + "loss": 0.0649, + "step": 30740 + }, + { + "epoch": 84.4532967032967, + "grad_norm": 14.855480194091797, + "learning_rate": 7.773351648351649e-06, + "loss": 0.3248, + "step": 30741 + }, + { + "epoch": 84.45604395604396, + "grad_norm": 5.704315185546875, + "learning_rate": 7.771978021978022e-06, + "loss": 0.1151, + "step": 30742 + }, + { + "epoch": 84.45879120879121, + "grad_norm": 1.9402952194213867, + "learning_rate": 7.770604395604396e-06, + "loss": 0.0221, + "step": 30743 + }, + { + "epoch": 84.46153846153847, + "grad_norm": 11.367935180664062, + "learning_rate": 7.76923076923077e-06, + "loss": 0.2141, + "step": 30744 + }, + { + "epoch": 84.46428571428571, + "grad_norm": 9.495636940002441, + "learning_rate": 7.767857142857143e-06, + "loss": 0.0828, + "step": 30745 + }, + { + "epoch": 84.46703296703296, + "grad_norm": 11.716848373413086, + "learning_rate": 7.766483516483516e-06, + "loss": 0.1682, + "step": 30746 + }, + { + "epoch": 84.46978021978022, + "grad_norm": 26.697399139404297, + "learning_rate": 7.765109890109891e-06, + "loss": 0.8213, + "step": 30747 + }, + { + "epoch": 84.47252747252747, + "grad_norm": 12.839447021484375, + "learning_rate": 7.763736263736265e-06, + "loss": 0.3513, + "step": 30748 + }, + { + "epoch": 84.47527472527473, + "grad_norm": 1.7997628450393677, + "learning_rate": 7.762362637362636e-06, + "loss": 0.0287, + "step": 30749 + }, + { + "epoch": 84.47802197802197, + "grad_norm": 10.050826072692871, + "learning_rate": 7.760989010989011e-06, + "loss": 0.1156, + "step": 30750 + }, + { + "epoch": 84.48076923076923, + "grad_norm": 15.186286926269531, + "learning_rate": 7.759615384615385e-06, + "loss": 0.2573, + "step": 30751 + }, + { + "epoch": 84.48351648351648, + "grad_norm": 7.202692031860352, + "learning_rate": 7.75824175824176e-06, + "loss": 0.0818, + "step": 30752 + }, + { + "epoch": 84.48626373626374, + "grad_norm": 13.526716232299805, + "learning_rate": 7.756868131868132e-06, + "loss": 0.1983, + "step": 30753 + }, + { + "epoch": 84.48901098901099, + "grad_norm": 10.145196914672852, + "learning_rate": 7.755494505494505e-06, + "loss": 0.1321, + "step": 30754 + }, + { + "epoch": 84.49175824175825, + "grad_norm": 8.450714111328125, + "learning_rate": 7.75412087912088e-06, + "loss": 0.1347, + "step": 30755 + }, + { + "epoch": 84.49450549450549, + "grad_norm": 3.63207745552063, + "learning_rate": 7.752747252747254e-06, + "loss": 0.0633, + "step": 30756 + }, + { + "epoch": 84.49725274725274, + "grad_norm": 13.943483352661133, + "learning_rate": 7.751373626373627e-06, + "loss": 0.2764, + "step": 30757 + }, + { + "epoch": 84.5, + "grad_norm": 2.4518065452575684, + "learning_rate": 7.75e-06, + "loss": 0.0258, + "step": 30758 + }, + { + "epoch": 84.50274725274726, + "grad_norm": 14.406180381774902, + "learning_rate": 7.748626373626374e-06, + "loss": 0.1552, + "step": 30759 + }, + { + "epoch": 84.50549450549451, + "grad_norm": 11.777557373046875, + "learning_rate": 7.747252747252747e-06, + "loss": 0.2095, + "step": 30760 + }, + { + "epoch": 84.50824175824175, + "grad_norm": 11.261466979980469, + "learning_rate": 7.74587912087912e-06, + "loss": 0.1972, + "step": 30761 + }, + { + "epoch": 84.51098901098901, + "grad_norm": 18.45989418029785, + "learning_rate": 7.744505494505496e-06, + "loss": 0.4691, + "step": 30762 + }, + { + "epoch": 84.51373626373626, + "grad_norm": 8.742199897766113, + "learning_rate": 7.743131868131869e-06, + "loss": 0.2565, + "step": 30763 + }, + { + "epoch": 84.51648351648352, + "grad_norm": 15.644341468811035, + "learning_rate": 7.74175824175824e-06, + "loss": 0.1764, + "step": 30764 + }, + { + "epoch": 84.51923076923077, + "grad_norm": 14.13792896270752, + "learning_rate": 7.740384615384616e-06, + "loss": 0.1589, + "step": 30765 + }, + { + "epoch": 84.52197802197803, + "grad_norm": 12.3716459274292, + "learning_rate": 7.73901098901099e-06, + "loss": 0.1582, + "step": 30766 + }, + { + "epoch": 84.52472527472527, + "grad_norm": 13.006449699401855, + "learning_rate": 7.737637362637364e-06, + "loss": 0.2075, + "step": 30767 + }, + { + "epoch": 84.52747252747253, + "grad_norm": 23.459692001342773, + "learning_rate": 7.736263736263736e-06, + "loss": 0.3068, + "step": 30768 + }, + { + "epoch": 84.53021978021978, + "grad_norm": 13.327310562133789, + "learning_rate": 7.73489010989011e-06, + "loss": 0.2191, + "step": 30769 + }, + { + "epoch": 84.53296703296704, + "grad_norm": 2.601837635040283, + "learning_rate": 7.733516483516485e-06, + "loss": 0.0329, + "step": 30770 + }, + { + "epoch": 84.53571428571429, + "grad_norm": 8.505660057067871, + "learning_rate": 7.732142857142858e-06, + "loss": 0.1291, + "step": 30771 + }, + { + "epoch": 84.53846153846153, + "grad_norm": 16.648082733154297, + "learning_rate": 7.730769230769231e-06, + "loss": 0.4377, + "step": 30772 + }, + { + "epoch": 84.54120879120879, + "grad_norm": 16.383670806884766, + "learning_rate": 7.729395604395605e-06, + "loss": 0.1711, + "step": 30773 + }, + { + "epoch": 84.54395604395604, + "grad_norm": 13.190779685974121, + "learning_rate": 7.728021978021978e-06, + "loss": 0.37, + "step": 30774 + }, + { + "epoch": 84.5467032967033, + "grad_norm": 3.8328986167907715, + "learning_rate": 7.726648351648352e-06, + "loss": 0.0502, + "step": 30775 + }, + { + "epoch": 84.54945054945055, + "grad_norm": 18.751407623291016, + "learning_rate": 7.725274725274725e-06, + "loss": 0.3208, + "step": 30776 + }, + { + "epoch": 84.5521978021978, + "grad_norm": 11.326162338256836, + "learning_rate": 7.7239010989011e-06, + "loss": 0.1614, + "step": 30777 + }, + { + "epoch": 84.55494505494505, + "grad_norm": 14.089339256286621, + "learning_rate": 7.722527472527474e-06, + "loss": 0.2647, + "step": 30778 + }, + { + "epoch": 84.5576923076923, + "grad_norm": 17.089698791503906, + "learning_rate": 7.721153846153845e-06, + "loss": 0.368, + "step": 30779 + }, + { + "epoch": 84.56043956043956, + "grad_norm": 9.875986099243164, + "learning_rate": 7.71978021978022e-06, + "loss": 0.2506, + "step": 30780 + }, + { + "epoch": 84.56318681318682, + "grad_norm": 21.194910049438477, + "learning_rate": 7.718406593406594e-06, + "loss": 0.2635, + "step": 30781 + }, + { + "epoch": 84.56593406593407, + "grad_norm": 10.994802474975586, + "learning_rate": 7.717032967032967e-06, + "loss": 0.1566, + "step": 30782 + }, + { + "epoch": 84.56868131868131, + "grad_norm": 6.276133060455322, + "learning_rate": 7.71565934065934e-06, + "loss": 0.1048, + "step": 30783 + }, + { + "epoch": 84.57142857142857, + "grad_norm": 7.567822456359863, + "learning_rate": 7.714285714285714e-06, + "loss": 0.1334, + "step": 30784 + }, + { + "epoch": 84.57417582417582, + "grad_norm": 26.906877517700195, + "learning_rate": 7.712912087912089e-06, + "loss": 0.5598, + "step": 30785 + }, + { + "epoch": 84.57692307692308, + "grad_norm": 10.921150207519531, + "learning_rate": 7.711538461538463e-06, + "loss": 0.3065, + "step": 30786 + }, + { + "epoch": 84.57967032967034, + "grad_norm": 11.550838470458984, + "learning_rate": 7.710164835164836e-06, + "loss": 0.3154, + "step": 30787 + }, + { + "epoch": 84.58241758241758, + "grad_norm": 8.95567512512207, + "learning_rate": 7.70879120879121e-06, + "loss": 0.1268, + "step": 30788 + }, + { + "epoch": 84.58516483516483, + "grad_norm": 7.82436990737915, + "learning_rate": 7.707417582417583e-06, + "loss": 0.0774, + "step": 30789 + }, + { + "epoch": 84.58791208791209, + "grad_norm": 5.337648868560791, + "learning_rate": 7.706043956043956e-06, + "loss": 0.1247, + "step": 30790 + }, + { + "epoch": 84.59065934065934, + "grad_norm": 11.800040245056152, + "learning_rate": 7.70467032967033e-06, + "loss": 0.3131, + "step": 30791 + }, + { + "epoch": 84.5934065934066, + "grad_norm": 5.083837509155273, + "learning_rate": 7.703296703296705e-06, + "loss": 0.0361, + "step": 30792 + }, + { + "epoch": 84.59615384615384, + "grad_norm": 8.670869827270508, + "learning_rate": 7.701923076923078e-06, + "loss": 0.1836, + "step": 30793 + }, + { + "epoch": 84.5989010989011, + "grad_norm": 11.15681266784668, + "learning_rate": 7.70054945054945e-06, + "loss": 0.2177, + "step": 30794 + }, + { + "epoch": 84.60164835164835, + "grad_norm": 10.088628768920898, + "learning_rate": 7.699175824175825e-06, + "loss": 0.152, + "step": 30795 + }, + { + "epoch": 84.6043956043956, + "grad_norm": 12.48965835571289, + "learning_rate": 7.697802197802198e-06, + "loss": 0.1747, + "step": 30796 + }, + { + "epoch": 84.60714285714286, + "grad_norm": 3.971881866455078, + "learning_rate": 7.696428571428572e-06, + "loss": 0.0466, + "step": 30797 + }, + { + "epoch": 84.60989010989012, + "grad_norm": 12.5021333694458, + "learning_rate": 7.695054945054945e-06, + "loss": 0.1517, + "step": 30798 + }, + { + "epoch": 84.61263736263736, + "grad_norm": 23.48150062561035, + "learning_rate": 7.693681318681318e-06, + "loss": 0.2142, + "step": 30799 + }, + { + "epoch": 84.61538461538461, + "grad_norm": 13.76872730255127, + "learning_rate": 7.692307692307694e-06, + "loss": 0.2623, + "step": 30800 + }, + { + "epoch": 84.61813186813187, + "grad_norm": 23.15199851989746, + "learning_rate": 7.690934065934067e-06, + "loss": 0.6026, + "step": 30801 + }, + { + "epoch": 84.62087912087912, + "grad_norm": 10.555676460266113, + "learning_rate": 7.689560439560439e-06, + "loss": 0.2158, + "step": 30802 + }, + { + "epoch": 84.62362637362638, + "grad_norm": 7.014981746673584, + "learning_rate": 7.688186813186814e-06, + "loss": 0.0804, + "step": 30803 + }, + { + "epoch": 84.62637362637362, + "grad_norm": 18.295597076416016, + "learning_rate": 7.686813186813187e-06, + "loss": 0.3643, + "step": 30804 + }, + { + "epoch": 84.62912087912088, + "grad_norm": 13.035470962524414, + "learning_rate": 7.68543956043956e-06, + "loss": 0.1423, + "step": 30805 + }, + { + "epoch": 84.63186813186813, + "grad_norm": 4.114686012268066, + "learning_rate": 7.684065934065934e-06, + "loss": 0.1014, + "step": 30806 + }, + { + "epoch": 84.63461538461539, + "grad_norm": 13.655718803405762, + "learning_rate": 7.682692307692307e-06, + "loss": 0.3414, + "step": 30807 + }, + { + "epoch": 84.63736263736264, + "grad_norm": 19.077056884765625, + "learning_rate": 7.681318681318683e-06, + "loss": 0.5165, + "step": 30808 + }, + { + "epoch": 84.64010989010988, + "grad_norm": 20.517099380493164, + "learning_rate": 7.679945054945054e-06, + "loss": 0.4423, + "step": 30809 + }, + { + "epoch": 84.64285714285714, + "grad_norm": 5.044934272766113, + "learning_rate": 7.67857142857143e-06, + "loss": 0.0779, + "step": 30810 + }, + { + "epoch": 84.6456043956044, + "grad_norm": 18.19824981689453, + "learning_rate": 7.677197802197803e-06, + "loss": 0.1913, + "step": 30811 + }, + { + "epoch": 84.64835164835165, + "grad_norm": 9.521405220031738, + "learning_rate": 7.675824175824176e-06, + "loss": 0.1468, + "step": 30812 + }, + { + "epoch": 84.6510989010989, + "grad_norm": 17.2200984954834, + "learning_rate": 7.67445054945055e-06, + "loss": 0.2775, + "step": 30813 + }, + { + "epoch": 84.65384615384616, + "grad_norm": 23.732341766357422, + "learning_rate": 7.673076923076923e-06, + "loss": 0.5639, + "step": 30814 + }, + { + "epoch": 84.6565934065934, + "grad_norm": 4.407583236694336, + "learning_rate": 7.671703296703298e-06, + "loss": 0.0483, + "step": 30815 + }, + { + "epoch": 84.65934065934066, + "grad_norm": 15.38931655883789, + "learning_rate": 7.670329670329671e-06, + "loss": 0.1836, + "step": 30816 + }, + { + "epoch": 84.66208791208791, + "grad_norm": 1.6441571712493896, + "learning_rate": 7.668956043956043e-06, + "loss": 0.0232, + "step": 30817 + }, + { + "epoch": 84.66483516483517, + "grad_norm": 13.518695831298828, + "learning_rate": 7.667582417582418e-06, + "loss": 0.1855, + "step": 30818 + }, + { + "epoch": 84.66758241758242, + "grad_norm": 4.48642110824585, + "learning_rate": 7.666208791208792e-06, + "loss": 0.0872, + "step": 30819 + }, + { + "epoch": 84.67032967032966, + "grad_norm": 20.11379623413086, + "learning_rate": 7.664835164835165e-06, + "loss": 0.3346, + "step": 30820 + }, + { + "epoch": 84.67307692307692, + "grad_norm": 12.045082092285156, + "learning_rate": 7.663461538461539e-06, + "loss": 0.423, + "step": 30821 + }, + { + "epoch": 84.67582417582418, + "grad_norm": 6.272195816040039, + "learning_rate": 7.662087912087912e-06, + "loss": 0.1479, + "step": 30822 + }, + { + "epoch": 84.67857142857143, + "grad_norm": 9.422297477722168, + "learning_rate": 7.660714285714287e-06, + "loss": 0.1011, + "step": 30823 + }, + { + "epoch": 84.68131868131869, + "grad_norm": 21.406557083129883, + "learning_rate": 7.659340659340659e-06, + "loss": 0.6737, + "step": 30824 + }, + { + "epoch": 84.68406593406593, + "grad_norm": 14.89964485168457, + "learning_rate": 7.657967032967034e-06, + "loss": 0.1754, + "step": 30825 + }, + { + "epoch": 84.68681318681318, + "grad_norm": 7.7508673667907715, + "learning_rate": 7.656593406593407e-06, + "loss": 0.0976, + "step": 30826 + }, + { + "epoch": 84.68956043956044, + "grad_norm": 16.373958587646484, + "learning_rate": 7.65521978021978e-06, + "loss": 0.2913, + "step": 30827 + }, + { + "epoch": 84.6923076923077, + "grad_norm": 9.370509147644043, + "learning_rate": 7.653846153846154e-06, + "loss": 0.1549, + "step": 30828 + }, + { + "epoch": 84.69505494505495, + "grad_norm": 11.665502548217773, + "learning_rate": 7.652472527472527e-06, + "loss": 0.212, + "step": 30829 + }, + { + "epoch": 84.6978021978022, + "grad_norm": 11.865687370300293, + "learning_rate": 7.651098901098903e-06, + "loss": 0.1278, + "step": 30830 + }, + { + "epoch": 84.70054945054945, + "grad_norm": 7.794724941253662, + "learning_rate": 7.649725274725274e-06, + "loss": 0.1015, + "step": 30831 + }, + { + "epoch": 84.7032967032967, + "grad_norm": 7.12809944152832, + "learning_rate": 7.648351648351648e-06, + "loss": 0.1324, + "step": 30832 + }, + { + "epoch": 84.70604395604396, + "grad_norm": 8.284425735473633, + "learning_rate": 7.646978021978023e-06, + "loss": 0.0948, + "step": 30833 + }, + { + "epoch": 84.70879120879121, + "grad_norm": 13.742965698242188, + "learning_rate": 7.645604395604396e-06, + "loss": 0.1782, + "step": 30834 + }, + { + "epoch": 84.71153846153847, + "grad_norm": 2.7679083347320557, + "learning_rate": 7.64423076923077e-06, + "loss": 0.0314, + "step": 30835 + }, + { + "epoch": 84.71428571428571, + "grad_norm": 12.594100952148438, + "learning_rate": 7.642857142857143e-06, + "loss": 0.2713, + "step": 30836 + }, + { + "epoch": 84.71703296703296, + "grad_norm": 18.64402198791504, + "learning_rate": 7.641483516483516e-06, + "loss": 0.3663, + "step": 30837 + }, + { + "epoch": 84.71978021978022, + "grad_norm": 8.461752891540527, + "learning_rate": 7.640109890109892e-06, + "loss": 0.1228, + "step": 30838 + }, + { + "epoch": 84.72252747252747, + "grad_norm": 10.000123977661133, + "learning_rate": 7.638736263736263e-06, + "loss": 0.189, + "step": 30839 + }, + { + "epoch": 84.72527472527473, + "grad_norm": 26.62257957458496, + "learning_rate": 7.637362637362638e-06, + "loss": 0.8601, + "step": 30840 + }, + { + "epoch": 84.72802197802197, + "grad_norm": 20.2979679107666, + "learning_rate": 7.635989010989012e-06, + "loss": 0.3301, + "step": 30841 + }, + { + "epoch": 84.73076923076923, + "grad_norm": 3.6404523849487305, + "learning_rate": 7.634615384615385e-06, + "loss": 0.0458, + "step": 30842 + }, + { + "epoch": 84.73351648351648, + "grad_norm": 24.840560913085938, + "learning_rate": 7.633241758241759e-06, + "loss": 0.6928, + "step": 30843 + }, + { + "epoch": 84.73626373626374, + "grad_norm": 7.25695276260376, + "learning_rate": 7.631868131868132e-06, + "loss": 0.1345, + "step": 30844 + }, + { + "epoch": 84.73901098901099, + "grad_norm": 8.293636322021484, + "learning_rate": 7.630494505494507e-06, + "loss": 0.1335, + "step": 30845 + }, + { + "epoch": 84.74175824175825, + "grad_norm": 17.912593841552734, + "learning_rate": 7.62912087912088e-06, + "loss": 0.4094, + "step": 30846 + }, + { + "epoch": 84.74450549450549, + "grad_norm": 21.041868209838867, + "learning_rate": 7.627747252747253e-06, + "loss": 0.4, + "step": 30847 + }, + { + "epoch": 84.74725274725274, + "grad_norm": 5.961265563964844, + "learning_rate": 7.626373626373627e-06, + "loss": 0.0676, + "step": 30848 + }, + { + "epoch": 84.75, + "grad_norm": 15.415297508239746, + "learning_rate": 7.625e-06, + "loss": 0.4602, + "step": 30849 + }, + { + "epoch": 84.75274725274726, + "grad_norm": 12.172992706298828, + "learning_rate": 7.623626373626375e-06, + "loss": 0.2724, + "step": 30850 + }, + { + "epoch": 84.75549450549451, + "grad_norm": 4.928464412689209, + "learning_rate": 7.6222527472527475e-06, + "loss": 0.1135, + "step": 30851 + }, + { + "epoch": 84.75824175824175, + "grad_norm": 0.6764230132102966, + "learning_rate": 7.620879120879121e-06, + "loss": 0.0095, + "step": 30852 + }, + { + "epoch": 84.76098901098901, + "grad_norm": 7.521433353424072, + "learning_rate": 7.619505494505495e-06, + "loss": 0.0948, + "step": 30853 + }, + { + "epoch": 84.76373626373626, + "grad_norm": 9.2261381149292, + "learning_rate": 7.6181318681318686e-06, + "loss": 0.2823, + "step": 30854 + }, + { + "epoch": 84.76648351648352, + "grad_norm": 14.82968807220459, + "learning_rate": 7.616758241758241e-06, + "loss": 0.2256, + "step": 30855 + }, + { + "epoch": 84.76923076923077, + "grad_norm": 10.769336700439453, + "learning_rate": 7.615384615384616e-06, + "loss": 0.1286, + "step": 30856 + }, + { + "epoch": 84.77197802197803, + "grad_norm": 10.702552795410156, + "learning_rate": 7.614010989010989e-06, + "loss": 0.2094, + "step": 30857 + }, + { + "epoch": 84.77472527472527, + "grad_norm": 7.235081672668457, + "learning_rate": 7.612637362637363e-06, + "loss": 0.1651, + "step": 30858 + }, + { + "epoch": 84.77747252747253, + "grad_norm": 15.706636428833008, + "learning_rate": 7.6112637362637364e-06, + "loss": 0.1522, + "step": 30859 + }, + { + "epoch": 84.78021978021978, + "grad_norm": 4.4316229820251465, + "learning_rate": 7.60989010989011e-06, + "loss": 0.0541, + "step": 30860 + }, + { + "epoch": 84.78296703296704, + "grad_norm": 14.729170799255371, + "learning_rate": 7.608516483516484e-06, + "loss": 0.2342, + "step": 30861 + }, + { + "epoch": 84.78571428571429, + "grad_norm": 10.451584815979004, + "learning_rate": 7.6071428571428575e-06, + "loss": 0.1186, + "step": 30862 + }, + { + "epoch": 84.78846153846153, + "grad_norm": 6.587635517120361, + "learning_rate": 7.605769230769232e-06, + "loss": 0.0503, + "step": 30863 + }, + { + "epoch": 84.79120879120879, + "grad_norm": 16.62714385986328, + "learning_rate": 7.604395604395604e-06, + "loss": 0.2705, + "step": 30864 + }, + { + "epoch": 84.79395604395604, + "grad_norm": 9.555344581604004, + "learning_rate": 7.603021978021978e-06, + "loss": 0.0765, + "step": 30865 + }, + { + "epoch": 84.7967032967033, + "grad_norm": 5.869124889373779, + "learning_rate": 7.601648351648352e-06, + "loss": 0.0975, + "step": 30866 + }, + { + "epoch": 84.79945054945055, + "grad_norm": 4.293396472930908, + "learning_rate": 7.600274725274725e-06, + "loss": 0.0424, + "step": 30867 + }, + { + "epoch": 84.8021978021978, + "grad_norm": 12.729278564453125, + "learning_rate": 7.5989010989011e-06, + "loss": 0.1231, + "step": 30868 + }, + { + "epoch": 84.80494505494505, + "grad_norm": 12.202448844909668, + "learning_rate": 7.597527472527473e-06, + "loss": 0.1036, + "step": 30869 + }, + { + "epoch": 84.8076923076923, + "grad_norm": 13.689677238464355, + "learning_rate": 7.596153846153846e-06, + "loss": 0.1306, + "step": 30870 + }, + { + "epoch": 84.81043956043956, + "grad_norm": 5.94962215423584, + "learning_rate": 7.594780219780221e-06, + "loss": 0.115, + "step": 30871 + }, + { + "epoch": 84.81318681318682, + "grad_norm": 6.556886196136475, + "learning_rate": 7.593406593406593e-06, + "loss": 0.1436, + "step": 30872 + }, + { + "epoch": 84.81593406593407, + "grad_norm": 7.072794437408447, + "learning_rate": 7.5920329670329675e-06, + "loss": 0.0826, + "step": 30873 + }, + { + "epoch": 84.81868131868131, + "grad_norm": 20.706050872802734, + "learning_rate": 7.590659340659341e-06, + "loss": 0.6506, + "step": 30874 + }, + { + "epoch": 84.82142857142857, + "grad_norm": 10.125067710876465, + "learning_rate": 7.589285714285714e-06, + "loss": 0.16, + "step": 30875 + }, + { + "epoch": 84.82417582417582, + "grad_norm": 13.311976432800293, + "learning_rate": 7.587912087912089e-06, + "loss": 0.2829, + "step": 30876 + }, + { + "epoch": 84.82692307692308, + "grad_norm": 8.632969856262207, + "learning_rate": 7.586538461538462e-06, + "loss": 0.1167, + "step": 30877 + }, + { + "epoch": 84.82967032967034, + "grad_norm": 16.48076057434082, + "learning_rate": 7.585164835164836e-06, + "loss": 0.2992, + "step": 30878 + }, + { + "epoch": 84.83241758241758, + "grad_norm": 10.910340309143066, + "learning_rate": 7.583791208791209e-06, + "loss": 0.128, + "step": 30879 + }, + { + "epoch": 84.83516483516483, + "grad_norm": 9.502823829650879, + "learning_rate": 7.582417582417582e-06, + "loss": 0.1835, + "step": 30880 + }, + { + "epoch": 84.83791208791209, + "grad_norm": 16.4053955078125, + "learning_rate": 7.5810439560439565e-06, + "loss": 0.238, + "step": 30881 + }, + { + "epoch": 84.84065934065934, + "grad_norm": 15.39922046661377, + "learning_rate": 7.57967032967033e-06, + "loss": 0.2197, + "step": 30882 + }, + { + "epoch": 84.8434065934066, + "grad_norm": 18.33024024963379, + "learning_rate": 7.578296703296704e-06, + "loss": 0.3726, + "step": 30883 + }, + { + "epoch": 84.84615384615384, + "grad_norm": 7.7941975593566895, + "learning_rate": 7.5769230769230775e-06, + "loss": 0.125, + "step": 30884 + }, + { + "epoch": 84.8489010989011, + "grad_norm": 18.279802322387695, + "learning_rate": 7.57554945054945e-06, + "loss": 0.2408, + "step": 30885 + }, + { + "epoch": 84.85164835164835, + "grad_norm": 14.079917907714844, + "learning_rate": 7.574175824175825e-06, + "loss": 0.1903, + "step": 30886 + }, + { + "epoch": 84.8543956043956, + "grad_norm": 25.927087783813477, + "learning_rate": 7.572802197802198e-06, + "loss": 0.8061, + "step": 30887 + }, + { + "epoch": 84.85714285714286, + "grad_norm": 7.008087158203125, + "learning_rate": 7.571428571428572e-06, + "loss": 0.1417, + "step": 30888 + }, + { + "epoch": 84.85989010989012, + "grad_norm": 12.16731071472168, + "learning_rate": 7.5700549450549454e-06, + "loss": 0.1983, + "step": 30889 + }, + { + "epoch": 84.86263736263736, + "grad_norm": 5.18988561630249, + "learning_rate": 7.568681318681319e-06, + "loss": 0.0601, + "step": 30890 + }, + { + "epoch": 84.86538461538461, + "grad_norm": 19.723974227905273, + "learning_rate": 7.567307692307693e-06, + "loss": 0.2661, + "step": 30891 + }, + { + "epoch": 84.86813186813187, + "grad_norm": 3.221160650253296, + "learning_rate": 7.565934065934066e-06, + "loss": 0.0363, + "step": 30892 + }, + { + "epoch": 84.87087912087912, + "grad_norm": 11.036055564880371, + "learning_rate": 7.564560439560441e-06, + "loss": 0.2644, + "step": 30893 + }, + { + "epoch": 84.87362637362638, + "grad_norm": 5.971521377563477, + "learning_rate": 7.563186813186813e-06, + "loss": 0.0573, + "step": 30894 + }, + { + "epoch": 84.87637362637362, + "grad_norm": 10.805593490600586, + "learning_rate": 7.561813186813187e-06, + "loss": 0.1291, + "step": 30895 + }, + { + "epoch": 84.87912087912088, + "grad_norm": 7.275394916534424, + "learning_rate": 7.560439560439561e-06, + "loss": 0.0771, + "step": 30896 + }, + { + "epoch": 84.88186813186813, + "grad_norm": 14.760066032409668, + "learning_rate": 7.559065934065934e-06, + "loss": 0.1167, + "step": 30897 + }, + { + "epoch": 84.88461538461539, + "grad_norm": 21.0438175201416, + "learning_rate": 7.557692307692309e-06, + "loss": 0.5587, + "step": 30898 + }, + { + "epoch": 84.88736263736264, + "grad_norm": 7.111395835876465, + "learning_rate": 7.556318681318682e-06, + "loss": 0.1131, + "step": 30899 + }, + { + "epoch": 84.89010989010988, + "grad_norm": 16.897680282592773, + "learning_rate": 7.554945054945055e-06, + "loss": 0.2266, + "step": 30900 + }, + { + "epoch": 84.89285714285714, + "grad_norm": 5.378513813018799, + "learning_rate": 7.55357142857143e-06, + "loss": 0.0836, + "step": 30901 + }, + { + "epoch": 84.8956043956044, + "grad_norm": 18.51667022705078, + "learning_rate": 7.552197802197802e-06, + "loss": 0.3417, + "step": 30902 + }, + { + "epoch": 84.89835164835165, + "grad_norm": 25.187641143798828, + "learning_rate": 7.5508241758241765e-06, + "loss": 1.1408, + "step": 30903 + }, + { + "epoch": 84.9010989010989, + "grad_norm": 19.651626586914062, + "learning_rate": 7.54945054945055e-06, + "loss": 0.4554, + "step": 30904 + }, + { + "epoch": 84.90384615384616, + "grad_norm": 8.127303123474121, + "learning_rate": 7.548076923076923e-06, + "loss": 0.1166, + "step": 30905 + }, + { + "epoch": 84.9065934065934, + "grad_norm": 20.430931091308594, + "learning_rate": 7.546703296703298e-06, + "loss": 0.3355, + "step": 30906 + }, + { + "epoch": 84.90934065934066, + "grad_norm": 2.9524316787719727, + "learning_rate": 7.54532967032967e-06, + "loss": 0.0457, + "step": 30907 + }, + { + "epoch": 84.91208791208791, + "grad_norm": 17.736722946166992, + "learning_rate": 7.543956043956045e-06, + "loss": 0.3889, + "step": 30908 + }, + { + "epoch": 84.91483516483517, + "grad_norm": 13.056394577026367, + "learning_rate": 7.542582417582418e-06, + "loss": 0.1589, + "step": 30909 + }, + { + "epoch": 84.91758241758242, + "grad_norm": 3.6713171005249023, + "learning_rate": 7.541208791208791e-06, + "loss": 0.0586, + "step": 30910 + }, + { + "epoch": 84.92032967032966, + "grad_norm": 11.716235160827637, + "learning_rate": 7.5398351648351655e-06, + "loss": 0.4122, + "step": 30911 + }, + { + "epoch": 84.92307692307692, + "grad_norm": 4.641379356384277, + "learning_rate": 7.538461538461539e-06, + "loss": 0.07, + "step": 30912 + }, + { + "epoch": 84.92582417582418, + "grad_norm": 13.808326721191406, + "learning_rate": 7.5370879120879114e-06, + "loss": 0.1735, + "step": 30913 + }, + { + "epoch": 84.92857142857143, + "grad_norm": 8.693558692932129, + "learning_rate": 7.5357142857142865e-06, + "loss": 0.1026, + "step": 30914 + }, + { + "epoch": 84.93131868131869, + "grad_norm": 6.464432239532471, + "learning_rate": 7.534340659340659e-06, + "loss": 0.0796, + "step": 30915 + }, + { + "epoch": 84.93406593406593, + "grad_norm": 13.753905296325684, + "learning_rate": 7.532967032967033e-06, + "loss": 0.1643, + "step": 30916 + }, + { + "epoch": 84.93681318681318, + "grad_norm": 12.535261154174805, + "learning_rate": 7.531593406593407e-06, + "loss": 0.1858, + "step": 30917 + }, + { + "epoch": 84.93956043956044, + "grad_norm": 14.249481201171875, + "learning_rate": 7.53021978021978e-06, + "loss": 0.2166, + "step": 30918 + }, + { + "epoch": 84.9423076923077, + "grad_norm": 18.056848526000977, + "learning_rate": 7.528846153846154e-06, + "loss": 0.5213, + "step": 30919 + }, + { + "epoch": 84.94505494505495, + "grad_norm": 22.981834411621094, + "learning_rate": 7.527472527472528e-06, + "loss": 0.6837, + "step": 30920 + }, + { + "epoch": 84.9478021978022, + "grad_norm": 9.387578010559082, + "learning_rate": 7.526098901098902e-06, + "loss": 0.2442, + "step": 30921 + }, + { + "epoch": 84.95054945054945, + "grad_norm": 9.564162254333496, + "learning_rate": 7.524725274725275e-06, + "loss": 0.1477, + "step": 30922 + }, + { + "epoch": 84.9532967032967, + "grad_norm": 14.775440216064453, + "learning_rate": 7.523351648351648e-06, + "loss": 0.4381, + "step": 30923 + }, + { + "epoch": 84.95604395604396, + "grad_norm": 6.2975311279296875, + "learning_rate": 7.521978021978022e-06, + "loss": 0.0512, + "step": 30924 + }, + { + "epoch": 84.95879120879121, + "grad_norm": 14.211748123168945, + "learning_rate": 7.520604395604396e-06, + "loss": 0.2825, + "step": 30925 + }, + { + "epoch": 84.96153846153847, + "grad_norm": 18.098472595214844, + "learning_rate": 7.51923076923077e-06, + "loss": 0.1456, + "step": 30926 + }, + { + "epoch": 84.96428571428571, + "grad_norm": 12.107600212097168, + "learning_rate": 7.517857142857143e-06, + "loss": 0.1146, + "step": 30927 + }, + { + "epoch": 84.96703296703296, + "grad_norm": 8.461718559265137, + "learning_rate": 7.516483516483516e-06, + "loss": 0.1708, + "step": 30928 + }, + { + "epoch": 84.96978021978022, + "grad_norm": 12.009315490722656, + "learning_rate": 7.515109890109891e-06, + "loss": 0.1861, + "step": 30929 + }, + { + "epoch": 84.97252747252747, + "grad_norm": 16.346853256225586, + "learning_rate": 7.513736263736264e-06, + "loss": 0.2916, + "step": 30930 + }, + { + "epoch": 84.97527472527473, + "grad_norm": 3.5062203407287598, + "learning_rate": 7.512362637362638e-06, + "loss": 0.0326, + "step": 30931 + }, + { + "epoch": 84.97802197802197, + "grad_norm": 17.91937255859375, + "learning_rate": 7.510989010989011e-06, + "loss": 0.3667, + "step": 30932 + }, + { + "epoch": 84.98076923076923, + "grad_norm": 7.351161479949951, + "learning_rate": 7.509615384615385e-06, + "loss": 0.1156, + "step": 30933 + }, + { + "epoch": 84.98351648351648, + "grad_norm": 12.866798400878906, + "learning_rate": 7.508241758241759e-06, + "loss": 0.3672, + "step": 30934 + }, + { + "epoch": 84.98626373626374, + "grad_norm": 22.076068878173828, + "learning_rate": 7.506868131868132e-06, + "loss": 0.6885, + "step": 30935 + }, + { + "epoch": 84.98901098901099, + "grad_norm": 29.02618408203125, + "learning_rate": 7.5054945054945066e-06, + "loss": 0.2358, + "step": 30936 + }, + { + "epoch": 84.99175824175825, + "grad_norm": 11.91859245300293, + "learning_rate": 7.504120879120879e-06, + "loss": 0.2146, + "step": 30937 + }, + { + "epoch": 84.99450549450549, + "grad_norm": 27.61764907836914, + "learning_rate": 7.5027472527472525e-06, + "loss": 0.2653, + "step": 30938 + }, + { + "epoch": 84.99725274725274, + "grad_norm": 1.2638407945632935, + "learning_rate": 7.501373626373627e-06, + "loss": 0.0236, + "step": 30939 + }, + { + "epoch": 85.0, + "grad_norm": 12.209104537963867, + "learning_rate": 7.5e-06, + "loss": 0.078, + "step": 30940 + }, + { + "epoch": 85.0, + "eval_accuracy": 0.8168044077134986, + "eval_f1": 0.8224092447377009, + "eval_f1_DuraRiadoRio_64x64": 0.8353413654618473, + "eval_f1_Mole_64x64": 0.8873720136518771, + "eval_f1_Quebrado_64x64": 0.8129032258064516, + "eval_f1_RiadoRio_64x64": 0.7215909090909091, + "eval_f1_RioFechado_64x64": 0.8548387096774194, + "eval_loss": 0.7272677421569824, + "eval_precision": 0.8513991113062028, + "eval_precision_DuraRiadoRio_64x64": 0.9904761904761905, + "eval_precision_Mole_64x64": 0.87248322147651, + "eval_precision_Quebrado_64x64": 0.7590361445783133, + "eval_precision_RiadoRio_64x64": 0.635, + "eval_precision_RioFechado_64x64": 1.0, + "eval_recall": 0.8164010378057821, + "eval_recall_DuraRiadoRio_64x64": 0.7222222222222222, + "eval_recall_Mole_64x64": 0.9027777777777778, + "eval_recall_Quebrado_64x64": 0.875, + "eval_recall_RiadoRio_64x64": 0.8355263157894737, + "eval_recall_RioFechado_64x64": 0.7464788732394366, + "eval_runtime": 1.7768, + "eval_samples_per_second": 408.609, + "eval_steps_per_second": 25.89, + "step": 30940 + }, + { + "epoch": 85.00274725274726, + "grad_norm": 24.107938766479492, + "learning_rate": 7.4986263736263744e-06, + "loss": 0.4978, + "step": 30941 + }, + { + "epoch": 85.00549450549451, + "grad_norm": 1.4269956350326538, + "learning_rate": 7.497252747252748e-06, + "loss": 0.0233, + "step": 30942 + }, + { + "epoch": 85.00824175824175, + "grad_norm": 14.801066398620605, + "learning_rate": 7.49587912087912e-06, + "loss": 0.1965, + "step": 30943 + }, + { + "epoch": 85.01098901098901, + "grad_norm": 7.37425422668457, + "learning_rate": 7.4945054945054955e-06, + "loss": 0.081, + "step": 30944 + }, + { + "epoch": 85.01373626373626, + "grad_norm": 7.587636947631836, + "learning_rate": 7.493131868131868e-06, + "loss": 0.1738, + "step": 30945 + }, + { + "epoch": 85.01648351648352, + "grad_norm": 12.649291038513184, + "learning_rate": 7.491758241758242e-06, + "loss": 0.2421, + "step": 30946 + }, + { + "epoch": 85.01923076923077, + "grad_norm": 18.974475860595703, + "learning_rate": 7.490384615384616e-06, + "loss": 0.3039, + "step": 30947 + }, + { + "epoch": 85.02197802197803, + "grad_norm": 12.328811645507812, + "learning_rate": 7.489010989010989e-06, + "loss": 0.1419, + "step": 30948 + }, + { + "epoch": 85.02472527472527, + "grad_norm": 12.912386894226074, + "learning_rate": 7.487637362637363e-06, + "loss": 0.2161, + "step": 30949 + }, + { + "epoch": 85.02747252747253, + "grad_norm": 0.9656123518943787, + "learning_rate": 7.486263736263737e-06, + "loss": 0.0119, + "step": 30950 + }, + { + "epoch": 85.03021978021978, + "grad_norm": 6.968303203582764, + "learning_rate": 7.484890109890111e-06, + "loss": 0.053, + "step": 30951 + }, + { + "epoch": 85.03296703296704, + "grad_norm": 5.777513027191162, + "learning_rate": 7.483516483516484e-06, + "loss": 0.1272, + "step": 30952 + }, + { + "epoch": 85.03571428571429, + "grad_norm": 14.072510719299316, + "learning_rate": 7.482142857142857e-06, + "loss": 0.3132, + "step": 30953 + }, + { + "epoch": 85.03846153846153, + "grad_norm": 27.517961502075195, + "learning_rate": 7.480769230769231e-06, + "loss": 0.797, + "step": 30954 + }, + { + "epoch": 85.04120879120879, + "grad_norm": 8.068087577819824, + "learning_rate": 7.479395604395605e-06, + "loss": 0.1684, + "step": 30955 + }, + { + "epoch": 85.04395604395604, + "grad_norm": 3.457249402999878, + "learning_rate": 7.478021978021979e-06, + "loss": 0.0507, + "step": 30956 + }, + { + "epoch": 85.0467032967033, + "grad_norm": 13.680705070495605, + "learning_rate": 7.476648351648352e-06, + "loss": 0.2608, + "step": 30957 + }, + { + "epoch": 85.04945054945055, + "grad_norm": 9.742050170898438, + "learning_rate": 7.475274725274725e-06, + "loss": 0.1749, + "step": 30958 + }, + { + "epoch": 85.0521978021978, + "grad_norm": 3.6951749324798584, + "learning_rate": 7.4739010989011e-06, + "loss": 0.0256, + "step": 30959 + }, + { + "epoch": 85.05494505494505, + "grad_norm": 21.28278923034668, + "learning_rate": 7.4725274725274726e-06, + "loss": 0.2952, + "step": 30960 + }, + { + "epoch": 85.0576923076923, + "grad_norm": 23.690744400024414, + "learning_rate": 7.471153846153847e-06, + "loss": 0.8022, + "step": 30961 + }, + { + "epoch": 85.06043956043956, + "grad_norm": 14.017969131469727, + "learning_rate": 7.46978021978022e-06, + "loss": 0.1427, + "step": 30962 + }, + { + "epoch": 85.06318681318682, + "grad_norm": 19.43339729309082, + "learning_rate": 7.468406593406594e-06, + "loss": 0.5176, + "step": 30963 + }, + { + "epoch": 85.06593406593407, + "grad_norm": 21.91963005065918, + "learning_rate": 7.467032967032968e-06, + "loss": 0.5343, + "step": 30964 + }, + { + "epoch": 85.06868131868131, + "grad_norm": 2.3173105716705322, + "learning_rate": 7.4656593406593404e-06, + "loss": 0.0249, + "step": 30965 + }, + { + "epoch": 85.07142857142857, + "grad_norm": 12.048624992370605, + "learning_rate": 7.4642857142857155e-06, + "loss": 0.1763, + "step": 30966 + }, + { + "epoch": 85.07417582417582, + "grad_norm": 15.2771577835083, + "learning_rate": 7.462912087912088e-06, + "loss": 0.274, + "step": 30967 + }, + { + "epoch": 85.07692307692308, + "grad_norm": 5.386993408203125, + "learning_rate": 7.4615384615384615e-06, + "loss": 0.0954, + "step": 30968 + }, + { + "epoch": 85.07967032967034, + "grad_norm": 15.956768989562988, + "learning_rate": 7.460164835164836e-06, + "loss": 0.3617, + "step": 30969 + }, + { + "epoch": 85.08241758241758, + "grad_norm": 20.73865509033203, + "learning_rate": 7.458791208791209e-06, + "loss": 0.4966, + "step": 30970 + }, + { + "epoch": 85.08516483516483, + "grad_norm": 2.8686821460723877, + "learning_rate": 7.457417582417582e-06, + "loss": 0.0506, + "step": 30971 + }, + { + "epoch": 85.08791208791209, + "grad_norm": 13.553569793701172, + "learning_rate": 7.456043956043957e-06, + "loss": 0.2498, + "step": 30972 + }, + { + "epoch": 85.09065934065934, + "grad_norm": 3.702697992324829, + "learning_rate": 7.454670329670329e-06, + "loss": 0.0285, + "step": 30973 + }, + { + "epoch": 85.0934065934066, + "grad_norm": 11.281698226928711, + "learning_rate": 7.4532967032967045e-06, + "loss": 0.1871, + "step": 30974 + }, + { + "epoch": 85.09615384615384, + "grad_norm": 12.087308883666992, + "learning_rate": 7.451923076923077e-06, + "loss": 0.1775, + "step": 30975 + }, + { + "epoch": 85.0989010989011, + "grad_norm": 17.95400047302246, + "learning_rate": 7.4505494505494505e-06, + "loss": 0.3277, + "step": 30976 + }, + { + "epoch": 85.10164835164835, + "grad_norm": 7.129536151885986, + "learning_rate": 7.449175824175825e-06, + "loss": 0.1234, + "step": 30977 + }, + { + "epoch": 85.1043956043956, + "grad_norm": 3.7235970497131348, + "learning_rate": 7.447802197802198e-06, + "loss": 0.0363, + "step": 30978 + }, + { + "epoch": 85.10714285714286, + "grad_norm": 10.338408470153809, + "learning_rate": 7.446428571428572e-06, + "loss": 0.3372, + "step": 30979 + }, + { + "epoch": 85.10989010989012, + "grad_norm": 16.72315216064453, + "learning_rate": 7.445054945054945e-06, + "loss": 0.3065, + "step": 30980 + }, + { + "epoch": 85.11263736263736, + "grad_norm": 14.5601224899292, + "learning_rate": 7.443681318681318e-06, + "loss": 0.1627, + "step": 30981 + }, + { + "epoch": 85.11538461538461, + "grad_norm": 12.708580017089844, + "learning_rate": 7.442307692307693e-06, + "loss": 0.125, + "step": 30982 + }, + { + "epoch": 85.11813186813187, + "grad_norm": 7.575632572174072, + "learning_rate": 7.440934065934066e-06, + "loss": 0.115, + "step": 30983 + }, + { + "epoch": 85.12087912087912, + "grad_norm": 9.239612579345703, + "learning_rate": 7.43956043956044e-06, + "loss": 0.1573, + "step": 30984 + }, + { + "epoch": 85.12362637362638, + "grad_norm": 15.492772102355957, + "learning_rate": 7.438186813186814e-06, + "loss": 0.1963, + "step": 30985 + }, + { + "epoch": 85.12637362637362, + "grad_norm": 5.305481910705566, + "learning_rate": 7.436813186813186e-06, + "loss": 0.0412, + "step": 30986 + }, + { + "epoch": 85.12912087912088, + "grad_norm": 15.128521919250488, + "learning_rate": 7.435439560439561e-06, + "loss": 0.2426, + "step": 30987 + }, + { + "epoch": 85.13186813186813, + "grad_norm": 11.263812065124512, + "learning_rate": 7.434065934065934e-06, + "loss": 0.1968, + "step": 30988 + }, + { + "epoch": 85.13461538461539, + "grad_norm": 18.560365676879883, + "learning_rate": 7.432692307692309e-06, + "loss": 0.2924, + "step": 30989 + }, + { + "epoch": 85.13736263736264, + "grad_norm": 12.358314514160156, + "learning_rate": 7.4313186813186815e-06, + "loss": 0.0769, + "step": 30990 + }, + { + "epoch": 85.14010989010988, + "grad_norm": 19.351930618286133, + "learning_rate": 7.429945054945055e-06, + "loss": 0.3624, + "step": 30991 + }, + { + "epoch": 85.14285714285714, + "grad_norm": 17.30816650390625, + "learning_rate": 7.428571428571429e-06, + "loss": 0.4914, + "step": 30992 + }, + { + "epoch": 85.1456043956044, + "grad_norm": 13.441940307617188, + "learning_rate": 7.427197802197803e-06, + "loss": 0.2183, + "step": 30993 + }, + { + "epoch": 85.14835164835165, + "grad_norm": 2.617647409439087, + "learning_rate": 7.425824175824177e-06, + "loss": 0.0438, + "step": 30994 + }, + { + "epoch": 85.1510989010989, + "grad_norm": 14.997417449951172, + "learning_rate": 7.4244505494505494e-06, + "loss": 0.3107, + "step": 30995 + }, + { + "epoch": 85.15384615384616, + "grad_norm": 17.09375, + "learning_rate": 7.423076923076923e-06, + "loss": 0.2172, + "step": 30996 + }, + { + "epoch": 85.1565934065934, + "grad_norm": 2.22310733795166, + "learning_rate": 7.421703296703297e-06, + "loss": 0.0305, + "step": 30997 + }, + { + "epoch": 85.15934065934066, + "grad_norm": 19.744949340820312, + "learning_rate": 7.4203296703296705e-06, + "loss": 0.433, + "step": 30998 + }, + { + "epoch": 85.16208791208791, + "grad_norm": 9.030510902404785, + "learning_rate": 7.418956043956045e-06, + "loss": 0.1286, + "step": 30999 + }, + { + "epoch": 85.16483516483517, + "grad_norm": 8.44575309753418, + "learning_rate": 7.417582417582418e-06, + "loss": 0.0913, + "step": 31000 + }, + { + "epoch": 85.16758241758242, + "grad_norm": 4.036081790924072, + "learning_rate": 7.416208791208791e-06, + "loss": 0.0599, + "step": 31001 + }, + { + "epoch": 85.17032967032966, + "grad_norm": 15.31649398803711, + "learning_rate": 7.414835164835166e-06, + "loss": 0.4469, + "step": 31002 + }, + { + "epoch": 85.17307692307692, + "grad_norm": 27.677953720092773, + "learning_rate": 7.413461538461538e-06, + "loss": 0.4545, + "step": 31003 + }, + { + "epoch": 85.17582417582418, + "grad_norm": 15.300450325012207, + "learning_rate": 7.412087912087913e-06, + "loss": 0.1036, + "step": 31004 + }, + { + "epoch": 85.17857142857143, + "grad_norm": 2.6860973834991455, + "learning_rate": 7.410714285714286e-06, + "loss": 0.0249, + "step": 31005 + }, + { + "epoch": 85.18131868131869, + "grad_norm": 17.159839630126953, + "learning_rate": 7.4093406593406594e-06, + "loss": 0.2777, + "step": 31006 + }, + { + "epoch": 85.18406593406593, + "grad_norm": 6.634059906005859, + "learning_rate": 7.407967032967034e-06, + "loss": 0.0513, + "step": 31007 + }, + { + "epoch": 85.18681318681318, + "grad_norm": 12.82214641571045, + "learning_rate": 7.406593406593407e-06, + "loss": 0.2346, + "step": 31008 + }, + { + "epoch": 85.18956043956044, + "grad_norm": 12.254769325256348, + "learning_rate": 7.405219780219781e-06, + "loss": 0.1438, + "step": 31009 + }, + { + "epoch": 85.1923076923077, + "grad_norm": 2.887767791748047, + "learning_rate": 7.403846153846154e-06, + "loss": 0.0396, + "step": 31010 + }, + { + "epoch": 85.19505494505495, + "grad_norm": 4.305617809295654, + "learning_rate": 7.402472527472527e-06, + "loss": 0.0874, + "step": 31011 + }, + { + "epoch": 85.1978021978022, + "grad_norm": 7.844841957092285, + "learning_rate": 7.4010989010989016e-06, + "loss": 0.1352, + "step": 31012 + }, + { + "epoch": 85.20054945054945, + "grad_norm": 11.037705421447754, + "learning_rate": 7.399725274725275e-06, + "loss": 0.1486, + "step": 31013 + }, + { + "epoch": 85.2032967032967, + "grad_norm": 2.4459476470947266, + "learning_rate": 7.398351648351649e-06, + "loss": 0.0467, + "step": 31014 + }, + { + "epoch": 85.20604395604396, + "grad_norm": 8.028668403625488, + "learning_rate": 7.396978021978023e-06, + "loss": 0.0984, + "step": 31015 + }, + { + "epoch": 85.20879120879121, + "grad_norm": 12.481684684753418, + "learning_rate": 7.395604395604395e-06, + "loss": 0.3023, + "step": 31016 + }, + { + "epoch": 85.21153846153847, + "grad_norm": 3.597289562225342, + "learning_rate": 7.39423076923077e-06, + "loss": 0.0505, + "step": 31017 + }, + { + "epoch": 85.21428571428571, + "grad_norm": 9.247457504272461, + "learning_rate": 7.392857142857143e-06, + "loss": 0.0684, + "step": 31018 + }, + { + "epoch": 85.21703296703296, + "grad_norm": 7.6619439125061035, + "learning_rate": 7.391483516483517e-06, + "loss": 0.1019, + "step": 31019 + }, + { + "epoch": 85.21978021978022, + "grad_norm": 14.143543243408203, + "learning_rate": 7.3901098901098905e-06, + "loss": 0.2489, + "step": 31020 + }, + { + "epoch": 85.22252747252747, + "grad_norm": 5.673798561096191, + "learning_rate": 7.388736263736264e-06, + "loss": 0.0571, + "step": 31021 + }, + { + "epoch": 85.22527472527473, + "grad_norm": 5.738351821899414, + "learning_rate": 7.387362637362638e-06, + "loss": 0.0629, + "step": 31022 + }, + { + "epoch": 85.22802197802197, + "grad_norm": 4.89368200302124, + "learning_rate": 7.385989010989012e-06, + "loss": 0.0742, + "step": 31023 + }, + { + "epoch": 85.23076923076923, + "grad_norm": 10.960958480834961, + "learning_rate": 7.384615384615386e-06, + "loss": 0.1719, + "step": 31024 + }, + { + "epoch": 85.23351648351648, + "grad_norm": 13.899470329284668, + "learning_rate": 7.383241758241758e-06, + "loss": 0.2407, + "step": 31025 + }, + { + "epoch": 85.23626373626374, + "grad_norm": 12.195881843566895, + "learning_rate": 7.381868131868132e-06, + "loss": 0.1553, + "step": 31026 + }, + { + "epoch": 85.23901098901099, + "grad_norm": 10.85757827758789, + "learning_rate": 7.380494505494506e-06, + "loss": 0.1548, + "step": 31027 + }, + { + "epoch": 85.24175824175825, + "grad_norm": 10.308747291564941, + "learning_rate": 7.3791208791208795e-06, + "loss": 0.2324, + "step": 31028 + }, + { + "epoch": 85.24450549450549, + "grad_norm": 7.600223541259766, + "learning_rate": 7.377747252747252e-06, + "loss": 0.1139, + "step": 31029 + }, + { + "epoch": 85.24725274725274, + "grad_norm": 10.706437110900879, + "learning_rate": 7.376373626373627e-06, + "loss": 0.2378, + "step": 31030 + }, + { + "epoch": 85.25, + "grad_norm": 13.412932395935059, + "learning_rate": 7.375e-06, + "loss": 0.2361, + "step": 31031 + }, + { + "epoch": 85.25274725274726, + "grad_norm": 12.357157707214355, + "learning_rate": 7.373626373626375e-06, + "loss": 0.1922, + "step": 31032 + }, + { + "epoch": 85.25549450549451, + "grad_norm": 4.325564384460449, + "learning_rate": 7.372252747252747e-06, + "loss": 0.0468, + "step": 31033 + }, + { + "epoch": 85.25824175824175, + "grad_norm": 5.909628391265869, + "learning_rate": 7.370879120879121e-06, + "loss": 0.0768, + "step": 31034 + }, + { + "epoch": 85.26098901098901, + "grad_norm": 8.922861099243164, + "learning_rate": 7.369505494505495e-06, + "loss": 0.2221, + "step": 31035 + }, + { + "epoch": 85.26373626373626, + "grad_norm": 28.64985466003418, + "learning_rate": 7.368131868131868e-06, + "loss": 0.459, + "step": 31036 + }, + { + "epoch": 85.26648351648352, + "grad_norm": 21.917421340942383, + "learning_rate": 7.366758241758243e-06, + "loss": 0.8007, + "step": 31037 + }, + { + "epoch": 85.26923076923077, + "grad_norm": 7.026601791381836, + "learning_rate": 7.365384615384616e-06, + "loss": 0.1686, + "step": 31038 + }, + { + "epoch": 85.27197802197803, + "grad_norm": 13.374053001403809, + "learning_rate": 7.364010989010989e-06, + "loss": 0.1624, + "step": 31039 + }, + { + "epoch": 85.27472527472527, + "grad_norm": 5.494925498962402, + "learning_rate": 7.362637362637363e-06, + "loss": 0.0663, + "step": 31040 + }, + { + "epoch": 85.27747252747253, + "grad_norm": 5.2855143547058105, + "learning_rate": 7.361263736263736e-06, + "loss": 0.0535, + "step": 31041 + }, + { + "epoch": 85.28021978021978, + "grad_norm": 9.302835464477539, + "learning_rate": 7.3598901098901106e-06, + "loss": 0.1512, + "step": 31042 + }, + { + "epoch": 85.28296703296704, + "grad_norm": 23.910781860351562, + "learning_rate": 7.358516483516484e-06, + "loss": 0.5696, + "step": 31043 + }, + { + "epoch": 85.28571428571429, + "grad_norm": 18.838045120239258, + "learning_rate": 7.3571428571428565e-06, + "loss": 0.5271, + "step": 31044 + }, + { + "epoch": 85.28846153846153, + "grad_norm": 16.463613510131836, + "learning_rate": 7.355769230769232e-06, + "loss": 0.4423, + "step": 31045 + }, + { + "epoch": 85.29120879120879, + "grad_norm": 4.402022838592529, + "learning_rate": 7.354395604395604e-06, + "loss": 0.0368, + "step": 31046 + }, + { + "epoch": 85.29395604395604, + "grad_norm": 9.475871086120605, + "learning_rate": 7.353021978021979e-06, + "loss": 0.1026, + "step": 31047 + }, + { + "epoch": 85.2967032967033, + "grad_norm": 7.9102253913879395, + "learning_rate": 7.351648351648352e-06, + "loss": 0.114, + "step": 31048 + }, + { + "epoch": 85.29945054945055, + "grad_norm": 14.497100830078125, + "learning_rate": 7.350274725274725e-06, + "loss": 0.3258, + "step": 31049 + }, + { + "epoch": 85.3021978021978, + "grad_norm": 11.640640258789062, + "learning_rate": 7.3489010989010995e-06, + "loss": 0.3652, + "step": 31050 + }, + { + "epoch": 85.30494505494505, + "grad_norm": 7.636074066162109, + "learning_rate": 7.347527472527473e-06, + "loss": 0.1039, + "step": 31051 + }, + { + "epoch": 85.3076923076923, + "grad_norm": 17.409242630004883, + "learning_rate": 7.346153846153847e-06, + "loss": 0.4161, + "step": 31052 + }, + { + "epoch": 85.31043956043956, + "grad_norm": 11.122281074523926, + "learning_rate": 7.34478021978022e-06, + "loss": 0.2774, + "step": 31053 + }, + { + "epoch": 85.31318681318682, + "grad_norm": 12.564681053161621, + "learning_rate": 7.343406593406593e-06, + "loss": 0.2746, + "step": 31054 + }, + { + "epoch": 85.31593406593407, + "grad_norm": 9.068469047546387, + "learning_rate": 7.342032967032967e-06, + "loss": 0.0692, + "step": 31055 + }, + { + "epoch": 85.31868131868131, + "grad_norm": 20.246971130371094, + "learning_rate": 7.340659340659341e-06, + "loss": 0.2462, + "step": 31056 + }, + { + "epoch": 85.32142857142857, + "grad_norm": 19.043434143066406, + "learning_rate": 7.339285714285715e-06, + "loss": 0.4115, + "step": 31057 + }, + { + "epoch": 85.32417582417582, + "grad_norm": 3.775956392288208, + "learning_rate": 7.3379120879120885e-06, + "loss": 0.0369, + "step": 31058 + }, + { + "epoch": 85.32692307692308, + "grad_norm": 12.421088218688965, + "learning_rate": 7.336538461538461e-06, + "loss": 0.1242, + "step": 31059 + }, + { + "epoch": 85.32967032967034, + "grad_norm": 13.553214073181152, + "learning_rate": 7.335164835164836e-06, + "loss": 0.2815, + "step": 31060 + }, + { + "epoch": 85.33241758241758, + "grad_norm": 9.811796188354492, + "learning_rate": 7.333791208791209e-06, + "loss": 0.1598, + "step": 31061 + }, + { + "epoch": 85.33516483516483, + "grad_norm": 18.717470169067383, + "learning_rate": 7.332417582417584e-06, + "loss": 0.4969, + "step": 31062 + }, + { + "epoch": 85.33791208791209, + "grad_norm": 24.609764099121094, + "learning_rate": 7.331043956043956e-06, + "loss": 0.6174, + "step": 31063 + }, + { + "epoch": 85.34065934065934, + "grad_norm": 18.862916946411133, + "learning_rate": 7.32967032967033e-06, + "loss": 0.3319, + "step": 31064 + }, + { + "epoch": 85.3434065934066, + "grad_norm": 12.901082038879395, + "learning_rate": 7.328296703296704e-06, + "loss": 0.257, + "step": 31065 + }, + { + "epoch": 85.34615384615384, + "grad_norm": 4.800753593444824, + "learning_rate": 7.326923076923077e-06, + "loss": 0.0643, + "step": 31066 + }, + { + "epoch": 85.3489010989011, + "grad_norm": 10.843572616577148, + "learning_rate": 7.325549450549452e-06, + "loss": 0.178, + "step": 31067 + }, + { + "epoch": 85.35164835164835, + "grad_norm": 18.685224533081055, + "learning_rate": 7.324175824175824e-06, + "loss": 0.6251, + "step": 31068 + }, + { + "epoch": 85.3543956043956, + "grad_norm": 17.21132469177246, + "learning_rate": 7.322802197802198e-06, + "loss": 0.3612, + "step": 31069 + }, + { + "epoch": 85.35714285714286, + "grad_norm": 9.335755348205566, + "learning_rate": 7.321428571428572e-06, + "loss": 0.1051, + "step": 31070 + }, + { + "epoch": 85.35989010989012, + "grad_norm": 19.260723114013672, + "learning_rate": 7.320054945054945e-06, + "loss": 0.3793, + "step": 31071 + }, + { + "epoch": 85.36263736263736, + "grad_norm": 14.595598220825195, + "learning_rate": 7.3186813186813195e-06, + "loss": 0.2957, + "step": 31072 + }, + { + "epoch": 85.36538461538461, + "grad_norm": 4.587841987609863, + "learning_rate": 7.317307692307693e-06, + "loss": 0.0947, + "step": 31073 + }, + { + "epoch": 85.36813186813187, + "grad_norm": 12.48622989654541, + "learning_rate": 7.3159340659340655e-06, + "loss": 0.328, + "step": 31074 + }, + { + "epoch": 85.37087912087912, + "grad_norm": 14.926000595092773, + "learning_rate": 7.314560439560441e-06, + "loss": 0.2788, + "step": 31075 + }, + { + "epoch": 85.37362637362638, + "grad_norm": 6.036189556121826, + "learning_rate": 7.313186813186813e-06, + "loss": 0.0674, + "step": 31076 + }, + { + "epoch": 85.37637362637362, + "grad_norm": 13.543415069580078, + "learning_rate": 7.311813186813187e-06, + "loss": 0.4086, + "step": 31077 + }, + { + "epoch": 85.37912087912088, + "grad_norm": 7.525248050689697, + "learning_rate": 7.310439560439561e-06, + "loss": 0.0894, + "step": 31078 + }, + { + "epoch": 85.38186813186813, + "grad_norm": 13.090681076049805, + "learning_rate": 7.309065934065934e-06, + "loss": 0.3559, + "step": 31079 + }, + { + "epoch": 85.38461538461539, + "grad_norm": 8.690019607543945, + "learning_rate": 7.3076923076923085e-06, + "loss": 0.1186, + "step": 31080 + }, + { + "epoch": 85.38736263736264, + "grad_norm": 13.170004844665527, + "learning_rate": 7.306318681318682e-06, + "loss": 0.3054, + "step": 31081 + }, + { + "epoch": 85.39010989010988, + "grad_norm": 18.18992805480957, + "learning_rate": 7.304945054945056e-06, + "loss": 0.2501, + "step": 31082 + }, + { + "epoch": 85.39285714285714, + "grad_norm": 10.960814476013184, + "learning_rate": 7.303571428571429e-06, + "loss": 0.1378, + "step": 31083 + }, + { + "epoch": 85.3956043956044, + "grad_norm": 1.8241347074508667, + "learning_rate": 7.302197802197802e-06, + "loss": 0.0192, + "step": 31084 + }, + { + "epoch": 85.39835164835165, + "grad_norm": 21.082242965698242, + "learning_rate": 7.300824175824176e-06, + "loss": 0.4124, + "step": 31085 + }, + { + "epoch": 85.4010989010989, + "grad_norm": 8.475459098815918, + "learning_rate": 7.29945054945055e-06, + "loss": 0.125, + "step": 31086 + }, + { + "epoch": 85.40384615384616, + "grad_norm": 9.170197486877441, + "learning_rate": 7.298076923076922e-06, + "loss": 0.0822, + "step": 31087 + }, + { + "epoch": 85.4065934065934, + "grad_norm": 2.4199297428131104, + "learning_rate": 7.2967032967032974e-06, + "loss": 0.0329, + "step": 31088 + }, + { + "epoch": 85.40934065934066, + "grad_norm": 17.752779006958008, + "learning_rate": 7.29532967032967e-06, + "loss": 0.6677, + "step": 31089 + }, + { + "epoch": 85.41208791208791, + "grad_norm": 13.637478828430176, + "learning_rate": 7.293956043956045e-06, + "loss": 0.15, + "step": 31090 + }, + { + "epoch": 85.41483516483517, + "grad_norm": 14.833532333374023, + "learning_rate": 7.292582417582418e-06, + "loss": 0.3104, + "step": 31091 + }, + { + "epoch": 85.41758241758242, + "grad_norm": 13.353572845458984, + "learning_rate": 7.291208791208791e-06, + "loss": 0.2017, + "step": 31092 + }, + { + "epoch": 85.42032967032966, + "grad_norm": 8.363927841186523, + "learning_rate": 7.289835164835165e-06, + "loss": 0.1071, + "step": 31093 + }, + { + "epoch": 85.42307692307692, + "grad_norm": 12.965042114257812, + "learning_rate": 7.288461538461539e-06, + "loss": 0.3613, + "step": 31094 + }, + { + "epoch": 85.42582417582418, + "grad_norm": 1.8712387084960938, + "learning_rate": 7.287087912087913e-06, + "loss": 0.02, + "step": 31095 + }, + { + "epoch": 85.42857142857143, + "grad_norm": 20.261125564575195, + "learning_rate": 7.285714285714286e-06, + "loss": 0.4253, + "step": 31096 + }, + { + "epoch": 85.43131868131869, + "grad_norm": 5.264920711517334, + "learning_rate": 7.284340659340659e-06, + "loss": 0.0775, + "step": 31097 + }, + { + "epoch": 85.43406593406593, + "grad_norm": 8.472332954406738, + "learning_rate": 7.282967032967033e-06, + "loss": 0.0833, + "step": 31098 + }, + { + "epoch": 85.43681318681318, + "grad_norm": 17.252748489379883, + "learning_rate": 7.281593406593407e-06, + "loss": 0.212, + "step": 31099 + }, + { + "epoch": 85.43956043956044, + "grad_norm": 12.511497497558594, + "learning_rate": 7.280219780219781e-06, + "loss": 0.2787, + "step": 31100 + }, + { + "epoch": 85.4423076923077, + "grad_norm": 20.904996871948242, + "learning_rate": 7.278846153846154e-06, + "loss": 0.3202, + "step": 31101 + }, + { + "epoch": 85.44505494505495, + "grad_norm": 34.58855056762695, + "learning_rate": 7.277472527472527e-06, + "loss": 0.343, + "step": 31102 + }, + { + "epoch": 85.4478021978022, + "grad_norm": 24.272125244140625, + "learning_rate": 7.276098901098902e-06, + "loss": 0.7306, + "step": 31103 + }, + { + "epoch": 85.45054945054945, + "grad_norm": 8.936418533325195, + "learning_rate": 7.2747252747252745e-06, + "loss": 0.1519, + "step": 31104 + }, + { + "epoch": 85.4532967032967, + "grad_norm": 8.755861282348633, + "learning_rate": 7.27335164835165e-06, + "loss": 0.1211, + "step": 31105 + }, + { + "epoch": 85.45604395604396, + "grad_norm": 2.024092435836792, + "learning_rate": 7.271978021978022e-06, + "loss": 0.0265, + "step": 31106 + }, + { + "epoch": 85.45879120879121, + "grad_norm": 8.641826629638672, + "learning_rate": 7.2706043956043956e-06, + "loss": 0.194, + "step": 31107 + }, + { + "epoch": 85.46153846153847, + "grad_norm": 13.498180389404297, + "learning_rate": 7.26923076923077e-06, + "loss": 0.2882, + "step": 31108 + }, + { + "epoch": 85.46428571428571, + "grad_norm": 10.94201946258545, + "learning_rate": 7.267857142857143e-06, + "loss": 0.0922, + "step": 31109 + }, + { + "epoch": 85.46703296703296, + "grad_norm": 5.981549263000488, + "learning_rate": 7.2664835164835175e-06, + "loss": 0.063, + "step": 31110 + }, + { + "epoch": 85.46978021978022, + "grad_norm": 17.688011169433594, + "learning_rate": 7.265109890109891e-06, + "loss": 0.3301, + "step": 31111 + }, + { + "epoch": 85.47252747252747, + "grad_norm": 11.021952629089355, + "learning_rate": 7.2637362637362634e-06, + "loss": 0.2392, + "step": 31112 + }, + { + "epoch": 85.47527472527473, + "grad_norm": 7.272143363952637, + "learning_rate": 7.262362637362638e-06, + "loss": 0.1028, + "step": 31113 + }, + { + "epoch": 85.47802197802197, + "grad_norm": 7.44155740737915, + "learning_rate": 7.260989010989011e-06, + "loss": 0.0682, + "step": 31114 + }, + { + "epoch": 85.48076923076923, + "grad_norm": 18.410768508911133, + "learning_rate": 7.259615384615385e-06, + "loss": 0.5611, + "step": 31115 + }, + { + "epoch": 85.48351648351648, + "grad_norm": 28.568885803222656, + "learning_rate": 7.258241758241759e-06, + "loss": 0.2919, + "step": 31116 + }, + { + "epoch": 85.48626373626374, + "grad_norm": 10.756808280944824, + "learning_rate": 7.256868131868131e-06, + "loss": 0.2433, + "step": 31117 + }, + { + "epoch": 85.48901098901099, + "grad_norm": 12.813970565795898, + "learning_rate": 7.255494505494506e-06, + "loss": 0.1962, + "step": 31118 + }, + { + "epoch": 85.49175824175825, + "grad_norm": 12.569188117980957, + "learning_rate": 7.254120879120879e-06, + "loss": 0.1265, + "step": 31119 + }, + { + "epoch": 85.49450549450549, + "grad_norm": 2.5332489013671875, + "learning_rate": 7.252747252747254e-06, + "loss": 0.0372, + "step": 31120 + }, + { + "epoch": 85.49725274725274, + "grad_norm": 7.66208553314209, + "learning_rate": 7.251373626373627e-06, + "loss": 0.1351, + "step": 31121 + }, + { + "epoch": 85.5, + "grad_norm": 9.762004852294922, + "learning_rate": 7.25e-06, + "loss": 0.3187, + "step": 31122 + }, + { + "epoch": 85.50274725274726, + "grad_norm": 11.124347686767578, + "learning_rate": 7.248626373626374e-06, + "loss": 0.2478, + "step": 31123 + }, + { + "epoch": 85.50549450549451, + "grad_norm": 7.528088569641113, + "learning_rate": 7.247252747252748e-06, + "loss": 0.1427, + "step": 31124 + }, + { + "epoch": 85.50824175824175, + "grad_norm": 8.409334182739258, + "learning_rate": 7.245879120879122e-06, + "loss": 0.1314, + "step": 31125 + }, + { + "epoch": 85.51098901098901, + "grad_norm": 12.148805618286133, + "learning_rate": 7.2445054945054945e-06, + "loss": 0.1833, + "step": 31126 + }, + { + "epoch": 85.51373626373626, + "grad_norm": 12.092231750488281, + "learning_rate": 7.243131868131868e-06, + "loss": 0.2401, + "step": 31127 + }, + { + "epoch": 85.51648351648352, + "grad_norm": 12.871950149536133, + "learning_rate": 7.241758241758242e-06, + "loss": 0.2631, + "step": 31128 + }, + { + "epoch": 85.51923076923077, + "grad_norm": 6.647465229034424, + "learning_rate": 7.240384615384616e-06, + "loss": 0.0799, + "step": 31129 + }, + { + "epoch": 85.52197802197803, + "grad_norm": 19.386516571044922, + "learning_rate": 7.23901098901099e-06, + "loss": 0.6526, + "step": 31130 + }, + { + "epoch": 85.52472527472527, + "grad_norm": 15.62226390838623, + "learning_rate": 7.237637362637363e-06, + "loss": 0.3371, + "step": 31131 + }, + { + "epoch": 85.52747252747253, + "grad_norm": 9.263456344604492, + "learning_rate": 7.236263736263736e-06, + "loss": 0.1194, + "step": 31132 + }, + { + "epoch": 85.53021978021978, + "grad_norm": 3.7171850204467773, + "learning_rate": 7.234890109890111e-06, + "loss": 0.0508, + "step": 31133 + }, + { + "epoch": 85.53296703296704, + "grad_norm": 13.714614868164062, + "learning_rate": 7.2335164835164835e-06, + "loss": 0.2806, + "step": 31134 + }, + { + "epoch": 85.53571428571429, + "grad_norm": 9.985688209533691, + "learning_rate": 7.2321428571428586e-06, + "loss": 0.1725, + "step": 31135 + }, + { + "epoch": 85.53846153846153, + "grad_norm": 9.802057266235352, + "learning_rate": 7.230769230769231e-06, + "loss": 0.1887, + "step": 31136 + }, + { + "epoch": 85.54120879120879, + "grad_norm": 9.725156784057617, + "learning_rate": 7.2293956043956045e-06, + "loss": 0.0818, + "step": 31137 + }, + { + "epoch": 85.54395604395604, + "grad_norm": 5.0973052978515625, + "learning_rate": 7.228021978021979e-06, + "loss": 0.1104, + "step": 31138 + }, + { + "epoch": 85.5467032967033, + "grad_norm": 15.08609676361084, + "learning_rate": 7.226648351648352e-06, + "loss": 0.2018, + "step": 31139 + }, + { + "epoch": 85.54945054945055, + "grad_norm": 28.91327667236328, + "learning_rate": 7.225274725274725e-06, + "loss": 0.3368, + "step": 31140 + }, + { + "epoch": 85.5521978021978, + "grad_norm": 7.235317707061768, + "learning_rate": 7.223901098901099e-06, + "loss": 0.0671, + "step": 31141 + }, + { + "epoch": 85.55494505494505, + "grad_norm": 14.178893089294434, + "learning_rate": 7.222527472527472e-06, + "loss": 0.4137, + "step": 31142 + }, + { + "epoch": 85.5576923076923, + "grad_norm": 11.647294998168945, + "learning_rate": 7.221153846153847e-06, + "loss": 0.1313, + "step": 31143 + }, + { + "epoch": 85.56043956043956, + "grad_norm": 8.784507751464844, + "learning_rate": 7.21978021978022e-06, + "loss": 0.1111, + "step": 31144 + }, + { + "epoch": 85.56318681318682, + "grad_norm": 7.645271301269531, + "learning_rate": 7.2184065934065935e-06, + "loss": 0.2034, + "step": 31145 + }, + { + "epoch": 85.56593406593407, + "grad_norm": 20.254283905029297, + "learning_rate": 7.217032967032968e-06, + "loss": 0.4805, + "step": 31146 + }, + { + "epoch": 85.56868131868131, + "grad_norm": 6.987357139587402, + "learning_rate": 7.21565934065934e-06, + "loss": 0.0498, + "step": 31147 + }, + { + "epoch": 85.57142857142857, + "grad_norm": 21.720199584960938, + "learning_rate": 7.214285714285715e-06, + "loss": 0.43, + "step": 31148 + }, + { + "epoch": 85.57417582417582, + "grad_norm": 1.2008419036865234, + "learning_rate": 7.212912087912088e-06, + "loss": 0.0127, + "step": 31149 + }, + { + "epoch": 85.57692307692308, + "grad_norm": 7.287298202514648, + "learning_rate": 7.211538461538461e-06, + "loss": 0.1442, + "step": 31150 + }, + { + "epoch": 85.57967032967034, + "grad_norm": 19.907358169555664, + "learning_rate": 7.210164835164836e-06, + "loss": 0.4019, + "step": 31151 + }, + { + "epoch": 85.58241758241758, + "grad_norm": 4.373569488525391, + "learning_rate": 7.208791208791209e-06, + "loss": 0.0666, + "step": 31152 + }, + { + "epoch": 85.58516483516483, + "grad_norm": 12.788915634155273, + "learning_rate": 7.207417582417583e-06, + "loss": 0.1852, + "step": 31153 + }, + { + "epoch": 85.58791208791209, + "grad_norm": 6.818429470062256, + "learning_rate": 7.206043956043957e-06, + "loss": 0.0941, + "step": 31154 + }, + { + "epoch": 85.59065934065934, + "grad_norm": 6.99704647064209, + "learning_rate": 7.204670329670329e-06, + "loss": 0.0809, + "step": 31155 + }, + { + "epoch": 85.5934065934066, + "grad_norm": 9.549511909484863, + "learning_rate": 7.2032967032967035e-06, + "loss": 0.0777, + "step": 31156 + }, + { + "epoch": 85.59615384615384, + "grad_norm": 13.247453689575195, + "learning_rate": 7.201923076923077e-06, + "loss": 0.229, + "step": 31157 + }, + { + "epoch": 85.5989010989011, + "grad_norm": 8.847832679748535, + "learning_rate": 7.200549450549451e-06, + "loss": 0.0723, + "step": 31158 + }, + { + "epoch": 85.60164835164835, + "grad_norm": 14.048643112182617, + "learning_rate": 7.1991758241758246e-06, + "loss": 0.4034, + "step": 31159 + }, + { + "epoch": 85.6043956043956, + "grad_norm": 6.017612457275391, + "learning_rate": 7.197802197802198e-06, + "loss": 0.0697, + "step": 31160 + }, + { + "epoch": 85.60714285714286, + "grad_norm": 6.779435157775879, + "learning_rate": 7.196428571428572e-06, + "loss": 0.0654, + "step": 31161 + }, + { + "epoch": 85.60989010989012, + "grad_norm": 13.981644630432129, + "learning_rate": 7.195054945054945e-06, + "loss": 0.1367, + "step": 31162 + }, + { + "epoch": 85.61263736263736, + "grad_norm": 9.773475646972656, + "learning_rate": 7.19368131868132e-06, + "loss": 0.14, + "step": 31163 + }, + { + "epoch": 85.61538461538461, + "grad_norm": 4.942870140075684, + "learning_rate": 7.1923076923076924e-06, + "loss": 0.0836, + "step": 31164 + }, + { + "epoch": 85.61813186813187, + "grad_norm": 14.574320793151855, + "learning_rate": 7.190934065934066e-06, + "loss": 0.2053, + "step": 31165 + }, + { + "epoch": 85.62087912087912, + "grad_norm": 21.39827537536621, + "learning_rate": 7.18956043956044e-06, + "loss": 0.6905, + "step": 31166 + }, + { + "epoch": 85.62362637362638, + "grad_norm": 4.5641069412231445, + "learning_rate": 7.1881868131868135e-06, + "loss": 0.069, + "step": 31167 + }, + { + "epoch": 85.62637362637362, + "grad_norm": 8.18802261352539, + "learning_rate": 7.186813186813188e-06, + "loss": 0.3129, + "step": 31168 + }, + { + "epoch": 85.62912087912088, + "grad_norm": 19.38926124572754, + "learning_rate": 7.185439560439561e-06, + "loss": 0.3937, + "step": 31169 + }, + { + "epoch": 85.63186813186813, + "grad_norm": 2.432128429412842, + "learning_rate": 7.184065934065934e-06, + "loss": 0.0238, + "step": 31170 + }, + { + "epoch": 85.63461538461539, + "grad_norm": 26.704586029052734, + "learning_rate": 7.182692307692308e-06, + "loss": 0.5105, + "step": 31171 + }, + { + "epoch": 85.63736263736264, + "grad_norm": 17.601627349853516, + "learning_rate": 7.181318681318681e-06, + "loss": 0.2801, + "step": 31172 + }, + { + "epoch": 85.64010989010988, + "grad_norm": 9.231597900390625, + "learning_rate": 7.179945054945056e-06, + "loss": 0.0626, + "step": 31173 + }, + { + "epoch": 85.64285714285714, + "grad_norm": 16.864091873168945, + "learning_rate": 7.178571428571429e-06, + "loss": 0.3598, + "step": 31174 + }, + { + "epoch": 85.6456043956044, + "grad_norm": 9.472394943237305, + "learning_rate": 7.177197802197802e-06, + "loss": 0.1253, + "step": 31175 + }, + { + "epoch": 85.64835164835165, + "grad_norm": 16.326950073242188, + "learning_rate": 7.175824175824177e-06, + "loss": 0.3166, + "step": 31176 + }, + { + "epoch": 85.6510989010989, + "grad_norm": 10.935425758361816, + "learning_rate": 7.174450549450549e-06, + "loss": 0.251, + "step": 31177 + }, + { + "epoch": 85.65384615384616, + "grad_norm": 17.339292526245117, + "learning_rate": 7.173076923076924e-06, + "loss": 0.5511, + "step": 31178 + }, + { + "epoch": 85.6565934065934, + "grad_norm": 17.381784439086914, + "learning_rate": 7.171703296703297e-06, + "loss": 0.4198, + "step": 31179 + }, + { + "epoch": 85.65934065934066, + "grad_norm": 4.5246429443359375, + "learning_rate": 7.17032967032967e-06, + "loss": 0.0484, + "step": 31180 + }, + { + "epoch": 85.66208791208791, + "grad_norm": 14.543280601501465, + "learning_rate": 7.168956043956045e-06, + "loss": 0.4241, + "step": 31181 + }, + { + "epoch": 85.66483516483517, + "grad_norm": 7.3036723136901855, + "learning_rate": 7.167582417582418e-06, + "loss": 0.1171, + "step": 31182 + }, + { + "epoch": 85.66758241758242, + "grad_norm": 0.9719240665435791, + "learning_rate": 7.166208791208792e-06, + "loss": 0.0089, + "step": 31183 + }, + { + "epoch": 85.67032967032966, + "grad_norm": 12.774275779724121, + "learning_rate": 7.164835164835166e-06, + "loss": 0.2252, + "step": 31184 + }, + { + "epoch": 85.67307692307692, + "grad_norm": 1.5830937623977661, + "learning_rate": 7.163461538461538e-06, + "loss": 0.0144, + "step": 31185 + }, + { + "epoch": 85.67582417582418, + "grad_norm": 17.384578704833984, + "learning_rate": 7.1620879120879125e-06, + "loss": 0.5647, + "step": 31186 + }, + { + "epoch": 85.67857142857143, + "grad_norm": 2.907379150390625, + "learning_rate": 7.160714285714286e-06, + "loss": 0.0288, + "step": 31187 + }, + { + "epoch": 85.68131868131869, + "grad_norm": 9.030767440795898, + "learning_rate": 7.15934065934066e-06, + "loss": 0.2049, + "step": 31188 + }, + { + "epoch": 85.68406593406593, + "grad_norm": 15.828788757324219, + "learning_rate": 7.1579670329670335e-06, + "loss": 0.3339, + "step": 31189 + }, + { + "epoch": 85.68681318681318, + "grad_norm": 6.6315484046936035, + "learning_rate": 7.156593406593406e-06, + "loss": 0.0512, + "step": 31190 + }, + { + "epoch": 85.68956043956044, + "grad_norm": 22.116514205932617, + "learning_rate": 7.155219780219781e-06, + "loss": 0.6266, + "step": 31191 + }, + { + "epoch": 85.6923076923077, + "grad_norm": 18.10511589050293, + "learning_rate": 7.153846153846154e-06, + "loss": 0.3101, + "step": 31192 + }, + { + "epoch": 85.69505494505495, + "grad_norm": 5.861903190612793, + "learning_rate": 7.152472527472529e-06, + "loss": 0.0663, + "step": 31193 + }, + { + "epoch": 85.6978021978022, + "grad_norm": 13.827268600463867, + "learning_rate": 7.1510989010989014e-06, + "loss": 0.2375, + "step": 31194 + }, + { + "epoch": 85.70054945054945, + "grad_norm": 7.266690254211426, + "learning_rate": 7.149725274725275e-06, + "loss": 0.1353, + "step": 31195 + }, + { + "epoch": 85.7032967032967, + "grad_norm": 16.81715202331543, + "learning_rate": 7.148351648351649e-06, + "loss": 0.5222, + "step": 31196 + }, + { + "epoch": 85.70604395604396, + "grad_norm": 3.451096534729004, + "learning_rate": 7.1469780219780225e-06, + "loss": 0.0248, + "step": 31197 + }, + { + "epoch": 85.70879120879121, + "grad_norm": 5.248863697052002, + "learning_rate": 7.145604395604395e-06, + "loss": 0.0567, + "step": 31198 + }, + { + "epoch": 85.71153846153847, + "grad_norm": 18.04197120666504, + "learning_rate": 7.144230769230769e-06, + "loss": 0.2286, + "step": 31199 + }, + { + "epoch": 85.71428571428571, + "grad_norm": 13.458799362182617, + "learning_rate": 7.142857142857143e-06, + "loss": 0.4534, + "step": 31200 + }, + { + "epoch": 85.71703296703296, + "grad_norm": 8.73231315612793, + "learning_rate": 7.141483516483517e-06, + "loss": 0.1417, + "step": 31201 + }, + { + "epoch": 85.71978021978022, + "grad_norm": 6.300540924072266, + "learning_rate": 7.14010989010989e-06, + "loss": 0.0348, + "step": 31202 + }, + { + "epoch": 85.72252747252747, + "grad_norm": 19.145084381103516, + "learning_rate": 7.138736263736264e-06, + "loss": 0.376, + "step": 31203 + }, + { + "epoch": 85.72527472527473, + "grad_norm": 22.46304702758789, + "learning_rate": 7.137362637362638e-06, + "loss": 0.4319, + "step": 31204 + }, + { + "epoch": 85.72802197802197, + "grad_norm": 3.4186553955078125, + "learning_rate": 7.135989010989011e-06, + "loss": 0.0365, + "step": 31205 + }, + { + "epoch": 85.73076923076923, + "grad_norm": 11.554720878601074, + "learning_rate": 7.134615384615386e-06, + "loss": 0.1477, + "step": 31206 + }, + { + "epoch": 85.73351648351648, + "grad_norm": 23.676544189453125, + "learning_rate": 7.133241758241758e-06, + "loss": 0.5596, + "step": 31207 + }, + { + "epoch": 85.73626373626374, + "grad_norm": 13.107763290405273, + "learning_rate": 7.131868131868132e-06, + "loss": 0.2197, + "step": 31208 + }, + { + "epoch": 85.73901098901099, + "grad_norm": 11.547468185424805, + "learning_rate": 7.130494505494506e-06, + "loss": 0.2031, + "step": 31209 + }, + { + "epoch": 85.74175824175825, + "grad_norm": 10.535504341125488, + "learning_rate": 7.129120879120879e-06, + "loss": 0.1271, + "step": 31210 + }, + { + "epoch": 85.74450549450549, + "grad_norm": 4.697037696838379, + "learning_rate": 7.127747252747254e-06, + "loss": 0.0863, + "step": 31211 + }, + { + "epoch": 85.74725274725274, + "grad_norm": 22.22770881652832, + "learning_rate": 7.126373626373627e-06, + "loss": 0.6564, + "step": 31212 + }, + { + "epoch": 85.75, + "grad_norm": 10.35918140411377, + "learning_rate": 7.1249999999999995e-06, + "loss": 0.1838, + "step": 31213 + }, + { + "epoch": 85.75274725274726, + "grad_norm": 24.26083755493164, + "learning_rate": 7.123626373626374e-06, + "loss": 0.2425, + "step": 31214 + }, + { + "epoch": 85.75549450549451, + "grad_norm": 2.8544065952301025, + "learning_rate": 7.122252747252747e-06, + "loss": 0.0283, + "step": 31215 + }, + { + "epoch": 85.75824175824175, + "grad_norm": 7.494152069091797, + "learning_rate": 7.1208791208791215e-06, + "loss": 0.1354, + "step": 31216 + }, + { + "epoch": 85.76098901098901, + "grad_norm": 18.311767578125, + "learning_rate": 7.119505494505495e-06, + "loss": 0.331, + "step": 31217 + }, + { + "epoch": 85.76373626373626, + "grad_norm": 22.30048370361328, + "learning_rate": 7.118131868131868e-06, + "loss": 0.3589, + "step": 31218 + }, + { + "epoch": 85.76648351648352, + "grad_norm": 18.35077667236328, + "learning_rate": 7.1167582417582425e-06, + "loss": 0.2673, + "step": 31219 + }, + { + "epoch": 85.76923076923077, + "grad_norm": 28.766584396362305, + "learning_rate": 7.115384615384615e-06, + "loss": 0.4727, + "step": 31220 + }, + { + "epoch": 85.77197802197803, + "grad_norm": 23.040252685546875, + "learning_rate": 7.11401098901099e-06, + "loss": 0.4376, + "step": 31221 + }, + { + "epoch": 85.77472527472527, + "grad_norm": 17.753433227539062, + "learning_rate": 7.112637362637363e-06, + "loss": 0.2267, + "step": 31222 + }, + { + "epoch": 85.77747252747253, + "grad_norm": 12.709359169006348, + "learning_rate": 7.111263736263736e-06, + "loss": 0.2818, + "step": 31223 + }, + { + "epoch": 85.78021978021978, + "grad_norm": 7.818641185760498, + "learning_rate": 7.10989010989011e-06, + "loss": 0.1099, + "step": 31224 + }, + { + "epoch": 85.78296703296704, + "grad_norm": 13.344326972961426, + "learning_rate": 7.108516483516484e-06, + "loss": 0.1443, + "step": 31225 + }, + { + "epoch": 85.78571428571429, + "grad_norm": 1.7491437196731567, + "learning_rate": 7.107142857142858e-06, + "loss": 0.0165, + "step": 31226 + }, + { + "epoch": 85.78846153846153, + "grad_norm": 8.648824691772461, + "learning_rate": 7.1057692307692315e-06, + "loss": 0.1192, + "step": 31227 + }, + { + "epoch": 85.79120879120879, + "grad_norm": 4.073192119598389, + "learning_rate": 7.104395604395604e-06, + "loss": 0.0631, + "step": 31228 + }, + { + "epoch": 85.79395604395604, + "grad_norm": 11.167916297912598, + "learning_rate": 7.103021978021978e-06, + "loss": 0.3872, + "step": 31229 + }, + { + "epoch": 85.7967032967033, + "grad_norm": 2.4875998497009277, + "learning_rate": 7.101648351648352e-06, + "loss": 0.0144, + "step": 31230 + }, + { + "epoch": 85.79945054945055, + "grad_norm": 16.68060302734375, + "learning_rate": 7.100274725274726e-06, + "loss": 0.2669, + "step": 31231 + }, + { + "epoch": 85.8021978021978, + "grad_norm": 16.58868408203125, + "learning_rate": 7.098901098901099e-06, + "loss": 0.2701, + "step": 31232 + }, + { + "epoch": 85.80494505494505, + "grad_norm": 13.796075820922852, + "learning_rate": 7.097527472527473e-06, + "loss": 0.2067, + "step": 31233 + }, + { + "epoch": 85.8076923076923, + "grad_norm": 12.810294151306152, + "learning_rate": 7.096153846153847e-06, + "loss": 0.2905, + "step": 31234 + }, + { + "epoch": 85.81043956043956, + "grad_norm": 16.948575973510742, + "learning_rate": 7.09478021978022e-06, + "loss": 0.3457, + "step": 31235 + }, + { + "epoch": 85.81318681318682, + "grad_norm": 8.776568412780762, + "learning_rate": 7.093406593406595e-06, + "loss": 0.1893, + "step": 31236 + }, + { + "epoch": 85.81593406593407, + "grad_norm": 15.682251930236816, + "learning_rate": 7.092032967032967e-06, + "loss": 0.4061, + "step": 31237 + }, + { + "epoch": 85.81868131868131, + "grad_norm": 5.613182544708252, + "learning_rate": 7.090659340659341e-06, + "loss": 0.0667, + "step": 31238 + }, + { + "epoch": 85.82142857142857, + "grad_norm": 9.13897705078125, + "learning_rate": 7.089285714285715e-06, + "loss": 0.1632, + "step": 31239 + }, + { + "epoch": 85.82417582417582, + "grad_norm": 13.52238655090332, + "learning_rate": 7.087912087912088e-06, + "loss": 0.1541, + "step": 31240 + }, + { + "epoch": 85.82692307692308, + "grad_norm": 5.459969997406006, + "learning_rate": 7.0865384615384626e-06, + "loss": 0.0827, + "step": 31241 + }, + { + "epoch": 85.82967032967034, + "grad_norm": 8.365567207336426, + "learning_rate": 7.085164835164836e-06, + "loss": 0.1466, + "step": 31242 + }, + { + "epoch": 85.83241758241758, + "grad_norm": 8.366044998168945, + "learning_rate": 7.0837912087912085e-06, + "loss": 0.1045, + "step": 31243 + }, + { + "epoch": 85.83516483516483, + "grad_norm": 8.218879699707031, + "learning_rate": 7.082417582417583e-06, + "loss": 0.1098, + "step": 31244 + }, + { + "epoch": 85.83791208791209, + "grad_norm": 21.801193237304688, + "learning_rate": 7.081043956043956e-06, + "loss": 0.4581, + "step": 31245 + }, + { + "epoch": 85.84065934065934, + "grad_norm": 4.947696208953857, + "learning_rate": 7.0796703296703304e-06, + "loss": 0.0776, + "step": 31246 + }, + { + "epoch": 85.8434065934066, + "grad_norm": 8.315905570983887, + "learning_rate": 7.078296703296704e-06, + "loss": 0.1403, + "step": 31247 + }, + { + "epoch": 85.84615384615384, + "grad_norm": 16.6662540435791, + "learning_rate": 7.076923076923076e-06, + "loss": 0.2497, + "step": 31248 + }, + { + "epoch": 85.8489010989011, + "grad_norm": 9.9738187789917, + "learning_rate": 7.0755494505494515e-06, + "loss": 0.1707, + "step": 31249 + }, + { + "epoch": 85.85164835164835, + "grad_norm": 24.8499698638916, + "learning_rate": 7.074175824175824e-06, + "loss": 0.5897, + "step": 31250 + }, + { + "epoch": 85.8543956043956, + "grad_norm": 1.8136051893234253, + "learning_rate": 7.072802197802199e-06, + "loss": 0.0185, + "step": 31251 + }, + { + "epoch": 85.85714285714286, + "grad_norm": 11.029790878295898, + "learning_rate": 7.071428571428572e-06, + "loss": 0.1257, + "step": 31252 + }, + { + "epoch": 85.85989010989012, + "grad_norm": 18.64785385131836, + "learning_rate": 7.070054945054945e-06, + "loss": 0.5856, + "step": 31253 + }, + { + "epoch": 85.86263736263736, + "grad_norm": 2.543895959854126, + "learning_rate": 7.068681318681319e-06, + "loss": 0.0275, + "step": 31254 + }, + { + "epoch": 85.86538461538461, + "grad_norm": 28.009748458862305, + "learning_rate": 7.067307692307693e-06, + "loss": 0.5044, + "step": 31255 + }, + { + "epoch": 85.86813186813187, + "grad_norm": 12.063109397888184, + "learning_rate": 7.065934065934065e-06, + "loss": 0.1602, + "step": 31256 + }, + { + "epoch": 85.87087912087912, + "grad_norm": 22.396352767944336, + "learning_rate": 7.0645604395604405e-06, + "loss": 0.4267, + "step": 31257 + }, + { + "epoch": 85.87362637362638, + "grad_norm": 15.353316307067871, + "learning_rate": 7.063186813186813e-06, + "loss": 0.2648, + "step": 31258 + }, + { + "epoch": 85.87637362637362, + "grad_norm": 10.413509368896484, + "learning_rate": 7.061813186813187e-06, + "loss": 0.3482, + "step": 31259 + }, + { + "epoch": 85.87912087912088, + "grad_norm": 8.149500846862793, + "learning_rate": 7.060439560439561e-06, + "loss": 0.0851, + "step": 31260 + }, + { + "epoch": 85.88186813186813, + "grad_norm": 17.31516456604004, + "learning_rate": 7.059065934065934e-06, + "loss": 0.2187, + "step": 31261 + }, + { + "epoch": 85.88461538461539, + "grad_norm": 1.974906086921692, + "learning_rate": 7.057692307692308e-06, + "loss": 0.0182, + "step": 31262 + }, + { + "epoch": 85.88736263736264, + "grad_norm": 17.765317916870117, + "learning_rate": 7.056318681318681e-06, + "loss": 0.3286, + "step": 31263 + }, + { + "epoch": 85.89010989010988, + "grad_norm": 17.147926330566406, + "learning_rate": 7.054945054945056e-06, + "loss": 0.3923, + "step": 31264 + }, + { + "epoch": 85.89285714285714, + "grad_norm": 12.510430335998535, + "learning_rate": 7.0535714285714286e-06, + "loss": 0.2502, + "step": 31265 + }, + { + "epoch": 85.8956043956044, + "grad_norm": 11.518625259399414, + "learning_rate": 7.052197802197802e-06, + "loss": 0.2757, + "step": 31266 + }, + { + "epoch": 85.89835164835165, + "grad_norm": 21.40083885192871, + "learning_rate": 7.050824175824176e-06, + "loss": 0.2428, + "step": 31267 + }, + { + "epoch": 85.9010989010989, + "grad_norm": 6.205498218536377, + "learning_rate": 7.04945054945055e-06, + "loss": 0.0722, + "step": 31268 + }, + { + "epoch": 85.90384615384616, + "grad_norm": 10.22498607635498, + "learning_rate": 7.048076923076924e-06, + "loss": 0.1947, + "step": 31269 + }, + { + "epoch": 85.9065934065934, + "grad_norm": 2.2893006801605225, + "learning_rate": 7.046703296703297e-06, + "loss": 0.0144, + "step": 31270 + }, + { + "epoch": 85.90934065934066, + "grad_norm": 15.10107135772705, + "learning_rate": 7.04532967032967e-06, + "loss": 0.2528, + "step": 31271 + }, + { + "epoch": 85.91208791208791, + "grad_norm": 11.65420150756836, + "learning_rate": 7.043956043956045e-06, + "loss": 0.1678, + "step": 31272 + }, + { + "epoch": 85.91483516483517, + "grad_norm": 6.95720100402832, + "learning_rate": 7.0425824175824175e-06, + "loss": 0.1017, + "step": 31273 + }, + { + "epoch": 85.91758241758242, + "grad_norm": 1.8735262155532837, + "learning_rate": 7.041208791208792e-06, + "loss": 0.0123, + "step": 31274 + }, + { + "epoch": 85.92032967032966, + "grad_norm": 10.720043182373047, + "learning_rate": 7.039835164835165e-06, + "loss": 0.2182, + "step": 31275 + }, + { + "epoch": 85.92307692307692, + "grad_norm": 16.435678482055664, + "learning_rate": 7.0384615384615386e-06, + "loss": 0.4939, + "step": 31276 + }, + { + "epoch": 85.92582417582418, + "grad_norm": 21.552019119262695, + "learning_rate": 7.037087912087913e-06, + "loss": 0.3441, + "step": 31277 + }, + { + "epoch": 85.92857142857143, + "grad_norm": 6.160330772399902, + "learning_rate": 7.035714285714285e-06, + "loss": 0.0891, + "step": 31278 + }, + { + "epoch": 85.93131868131869, + "grad_norm": 6.278586387634277, + "learning_rate": 7.0343406593406605e-06, + "loss": 0.0684, + "step": 31279 + }, + { + "epoch": 85.93406593406593, + "grad_norm": 16.159730911254883, + "learning_rate": 7.032967032967033e-06, + "loss": 0.1908, + "step": 31280 + }, + { + "epoch": 85.93681318681318, + "grad_norm": 18.276700973510742, + "learning_rate": 7.0315934065934065e-06, + "loss": 0.2543, + "step": 31281 + }, + { + "epoch": 85.93956043956044, + "grad_norm": 9.044010162353516, + "learning_rate": 7.030219780219781e-06, + "loss": 0.0943, + "step": 31282 + }, + { + "epoch": 85.9423076923077, + "grad_norm": 21.142871856689453, + "learning_rate": 7.028846153846154e-06, + "loss": 0.8176, + "step": 31283 + }, + { + "epoch": 85.94505494505495, + "grad_norm": 3.949991226196289, + "learning_rate": 7.027472527472528e-06, + "loss": 0.0357, + "step": 31284 + }, + { + "epoch": 85.9478021978022, + "grad_norm": 7.049394130706787, + "learning_rate": 7.026098901098902e-06, + "loss": 0.0838, + "step": 31285 + }, + { + "epoch": 85.95054945054945, + "grad_norm": 14.084059715270996, + "learning_rate": 7.024725274725274e-06, + "loss": 0.1512, + "step": 31286 + }, + { + "epoch": 85.9532967032967, + "grad_norm": 5.949623107910156, + "learning_rate": 7.023351648351649e-06, + "loss": 0.065, + "step": 31287 + }, + { + "epoch": 85.95604395604396, + "grad_norm": 12.129267692565918, + "learning_rate": 7.021978021978022e-06, + "loss": 0.4352, + "step": 31288 + }, + { + "epoch": 85.95879120879121, + "grad_norm": 11.945014953613281, + "learning_rate": 7.020604395604396e-06, + "loss": 0.1633, + "step": 31289 + }, + { + "epoch": 85.96153846153847, + "grad_norm": 17.015426635742188, + "learning_rate": 7.01923076923077e-06, + "loss": 0.3512, + "step": 31290 + }, + { + "epoch": 85.96428571428571, + "grad_norm": 12.32835865020752, + "learning_rate": 7.017857142857143e-06, + "loss": 0.2467, + "step": 31291 + }, + { + "epoch": 85.96703296703296, + "grad_norm": 14.79916763305664, + "learning_rate": 7.016483516483517e-06, + "loss": 0.3343, + "step": 31292 + }, + { + "epoch": 85.96978021978022, + "grad_norm": 7.970034599304199, + "learning_rate": 7.01510989010989e-06, + "loss": 0.2578, + "step": 31293 + }, + { + "epoch": 85.97252747252747, + "grad_norm": 13.380620002746582, + "learning_rate": 7.013736263736265e-06, + "loss": 0.2272, + "step": 31294 + }, + { + "epoch": 85.97527472527473, + "grad_norm": 5.841172695159912, + "learning_rate": 7.0123626373626375e-06, + "loss": 0.103, + "step": 31295 + }, + { + "epoch": 85.97802197802197, + "grad_norm": 11.300597190856934, + "learning_rate": 7.010989010989011e-06, + "loss": 0.2458, + "step": 31296 + }, + { + "epoch": 85.98076923076923, + "grad_norm": 7.379159927368164, + "learning_rate": 7.009615384615385e-06, + "loss": 0.1026, + "step": 31297 + }, + { + "epoch": 85.98351648351648, + "grad_norm": 14.24374008178711, + "learning_rate": 7.008241758241759e-06, + "loss": 0.3279, + "step": 31298 + }, + { + "epoch": 85.98626373626374, + "grad_norm": 5.326721668243408, + "learning_rate": 7.006868131868133e-06, + "loss": 0.0285, + "step": 31299 + }, + { + "epoch": 85.98901098901099, + "grad_norm": 20.034210205078125, + "learning_rate": 7.005494505494506e-06, + "loss": 0.2402, + "step": 31300 + }, + { + "epoch": 85.99175824175825, + "grad_norm": 15.478482246398926, + "learning_rate": 7.004120879120879e-06, + "loss": 0.2044, + "step": 31301 + }, + { + "epoch": 85.99450549450549, + "grad_norm": 5.989533424377441, + "learning_rate": 7.002747252747253e-06, + "loss": 0.1013, + "step": 31302 + }, + { + "epoch": 85.99725274725274, + "grad_norm": 7.501388072967529, + "learning_rate": 7.0013736263736265e-06, + "loss": 0.0955, + "step": 31303 + }, + { + "epoch": 86.0, + "grad_norm": 69.03571319580078, + "learning_rate": 7.000000000000001e-06, + "loss": 1.4247, + "step": 31304 + }, + { + "epoch": 86.0, + "eval_accuracy": 0.7768595041322314, + "eval_f1": 0.7732883218256641, + "eval_f1_DuraRiadoRio_64x64": 0.715, + "eval_f1_Mole_64x64": 0.8446215139442231, + "eval_f1_Quebrado_64x64": 0.8611111111111112, + "eval_f1_RiadoRio_64x64": 0.5560538116591929, + "eval_f1_RioFechado_64x64": 0.8896551724137931, + "eval_loss": 1.000455379486084, + "eval_precision": 0.8310440249919855, + "eval_precision_DuraRiadoRio_64x64": 0.55859375, + "eval_precision_Mole_64x64": 0.9906542056074766, + "eval_precision_Quebrado_64x64": 0.8611111111111112, + "eval_precision_RiadoRio_64x64": 0.8732394366197183, + "eval_precision_RioFechado_64x64": 0.8716216216216216, + "eval_recall": 0.781324643769047, + "eval_recall_DuraRiadoRio_64x64": 0.9930555555555556, + "eval_recall_Mole_64x64": 0.7361111111111112, + "eval_recall_Quebrado_64x64": 0.8611111111111112, + "eval_recall_RiadoRio_64x64": 0.40789473684210525, + "eval_recall_RioFechado_64x64": 0.9084507042253521, + "eval_runtime": 1.7632, + "eval_samples_per_second": 411.759, + "eval_steps_per_second": 26.089, + "step": 31304 + }, + { + "epoch": 86.00274725274726, + "grad_norm": 4.449486255645752, + "learning_rate": 6.998626373626374e-06, + "loss": 0.0571, + "step": 31305 + }, + { + "epoch": 86.00549450549451, + "grad_norm": 5.971493244171143, + "learning_rate": 6.9972527472527476e-06, + "loss": 0.078, + "step": 31306 + }, + { + "epoch": 86.00824175824175, + "grad_norm": 6.571935176849365, + "learning_rate": 6.995879120879122e-06, + "loss": 0.1239, + "step": 31307 + }, + { + "epoch": 86.01098901098901, + "grad_norm": 6.693199634552002, + "learning_rate": 6.994505494505494e-06, + "loss": 0.0405, + "step": 31308 + }, + { + "epoch": 86.01373626373626, + "grad_norm": 18.256704330444336, + "learning_rate": 6.9931318681318695e-06, + "loss": 0.4487, + "step": 31309 + }, + { + "epoch": 86.01648351648352, + "grad_norm": 27.87000274658203, + "learning_rate": 6.991758241758242e-06, + "loss": 0.7846, + "step": 31310 + }, + { + "epoch": 86.01923076923077, + "grad_norm": 10.443095207214355, + "learning_rate": 6.9903846153846154e-06, + "loss": 0.1959, + "step": 31311 + }, + { + "epoch": 86.02197802197803, + "grad_norm": 19.82872200012207, + "learning_rate": 6.98901098901099e-06, + "loss": 0.1928, + "step": 31312 + }, + { + "epoch": 86.02472527472527, + "grad_norm": 10.206469535827637, + "learning_rate": 6.987637362637363e-06, + "loss": 0.1915, + "step": 31313 + }, + { + "epoch": 86.02747252747253, + "grad_norm": 5.852241516113281, + "learning_rate": 6.986263736263736e-06, + "loss": 0.0704, + "step": 31314 + }, + { + "epoch": 86.03021978021978, + "grad_norm": 15.287398338317871, + "learning_rate": 6.984890109890111e-06, + "loss": 0.183, + "step": 31315 + }, + { + "epoch": 86.03296703296704, + "grad_norm": 19.966894149780273, + "learning_rate": 6.983516483516483e-06, + "loss": 0.3577, + "step": 31316 + }, + { + "epoch": 86.03571428571429, + "grad_norm": 8.289173126220703, + "learning_rate": 6.9821428571428576e-06, + "loss": 0.0689, + "step": 31317 + }, + { + "epoch": 86.03846153846153, + "grad_norm": 13.2860689163208, + "learning_rate": 6.980769230769231e-06, + "loss": 0.1975, + "step": 31318 + }, + { + "epoch": 86.04120879120879, + "grad_norm": 15.183987617492676, + "learning_rate": 6.979395604395604e-06, + "loss": 0.3813, + "step": 31319 + }, + { + "epoch": 86.04395604395604, + "grad_norm": 9.015361785888672, + "learning_rate": 6.978021978021979e-06, + "loss": 0.1542, + "step": 31320 + }, + { + "epoch": 86.0467032967033, + "grad_norm": 6.141110897064209, + "learning_rate": 6.976648351648351e-06, + "loss": 0.0819, + "step": 31321 + }, + { + "epoch": 86.04945054945055, + "grad_norm": 12.429741859436035, + "learning_rate": 6.975274725274726e-06, + "loss": 0.1736, + "step": 31322 + }, + { + "epoch": 86.0521978021978, + "grad_norm": 20.86835479736328, + "learning_rate": 6.973901098901099e-06, + "loss": 0.5343, + "step": 31323 + }, + { + "epoch": 86.05494505494505, + "grad_norm": 5.0592827796936035, + "learning_rate": 6.972527472527472e-06, + "loss": 0.0545, + "step": 31324 + }, + { + "epoch": 86.0576923076923, + "grad_norm": 12.639524459838867, + "learning_rate": 6.9711538461538465e-06, + "loss": 0.2178, + "step": 31325 + }, + { + "epoch": 86.06043956043956, + "grad_norm": 7.340926170349121, + "learning_rate": 6.96978021978022e-06, + "loss": 0.0899, + "step": 31326 + }, + { + "epoch": 86.06318681318682, + "grad_norm": 9.747600555419922, + "learning_rate": 6.968406593406594e-06, + "loss": 0.1647, + "step": 31327 + }, + { + "epoch": 86.06593406593407, + "grad_norm": 5.511858940124512, + "learning_rate": 6.967032967032968e-06, + "loss": 0.1084, + "step": 31328 + }, + { + "epoch": 86.06868131868131, + "grad_norm": 4.656818866729736, + "learning_rate": 6.96565934065934e-06, + "loss": 0.0973, + "step": 31329 + }, + { + "epoch": 86.07142857142857, + "grad_norm": 23.810710906982422, + "learning_rate": 6.964285714285715e-06, + "loss": 0.8096, + "step": 31330 + }, + { + "epoch": 86.07417582417582, + "grad_norm": 21.870939254760742, + "learning_rate": 6.962912087912088e-06, + "loss": 0.5572, + "step": 31331 + }, + { + "epoch": 86.07692307692308, + "grad_norm": 17.36553192138672, + "learning_rate": 6.961538461538462e-06, + "loss": 0.2407, + "step": 31332 + }, + { + "epoch": 86.07967032967034, + "grad_norm": 14.043854713439941, + "learning_rate": 6.9601648351648355e-06, + "loss": 0.1602, + "step": 31333 + }, + { + "epoch": 86.08241758241758, + "grad_norm": 17.9325008392334, + "learning_rate": 6.958791208791209e-06, + "loss": 0.4987, + "step": 31334 + }, + { + "epoch": 86.08516483516483, + "grad_norm": 8.595718383789062, + "learning_rate": 6.957417582417583e-06, + "loss": 0.1282, + "step": 31335 + }, + { + "epoch": 86.08791208791209, + "grad_norm": 15.011029243469238, + "learning_rate": 6.956043956043956e-06, + "loss": 0.1938, + "step": 31336 + }, + { + "epoch": 86.09065934065934, + "grad_norm": 7.907956123352051, + "learning_rate": 6.954670329670331e-06, + "loss": 0.0583, + "step": 31337 + }, + { + "epoch": 86.0934065934066, + "grad_norm": 8.962318420410156, + "learning_rate": 6.953296703296703e-06, + "loss": 0.1859, + "step": 31338 + }, + { + "epoch": 86.09615384615384, + "grad_norm": 10.412238121032715, + "learning_rate": 6.951923076923077e-06, + "loss": 0.2268, + "step": 31339 + }, + { + "epoch": 86.0989010989011, + "grad_norm": 0.6473767161369324, + "learning_rate": 6.950549450549451e-06, + "loss": 0.0079, + "step": 31340 + }, + { + "epoch": 86.10164835164835, + "grad_norm": 13.13156509399414, + "learning_rate": 6.949175824175824e-06, + "loss": 0.4014, + "step": 31341 + }, + { + "epoch": 86.1043956043956, + "grad_norm": 6.833922863006592, + "learning_rate": 6.947802197802199e-06, + "loss": 0.1125, + "step": 31342 + }, + { + "epoch": 86.10714285714286, + "grad_norm": 3.4215087890625, + "learning_rate": 6.946428571428572e-06, + "loss": 0.0424, + "step": 31343 + }, + { + "epoch": 86.10989010989012, + "grad_norm": 1.715976595878601, + "learning_rate": 6.945054945054945e-06, + "loss": 0.023, + "step": 31344 + }, + { + "epoch": 86.11263736263736, + "grad_norm": 3.391145944595337, + "learning_rate": 6.94368131868132e-06, + "loss": 0.0519, + "step": 31345 + }, + { + "epoch": 86.11538461538461, + "grad_norm": 9.165863990783691, + "learning_rate": 6.942307692307692e-06, + "loss": 0.0843, + "step": 31346 + }, + { + "epoch": 86.11813186813187, + "grad_norm": 9.518525123596191, + "learning_rate": 6.9409340659340666e-06, + "loss": 0.0875, + "step": 31347 + }, + { + "epoch": 86.12087912087912, + "grad_norm": 26.64307403564453, + "learning_rate": 6.93956043956044e-06, + "loss": 0.7614, + "step": 31348 + }, + { + "epoch": 86.12362637362638, + "grad_norm": 15.765008926391602, + "learning_rate": 6.938186813186813e-06, + "loss": 0.2012, + "step": 31349 + }, + { + "epoch": 86.12637362637362, + "grad_norm": 7.25197172164917, + "learning_rate": 6.936813186813188e-06, + "loss": 0.0944, + "step": 31350 + }, + { + "epoch": 86.12912087912088, + "grad_norm": 4.372137546539307, + "learning_rate": 6.93543956043956e-06, + "loss": 0.0586, + "step": 31351 + }, + { + "epoch": 86.13186813186813, + "grad_norm": 11.21092414855957, + "learning_rate": 6.934065934065935e-06, + "loss": 0.1029, + "step": 31352 + }, + { + "epoch": 86.13461538461539, + "grad_norm": 7.352342128753662, + "learning_rate": 6.932692307692308e-06, + "loss": 0.1337, + "step": 31353 + }, + { + "epoch": 86.13736263736264, + "grad_norm": 5.952825546264648, + "learning_rate": 6.931318681318681e-06, + "loss": 0.0606, + "step": 31354 + }, + { + "epoch": 86.14010989010988, + "grad_norm": 2.430114984512329, + "learning_rate": 6.9299450549450555e-06, + "loss": 0.0363, + "step": 31355 + }, + { + "epoch": 86.14285714285714, + "grad_norm": 11.37849235534668, + "learning_rate": 6.928571428571429e-06, + "loss": 0.1367, + "step": 31356 + }, + { + "epoch": 86.1456043956044, + "grad_norm": 11.908246040344238, + "learning_rate": 6.927197802197803e-06, + "loss": 0.1736, + "step": 31357 + }, + { + "epoch": 86.14835164835165, + "grad_norm": 17.49917984008789, + "learning_rate": 6.9258241758241766e-06, + "loss": 0.2821, + "step": 31358 + }, + { + "epoch": 86.1510989010989, + "grad_norm": 8.430681228637695, + "learning_rate": 6.924450549450549e-06, + "loss": 0.1847, + "step": 31359 + }, + { + "epoch": 86.15384615384616, + "grad_norm": 15.617571830749512, + "learning_rate": 6.923076923076923e-06, + "loss": 0.2202, + "step": 31360 + }, + { + "epoch": 86.1565934065934, + "grad_norm": 20.030529022216797, + "learning_rate": 6.921703296703297e-06, + "loss": 0.6038, + "step": 31361 + }, + { + "epoch": 86.15934065934066, + "grad_norm": 11.272894859313965, + "learning_rate": 6.920329670329671e-06, + "loss": 0.277, + "step": 31362 + }, + { + "epoch": 86.16208791208791, + "grad_norm": 4.2484211921691895, + "learning_rate": 6.9189560439560445e-06, + "loss": 0.0566, + "step": 31363 + }, + { + "epoch": 86.16483516483517, + "grad_norm": 4.516879081726074, + "learning_rate": 6.917582417582418e-06, + "loss": 0.0672, + "step": 31364 + }, + { + "epoch": 86.16758241758242, + "grad_norm": 5.472296714782715, + "learning_rate": 6.916208791208792e-06, + "loss": 0.0817, + "step": 31365 + }, + { + "epoch": 86.17032967032966, + "grad_norm": 25.983930587768555, + "learning_rate": 6.914835164835165e-06, + "loss": 0.4158, + "step": 31366 + }, + { + "epoch": 86.17307692307692, + "grad_norm": 12.528871536254883, + "learning_rate": 6.913461538461538e-06, + "loss": 0.1756, + "step": 31367 + }, + { + "epoch": 86.17582417582418, + "grad_norm": 6.120877742767334, + "learning_rate": 6.912087912087912e-06, + "loss": 0.0914, + "step": 31368 + }, + { + "epoch": 86.17857142857143, + "grad_norm": 10.626250267028809, + "learning_rate": 6.910714285714286e-06, + "loss": 0.1881, + "step": 31369 + }, + { + "epoch": 86.18131868131869, + "grad_norm": 11.552218437194824, + "learning_rate": 6.90934065934066e-06, + "loss": 0.1278, + "step": 31370 + }, + { + "epoch": 86.18406593406593, + "grad_norm": 10.525118827819824, + "learning_rate": 6.907967032967033e-06, + "loss": 0.2566, + "step": 31371 + }, + { + "epoch": 86.18681318681318, + "grad_norm": 10.283000946044922, + "learning_rate": 6.906593406593406e-06, + "loss": 0.1526, + "step": 31372 + }, + { + "epoch": 86.18956043956044, + "grad_norm": 28.818754196166992, + "learning_rate": 6.905219780219781e-06, + "loss": 0.719, + "step": 31373 + }, + { + "epoch": 86.1923076923077, + "grad_norm": 11.13818645477295, + "learning_rate": 6.903846153846154e-06, + "loss": 0.2517, + "step": 31374 + }, + { + "epoch": 86.19505494505495, + "grad_norm": 3.438903331756592, + "learning_rate": 6.902472527472528e-06, + "loss": 0.0333, + "step": 31375 + }, + { + "epoch": 86.1978021978022, + "grad_norm": 13.16648006439209, + "learning_rate": 6.901098901098901e-06, + "loss": 0.2467, + "step": 31376 + }, + { + "epoch": 86.20054945054945, + "grad_norm": 20.175798416137695, + "learning_rate": 6.899725274725275e-06, + "loss": 0.5165, + "step": 31377 + }, + { + "epoch": 86.2032967032967, + "grad_norm": 17.421022415161133, + "learning_rate": 6.898351648351649e-06, + "loss": 0.4691, + "step": 31378 + }, + { + "epoch": 86.20604395604396, + "grad_norm": 21.40746307373047, + "learning_rate": 6.896978021978022e-06, + "loss": 0.3875, + "step": 31379 + }, + { + "epoch": 86.20879120879121, + "grad_norm": 2.618748188018799, + "learning_rate": 6.895604395604397e-06, + "loss": 0.0278, + "step": 31380 + }, + { + "epoch": 86.21153846153847, + "grad_norm": 4.849564075469971, + "learning_rate": 6.894230769230769e-06, + "loss": 0.0308, + "step": 31381 + }, + { + "epoch": 86.21428571428571, + "grad_norm": 21.325504302978516, + "learning_rate": 6.8928571428571426e-06, + "loss": 0.4262, + "step": 31382 + }, + { + "epoch": 86.21703296703296, + "grad_norm": 10.947049140930176, + "learning_rate": 6.891483516483517e-06, + "loss": 0.1081, + "step": 31383 + }, + { + "epoch": 86.21978021978022, + "grad_norm": 8.911273956298828, + "learning_rate": 6.89010989010989e-06, + "loss": 0.0529, + "step": 31384 + }, + { + "epoch": 86.22252747252747, + "grad_norm": 6.783493995666504, + "learning_rate": 6.8887362637362645e-06, + "loss": 0.1456, + "step": 31385 + }, + { + "epoch": 86.22527472527473, + "grad_norm": 15.935685157775879, + "learning_rate": 6.887362637362638e-06, + "loss": 0.2296, + "step": 31386 + }, + { + "epoch": 86.22802197802197, + "grad_norm": 9.520424842834473, + "learning_rate": 6.8859890109890104e-06, + "loss": 0.2359, + "step": 31387 + }, + { + "epoch": 86.23076923076923, + "grad_norm": 9.871808052062988, + "learning_rate": 6.8846153846153855e-06, + "loss": 0.1277, + "step": 31388 + }, + { + "epoch": 86.23351648351648, + "grad_norm": 10.47801685333252, + "learning_rate": 6.883241758241758e-06, + "loss": 0.195, + "step": 31389 + }, + { + "epoch": 86.23626373626374, + "grad_norm": 3.8942837715148926, + "learning_rate": 6.881868131868132e-06, + "loss": 0.0617, + "step": 31390 + }, + { + "epoch": 86.23901098901099, + "grad_norm": 13.165595054626465, + "learning_rate": 6.880494505494506e-06, + "loss": 0.2162, + "step": 31391 + }, + { + "epoch": 86.24175824175825, + "grad_norm": 19.414216995239258, + "learning_rate": 6.879120879120879e-06, + "loss": 0.281, + "step": 31392 + }, + { + "epoch": 86.24450549450549, + "grad_norm": 10.866898536682129, + "learning_rate": 6.8777472527472534e-06, + "loss": 0.2402, + "step": 31393 + }, + { + "epoch": 86.24725274725274, + "grad_norm": 14.71216106414795, + "learning_rate": 6.876373626373627e-06, + "loss": 0.2993, + "step": 31394 + }, + { + "epoch": 86.25, + "grad_norm": 36.591796875, + "learning_rate": 6.875000000000001e-06, + "loss": 1.0977, + "step": 31395 + }, + { + "epoch": 86.25274725274726, + "grad_norm": 14.726387977600098, + "learning_rate": 6.873626373626374e-06, + "loss": 0.5771, + "step": 31396 + }, + { + "epoch": 86.25549450549451, + "grad_norm": 8.379290580749512, + "learning_rate": 6.872252747252747e-06, + "loss": 0.0983, + "step": 31397 + }, + { + "epoch": 86.25824175824175, + "grad_norm": 16.546430587768555, + "learning_rate": 6.870879120879121e-06, + "loss": 0.4968, + "step": 31398 + }, + { + "epoch": 86.26098901098901, + "grad_norm": 3.550901412963867, + "learning_rate": 6.869505494505495e-06, + "loss": 0.0391, + "step": 31399 + }, + { + "epoch": 86.26373626373626, + "grad_norm": 25.599651336669922, + "learning_rate": 6.868131868131869e-06, + "loss": 0.4808, + "step": 31400 + }, + { + "epoch": 86.26648351648352, + "grad_norm": 23.79475212097168, + "learning_rate": 6.866758241758242e-06, + "loss": 0.7105, + "step": 31401 + }, + { + "epoch": 86.26923076923077, + "grad_norm": 3.6011621952056885, + "learning_rate": 6.865384615384615e-06, + "loss": 0.0879, + "step": 31402 + }, + { + "epoch": 86.27197802197803, + "grad_norm": 20.459455490112305, + "learning_rate": 6.86401098901099e-06, + "loss": 0.3818, + "step": 31403 + }, + { + "epoch": 86.27472527472527, + "grad_norm": 8.033008575439453, + "learning_rate": 6.862637362637363e-06, + "loss": 0.1216, + "step": 31404 + }, + { + "epoch": 86.27747252747253, + "grad_norm": 10.552194595336914, + "learning_rate": 6.861263736263737e-06, + "loss": 0.0991, + "step": 31405 + }, + { + "epoch": 86.28021978021978, + "grad_norm": 3.4402782917022705, + "learning_rate": 6.85989010989011e-06, + "loss": 0.0423, + "step": 31406 + }, + { + "epoch": 86.28296703296704, + "grad_norm": 20.03033447265625, + "learning_rate": 6.858516483516484e-06, + "loss": 0.5731, + "step": 31407 + }, + { + "epoch": 86.28571428571429, + "grad_norm": 21.09762191772461, + "learning_rate": 6.857142857142858e-06, + "loss": 0.6218, + "step": 31408 + }, + { + "epoch": 86.28846153846153, + "grad_norm": 3.931015968322754, + "learning_rate": 6.8557692307692305e-06, + "loss": 0.0638, + "step": 31409 + }, + { + "epoch": 86.29120879120879, + "grad_norm": 7.812618255615234, + "learning_rate": 6.854395604395606e-06, + "loss": 0.0913, + "step": 31410 + }, + { + "epoch": 86.29395604395604, + "grad_norm": 8.252082824707031, + "learning_rate": 6.853021978021978e-06, + "loss": 0.08, + "step": 31411 + }, + { + "epoch": 86.2967032967033, + "grad_norm": 7.774262428283691, + "learning_rate": 6.8516483516483515e-06, + "loss": 0.1367, + "step": 31412 + }, + { + "epoch": 86.29945054945055, + "grad_norm": 19.565553665161133, + "learning_rate": 6.850274725274726e-06, + "loss": 0.2364, + "step": 31413 + }, + { + "epoch": 86.3021978021978, + "grad_norm": 3.633157253265381, + "learning_rate": 6.848901098901099e-06, + "loss": 0.038, + "step": 31414 + }, + { + "epoch": 86.30494505494505, + "grad_norm": 22.774080276489258, + "learning_rate": 6.8475274725274735e-06, + "loss": 0.465, + "step": 31415 + }, + { + "epoch": 86.3076923076923, + "grad_norm": 5.715364456176758, + "learning_rate": 6.846153846153847e-06, + "loss": 0.076, + "step": 31416 + }, + { + "epoch": 86.31043956043956, + "grad_norm": 17.326723098754883, + "learning_rate": 6.8447802197802194e-06, + "loss": 0.5328, + "step": 31417 + }, + { + "epoch": 86.31318681318682, + "grad_norm": 8.463623046875, + "learning_rate": 6.8434065934065945e-06, + "loss": 0.1349, + "step": 31418 + }, + { + "epoch": 86.31593406593407, + "grad_norm": 6.347837924957275, + "learning_rate": 6.842032967032967e-06, + "loss": 0.0491, + "step": 31419 + }, + { + "epoch": 86.31868131868131, + "grad_norm": 5.4098896980285645, + "learning_rate": 6.840659340659341e-06, + "loss": 0.07, + "step": 31420 + }, + { + "epoch": 86.32142857142857, + "grad_norm": 17.052852630615234, + "learning_rate": 6.839285714285715e-06, + "loss": 0.2411, + "step": 31421 + }, + { + "epoch": 86.32417582417582, + "grad_norm": 18.06740951538086, + "learning_rate": 6.837912087912088e-06, + "loss": 0.3431, + "step": 31422 + }, + { + "epoch": 86.32692307692308, + "grad_norm": 13.29776668548584, + "learning_rate": 6.836538461538462e-06, + "loss": 0.2694, + "step": 31423 + }, + { + "epoch": 86.32967032967034, + "grad_norm": 14.302735328674316, + "learning_rate": 6.835164835164835e-06, + "loss": 0.2608, + "step": 31424 + }, + { + "epoch": 86.33241758241758, + "grad_norm": 6.13353157043457, + "learning_rate": 6.833791208791208e-06, + "loss": 0.0652, + "step": 31425 + }, + { + "epoch": 86.33516483516483, + "grad_norm": 1.9093029499053955, + "learning_rate": 6.832417582417583e-06, + "loss": 0.03, + "step": 31426 + }, + { + "epoch": 86.33791208791209, + "grad_norm": 7.162989139556885, + "learning_rate": 6.831043956043956e-06, + "loss": 0.1134, + "step": 31427 + }, + { + "epoch": 86.34065934065934, + "grad_norm": 14.514780044555664, + "learning_rate": 6.82967032967033e-06, + "loss": 0.232, + "step": 31428 + }, + { + "epoch": 86.3434065934066, + "grad_norm": 9.015313148498535, + "learning_rate": 6.828296703296704e-06, + "loss": 0.0936, + "step": 31429 + }, + { + "epoch": 86.34615384615384, + "grad_norm": 17.945430755615234, + "learning_rate": 6.826923076923076e-06, + "loss": 0.2315, + "step": 31430 + }, + { + "epoch": 86.3489010989011, + "grad_norm": 13.677360534667969, + "learning_rate": 6.825549450549451e-06, + "loss": 0.4171, + "step": 31431 + }, + { + "epoch": 86.35164835164835, + "grad_norm": 3.683511972427368, + "learning_rate": 6.824175824175824e-06, + "loss": 0.0415, + "step": 31432 + }, + { + "epoch": 86.3543956043956, + "grad_norm": 13.415046691894531, + "learning_rate": 6.822802197802198e-06, + "loss": 0.1965, + "step": 31433 + }, + { + "epoch": 86.35714285714286, + "grad_norm": 1.860182762145996, + "learning_rate": 6.821428571428572e-06, + "loss": 0.0206, + "step": 31434 + }, + { + "epoch": 86.35989010989012, + "grad_norm": 8.692419052124023, + "learning_rate": 6.820054945054945e-06, + "loss": 0.1077, + "step": 31435 + }, + { + "epoch": 86.36263736263736, + "grad_norm": 16.61711883544922, + "learning_rate": 6.818681318681319e-06, + "loss": 0.2054, + "step": 31436 + }, + { + "epoch": 86.36538461538461, + "grad_norm": 5.069241046905518, + "learning_rate": 6.817307692307693e-06, + "loss": 0.0825, + "step": 31437 + }, + { + "epoch": 86.36813186813187, + "grad_norm": 14.47596263885498, + "learning_rate": 6.815934065934067e-06, + "loss": 0.1475, + "step": 31438 + }, + { + "epoch": 86.37087912087912, + "grad_norm": 3.3676772117614746, + "learning_rate": 6.8145604395604395e-06, + "loss": 0.0536, + "step": 31439 + }, + { + "epoch": 86.37362637362638, + "grad_norm": 9.704282760620117, + "learning_rate": 6.813186813186813e-06, + "loss": 0.1088, + "step": 31440 + }, + { + "epoch": 86.37637362637362, + "grad_norm": 11.794952392578125, + "learning_rate": 6.811813186813187e-06, + "loss": 0.09, + "step": 31441 + }, + { + "epoch": 86.37912087912088, + "grad_norm": 3.0370962619781494, + "learning_rate": 6.8104395604395605e-06, + "loss": 0.0488, + "step": 31442 + }, + { + "epoch": 86.38186813186813, + "grad_norm": 12.503342628479004, + "learning_rate": 6.809065934065935e-06, + "loss": 0.2735, + "step": 31443 + }, + { + "epoch": 86.38461538461539, + "grad_norm": 18.750621795654297, + "learning_rate": 6.807692307692308e-06, + "loss": 0.3634, + "step": 31444 + }, + { + "epoch": 86.38736263736264, + "grad_norm": 21.945911407470703, + "learning_rate": 6.806318681318681e-06, + "loss": 0.571, + "step": 31445 + }, + { + "epoch": 86.39010989010988, + "grad_norm": 2.321547269821167, + "learning_rate": 6.804945054945056e-06, + "loss": 0.0313, + "step": 31446 + }, + { + "epoch": 86.39285714285714, + "grad_norm": 24.216400146484375, + "learning_rate": 6.803571428571428e-06, + "loss": 0.6218, + "step": 31447 + }, + { + "epoch": 86.3956043956044, + "grad_norm": 7.2989115715026855, + "learning_rate": 6.802197802197803e-06, + "loss": 0.0965, + "step": 31448 + }, + { + "epoch": 86.39835164835165, + "grad_norm": 4.006138801574707, + "learning_rate": 6.800824175824176e-06, + "loss": 0.0504, + "step": 31449 + }, + { + "epoch": 86.4010989010989, + "grad_norm": 12.268082618713379, + "learning_rate": 6.7994505494505495e-06, + "loss": 0.2187, + "step": 31450 + }, + { + "epoch": 86.40384615384616, + "grad_norm": 18.167879104614258, + "learning_rate": 6.798076923076924e-06, + "loss": 0.2683, + "step": 31451 + }, + { + "epoch": 86.4065934065934, + "grad_norm": 7.6609930992126465, + "learning_rate": 6.796703296703297e-06, + "loss": 0.1144, + "step": 31452 + }, + { + "epoch": 86.40934065934066, + "grad_norm": 18.6064510345459, + "learning_rate": 6.795329670329671e-06, + "loss": 0.335, + "step": 31453 + }, + { + "epoch": 86.41208791208791, + "grad_norm": 10.99040699005127, + "learning_rate": 6.793956043956044e-06, + "loss": 0.1339, + "step": 31454 + }, + { + "epoch": 86.41483516483517, + "grad_norm": 8.634750366210938, + "learning_rate": 6.792582417582417e-06, + "loss": 0.1253, + "step": 31455 + }, + { + "epoch": 86.41758241758242, + "grad_norm": 14.383840560913086, + "learning_rate": 6.791208791208792e-06, + "loss": 0.3215, + "step": 31456 + }, + { + "epoch": 86.42032967032966, + "grad_norm": 17.711536407470703, + "learning_rate": 6.789835164835165e-06, + "loss": 0.534, + "step": 31457 + }, + { + "epoch": 86.42307692307692, + "grad_norm": 23.242900848388672, + "learning_rate": 6.788461538461539e-06, + "loss": 0.3036, + "step": 31458 + }, + { + "epoch": 86.42582417582418, + "grad_norm": 7.886355400085449, + "learning_rate": 6.787087912087913e-06, + "loss": 0.1921, + "step": 31459 + }, + { + "epoch": 86.42857142857143, + "grad_norm": 14.505584716796875, + "learning_rate": 6.785714285714285e-06, + "loss": 0.2788, + "step": 31460 + }, + { + "epoch": 86.43131868131869, + "grad_norm": 9.224406242370605, + "learning_rate": 6.78434065934066e-06, + "loss": 0.1775, + "step": 31461 + }, + { + "epoch": 86.43406593406593, + "grad_norm": 14.632430076599121, + "learning_rate": 6.782967032967033e-06, + "loss": 0.4432, + "step": 31462 + }, + { + "epoch": 86.43681318681318, + "grad_norm": 15.164831161499023, + "learning_rate": 6.781593406593407e-06, + "loss": 0.389, + "step": 31463 + }, + { + "epoch": 86.43956043956044, + "grad_norm": 14.477946281433105, + "learning_rate": 6.7802197802197806e-06, + "loss": 0.241, + "step": 31464 + }, + { + "epoch": 86.4423076923077, + "grad_norm": 10.587892532348633, + "learning_rate": 6.778846153846154e-06, + "loss": 0.1718, + "step": 31465 + }, + { + "epoch": 86.44505494505495, + "grad_norm": 5.054068088531494, + "learning_rate": 6.777472527472528e-06, + "loss": 0.0433, + "step": 31466 + }, + { + "epoch": 86.4478021978022, + "grad_norm": 20.510725021362305, + "learning_rate": 6.776098901098902e-06, + "loss": 0.4639, + "step": 31467 + }, + { + "epoch": 86.45054945054945, + "grad_norm": 18.34522247314453, + "learning_rate": 6.774725274725276e-06, + "loss": 0.6245, + "step": 31468 + }, + { + "epoch": 86.4532967032967, + "grad_norm": 7.391488552093506, + "learning_rate": 6.7733516483516484e-06, + "loss": 0.0396, + "step": 31469 + }, + { + "epoch": 86.45604395604396, + "grad_norm": 38.663963317871094, + "learning_rate": 6.771978021978022e-06, + "loss": 1.5547, + "step": 31470 + }, + { + "epoch": 86.45879120879121, + "grad_norm": 20.761653900146484, + "learning_rate": 6.770604395604396e-06, + "loss": 0.438, + "step": 31471 + }, + { + "epoch": 86.46153846153847, + "grad_norm": 15.57792854309082, + "learning_rate": 6.7692307692307695e-06, + "loss": 0.39, + "step": 31472 + }, + { + "epoch": 86.46428571428571, + "grad_norm": 7.398558139801025, + "learning_rate": 6.767857142857144e-06, + "loss": 0.1152, + "step": 31473 + }, + { + "epoch": 86.46703296703296, + "grad_norm": 16.878835678100586, + "learning_rate": 6.766483516483517e-06, + "loss": 0.1307, + "step": 31474 + }, + { + "epoch": 86.46978021978022, + "grad_norm": 3.5407564640045166, + "learning_rate": 6.76510989010989e-06, + "loss": 0.0316, + "step": 31475 + }, + { + "epoch": 86.47252747252747, + "grad_norm": 15.809937477111816, + "learning_rate": 6.763736263736265e-06, + "loss": 0.3911, + "step": 31476 + }, + { + "epoch": 86.47527472527473, + "grad_norm": 16.807353973388672, + "learning_rate": 6.762362637362637e-06, + "loss": 0.2155, + "step": 31477 + }, + { + "epoch": 86.47802197802197, + "grad_norm": 12.98111629486084, + "learning_rate": 6.760989010989012e-06, + "loss": 0.2727, + "step": 31478 + }, + { + "epoch": 86.48076923076923, + "grad_norm": 9.758408546447754, + "learning_rate": 6.759615384615385e-06, + "loss": 0.1383, + "step": 31479 + }, + { + "epoch": 86.48351648351648, + "grad_norm": 6.411943435668945, + "learning_rate": 6.7582417582417585e-06, + "loss": 0.074, + "step": 31480 + }, + { + "epoch": 86.48626373626374, + "grad_norm": 7.520305633544922, + "learning_rate": 6.756868131868133e-06, + "loss": 0.1373, + "step": 31481 + }, + { + "epoch": 86.48901098901099, + "grad_norm": 11.001904487609863, + "learning_rate": 6.755494505494505e-06, + "loss": 0.126, + "step": 31482 + }, + { + "epoch": 86.49175824175825, + "grad_norm": 6.135746479034424, + "learning_rate": 6.754120879120879e-06, + "loss": 0.1173, + "step": 31483 + }, + { + "epoch": 86.49450549450549, + "grad_norm": 13.614411354064941, + "learning_rate": 6.752747252747253e-06, + "loss": 0.2343, + "step": 31484 + }, + { + "epoch": 86.49725274725274, + "grad_norm": 9.836512565612793, + "learning_rate": 6.751373626373626e-06, + "loss": 0.1305, + "step": 31485 + }, + { + "epoch": 86.5, + "grad_norm": 2.6424295902252197, + "learning_rate": 6.750000000000001e-06, + "loss": 0.0368, + "step": 31486 + }, + { + "epoch": 86.50274725274726, + "grad_norm": 12.288312911987305, + "learning_rate": 6.748626373626374e-06, + "loss": 0.2606, + "step": 31487 + }, + { + "epoch": 86.50549450549451, + "grad_norm": 3.139927625656128, + "learning_rate": 6.7472527472527466e-06, + "loss": 0.0245, + "step": 31488 + }, + { + "epoch": 86.50824175824175, + "grad_norm": 18.21416473388672, + "learning_rate": 6.745879120879122e-06, + "loss": 0.2505, + "step": 31489 + }, + { + "epoch": 86.51098901098901, + "grad_norm": 14.616950988769531, + "learning_rate": 6.744505494505494e-06, + "loss": 0.2589, + "step": 31490 + }, + { + "epoch": 86.51373626373626, + "grad_norm": 5.4025397300720215, + "learning_rate": 6.743131868131869e-06, + "loss": 0.0611, + "step": 31491 + }, + { + "epoch": 86.51648351648352, + "grad_norm": 24.724323272705078, + "learning_rate": 6.741758241758242e-06, + "loss": 0.4409, + "step": 31492 + }, + { + "epoch": 86.51923076923077, + "grad_norm": 6.988461971282959, + "learning_rate": 6.740384615384615e-06, + "loss": 0.1736, + "step": 31493 + }, + { + "epoch": 86.52197802197803, + "grad_norm": 4.337882995605469, + "learning_rate": 6.7390109890109895e-06, + "loss": 0.0379, + "step": 31494 + }, + { + "epoch": 86.52472527472527, + "grad_norm": 19.683609008789062, + "learning_rate": 6.737637362637363e-06, + "loss": 0.2437, + "step": 31495 + }, + { + "epoch": 86.52747252747253, + "grad_norm": 16.315752029418945, + "learning_rate": 6.736263736263737e-06, + "loss": 0.3896, + "step": 31496 + }, + { + "epoch": 86.53021978021978, + "grad_norm": 7.52601432800293, + "learning_rate": 6.73489010989011e-06, + "loss": 0.1207, + "step": 31497 + }, + { + "epoch": 86.53296703296704, + "grad_norm": 11.651968955993652, + "learning_rate": 6.733516483516483e-06, + "loss": 0.1531, + "step": 31498 + }, + { + "epoch": 86.53571428571429, + "grad_norm": 18.352540969848633, + "learning_rate": 6.7321428571428574e-06, + "loss": 0.3236, + "step": 31499 + }, + { + "epoch": 86.53846153846153, + "grad_norm": 5.386667728424072, + "learning_rate": 6.730769230769231e-06, + "loss": 0.0857, + "step": 31500 + }, + { + "epoch": 86.54120879120879, + "grad_norm": 12.043158531188965, + "learning_rate": 6.729395604395605e-06, + "loss": 0.1714, + "step": 31501 + }, + { + "epoch": 86.54395604395604, + "grad_norm": 4.91750955581665, + "learning_rate": 6.7280219780219785e-06, + "loss": 0.0445, + "step": 31502 + }, + { + "epoch": 86.5467032967033, + "grad_norm": 1.7901155948638916, + "learning_rate": 6.726648351648351e-06, + "loss": 0.0295, + "step": 31503 + }, + { + "epoch": 86.54945054945055, + "grad_norm": 16.832828521728516, + "learning_rate": 6.725274725274726e-06, + "loss": 0.2379, + "step": 31504 + }, + { + "epoch": 86.5521978021978, + "grad_norm": 19.119014739990234, + "learning_rate": 6.723901098901099e-06, + "loss": 0.4627, + "step": 31505 + }, + { + "epoch": 86.55494505494505, + "grad_norm": 6.321493625640869, + "learning_rate": 6.722527472527474e-06, + "loss": 0.0664, + "step": 31506 + }, + { + "epoch": 86.5576923076923, + "grad_norm": 13.269061088562012, + "learning_rate": 6.721153846153846e-06, + "loss": 0.4093, + "step": 31507 + }, + { + "epoch": 86.56043956043956, + "grad_norm": 9.666115760803223, + "learning_rate": 6.71978021978022e-06, + "loss": 0.1308, + "step": 31508 + }, + { + "epoch": 86.56318681318682, + "grad_norm": 18.294267654418945, + "learning_rate": 6.718406593406594e-06, + "loss": 0.1092, + "step": 31509 + }, + { + "epoch": 86.56593406593407, + "grad_norm": 1.5765843391418457, + "learning_rate": 6.7170329670329674e-06, + "loss": 0.026, + "step": 31510 + }, + { + "epoch": 86.56868131868131, + "grad_norm": 5.420276165008545, + "learning_rate": 6.715659340659342e-06, + "loss": 0.1229, + "step": 31511 + }, + { + "epoch": 86.57142857142857, + "grad_norm": 18.454023361206055, + "learning_rate": 6.714285714285714e-06, + "loss": 0.2255, + "step": 31512 + }, + { + "epoch": 86.57417582417582, + "grad_norm": 2.3988149166107178, + "learning_rate": 6.712912087912088e-06, + "loss": 0.0195, + "step": 31513 + }, + { + "epoch": 86.57692307692308, + "grad_norm": 10.23214054107666, + "learning_rate": 6.711538461538462e-06, + "loss": 0.1569, + "step": 31514 + }, + { + "epoch": 86.57967032967034, + "grad_norm": 5.631913661956787, + "learning_rate": 6.710164835164835e-06, + "loss": 0.0993, + "step": 31515 + }, + { + "epoch": 86.58241758241758, + "grad_norm": 4.05538272857666, + "learning_rate": 6.7087912087912096e-06, + "loss": 0.0569, + "step": 31516 + }, + { + "epoch": 86.58516483516483, + "grad_norm": 21.05695343017578, + "learning_rate": 6.707417582417583e-06, + "loss": 0.5225, + "step": 31517 + }, + { + "epoch": 86.58791208791209, + "grad_norm": 5.697508811950684, + "learning_rate": 6.7060439560439555e-06, + "loss": 0.105, + "step": 31518 + }, + { + "epoch": 86.59065934065934, + "grad_norm": 16.992900848388672, + "learning_rate": 6.704670329670331e-06, + "loss": 0.323, + "step": 31519 + }, + { + "epoch": 86.5934065934066, + "grad_norm": 16.56432342529297, + "learning_rate": 6.703296703296703e-06, + "loss": 0.3187, + "step": 31520 + }, + { + "epoch": 86.59615384615384, + "grad_norm": 4.7019195556640625, + "learning_rate": 6.7019230769230775e-06, + "loss": 0.1153, + "step": 31521 + }, + { + "epoch": 86.5989010989011, + "grad_norm": 13.127593040466309, + "learning_rate": 6.700549450549451e-06, + "loss": 0.2267, + "step": 31522 + }, + { + "epoch": 86.60164835164835, + "grad_norm": 8.199396133422852, + "learning_rate": 6.699175824175824e-06, + "loss": 0.1167, + "step": 31523 + }, + { + "epoch": 86.6043956043956, + "grad_norm": 7.406313896179199, + "learning_rate": 6.6978021978021985e-06, + "loss": 0.1029, + "step": 31524 + }, + { + "epoch": 86.60714285714286, + "grad_norm": 9.23757266998291, + "learning_rate": 6.696428571428572e-06, + "loss": 0.2995, + "step": 31525 + }, + { + "epoch": 86.60989010989012, + "grad_norm": 38.559139251708984, + "learning_rate": 6.695054945054946e-06, + "loss": 1.0828, + "step": 31526 + }, + { + "epoch": 86.61263736263736, + "grad_norm": 20.348594665527344, + "learning_rate": 6.693681318681319e-06, + "loss": 0.4873, + "step": 31527 + }, + { + "epoch": 86.61538461538461, + "grad_norm": 12.24208927154541, + "learning_rate": 6.692307692307692e-06, + "loss": 0.3543, + "step": 31528 + }, + { + "epoch": 86.61813186813187, + "grad_norm": 4.892172813415527, + "learning_rate": 6.690934065934066e-06, + "loss": 0.1173, + "step": 31529 + }, + { + "epoch": 86.62087912087912, + "grad_norm": 21.257951736450195, + "learning_rate": 6.68956043956044e-06, + "loss": 0.5212, + "step": 31530 + }, + { + "epoch": 86.62362637362638, + "grad_norm": 9.198250770568848, + "learning_rate": 6.688186813186814e-06, + "loss": 0.088, + "step": 31531 + }, + { + "epoch": 86.62637362637362, + "grad_norm": 4.323869228363037, + "learning_rate": 6.6868131868131875e-06, + "loss": 0.0605, + "step": 31532 + }, + { + "epoch": 86.62912087912088, + "grad_norm": 15.7799072265625, + "learning_rate": 6.68543956043956e-06, + "loss": 0.2685, + "step": 31533 + }, + { + "epoch": 86.63186813186813, + "grad_norm": 14.151308059692383, + "learning_rate": 6.684065934065935e-06, + "loss": 0.2211, + "step": 31534 + }, + { + "epoch": 86.63461538461539, + "grad_norm": 2.311007261276245, + "learning_rate": 6.682692307692308e-06, + "loss": 0.0226, + "step": 31535 + }, + { + "epoch": 86.63736263736264, + "grad_norm": 19.077802658081055, + "learning_rate": 6.681318681318682e-06, + "loss": 0.3251, + "step": 31536 + }, + { + "epoch": 86.64010989010988, + "grad_norm": 21.313810348510742, + "learning_rate": 6.679945054945055e-06, + "loss": 0.6589, + "step": 31537 + }, + { + "epoch": 86.64285714285714, + "grad_norm": 6.022453784942627, + "learning_rate": 6.678571428571429e-06, + "loss": 0.0575, + "step": 31538 + }, + { + "epoch": 86.6456043956044, + "grad_norm": 9.844707489013672, + "learning_rate": 6.677197802197803e-06, + "loss": 0.1014, + "step": 31539 + }, + { + "epoch": 86.64835164835165, + "grad_norm": 22.386159896850586, + "learning_rate": 6.675824175824176e-06, + "loss": 0.595, + "step": 31540 + }, + { + "epoch": 86.6510989010989, + "grad_norm": 9.155553817749023, + "learning_rate": 6.674450549450549e-06, + "loss": 0.1802, + "step": 31541 + }, + { + "epoch": 86.65384615384616, + "grad_norm": 10.110336303710938, + "learning_rate": 6.673076923076923e-06, + "loss": 0.1205, + "step": 31542 + }, + { + "epoch": 86.6565934065934, + "grad_norm": 22.727020263671875, + "learning_rate": 6.671703296703297e-06, + "loss": 0.3698, + "step": 31543 + }, + { + "epoch": 86.65934065934066, + "grad_norm": 11.780826568603516, + "learning_rate": 6.670329670329671e-06, + "loss": 0.1994, + "step": 31544 + }, + { + "epoch": 86.66208791208791, + "grad_norm": 8.108965873718262, + "learning_rate": 6.668956043956044e-06, + "loss": 0.1504, + "step": 31545 + }, + { + "epoch": 86.66483516483517, + "grad_norm": 15.544882774353027, + "learning_rate": 6.667582417582417e-06, + "loss": 0.5009, + "step": 31546 + }, + { + "epoch": 86.66758241758242, + "grad_norm": 9.00240421295166, + "learning_rate": 6.666208791208792e-06, + "loss": 0.1553, + "step": 31547 + }, + { + "epoch": 86.67032967032966, + "grad_norm": 11.29401683807373, + "learning_rate": 6.6648351648351645e-06, + "loss": 0.1256, + "step": 31548 + }, + { + "epoch": 86.67307692307692, + "grad_norm": 11.72349739074707, + "learning_rate": 6.66346153846154e-06, + "loss": 0.1012, + "step": 31549 + }, + { + "epoch": 86.67582417582418, + "grad_norm": 20.03377914428711, + "learning_rate": 6.662087912087912e-06, + "loss": 0.1285, + "step": 31550 + }, + { + "epoch": 86.67857142857143, + "grad_norm": 12.617005348205566, + "learning_rate": 6.660714285714286e-06, + "loss": 0.2192, + "step": 31551 + }, + { + "epoch": 86.68131868131869, + "grad_norm": 17.320209503173828, + "learning_rate": 6.65934065934066e-06, + "loss": 0.1231, + "step": 31552 + }, + { + "epoch": 86.68406593406593, + "grad_norm": 10.777639389038086, + "learning_rate": 6.657967032967033e-06, + "loss": 0.1356, + "step": 31553 + }, + { + "epoch": 86.68681318681318, + "grad_norm": 10.23319149017334, + "learning_rate": 6.6565934065934075e-06, + "loss": 0.1792, + "step": 31554 + }, + { + "epoch": 86.68956043956044, + "grad_norm": 17.201766967773438, + "learning_rate": 6.65521978021978e-06, + "loss": 0.2772, + "step": 31555 + }, + { + "epoch": 86.6923076923077, + "grad_norm": 11.278255462646484, + "learning_rate": 6.6538461538461535e-06, + "loss": 0.1183, + "step": 31556 + }, + { + "epoch": 86.69505494505495, + "grad_norm": 16.057586669921875, + "learning_rate": 6.652472527472528e-06, + "loss": 0.4717, + "step": 31557 + }, + { + "epoch": 86.6978021978022, + "grad_norm": 28.414838790893555, + "learning_rate": 6.651098901098901e-06, + "loss": 0.7215, + "step": 31558 + }, + { + "epoch": 86.70054945054945, + "grad_norm": 17.874279022216797, + "learning_rate": 6.649725274725275e-06, + "loss": 0.3081, + "step": 31559 + }, + { + "epoch": 86.7032967032967, + "grad_norm": 2.9116368293762207, + "learning_rate": 6.648351648351649e-06, + "loss": 0.0392, + "step": 31560 + }, + { + "epoch": 86.70604395604396, + "grad_norm": 16.33013153076172, + "learning_rate": 6.646978021978021e-06, + "loss": 0.2963, + "step": 31561 + }, + { + "epoch": 86.70879120879121, + "grad_norm": 8.829864501953125, + "learning_rate": 6.6456043956043965e-06, + "loss": 0.1173, + "step": 31562 + }, + { + "epoch": 86.71153846153847, + "grad_norm": 16.75873374938965, + "learning_rate": 6.644230769230769e-06, + "loss": 0.2571, + "step": 31563 + }, + { + "epoch": 86.71428571428571, + "grad_norm": 14.642560005187988, + "learning_rate": 6.642857142857144e-06, + "loss": 0.3357, + "step": 31564 + }, + { + "epoch": 86.71703296703296, + "grad_norm": 8.129349708557129, + "learning_rate": 6.641483516483517e-06, + "loss": 0.1338, + "step": 31565 + }, + { + "epoch": 86.71978021978022, + "grad_norm": 2.617299795150757, + "learning_rate": 6.64010989010989e-06, + "loss": 0.0275, + "step": 31566 + }, + { + "epoch": 86.72252747252747, + "grad_norm": 4.442726135253906, + "learning_rate": 6.638736263736264e-06, + "loss": 0.0317, + "step": 31567 + }, + { + "epoch": 86.72527472527473, + "grad_norm": 9.881166458129883, + "learning_rate": 6.637362637362638e-06, + "loss": 0.2436, + "step": 31568 + }, + { + "epoch": 86.72802197802197, + "grad_norm": 12.983675956726074, + "learning_rate": 6.635989010989012e-06, + "loss": 0.15, + "step": 31569 + }, + { + "epoch": 86.73076923076923, + "grad_norm": 17.703027725219727, + "learning_rate": 6.6346153846153846e-06, + "loss": 0.2903, + "step": 31570 + }, + { + "epoch": 86.73351648351648, + "grad_norm": 12.682165145874023, + "learning_rate": 6.633241758241758e-06, + "loss": 0.26, + "step": 31571 + }, + { + "epoch": 86.73626373626374, + "grad_norm": 11.811923027038574, + "learning_rate": 6.631868131868132e-06, + "loss": 0.1253, + "step": 31572 + }, + { + "epoch": 86.73901098901099, + "grad_norm": 18.941755294799805, + "learning_rate": 6.630494505494506e-06, + "loss": 0.2372, + "step": 31573 + }, + { + "epoch": 86.74175824175825, + "grad_norm": 13.11767864227295, + "learning_rate": 6.62912087912088e-06, + "loss": 0.2827, + "step": 31574 + }, + { + "epoch": 86.74450549450549, + "grad_norm": 22.66853141784668, + "learning_rate": 6.627747252747253e-06, + "loss": 0.5889, + "step": 31575 + }, + { + "epoch": 86.74725274725274, + "grad_norm": 15.152265548706055, + "learning_rate": 6.626373626373626e-06, + "loss": 0.3296, + "step": 31576 + }, + { + "epoch": 86.75, + "grad_norm": 18.67995262145996, + "learning_rate": 6.625000000000001e-06, + "loss": 0.4685, + "step": 31577 + }, + { + "epoch": 86.75274725274726, + "grad_norm": 17.784164428710938, + "learning_rate": 6.6236263736263735e-06, + "loss": 0.1123, + "step": 31578 + }, + { + "epoch": 86.75549450549451, + "grad_norm": 9.44255256652832, + "learning_rate": 6.622252747252749e-06, + "loss": 0.1943, + "step": 31579 + }, + { + "epoch": 86.75824175824175, + "grad_norm": 13.086881637573242, + "learning_rate": 6.620879120879121e-06, + "loss": 0.2304, + "step": 31580 + }, + { + "epoch": 86.76098901098901, + "grad_norm": 5.164651870727539, + "learning_rate": 6.6195054945054946e-06, + "loss": 0.0721, + "step": 31581 + }, + { + "epoch": 86.76373626373626, + "grad_norm": 7.138267993927002, + "learning_rate": 6.618131868131869e-06, + "loss": 0.068, + "step": 31582 + }, + { + "epoch": 86.76648351648352, + "grad_norm": 14.743125915527344, + "learning_rate": 6.616758241758242e-06, + "loss": 0.1561, + "step": 31583 + }, + { + "epoch": 86.76923076923077, + "grad_norm": 7.630990028381348, + "learning_rate": 6.6153846153846165e-06, + "loss": 0.1224, + "step": 31584 + }, + { + "epoch": 86.77197802197803, + "grad_norm": 6.631220817565918, + "learning_rate": 6.614010989010989e-06, + "loss": 0.0793, + "step": 31585 + }, + { + "epoch": 86.77472527472527, + "grad_norm": 12.850422859191895, + "learning_rate": 6.6126373626373625e-06, + "loss": 0.1909, + "step": 31586 + }, + { + "epoch": 86.77747252747253, + "grad_norm": 11.553951263427734, + "learning_rate": 6.611263736263737e-06, + "loss": 0.0914, + "step": 31587 + }, + { + "epoch": 86.78021978021978, + "grad_norm": 19.069034576416016, + "learning_rate": 6.60989010989011e-06, + "loss": 0.3706, + "step": 31588 + }, + { + "epoch": 86.78296703296704, + "grad_norm": 15.93610668182373, + "learning_rate": 6.608516483516484e-06, + "loss": 0.2513, + "step": 31589 + }, + { + "epoch": 86.78571428571429, + "grad_norm": 1.7896661758422852, + "learning_rate": 6.607142857142858e-06, + "loss": 0.0213, + "step": 31590 + }, + { + "epoch": 86.78846153846153, + "grad_norm": 15.269867897033691, + "learning_rate": 6.60576923076923e-06, + "loss": 0.2271, + "step": 31591 + }, + { + "epoch": 86.79120879120879, + "grad_norm": 8.833882331848145, + "learning_rate": 6.6043956043956054e-06, + "loss": 0.0425, + "step": 31592 + }, + { + "epoch": 86.79395604395604, + "grad_norm": 16.43355941772461, + "learning_rate": 6.603021978021978e-06, + "loss": 0.3446, + "step": 31593 + }, + { + "epoch": 86.7967032967033, + "grad_norm": 7.15225076675415, + "learning_rate": 6.601648351648352e-06, + "loss": 0.1411, + "step": 31594 + }, + { + "epoch": 86.79945054945055, + "grad_norm": 9.751032829284668, + "learning_rate": 6.600274725274726e-06, + "loss": 0.1095, + "step": 31595 + }, + { + "epoch": 86.8021978021978, + "grad_norm": 19.850242614746094, + "learning_rate": 6.598901098901099e-06, + "loss": 0.3104, + "step": 31596 + }, + { + "epoch": 86.80494505494505, + "grad_norm": 10.595515251159668, + "learning_rate": 6.597527472527473e-06, + "loss": 0.3204, + "step": 31597 + }, + { + "epoch": 86.8076923076923, + "grad_norm": 8.412388801574707, + "learning_rate": 6.596153846153847e-06, + "loss": 0.232, + "step": 31598 + }, + { + "epoch": 86.81043956043956, + "grad_norm": 13.193495750427246, + "learning_rate": 6.594780219780219e-06, + "loss": 0.1814, + "step": 31599 + }, + { + "epoch": 86.81318681318682, + "grad_norm": 7.868647575378418, + "learning_rate": 6.5934065934065935e-06, + "loss": 0.0828, + "step": 31600 + }, + { + "epoch": 86.81593406593407, + "grad_norm": 16.25661849975586, + "learning_rate": 6.592032967032967e-06, + "loss": 0.3784, + "step": 31601 + }, + { + "epoch": 86.81868131868131, + "grad_norm": 8.396783828735352, + "learning_rate": 6.590659340659341e-06, + "loss": 0.1139, + "step": 31602 + }, + { + "epoch": 86.82142857142857, + "grad_norm": 25.2490291595459, + "learning_rate": 6.589285714285715e-06, + "loss": 0.7133, + "step": 31603 + }, + { + "epoch": 86.82417582417582, + "grad_norm": 8.634930610656738, + "learning_rate": 6.587912087912087e-06, + "loss": 0.1009, + "step": 31604 + }, + { + "epoch": 86.82692307692308, + "grad_norm": 7.456344127655029, + "learning_rate": 6.586538461538462e-06, + "loss": 0.0854, + "step": 31605 + }, + { + "epoch": 86.82967032967034, + "grad_norm": 14.036945343017578, + "learning_rate": 6.585164835164835e-06, + "loss": 0.2764, + "step": 31606 + }, + { + "epoch": 86.83241758241758, + "grad_norm": 4.599968433380127, + "learning_rate": 6.58379120879121e-06, + "loss": 0.0434, + "step": 31607 + }, + { + "epoch": 86.83516483516483, + "grad_norm": 7.645307540893555, + "learning_rate": 6.5824175824175825e-06, + "loss": 0.0684, + "step": 31608 + }, + { + "epoch": 86.83791208791209, + "grad_norm": 21.261737823486328, + "learning_rate": 6.581043956043956e-06, + "loss": 0.5318, + "step": 31609 + }, + { + "epoch": 86.84065934065934, + "grad_norm": 18.425580978393555, + "learning_rate": 6.57967032967033e-06, + "loss": 0.5741, + "step": 31610 + }, + { + "epoch": 86.8434065934066, + "grad_norm": 14.210731506347656, + "learning_rate": 6.5782967032967036e-06, + "loss": 0.2379, + "step": 31611 + }, + { + "epoch": 86.84615384615384, + "grad_norm": 16.609840393066406, + "learning_rate": 6.576923076923078e-06, + "loss": 0.3524, + "step": 31612 + }, + { + "epoch": 86.8489010989011, + "grad_norm": 14.675609588623047, + "learning_rate": 6.575549450549451e-06, + "loss": 0.3057, + "step": 31613 + }, + { + "epoch": 86.85164835164835, + "grad_norm": 7.9880852699279785, + "learning_rate": 6.574175824175824e-06, + "loss": 0.0813, + "step": 31614 + }, + { + "epoch": 86.8543956043956, + "grad_norm": 14.903548240661621, + "learning_rate": 6.572802197802198e-06, + "loss": 0.6943, + "step": 31615 + }, + { + "epoch": 86.85714285714286, + "grad_norm": 30.33832359313965, + "learning_rate": 6.5714285714285714e-06, + "loss": 0.4358, + "step": 31616 + }, + { + "epoch": 86.85989010989012, + "grad_norm": 13.087343215942383, + "learning_rate": 6.570054945054946e-06, + "loss": 0.173, + "step": 31617 + }, + { + "epoch": 86.86263736263736, + "grad_norm": 4.803091049194336, + "learning_rate": 6.568681318681319e-06, + "loss": 0.0605, + "step": 31618 + }, + { + "epoch": 86.86538461538461, + "grad_norm": 18.87532615661621, + "learning_rate": 6.567307692307692e-06, + "loss": 0.2248, + "step": 31619 + }, + { + "epoch": 86.86813186813187, + "grad_norm": 21.54852294921875, + "learning_rate": 6.565934065934067e-06, + "loss": 0.5235, + "step": 31620 + }, + { + "epoch": 86.87087912087912, + "grad_norm": 4.936305999755859, + "learning_rate": 6.564560439560439e-06, + "loss": 0.0732, + "step": 31621 + }, + { + "epoch": 86.87362637362638, + "grad_norm": 13.344381332397461, + "learning_rate": 6.563186813186814e-06, + "loss": 0.2317, + "step": 31622 + }, + { + "epoch": 86.87637362637362, + "grad_norm": 7.6887383460998535, + "learning_rate": 6.561813186813187e-06, + "loss": 0.0833, + "step": 31623 + }, + { + "epoch": 86.87912087912088, + "grad_norm": 7.600217819213867, + "learning_rate": 6.56043956043956e-06, + "loss": 0.1109, + "step": 31624 + }, + { + "epoch": 86.88186813186813, + "grad_norm": 4.443580150604248, + "learning_rate": 6.559065934065935e-06, + "loss": 0.0269, + "step": 31625 + }, + { + "epoch": 86.88461538461539, + "grad_norm": 9.370664596557617, + "learning_rate": 6.557692307692308e-06, + "loss": 0.1169, + "step": 31626 + }, + { + "epoch": 86.88736263736264, + "grad_norm": 5.579980373382568, + "learning_rate": 6.556318681318682e-06, + "loss": 0.0864, + "step": 31627 + }, + { + "epoch": 86.89010989010988, + "grad_norm": 9.991016387939453, + "learning_rate": 6.554945054945056e-06, + "loss": 0.1503, + "step": 31628 + }, + { + "epoch": 86.89285714285714, + "grad_norm": 0.9233697652816772, + "learning_rate": 6.553571428571428e-06, + "loss": 0.0127, + "step": 31629 + }, + { + "epoch": 86.8956043956044, + "grad_norm": 11.926163673400879, + "learning_rate": 6.5521978021978025e-06, + "loss": 0.2091, + "step": 31630 + }, + { + "epoch": 86.89835164835165, + "grad_norm": 13.232604026794434, + "learning_rate": 6.550824175824176e-06, + "loss": 0.2538, + "step": 31631 + }, + { + "epoch": 86.9010989010989, + "grad_norm": 2.7410576343536377, + "learning_rate": 6.54945054945055e-06, + "loss": 0.0231, + "step": 31632 + }, + { + "epoch": 86.90384615384616, + "grad_norm": 4.239765644073486, + "learning_rate": 6.548076923076924e-06, + "loss": 0.0404, + "step": 31633 + }, + { + "epoch": 86.9065934065934, + "grad_norm": 8.249621391296387, + "learning_rate": 6.546703296703296e-06, + "loss": 0.084, + "step": 31634 + }, + { + "epoch": 86.90934065934066, + "grad_norm": 8.679888725280762, + "learning_rate": 6.545329670329671e-06, + "loss": 0.0675, + "step": 31635 + }, + { + "epoch": 86.91208791208791, + "grad_norm": 27.37932014465332, + "learning_rate": 6.543956043956044e-06, + "loss": 0.5454, + "step": 31636 + }, + { + "epoch": 86.91483516483517, + "grad_norm": 6.295296669006348, + "learning_rate": 6.542582417582419e-06, + "loss": 0.0715, + "step": 31637 + }, + { + "epoch": 86.91758241758242, + "grad_norm": 10.407858848571777, + "learning_rate": 6.5412087912087915e-06, + "loss": 0.1563, + "step": 31638 + }, + { + "epoch": 86.92032967032966, + "grad_norm": 16.567462921142578, + "learning_rate": 6.539835164835165e-06, + "loss": 0.6666, + "step": 31639 + }, + { + "epoch": 86.92307692307692, + "grad_norm": 6.172588348388672, + "learning_rate": 6.538461538461539e-06, + "loss": 0.0977, + "step": 31640 + }, + { + "epoch": 86.92582417582418, + "grad_norm": 9.692968368530273, + "learning_rate": 6.5370879120879125e-06, + "loss": 0.2177, + "step": 31641 + }, + { + "epoch": 86.92857142857143, + "grad_norm": 12.84956169128418, + "learning_rate": 6.535714285714287e-06, + "loss": 0.2458, + "step": 31642 + }, + { + "epoch": 86.93131868131869, + "grad_norm": 16.222049713134766, + "learning_rate": 6.534340659340659e-06, + "loss": 0.5737, + "step": 31643 + }, + { + "epoch": 86.93406593406593, + "grad_norm": 15.58321475982666, + "learning_rate": 6.532967032967033e-06, + "loss": 0.2516, + "step": 31644 + }, + { + "epoch": 86.93681318681318, + "grad_norm": 3.762620687484741, + "learning_rate": 6.531593406593407e-06, + "loss": 0.0376, + "step": 31645 + }, + { + "epoch": 86.93956043956044, + "grad_norm": 22.178274154663086, + "learning_rate": 6.53021978021978e-06, + "loss": 0.3587, + "step": 31646 + }, + { + "epoch": 86.9423076923077, + "grad_norm": 5.27473783493042, + "learning_rate": 6.528846153846155e-06, + "loss": 0.0818, + "step": 31647 + }, + { + "epoch": 86.94505494505495, + "grad_norm": 17.882755279541016, + "learning_rate": 6.527472527472528e-06, + "loss": 0.2662, + "step": 31648 + }, + { + "epoch": 86.9478021978022, + "grad_norm": 13.01938533782959, + "learning_rate": 6.526098901098901e-06, + "loss": 0.2432, + "step": 31649 + }, + { + "epoch": 86.95054945054945, + "grad_norm": 2.4527692794799805, + "learning_rate": 6.524725274725276e-06, + "loss": 0.0212, + "step": 31650 + }, + { + "epoch": 86.9532967032967, + "grad_norm": 12.490797996520996, + "learning_rate": 6.523351648351648e-06, + "loss": 0.1021, + "step": 31651 + }, + { + "epoch": 86.95604395604396, + "grad_norm": 15.56146240234375, + "learning_rate": 6.521978021978022e-06, + "loss": 0.4473, + "step": 31652 + }, + { + "epoch": 86.95879120879121, + "grad_norm": 13.707338333129883, + "learning_rate": 6.520604395604396e-06, + "loss": 0.2851, + "step": 31653 + }, + { + "epoch": 86.96153846153847, + "grad_norm": 13.561751365661621, + "learning_rate": 6.519230769230769e-06, + "loss": 0.2729, + "step": 31654 + }, + { + "epoch": 86.96428571428571, + "grad_norm": 11.147806167602539, + "learning_rate": 6.517857142857144e-06, + "loss": 0.1849, + "step": 31655 + }, + { + "epoch": 86.96703296703296, + "grad_norm": 23.221830368041992, + "learning_rate": 6.516483516483517e-06, + "loss": 0.2791, + "step": 31656 + }, + { + "epoch": 86.96978021978022, + "grad_norm": 4.090696334838867, + "learning_rate": 6.51510989010989e-06, + "loss": 0.0811, + "step": 31657 + }, + { + "epoch": 86.97252747252747, + "grad_norm": 21.17226791381836, + "learning_rate": 6.513736263736264e-06, + "loss": 0.467, + "step": 31658 + }, + { + "epoch": 86.97527472527473, + "grad_norm": 11.969572067260742, + "learning_rate": 6.512362637362637e-06, + "loss": 0.1579, + "step": 31659 + }, + { + "epoch": 86.97802197802197, + "grad_norm": 12.151928901672363, + "learning_rate": 6.5109890109890115e-06, + "loss": 0.1193, + "step": 31660 + }, + { + "epoch": 86.98076923076923, + "grad_norm": 11.511199951171875, + "learning_rate": 6.509615384615385e-06, + "loss": 0.1443, + "step": 31661 + }, + { + "epoch": 86.98351648351648, + "grad_norm": 14.626116752624512, + "learning_rate": 6.508241758241758e-06, + "loss": 0.199, + "step": 31662 + }, + { + "epoch": 86.98626373626374, + "grad_norm": 7.605087757110596, + "learning_rate": 6.5068681318681326e-06, + "loss": 0.1106, + "step": 31663 + }, + { + "epoch": 86.98901098901099, + "grad_norm": 17.013792037963867, + "learning_rate": 6.505494505494505e-06, + "loss": 0.1799, + "step": 31664 + }, + { + "epoch": 86.99175824175825, + "grad_norm": 13.408101081848145, + "learning_rate": 6.50412087912088e-06, + "loss": 0.2152, + "step": 31665 + }, + { + "epoch": 86.99450549450549, + "grad_norm": 20.30648422241211, + "learning_rate": 6.502747252747253e-06, + "loss": 0.4723, + "step": 31666 + }, + { + "epoch": 86.99725274725274, + "grad_norm": 11.371184349060059, + "learning_rate": 6.501373626373626e-06, + "loss": 0.1672, + "step": 31667 + }, + { + "epoch": 87.0, + "grad_norm": 53.525718688964844, + "learning_rate": 6.5000000000000004e-06, + "loss": 0.3335, + "step": 31668 + }, + { + "epoch": 87.0, + "eval_accuracy": 0.7617079889807162, + "eval_f1": 0.7646237329137895, + "eval_f1_DuraRiadoRio_64x64": 0.7112970711297071, + "eval_f1_Mole_64x64": 0.8680555555555556, + "eval_f1_Quebrado_64x64": 0.7692307692307693, + "eval_f1_RiadoRio_64x64": 0.6498599439775911, + "eval_f1_RioFechado_64x64": 0.8246753246753247, + "eval_loss": 1.4127073287963867, + "eval_precision": 0.7911550525357003, + "eval_precision_DuraRiadoRio_64x64": 0.8947368421052632, + "eval_precision_Mole_64x64": 0.8680555555555556, + "eval_precision_Quebrado_64x64": 0.8620689655172413, + "eval_precision_RiadoRio_64x64": 0.5658536585365853, + "eval_precision_RioFechado_64x64": 0.7650602409638554, + "eval_recall": 0.7620603739395436, + "eval_recall_DuraRiadoRio_64x64": 0.5902777777777778, + "eval_recall_Mole_64x64": 0.8680555555555556, + "eval_recall_Quebrado_64x64": 0.6944444444444444, + "eval_recall_RiadoRio_64x64": 0.7631578947368421, + "eval_recall_RioFechado_64x64": 0.8943661971830986, + "eval_runtime": 1.7457, + "eval_samples_per_second": 415.877, + "eval_steps_per_second": 26.35, + "step": 31668 + }, + { + "epoch": 87.00274725274726, + "grad_norm": 12.719435691833496, + "learning_rate": 6.498626373626374e-06, + "loss": 0.3154, + "step": 31669 + }, + { + "epoch": 87.00549450549451, + "grad_norm": 16.240337371826172, + "learning_rate": 6.497252747252748e-06, + "loss": 0.1873, + "step": 31670 + }, + { + "epoch": 87.00824175824175, + "grad_norm": 4.3248491287231445, + "learning_rate": 6.4958791208791215e-06, + "loss": 0.0785, + "step": 31671 + }, + { + "epoch": 87.01098901098901, + "grad_norm": 15.898747444152832, + "learning_rate": 6.494505494505494e-06, + "loss": 0.2026, + "step": 31672 + }, + { + "epoch": 87.01373626373626, + "grad_norm": 7.03383731842041, + "learning_rate": 6.493131868131868e-06, + "loss": 0.0907, + "step": 31673 + }, + { + "epoch": 87.01648351648352, + "grad_norm": 15.285852432250977, + "learning_rate": 6.491758241758242e-06, + "loss": 0.2211, + "step": 31674 + }, + { + "epoch": 87.01923076923077, + "grad_norm": 17.140026092529297, + "learning_rate": 6.490384615384616e-06, + "loss": 0.3842, + "step": 31675 + }, + { + "epoch": 87.02197802197803, + "grad_norm": 15.382411003112793, + "learning_rate": 6.489010989010989e-06, + "loss": 0.1369, + "step": 31676 + }, + { + "epoch": 87.02472527472527, + "grad_norm": 11.916876792907715, + "learning_rate": 6.487637362637363e-06, + "loss": 0.2594, + "step": 31677 + }, + { + "epoch": 87.02747252747253, + "grad_norm": 10.310818672180176, + "learning_rate": 6.486263736263737e-06, + "loss": 0.2279, + "step": 31678 + }, + { + "epoch": 87.03021978021978, + "grad_norm": 9.177839279174805, + "learning_rate": 6.48489010989011e-06, + "loss": 0.1637, + "step": 31679 + }, + { + "epoch": 87.03296703296704, + "grad_norm": 1.8855267763137817, + "learning_rate": 6.483516483516485e-06, + "loss": 0.0263, + "step": 31680 + }, + { + "epoch": 87.03571428571429, + "grad_norm": 11.891692161560059, + "learning_rate": 6.482142857142857e-06, + "loss": 0.2626, + "step": 31681 + }, + { + "epoch": 87.03846153846153, + "grad_norm": 2.494624376296997, + "learning_rate": 6.480769230769231e-06, + "loss": 0.0243, + "step": 31682 + }, + { + "epoch": 87.04120879120879, + "grad_norm": 8.84831428527832, + "learning_rate": 6.479395604395605e-06, + "loss": 0.2804, + "step": 31683 + }, + { + "epoch": 87.04395604395604, + "grad_norm": 7.198566913604736, + "learning_rate": 6.478021978021978e-06, + "loss": 0.0628, + "step": 31684 + }, + { + "epoch": 87.0467032967033, + "grad_norm": 14.900979042053223, + "learning_rate": 6.476648351648353e-06, + "loss": 0.2779, + "step": 31685 + }, + { + "epoch": 87.04945054945055, + "grad_norm": 9.250798225402832, + "learning_rate": 6.475274725274726e-06, + "loss": 0.1632, + "step": 31686 + }, + { + "epoch": 87.0521978021978, + "grad_norm": 14.528993606567383, + "learning_rate": 6.4739010989010986e-06, + "loss": 0.2771, + "step": 31687 + }, + { + "epoch": 87.05494505494505, + "grad_norm": 13.586127281188965, + "learning_rate": 6.472527472527473e-06, + "loss": 0.2701, + "step": 31688 + }, + { + "epoch": 87.0576923076923, + "grad_norm": 12.368948936462402, + "learning_rate": 6.471153846153846e-06, + "loss": 0.1765, + "step": 31689 + }, + { + "epoch": 87.06043956043956, + "grad_norm": 2.802664279937744, + "learning_rate": 6.4697802197802205e-06, + "loss": 0.0402, + "step": 31690 + }, + { + "epoch": 87.06318681318682, + "grad_norm": 8.466164588928223, + "learning_rate": 6.468406593406594e-06, + "loss": 0.1203, + "step": 31691 + }, + { + "epoch": 87.06593406593407, + "grad_norm": 10.147294044494629, + "learning_rate": 6.4670329670329664e-06, + "loss": 0.1563, + "step": 31692 + }, + { + "epoch": 87.06868131868131, + "grad_norm": 20.413156509399414, + "learning_rate": 6.4656593406593415e-06, + "loss": 0.5375, + "step": 31693 + }, + { + "epoch": 87.07142857142857, + "grad_norm": 4.9050164222717285, + "learning_rate": 6.464285714285714e-06, + "loss": 0.0753, + "step": 31694 + }, + { + "epoch": 87.07417582417582, + "grad_norm": 1.9551453590393066, + "learning_rate": 6.462912087912089e-06, + "loss": 0.0229, + "step": 31695 + }, + { + "epoch": 87.07692307692308, + "grad_norm": 10.853601455688477, + "learning_rate": 6.461538461538462e-06, + "loss": 0.2747, + "step": 31696 + }, + { + "epoch": 87.07967032967034, + "grad_norm": 13.65526008605957, + "learning_rate": 6.460164835164835e-06, + "loss": 0.1831, + "step": 31697 + }, + { + "epoch": 87.08241758241758, + "grad_norm": 12.04360294342041, + "learning_rate": 6.4587912087912094e-06, + "loss": 0.2104, + "step": 31698 + }, + { + "epoch": 87.08516483516483, + "grad_norm": 1.8311563730239868, + "learning_rate": 6.457417582417583e-06, + "loss": 0.016, + "step": 31699 + }, + { + "epoch": 87.08791208791209, + "grad_norm": 12.944005966186523, + "learning_rate": 6.456043956043957e-06, + "loss": 0.4481, + "step": 31700 + }, + { + "epoch": 87.09065934065934, + "grad_norm": 10.80862808227539, + "learning_rate": 6.4546703296703305e-06, + "loss": 0.1152, + "step": 31701 + }, + { + "epoch": 87.0934065934066, + "grad_norm": 3.1698739528656006, + "learning_rate": 6.453296703296703e-06, + "loss": 0.0572, + "step": 31702 + }, + { + "epoch": 87.09615384615384, + "grad_norm": 4.753612995147705, + "learning_rate": 6.451923076923077e-06, + "loss": 0.0478, + "step": 31703 + }, + { + "epoch": 87.0989010989011, + "grad_norm": 7.483379364013672, + "learning_rate": 6.450549450549451e-06, + "loss": 0.0809, + "step": 31704 + }, + { + "epoch": 87.10164835164835, + "grad_norm": 8.494540214538574, + "learning_rate": 6.449175824175825e-06, + "loss": 0.2061, + "step": 31705 + }, + { + "epoch": 87.1043956043956, + "grad_norm": 20.059677124023438, + "learning_rate": 6.447802197802198e-06, + "loss": 0.4548, + "step": 31706 + }, + { + "epoch": 87.10714285714286, + "grad_norm": 12.99736213684082, + "learning_rate": 6.446428571428571e-06, + "loss": 0.1433, + "step": 31707 + }, + { + "epoch": 87.10989010989012, + "grad_norm": 9.851675033569336, + "learning_rate": 6.445054945054946e-06, + "loss": 0.1042, + "step": 31708 + }, + { + "epoch": 87.11263736263736, + "grad_norm": 11.200079917907715, + "learning_rate": 6.443681318681319e-06, + "loss": 0.207, + "step": 31709 + }, + { + "epoch": 87.11538461538461, + "grad_norm": 19.582212448120117, + "learning_rate": 6.442307692307692e-06, + "loss": 0.4671, + "step": 31710 + }, + { + "epoch": 87.11813186813187, + "grad_norm": 6.56359338760376, + "learning_rate": 6.440934065934066e-06, + "loss": 0.1097, + "step": 31711 + }, + { + "epoch": 87.12087912087912, + "grad_norm": 13.236543655395508, + "learning_rate": 6.43956043956044e-06, + "loss": 0.1186, + "step": 31712 + }, + { + "epoch": 87.12362637362638, + "grad_norm": 10.740250587463379, + "learning_rate": 6.438186813186814e-06, + "loss": 0.2029, + "step": 31713 + }, + { + "epoch": 87.12637362637362, + "grad_norm": 16.823083877563477, + "learning_rate": 6.436813186813187e-06, + "loss": 0.255, + "step": 31714 + }, + { + "epoch": 87.12912087912088, + "grad_norm": 20.252696990966797, + "learning_rate": 6.43543956043956e-06, + "loss": 0.3034, + "step": 31715 + }, + { + "epoch": 87.13186813186813, + "grad_norm": 5.5976972579956055, + "learning_rate": 6.434065934065934e-06, + "loss": 0.0997, + "step": 31716 + }, + { + "epoch": 87.13461538461539, + "grad_norm": 18.987707138061523, + "learning_rate": 6.4326923076923075e-06, + "loss": 0.2968, + "step": 31717 + }, + { + "epoch": 87.13736263736264, + "grad_norm": 11.618708610534668, + "learning_rate": 6.431318681318682e-06, + "loss": 0.2111, + "step": 31718 + }, + { + "epoch": 87.14010989010988, + "grad_norm": 17.528465270996094, + "learning_rate": 6.429945054945055e-06, + "loss": 0.3342, + "step": 31719 + }, + { + "epoch": 87.14285714285714, + "grad_norm": 14.00888442993164, + "learning_rate": 6.428571428571429e-06, + "loss": 0.0978, + "step": 31720 + }, + { + "epoch": 87.1456043956044, + "grad_norm": 4.571184158325195, + "learning_rate": 6.427197802197803e-06, + "loss": 0.0537, + "step": 31721 + }, + { + "epoch": 87.14835164835165, + "grad_norm": 9.840118408203125, + "learning_rate": 6.4258241758241754e-06, + "loss": 0.2217, + "step": 31722 + }, + { + "epoch": 87.1510989010989, + "grad_norm": 14.214498519897461, + "learning_rate": 6.4244505494505505e-06, + "loss": 0.1071, + "step": 31723 + }, + { + "epoch": 87.15384615384616, + "grad_norm": 12.46040153503418, + "learning_rate": 6.423076923076923e-06, + "loss": 0.1596, + "step": 31724 + }, + { + "epoch": 87.1565934065934, + "grad_norm": 20.413597106933594, + "learning_rate": 6.4217032967032965e-06, + "loss": 0.2112, + "step": 31725 + }, + { + "epoch": 87.15934065934066, + "grad_norm": 17.2680606842041, + "learning_rate": 6.420329670329671e-06, + "loss": 0.3503, + "step": 31726 + }, + { + "epoch": 87.16208791208791, + "grad_norm": 7.494027137756348, + "learning_rate": 6.418956043956044e-06, + "loss": 0.0709, + "step": 31727 + }, + { + "epoch": 87.16483516483517, + "grad_norm": 11.402517318725586, + "learning_rate": 6.417582417582418e-06, + "loss": 0.1208, + "step": 31728 + }, + { + "epoch": 87.16758241758242, + "grad_norm": 30.562883377075195, + "learning_rate": 6.416208791208792e-06, + "loss": 0.8554, + "step": 31729 + }, + { + "epoch": 87.17032967032966, + "grad_norm": 9.540858268737793, + "learning_rate": 6.414835164835164e-06, + "loss": 0.11, + "step": 31730 + }, + { + "epoch": 87.17307692307692, + "grad_norm": 12.704011917114258, + "learning_rate": 6.413461538461539e-06, + "loss": 0.1928, + "step": 31731 + }, + { + "epoch": 87.17582417582418, + "grad_norm": 17.044841766357422, + "learning_rate": 6.412087912087912e-06, + "loss": 0.5215, + "step": 31732 + }, + { + "epoch": 87.17857142857143, + "grad_norm": 15.951865196228027, + "learning_rate": 6.410714285714286e-06, + "loss": 0.1947, + "step": 31733 + }, + { + "epoch": 87.18131868131869, + "grad_norm": 12.989349365234375, + "learning_rate": 6.40934065934066e-06, + "loss": 0.4087, + "step": 31734 + }, + { + "epoch": 87.18406593406593, + "grad_norm": 15.351202011108398, + "learning_rate": 6.407967032967033e-06, + "loss": 0.1483, + "step": 31735 + }, + { + "epoch": 87.18681318681318, + "grad_norm": 6.869884490966797, + "learning_rate": 6.406593406593407e-06, + "loss": 0.1656, + "step": 31736 + }, + { + "epoch": 87.18956043956044, + "grad_norm": 26.2520694732666, + "learning_rate": 6.40521978021978e-06, + "loss": 0.7777, + "step": 31737 + }, + { + "epoch": 87.1923076923077, + "grad_norm": 3.1439499855041504, + "learning_rate": 6.403846153846155e-06, + "loss": 0.0408, + "step": 31738 + }, + { + "epoch": 87.19505494505495, + "grad_norm": 3.99401593208313, + "learning_rate": 6.402472527472528e-06, + "loss": 0.0464, + "step": 31739 + }, + { + "epoch": 87.1978021978022, + "grad_norm": 9.177007675170898, + "learning_rate": 6.401098901098901e-06, + "loss": 0.1605, + "step": 31740 + }, + { + "epoch": 87.20054945054945, + "grad_norm": 6.219668865203857, + "learning_rate": 6.399725274725275e-06, + "loss": 0.0641, + "step": 31741 + }, + { + "epoch": 87.2032967032967, + "grad_norm": 12.346137046813965, + "learning_rate": 6.398351648351649e-06, + "loss": 0.1721, + "step": 31742 + }, + { + "epoch": 87.20604395604396, + "grad_norm": 23.320674896240234, + "learning_rate": 6.396978021978023e-06, + "loss": 0.4982, + "step": 31743 + }, + { + "epoch": 87.20879120879121, + "grad_norm": 17.795549392700195, + "learning_rate": 6.395604395604396e-06, + "loss": 0.4124, + "step": 31744 + }, + { + "epoch": 87.21153846153847, + "grad_norm": 14.062711715698242, + "learning_rate": 6.394230769230769e-06, + "loss": 0.2754, + "step": 31745 + }, + { + "epoch": 87.21428571428571, + "grad_norm": 16.442546844482422, + "learning_rate": 6.392857142857143e-06, + "loss": 0.1355, + "step": 31746 + }, + { + "epoch": 87.21703296703296, + "grad_norm": 15.233247756958008, + "learning_rate": 6.3914835164835165e-06, + "loss": 0.3207, + "step": 31747 + }, + { + "epoch": 87.21978021978022, + "grad_norm": 2.236011028289795, + "learning_rate": 6.390109890109891e-06, + "loss": 0.0224, + "step": 31748 + }, + { + "epoch": 87.22252747252747, + "grad_norm": 16.862085342407227, + "learning_rate": 6.388736263736264e-06, + "loss": 0.3552, + "step": 31749 + }, + { + "epoch": 87.22527472527473, + "grad_norm": 10.661479949951172, + "learning_rate": 6.387362637362638e-06, + "loss": 0.4159, + "step": 31750 + }, + { + "epoch": 87.22802197802197, + "grad_norm": 5.3614959716796875, + "learning_rate": 6.385989010989012e-06, + "loss": 0.0635, + "step": 31751 + }, + { + "epoch": 87.23076923076923, + "grad_norm": 13.498771667480469, + "learning_rate": 6.384615384615384e-06, + "loss": 0.4309, + "step": 31752 + }, + { + "epoch": 87.23351648351648, + "grad_norm": 19.30484390258789, + "learning_rate": 6.3832417582417595e-06, + "loss": 0.4784, + "step": 31753 + }, + { + "epoch": 87.23626373626374, + "grad_norm": 1.3442460298538208, + "learning_rate": 6.381868131868132e-06, + "loss": 0.0103, + "step": 31754 + }, + { + "epoch": 87.23901098901099, + "grad_norm": 4.97052526473999, + "learning_rate": 6.3804945054945055e-06, + "loss": 0.0752, + "step": 31755 + }, + { + "epoch": 87.24175824175825, + "grad_norm": 7.322045803070068, + "learning_rate": 6.37912087912088e-06, + "loss": 0.0753, + "step": 31756 + }, + { + "epoch": 87.24450549450549, + "grad_norm": 11.764740943908691, + "learning_rate": 6.377747252747253e-06, + "loss": 0.2901, + "step": 31757 + }, + { + "epoch": 87.24725274725274, + "grad_norm": 15.06353759765625, + "learning_rate": 6.376373626373627e-06, + "loss": 0.2352, + "step": 31758 + }, + { + "epoch": 87.25, + "grad_norm": 13.981649398803711, + "learning_rate": 6.375000000000001e-06, + "loss": 0.2906, + "step": 31759 + }, + { + "epoch": 87.25274725274726, + "grad_norm": 15.758018493652344, + "learning_rate": 6.373626373626373e-06, + "loss": 0.2451, + "step": 31760 + }, + { + "epoch": 87.25549450549451, + "grad_norm": 7.390368938446045, + "learning_rate": 6.372252747252748e-06, + "loss": 0.1294, + "step": 31761 + }, + { + "epoch": 87.25824175824175, + "grad_norm": 7.466442584991455, + "learning_rate": 6.370879120879121e-06, + "loss": 0.2424, + "step": 31762 + }, + { + "epoch": 87.26098901098901, + "grad_norm": 1.7873598337173462, + "learning_rate": 6.369505494505495e-06, + "loss": 0.0128, + "step": 31763 + }, + { + "epoch": 87.26373626373626, + "grad_norm": 13.095489501953125, + "learning_rate": 6.368131868131869e-06, + "loss": 0.2271, + "step": 31764 + }, + { + "epoch": 87.26648351648352, + "grad_norm": 12.15710163116455, + "learning_rate": 6.366758241758241e-06, + "loss": 0.1719, + "step": 31765 + }, + { + "epoch": 87.26923076923077, + "grad_norm": 12.4071683883667, + "learning_rate": 6.365384615384616e-06, + "loss": 0.197, + "step": 31766 + }, + { + "epoch": 87.27197802197803, + "grad_norm": 11.630900382995605, + "learning_rate": 6.364010989010989e-06, + "loss": 0.1795, + "step": 31767 + }, + { + "epoch": 87.27472527472527, + "grad_norm": 15.714160919189453, + "learning_rate": 6.362637362637362e-06, + "loss": 0.3347, + "step": 31768 + }, + { + "epoch": 87.27747252747253, + "grad_norm": 2.5317089557647705, + "learning_rate": 6.3612637362637366e-06, + "loss": 0.0798, + "step": 31769 + }, + { + "epoch": 87.28021978021978, + "grad_norm": 20.697978973388672, + "learning_rate": 6.35989010989011e-06, + "loss": 0.4258, + "step": 31770 + }, + { + "epoch": 87.28296703296704, + "grad_norm": 18.341171264648438, + "learning_rate": 6.358516483516484e-06, + "loss": 0.2755, + "step": 31771 + }, + { + "epoch": 87.28571428571429, + "grad_norm": 8.993934631347656, + "learning_rate": 6.357142857142858e-06, + "loss": 0.0621, + "step": 31772 + }, + { + "epoch": 87.28846153846153, + "grad_norm": 17.798503875732422, + "learning_rate": 6.35576923076923e-06, + "loss": 0.3265, + "step": 31773 + }, + { + "epoch": 87.29120879120879, + "grad_norm": 16.0529842376709, + "learning_rate": 6.354395604395605e-06, + "loss": 0.319, + "step": 31774 + }, + { + "epoch": 87.29395604395604, + "grad_norm": 4.801563262939453, + "learning_rate": 6.353021978021978e-06, + "loss": 0.0487, + "step": 31775 + }, + { + "epoch": 87.2967032967033, + "grad_norm": 8.855267524719238, + "learning_rate": 6.351648351648352e-06, + "loss": 0.3008, + "step": 31776 + }, + { + "epoch": 87.29945054945055, + "grad_norm": 5.121334075927734, + "learning_rate": 6.3502747252747255e-06, + "loss": 0.0644, + "step": 31777 + }, + { + "epoch": 87.3021978021978, + "grad_norm": 13.920964241027832, + "learning_rate": 6.348901098901099e-06, + "loss": 0.3116, + "step": 31778 + }, + { + "epoch": 87.30494505494505, + "grad_norm": 8.388113975524902, + "learning_rate": 6.347527472527473e-06, + "loss": 0.0995, + "step": 31779 + }, + { + "epoch": 87.3076923076923, + "grad_norm": 9.881744384765625, + "learning_rate": 6.346153846153846e-06, + "loss": 0.2153, + "step": 31780 + }, + { + "epoch": 87.31043956043956, + "grad_norm": 27.619308471679688, + "learning_rate": 6.344780219780221e-06, + "loss": 0.4908, + "step": 31781 + }, + { + "epoch": 87.31318681318682, + "grad_norm": 4.02154541015625, + "learning_rate": 6.343406593406593e-06, + "loss": 0.0363, + "step": 31782 + }, + { + "epoch": 87.31593406593407, + "grad_norm": 9.942121505737305, + "learning_rate": 6.342032967032967e-06, + "loss": 0.0873, + "step": 31783 + }, + { + "epoch": 87.31868131868131, + "grad_norm": 6.089910507202148, + "learning_rate": 6.340659340659341e-06, + "loss": 0.1187, + "step": 31784 + }, + { + "epoch": 87.32142857142857, + "grad_norm": 22.23354148864746, + "learning_rate": 6.3392857142857145e-06, + "loss": 0.7989, + "step": 31785 + }, + { + "epoch": 87.32417582417582, + "grad_norm": 1.0696741342544556, + "learning_rate": 6.337912087912089e-06, + "loss": 0.0091, + "step": 31786 + }, + { + "epoch": 87.32692307692308, + "grad_norm": 18.86264419555664, + "learning_rate": 6.336538461538462e-06, + "loss": 0.3328, + "step": 31787 + }, + { + "epoch": 87.32967032967034, + "grad_norm": 2.9126217365264893, + "learning_rate": 6.335164835164835e-06, + "loss": 0.0524, + "step": 31788 + }, + { + "epoch": 87.33241758241758, + "grad_norm": 16.45516586303711, + "learning_rate": 6.333791208791209e-06, + "loss": 0.3706, + "step": 31789 + }, + { + "epoch": 87.33516483516483, + "grad_norm": 2.326528787612915, + "learning_rate": 6.332417582417582e-06, + "loss": 0.0201, + "step": 31790 + }, + { + "epoch": 87.33791208791209, + "grad_norm": 9.617610931396484, + "learning_rate": 6.331043956043957e-06, + "loss": 0.0449, + "step": 31791 + }, + { + "epoch": 87.34065934065934, + "grad_norm": 10.867253303527832, + "learning_rate": 6.32967032967033e-06, + "loss": 0.3782, + "step": 31792 + }, + { + "epoch": 87.3434065934066, + "grad_norm": 17.10717010498047, + "learning_rate": 6.328296703296703e-06, + "loss": 0.3912, + "step": 31793 + }, + { + "epoch": 87.34615384615384, + "grad_norm": 10.362569808959961, + "learning_rate": 6.326923076923078e-06, + "loss": 0.2106, + "step": 31794 + }, + { + "epoch": 87.3489010989011, + "grad_norm": 6.6248884201049805, + "learning_rate": 6.32554945054945e-06, + "loss": 0.089, + "step": 31795 + }, + { + "epoch": 87.35164835164835, + "grad_norm": 0.8595396876335144, + "learning_rate": 6.324175824175825e-06, + "loss": 0.0098, + "step": 31796 + }, + { + "epoch": 87.3543956043956, + "grad_norm": 18.76458168029785, + "learning_rate": 6.322802197802198e-06, + "loss": 0.3859, + "step": 31797 + }, + { + "epoch": 87.35714285714286, + "grad_norm": 12.994196891784668, + "learning_rate": 6.321428571428571e-06, + "loss": 0.162, + "step": 31798 + }, + { + "epoch": 87.35989010989012, + "grad_norm": 23.535734176635742, + "learning_rate": 6.3200549450549455e-06, + "loss": 0.5032, + "step": 31799 + }, + { + "epoch": 87.36263736263736, + "grad_norm": 16.322006225585938, + "learning_rate": 6.318681318681319e-06, + "loss": 0.3762, + "step": 31800 + }, + { + "epoch": 87.36538461538461, + "grad_norm": 17.266096115112305, + "learning_rate": 6.317307692307693e-06, + "loss": 0.2405, + "step": 31801 + }, + { + "epoch": 87.36813186813187, + "grad_norm": 14.669388771057129, + "learning_rate": 6.315934065934067e-06, + "loss": 0.3979, + "step": 31802 + }, + { + "epoch": 87.37087912087912, + "grad_norm": 9.014847755432129, + "learning_rate": 6.314560439560439e-06, + "loss": 0.1236, + "step": 31803 + }, + { + "epoch": 87.37362637362638, + "grad_norm": 18.71540069580078, + "learning_rate": 6.313186813186813e-06, + "loss": 0.438, + "step": 31804 + }, + { + "epoch": 87.37637362637362, + "grad_norm": 5.2317328453063965, + "learning_rate": 6.311813186813187e-06, + "loss": 0.1001, + "step": 31805 + }, + { + "epoch": 87.37912087912088, + "grad_norm": 13.534671783447266, + "learning_rate": 6.310439560439561e-06, + "loss": 0.2618, + "step": 31806 + }, + { + "epoch": 87.38186813186813, + "grad_norm": 6.169734001159668, + "learning_rate": 6.3090659340659345e-06, + "loss": 0.159, + "step": 31807 + }, + { + "epoch": 87.38461538461539, + "grad_norm": 17.989038467407227, + "learning_rate": 6.307692307692308e-06, + "loss": 0.2529, + "step": 31808 + }, + { + "epoch": 87.38736263736264, + "grad_norm": 14.838277816772461, + "learning_rate": 6.306318681318682e-06, + "loss": 0.1913, + "step": 31809 + }, + { + "epoch": 87.39010989010988, + "grad_norm": 10.720104217529297, + "learning_rate": 6.304945054945055e-06, + "loss": 0.2421, + "step": 31810 + }, + { + "epoch": 87.39285714285714, + "grad_norm": 8.435662269592285, + "learning_rate": 6.30357142857143e-06, + "loss": 0.1889, + "step": 31811 + }, + { + "epoch": 87.3956043956044, + "grad_norm": 15.17917251586914, + "learning_rate": 6.302197802197802e-06, + "loss": 0.2703, + "step": 31812 + }, + { + "epoch": 87.39835164835165, + "grad_norm": 10.959280014038086, + "learning_rate": 6.300824175824176e-06, + "loss": 0.1371, + "step": 31813 + }, + { + "epoch": 87.4010989010989, + "grad_norm": 4.442855358123779, + "learning_rate": 6.29945054945055e-06, + "loss": 0.073, + "step": 31814 + }, + { + "epoch": 87.40384615384616, + "grad_norm": 16.877103805541992, + "learning_rate": 6.2980769230769234e-06, + "loss": 0.1533, + "step": 31815 + }, + { + "epoch": 87.4065934065934, + "grad_norm": 7.0680341720581055, + "learning_rate": 6.296703296703298e-06, + "loss": 0.1452, + "step": 31816 + }, + { + "epoch": 87.40934065934066, + "grad_norm": 9.302166938781738, + "learning_rate": 6.295329670329671e-06, + "loss": 0.1764, + "step": 31817 + }, + { + "epoch": 87.41208791208791, + "grad_norm": 25.166934967041016, + "learning_rate": 6.293956043956044e-06, + "loss": 0.8684, + "step": 31818 + }, + { + "epoch": 87.41483516483517, + "grad_norm": 20.979129791259766, + "learning_rate": 6.292582417582418e-06, + "loss": 0.577, + "step": 31819 + }, + { + "epoch": 87.41758241758242, + "grad_norm": 11.14073657989502, + "learning_rate": 6.291208791208791e-06, + "loss": 0.1578, + "step": 31820 + }, + { + "epoch": 87.42032967032966, + "grad_norm": 14.586954116821289, + "learning_rate": 6.2898351648351656e-06, + "loss": 0.31, + "step": 31821 + }, + { + "epoch": 87.42307692307692, + "grad_norm": 11.725560188293457, + "learning_rate": 6.288461538461539e-06, + "loss": 0.1356, + "step": 31822 + }, + { + "epoch": 87.42582417582418, + "grad_norm": 9.688669204711914, + "learning_rate": 6.287087912087912e-06, + "loss": 0.2552, + "step": 31823 + }, + { + "epoch": 87.42857142857143, + "grad_norm": 1.426758050918579, + "learning_rate": 6.285714285714287e-06, + "loss": 0.0136, + "step": 31824 + }, + { + "epoch": 87.43131868131869, + "grad_norm": 18.19611167907715, + "learning_rate": 6.284340659340659e-06, + "loss": 0.3281, + "step": 31825 + }, + { + "epoch": 87.43406593406593, + "grad_norm": 7.715896129608154, + "learning_rate": 6.282967032967033e-06, + "loss": 0.0895, + "step": 31826 + }, + { + "epoch": 87.43681318681318, + "grad_norm": 5.458501815795898, + "learning_rate": 6.281593406593407e-06, + "loss": 0.1146, + "step": 31827 + }, + { + "epoch": 87.43956043956044, + "grad_norm": 10.930693626403809, + "learning_rate": 6.28021978021978e-06, + "loss": 0.1295, + "step": 31828 + }, + { + "epoch": 87.4423076923077, + "grad_norm": 25.71951675415039, + "learning_rate": 6.2788461538461545e-06, + "loss": 0.6098, + "step": 31829 + }, + { + "epoch": 87.44505494505495, + "grad_norm": 22.880285263061523, + "learning_rate": 6.277472527472528e-06, + "loss": 0.3299, + "step": 31830 + }, + { + "epoch": 87.4478021978022, + "grad_norm": 32.23745346069336, + "learning_rate": 6.2760989010989005e-06, + "loss": 0.3605, + "step": 31831 + }, + { + "epoch": 87.45054945054945, + "grad_norm": 5.215386867523193, + "learning_rate": 6.274725274725276e-06, + "loss": 0.0698, + "step": 31832 + }, + { + "epoch": 87.4532967032967, + "grad_norm": 9.43686580657959, + "learning_rate": 6.273351648351648e-06, + "loss": 0.0757, + "step": 31833 + }, + { + "epoch": 87.45604395604396, + "grad_norm": 13.638328552246094, + "learning_rate": 6.271978021978022e-06, + "loss": 0.2034, + "step": 31834 + }, + { + "epoch": 87.45879120879121, + "grad_norm": 11.298017501831055, + "learning_rate": 6.270604395604396e-06, + "loss": 0.0853, + "step": 31835 + }, + { + "epoch": 87.46153846153847, + "grad_norm": 9.242692947387695, + "learning_rate": 6.269230769230769e-06, + "loss": 0.2608, + "step": 31836 + }, + { + "epoch": 87.46428571428571, + "grad_norm": 6.128836631774902, + "learning_rate": 6.2678571428571435e-06, + "loss": 0.0874, + "step": 31837 + }, + { + "epoch": 87.46703296703296, + "grad_norm": 12.360992431640625, + "learning_rate": 6.266483516483516e-06, + "loss": 0.1375, + "step": 31838 + }, + { + "epoch": 87.46978021978022, + "grad_norm": 2.883779287338257, + "learning_rate": 6.265109890109891e-06, + "loss": 0.0378, + "step": 31839 + }, + { + "epoch": 87.47252747252747, + "grad_norm": 14.17093563079834, + "learning_rate": 6.263736263736264e-06, + "loss": 0.3167, + "step": 31840 + }, + { + "epoch": 87.47527472527473, + "grad_norm": 20.707332611083984, + "learning_rate": 6.262362637362637e-06, + "loss": 0.8414, + "step": 31841 + }, + { + "epoch": 87.47802197802197, + "grad_norm": 15.123366355895996, + "learning_rate": 6.260989010989011e-06, + "loss": 0.2069, + "step": 31842 + }, + { + "epoch": 87.48076923076923, + "grad_norm": 19.17835807800293, + "learning_rate": 6.259615384615385e-06, + "loss": 0.5126, + "step": 31843 + }, + { + "epoch": 87.48351648351648, + "grad_norm": 15.349908828735352, + "learning_rate": 6.258241758241759e-06, + "loss": 0.3892, + "step": 31844 + }, + { + "epoch": 87.48626373626374, + "grad_norm": 2.774977207183838, + "learning_rate": 6.256868131868132e-06, + "loss": 0.0179, + "step": 31845 + }, + { + "epoch": 87.48901098901099, + "grad_norm": 14.362054824829102, + "learning_rate": 6.255494505494505e-06, + "loss": 0.337, + "step": 31846 + }, + { + "epoch": 87.49175824175825, + "grad_norm": 1.9991672039031982, + "learning_rate": 6.25412087912088e-06, + "loss": 0.0149, + "step": 31847 + }, + { + "epoch": 87.49450549450549, + "grad_norm": 9.073328971862793, + "learning_rate": 6.252747252747253e-06, + "loss": 0.1851, + "step": 31848 + }, + { + "epoch": 87.49725274725274, + "grad_norm": 10.029207229614258, + "learning_rate": 6.251373626373627e-06, + "loss": 0.1782, + "step": 31849 + }, + { + "epoch": 87.5, + "grad_norm": 4.195258617401123, + "learning_rate": 6.25e-06, + "loss": 0.0901, + "step": 31850 + }, + { + "epoch": 87.50274725274726, + "grad_norm": 20.87466049194336, + "learning_rate": 6.248626373626374e-06, + "loss": 0.4243, + "step": 31851 + }, + { + "epoch": 87.50549450549451, + "grad_norm": 8.953353881835938, + "learning_rate": 6.247252747252748e-06, + "loss": 0.1817, + "step": 31852 + }, + { + "epoch": 87.50824175824175, + "grad_norm": 40.84303283691406, + "learning_rate": 6.2458791208791205e-06, + "loss": 0.3227, + "step": 31853 + }, + { + "epoch": 87.51098901098901, + "grad_norm": 2.351888656616211, + "learning_rate": 6.244505494505495e-06, + "loss": 0.0269, + "step": 31854 + }, + { + "epoch": 87.51373626373626, + "grad_norm": 9.6820707321167, + "learning_rate": 6.243131868131868e-06, + "loss": 0.1553, + "step": 31855 + }, + { + "epoch": 87.51648351648352, + "grad_norm": 15.990718841552734, + "learning_rate": 6.2417582417582424e-06, + "loss": 0.2219, + "step": 31856 + }, + { + "epoch": 87.51923076923077, + "grad_norm": 9.661367416381836, + "learning_rate": 6.240384615384616e-06, + "loss": 0.065, + "step": 31857 + }, + { + "epoch": 87.52197802197803, + "grad_norm": 7.970487594604492, + "learning_rate": 6.239010989010989e-06, + "loss": 0.0989, + "step": 31858 + }, + { + "epoch": 87.52472527472527, + "grad_norm": 5.0420145988464355, + "learning_rate": 6.237637362637363e-06, + "loss": 0.0667, + "step": 31859 + }, + { + "epoch": 87.52747252747253, + "grad_norm": 10.332449913024902, + "learning_rate": 6.236263736263737e-06, + "loss": 0.1672, + "step": 31860 + }, + { + "epoch": 87.53021978021978, + "grad_norm": 10.235325813293457, + "learning_rate": 6.23489010989011e-06, + "loss": 0.196, + "step": 31861 + }, + { + "epoch": 87.53296703296704, + "grad_norm": 15.664353370666504, + "learning_rate": 6.2335164835164846e-06, + "loss": 0.3973, + "step": 31862 + }, + { + "epoch": 87.53571428571429, + "grad_norm": 6.709669589996338, + "learning_rate": 6.232142857142857e-06, + "loss": 0.0794, + "step": 31863 + }, + { + "epoch": 87.53846153846153, + "grad_norm": 3.2409770488739014, + "learning_rate": 6.230769230769231e-06, + "loss": 0.0379, + "step": 31864 + }, + { + "epoch": 87.54120879120879, + "grad_norm": 10.50250244140625, + "learning_rate": 6.229395604395605e-06, + "loss": 0.1524, + "step": 31865 + }, + { + "epoch": 87.54395604395604, + "grad_norm": 18.52947425842285, + "learning_rate": 6.228021978021978e-06, + "loss": 0.3092, + "step": 31866 + }, + { + "epoch": 87.5467032967033, + "grad_norm": 26.38578224182129, + "learning_rate": 6.226648351648352e-06, + "loss": 0.7887, + "step": 31867 + }, + { + "epoch": 87.54945054945055, + "grad_norm": 5.985176086425781, + "learning_rate": 6.225274725274725e-06, + "loss": 0.0478, + "step": 31868 + }, + { + "epoch": 87.5521978021978, + "grad_norm": 9.826523780822754, + "learning_rate": 6.223901098901099e-06, + "loss": 0.0803, + "step": 31869 + }, + { + "epoch": 87.55494505494505, + "grad_norm": 8.00125789642334, + "learning_rate": 6.222527472527473e-06, + "loss": 0.1437, + "step": 31870 + }, + { + "epoch": 87.5576923076923, + "grad_norm": 8.77349853515625, + "learning_rate": 6.221153846153847e-06, + "loss": 0.1295, + "step": 31871 + }, + { + "epoch": 87.56043956043956, + "grad_norm": 2.7603702545166016, + "learning_rate": 6.2197802197802195e-06, + "loss": 0.0375, + "step": 31872 + }, + { + "epoch": 87.56318681318682, + "grad_norm": 4.467413425445557, + "learning_rate": 6.218406593406594e-06, + "loss": 0.0738, + "step": 31873 + }, + { + "epoch": 87.56593406593407, + "grad_norm": 22.991910934448242, + "learning_rate": 6.217032967032967e-06, + "loss": 0.4869, + "step": 31874 + }, + { + "epoch": 87.56868131868131, + "grad_norm": 16.40529441833496, + "learning_rate": 6.215659340659341e-06, + "loss": 0.622, + "step": 31875 + }, + { + "epoch": 87.57142857142857, + "grad_norm": 8.5021390914917, + "learning_rate": 6.214285714285715e-06, + "loss": 0.1201, + "step": 31876 + }, + { + "epoch": 87.57417582417582, + "grad_norm": 3.2907915115356445, + "learning_rate": 6.212912087912088e-06, + "loss": 0.0331, + "step": 31877 + }, + { + "epoch": 87.57692307692308, + "grad_norm": 7.694129943847656, + "learning_rate": 6.211538461538462e-06, + "loss": 0.0919, + "step": 31878 + }, + { + "epoch": 87.57967032967034, + "grad_norm": 4.606136798858643, + "learning_rate": 6.210164835164836e-06, + "loss": 0.0891, + "step": 31879 + }, + { + "epoch": 87.58241758241758, + "grad_norm": 21.44423484802246, + "learning_rate": 6.208791208791209e-06, + "loss": 0.5013, + "step": 31880 + }, + { + "epoch": 87.58516483516483, + "grad_norm": 3.1013431549072266, + "learning_rate": 6.207417582417583e-06, + "loss": 0.0281, + "step": 31881 + }, + { + "epoch": 87.58791208791209, + "grad_norm": 6.926217555999756, + "learning_rate": 6.206043956043956e-06, + "loss": 0.1455, + "step": 31882 + }, + { + "epoch": 87.59065934065934, + "grad_norm": 5.12960958480835, + "learning_rate": 6.2046703296703295e-06, + "loss": 0.0592, + "step": 31883 + }, + { + "epoch": 87.5934065934066, + "grad_norm": 5.718238353729248, + "learning_rate": 6.203296703296704e-06, + "loss": 0.0615, + "step": 31884 + }, + { + "epoch": 87.59615384615384, + "grad_norm": 22.159404754638672, + "learning_rate": 6.201923076923077e-06, + "loss": 0.4958, + "step": 31885 + }, + { + "epoch": 87.5989010989011, + "grad_norm": 8.273213386535645, + "learning_rate": 6.200549450549451e-06, + "loss": 0.1596, + "step": 31886 + }, + { + "epoch": 87.60164835164835, + "grad_norm": 1.2572681903839111, + "learning_rate": 6.199175824175824e-06, + "loss": 0.0156, + "step": 31887 + }, + { + "epoch": 87.6043956043956, + "grad_norm": 10.382932662963867, + "learning_rate": 6.197802197802198e-06, + "loss": 0.1534, + "step": 31888 + }, + { + "epoch": 87.60714285714286, + "grad_norm": 7.700159072875977, + "learning_rate": 6.196428571428572e-06, + "loss": 0.0858, + "step": 31889 + }, + { + "epoch": 87.60989010989012, + "grad_norm": 6.966777801513672, + "learning_rate": 6.195054945054946e-06, + "loss": 0.0562, + "step": 31890 + }, + { + "epoch": 87.61263736263736, + "grad_norm": 9.76952075958252, + "learning_rate": 6.193681318681319e-06, + "loss": 0.2924, + "step": 31891 + }, + { + "epoch": 87.61538461538461, + "grad_norm": 13.89553451538086, + "learning_rate": 6.192307692307693e-06, + "loss": 0.275, + "step": 31892 + }, + { + "epoch": 87.61813186813187, + "grad_norm": 6.469915866851807, + "learning_rate": 6.190934065934066e-06, + "loss": 0.0913, + "step": 31893 + }, + { + "epoch": 87.62087912087912, + "grad_norm": 16.427419662475586, + "learning_rate": 6.1895604395604395e-06, + "loss": 0.1508, + "step": 31894 + }, + { + "epoch": 87.62362637362638, + "grad_norm": 18.501222610473633, + "learning_rate": 6.188186813186814e-06, + "loss": 0.4186, + "step": 31895 + }, + { + "epoch": 87.62637362637362, + "grad_norm": 13.480587005615234, + "learning_rate": 6.186813186813187e-06, + "loss": 0.2227, + "step": 31896 + }, + { + "epoch": 87.62912087912088, + "grad_norm": 14.55301570892334, + "learning_rate": 6.185439560439561e-06, + "loss": 0.2402, + "step": 31897 + }, + { + "epoch": 87.63186813186813, + "grad_norm": 8.839262962341309, + "learning_rate": 6.184065934065934e-06, + "loss": 0.1105, + "step": 31898 + }, + { + "epoch": 87.63461538461539, + "grad_norm": 16.99480628967285, + "learning_rate": 6.182692307692308e-06, + "loss": 0.3533, + "step": 31899 + }, + { + "epoch": 87.63736263736264, + "grad_norm": 15.554869651794434, + "learning_rate": 6.181318681318682e-06, + "loss": 0.3437, + "step": 31900 + }, + { + "epoch": 87.64010989010988, + "grad_norm": 15.205052375793457, + "learning_rate": 6.179945054945055e-06, + "loss": 0.2757, + "step": 31901 + }, + { + "epoch": 87.64285714285714, + "grad_norm": 11.185470581054688, + "learning_rate": 6.1785714285714285e-06, + "loss": 0.174, + "step": 31902 + }, + { + "epoch": 87.6456043956044, + "grad_norm": 5.339188098907471, + "learning_rate": 6.177197802197803e-06, + "loss": 0.0789, + "step": 31903 + }, + { + "epoch": 87.64835164835165, + "grad_norm": 16.218843460083008, + "learning_rate": 6.175824175824176e-06, + "loss": 0.3737, + "step": 31904 + }, + { + "epoch": 87.6510989010989, + "grad_norm": 6.544125556945801, + "learning_rate": 6.17445054945055e-06, + "loss": 0.0747, + "step": 31905 + }, + { + "epoch": 87.65384615384616, + "grad_norm": 14.265718460083008, + "learning_rate": 6.173076923076923e-06, + "loss": 0.1807, + "step": 31906 + }, + { + "epoch": 87.6565934065934, + "grad_norm": 8.736188888549805, + "learning_rate": 6.171703296703297e-06, + "loss": 0.1092, + "step": 31907 + }, + { + "epoch": 87.65934065934066, + "grad_norm": 4.20789098739624, + "learning_rate": 6.170329670329671e-06, + "loss": 0.0165, + "step": 31908 + }, + { + "epoch": 87.66208791208791, + "grad_norm": 15.191554069519043, + "learning_rate": 6.168956043956044e-06, + "loss": 0.1595, + "step": 31909 + }, + { + "epoch": 87.66483516483517, + "grad_norm": 7.951847076416016, + "learning_rate": 6.167582417582418e-06, + "loss": 0.1156, + "step": 31910 + }, + { + "epoch": 87.66758241758242, + "grad_norm": 19.179231643676758, + "learning_rate": 6.166208791208792e-06, + "loss": 0.4027, + "step": 31911 + }, + { + "epoch": 87.67032967032966, + "grad_norm": 6.549354553222656, + "learning_rate": 6.164835164835165e-06, + "loss": 0.0843, + "step": 31912 + }, + { + "epoch": 87.67307692307692, + "grad_norm": 16.61880874633789, + "learning_rate": 6.1634615384615385e-06, + "loss": 0.2766, + "step": 31913 + }, + { + "epoch": 87.67582417582418, + "grad_norm": 1.6348398923873901, + "learning_rate": 6.162087912087913e-06, + "loss": 0.0148, + "step": 31914 + }, + { + "epoch": 87.67857142857143, + "grad_norm": 6.966528415679932, + "learning_rate": 6.160714285714286e-06, + "loss": 0.07, + "step": 31915 + }, + { + "epoch": 87.68131868131869, + "grad_norm": 18.101871490478516, + "learning_rate": 6.1593406593406595e-06, + "loss": 0.4759, + "step": 31916 + }, + { + "epoch": 87.68406593406593, + "grad_norm": 11.281144142150879, + "learning_rate": 6.157967032967033e-06, + "loss": 0.2406, + "step": 31917 + }, + { + "epoch": 87.68681318681318, + "grad_norm": 8.889554023742676, + "learning_rate": 6.156593406593407e-06, + "loss": 0.1996, + "step": 31918 + }, + { + "epoch": 87.68956043956044, + "grad_norm": 12.899916648864746, + "learning_rate": 6.155219780219781e-06, + "loss": 0.1456, + "step": 31919 + }, + { + "epoch": 87.6923076923077, + "grad_norm": 10.949228286743164, + "learning_rate": 6.153846153846155e-06, + "loss": 0.3592, + "step": 31920 + }, + { + "epoch": 87.69505494505495, + "grad_norm": 26.4256649017334, + "learning_rate": 6.1524725274725274e-06, + "loss": 0.2106, + "step": 31921 + }, + { + "epoch": 87.6978021978022, + "grad_norm": 3.7933294773101807, + "learning_rate": 6.151098901098902e-06, + "loss": 0.0322, + "step": 31922 + }, + { + "epoch": 87.70054945054945, + "grad_norm": 17.573745727539062, + "learning_rate": 6.149725274725275e-06, + "loss": 0.3411, + "step": 31923 + }, + { + "epoch": 87.7032967032967, + "grad_norm": 14.643027305603027, + "learning_rate": 6.1483516483516485e-06, + "loss": 0.36, + "step": 31924 + }, + { + "epoch": 87.70604395604396, + "grad_norm": 10.959258079528809, + "learning_rate": 6.146978021978022e-06, + "loss": 0.1749, + "step": 31925 + }, + { + "epoch": 87.70879120879121, + "grad_norm": 9.265082359313965, + "learning_rate": 6.145604395604395e-06, + "loss": 0.1307, + "step": 31926 + }, + { + "epoch": 87.71153846153847, + "grad_norm": 9.091933250427246, + "learning_rate": 6.1442307692307696e-06, + "loss": 0.1926, + "step": 31927 + }, + { + "epoch": 87.71428571428571, + "grad_norm": 4.677009105682373, + "learning_rate": 6.142857142857143e-06, + "loss": 0.0492, + "step": 31928 + }, + { + "epoch": 87.71703296703296, + "grad_norm": 11.404600143432617, + "learning_rate": 6.141483516483517e-06, + "loss": 0.1712, + "step": 31929 + }, + { + "epoch": 87.71978021978022, + "grad_norm": 10.546223640441895, + "learning_rate": 6.14010989010989e-06, + "loss": 0.2009, + "step": 31930 + }, + { + "epoch": 87.72252747252747, + "grad_norm": 9.065783500671387, + "learning_rate": 6.138736263736264e-06, + "loss": 0.1838, + "step": 31931 + }, + { + "epoch": 87.72527472527473, + "grad_norm": 14.56006908416748, + "learning_rate": 6.1373626373626374e-06, + "loss": 0.3426, + "step": 31932 + }, + { + "epoch": 87.72802197802197, + "grad_norm": 19.851024627685547, + "learning_rate": 6.135989010989012e-06, + "loss": 0.1585, + "step": 31933 + }, + { + "epoch": 87.73076923076923, + "grad_norm": 15.035865783691406, + "learning_rate": 6.134615384615385e-06, + "loss": 0.1488, + "step": 31934 + }, + { + "epoch": 87.73351648351648, + "grad_norm": 3.0834124088287354, + "learning_rate": 6.1332417582417585e-06, + "loss": 0.0444, + "step": 31935 + }, + { + "epoch": 87.73626373626374, + "grad_norm": 10.184588432312012, + "learning_rate": 6.131868131868132e-06, + "loss": 0.1714, + "step": 31936 + }, + { + "epoch": 87.73901098901099, + "grad_norm": 13.50766658782959, + "learning_rate": 6.130494505494506e-06, + "loss": 0.204, + "step": 31937 + }, + { + "epoch": 87.74175824175825, + "grad_norm": 5.512248516082764, + "learning_rate": 6.12912087912088e-06, + "loss": 0.0418, + "step": 31938 + }, + { + "epoch": 87.74450549450549, + "grad_norm": 13.439126014709473, + "learning_rate": 6.127747252747253e-06, + "loss": 0.2868, + "step": 31939 + }, + { + "epoch": 87.74725274725274, + "grad_norm": 7.733755111694336, + "learning_rate": 6.126373626373626e-06, + "loss": 0.0898, + "step": 31940 + }, + { + "epoch": 87.75, + "grad_norm": 8.627480506896973, + "learning_rate": 6.125e-06, + "loss": 0.0858, + "step": 31941 + }, + { + "epoch": 87.75274725274726, + "grad_norm": 20.56868553161621, + "learning_rate": 6.123626373626374e-06, + "loss": 0.332, + "step": 31942 + }, + { + "epoch": 87.75549450549451, + "grad_norm": 4.049103260040283, + "learning_rate": 6.1222527472527475e-06, + "loss": 0.052, + "step": 31943 + }, + { + "epoch": 87.75824175824175, + "grad_norm": 15.991321563720703, + "learning_rate": 6.120879120879122e-06, + "loss": 0.2212, + "step": 31944 + }, + { + "epoch": 87.76098901098901, + "grad_norm": 17.03438377380371, + "learning_rate": 6.119505494505494e-06, + "loss": 0.7542, + "step": 31945 + }, + { + "epoch": 87.76373626373626, + "grad_norm": 2.5028164386749268, + "learning_rate": 6.1181318681318685e-06, + "loss": 0.0385, + "step": 31946 + }, + { + "epoch": 87.76648351648352, + "grad_norm": 14.76892375946045, + "learning_rate": 6.116758241758242e-06, + "loss": 0.3678, + "step": 31947 + }, + { + "epoch": 87.76923076923077, + "grad_norm": 14.684513092041016, + "learning_rate": 6.115384615384616e-06, + "loss": 0.2614, + "step": 31948 + }, + { + "epoch": 87.77197802197803, + "grad_norm": 11.341026306152344, + "learning_rate": 6.11401098901099e-06, + "loss": 0.1349, + "step": 31949 + }, + { + "epoch": 87.77472527472527, + "grad_norm": 22.94228172302246, + "learning_rate": 6.112637362637363e-06, + "loss": 0.3913, + "step": 31950 + }, + { + "epoch": 87.77747252747253, + "grad_norm": 4.483500957489014, + "learning_rate": 6.111263736263736e-06, + "loss": 0.0364, + "step": 31951 + }, + { + "epoch": 87.78021978021978, + "grad_norm": 24.819133758544922, + "learning_rate": 6.109890109890111e-06, + "loss": 0.3025, + "step": 31952 + }, + { + "epoch": 87.78296703296704, + "grad_norm": 12.713839530944824, + "learning_rate": 6.108516483516484e-06, + "loss": 0.4527, + "step": 31953 + }, + { + "epoch": 87.78571428571429, + "grad_norm": 12.428030967712402, + "learning_rate": 6.1071428571428575e-06, + "loss": 0.2518, + "step": 31954 + }, + { + "epoch": 87.78846153846153, + "grad_norm": 5.580554962158203, + "learning_rate": 6.105769230769231e-06, + "loss": 0.0544, + "step": 31955 + }, + { + "epoch": 87.79120879120879, + "grad_norm": 1.4682520627975464, + "learning_rate": 6.104395604395604e-06, + "loss": 0.0148, + "step": 31956 + }, + { + "epoch": 87.79395604395604, + "grad_norm": 10.41675853729248, + "learning_rate": 6.1030219780219785e-06, + "loss": 0.2679, + "step": 31957 + }, + { + "epoch": 87.7967032967033, + "grad_norm": 21.74929428100586, + "learning_rate": 6.101648351648352e-06, + "loss": 0.3389, + "step": 31958 + }, + { + "epoch": 87.79945054945055, + "grad_norm": 13.752191543579102, + "learning_rate": 6.100274725274725e-06, + "loss": 0.5431, + "step": 31959 + }, + { + "epoch": 87.8021978021978, + "grad_norm": 5.6996235847473145, + "learning_rate": 6.098901098901099e-06, + "loss": 0.1057, + "step": 31960 + }, + { + "epoch": 87.80494505494505, + "grad_norm": 15.000452995300293, + "learning_rate": 6.097527472527473e-06, + "loss": 0.3667, + "step": 31961 + }, + { + "epoch": 87.8076923076923, + "grad_norm": 1.5853230953216553, + "learning_rate": 6.0961538461538464e-06, + "loss": 0.0151, + "step": 31962 + }, + { + "epoch": 87.81043956043956, + "grad_norm": 13.540897369384766, + "learning_rate": 6.094780219780221e-06, + "loss": 0.3085, + "step": 31963 + }, + { + "epoch": 87.81318681318682, + "grad_norm": 8.653278350830078, + "learning_rate": 6.093406593406593e-06, + "loss": 0.1273, + "step": 31964 + }, + { + "epoch": 87.81593406593407, + "grad_norm": 8.956756591796875, + "learning_rate": 6.0920329670329675e-06, + "loss": 0.1758, + "step": 31965 + }, + { + "epoch": 87.81868131868131, + "grad_norm": 13.429723739624023, + "learning_rate": 6.090659340659341e-06, + "loss": 0.2104, + "step": 31966 + }, + { + "epoch": 87.82142857142857, + "grad_norm": 4.658572196960449, + "learning_rate": 6.089285714285714e-06, + "loss": 0.0446, + "step": 31967 + }, + { + "epoch": 87.82417582417582, + "grad_norm": 11.254586219787598, + "learning_rate": 6.0879120879120886e-06, + "loss": 0.1452, + "step": 31968 + }, + { + "epoch": 87.82692307692308, + "grad_norm": 2.902174711227417, + "learning_rate": 6.086538461538462e-06, + "loss": 0.0691, + "step": 31969 + }, + { + "epoch": 87.82967032967034, + "grad_norm": 16.047855377197266, + "learning_rate": 6.085164835164835e-06, + "loss": 0.4593, + "step": 31970 + }, + { + "epoch": 87.83241758241758, + "grad_norm": 15.773093223571777, + "learning_rate": 6.083791208791209e-06, + "loss": 0.3655, + "step": 31971 + }, + { + "epoch": 87.83516483516483, + "grad_norm": 18.46091079711914, + "learning_rate": 6.082417582417583e-06, + "loss": 0.2292, + "step": 31972 + }, + { + "epoch": 87.83791208791209, + "grad_norm": 10.007625579833984, + "learning_rate": 6.0810439560439564e-06, + "loss": 0.229, + "step": 31973 + }, + { + "epoch": 87.84065934065934, + "grad_norm": 21.855613708496094, + "learning_rate": 6.07967032967033e-06, + "loss": 0.2321, + "step": 31974 + }, + { + "epoch": 87.8434065934066, + "grad_norm": 5.904469966888428, + "learning_rate": 6.078296703296703e-06, + "loss": 0.0734, + "step": 31975 + }, + { + "epoch": 87.84615384615384, + "grad_norm": 15.167556762695312, + "learning_rate": 6.0769230769230775e-06, + "loss": 0.4272, + "step": 31976 + }, + { + "epoch": 87.8489010989011, + "grad_norm": 3.879323959350586, + "learning_rate": 6.075549450549451e-06, + "loss": 0.0573, + "step": 31977 + }, + { + "epoch": 87.85164835164835, + "grad_norm": 4.35915470123291, + "learning_rate": 6.074175824175824e-06, + "loss": 0.062, + "step": 31978 + }, + { + "epoch": 87.8543956043956, + "grad_norm": 28.18682861328125, + "learning_rate": 6.072802197802198e-06, + "loss": 0.6125, + "step": 31979 + }, + { + "epoch": 87.85714285714286, + "grad_norm": 12.213900566101074, + "learning_rate": 6.071428571428572e-06, + "loss": 0.189, + "step": 31980 + }, + { + "epoch": 87.85989010989012, + "grad_norm": 29.92107391357422, + "learning_rate": 6.070054945054945e-06, + "loss": 0.6811, + "step": 31981 + }, + { + "epoch": 87.86263736263736, + "grad_norm": 20.236974716186523, + "learning_rate": 6.068681318681319e-06, + "loss": 0.2774, + "step": 31982 + }, + { + "epoch": 87.86538461538461, + "grad_norm": 7.5843939781188965, + "learning_rate": 6.067307692307692e-06, + "loss": 0.1713, + "step": 31983 + }, + { + "epoch": 87.86813186813187, + "grad_norm": 11.205771446228027, + "learning_rate": 6.0659340659340665e-06, + "loss": 0.2555, + "step": 31984 + }, + { + "epoch": 87.87087912087912, + "grad_norm": 7.294879913330078, + "learning_rate": 6.06456043956044e-06, + "loss": 0.2286, + "step": 31985 + }, + { + "epoch": 87.87362637362638, + "grad_norm": 11.787589073181152, + "learning_rate": 6.063186813186813e-06, + "loss": 0.2155, + "step": 31986 + }, + { + "epoch": 87.87637362637362, + "grad_norm": 20.956497192382812, + "learning_rate": 6.0618131868131875e-06, + "loss": 0.5714, + "step": 31987 + }, + { + "epoch": 87.87912087912088, + "grad_norm": 10.180927276611328, + "learning_rate": 6.06043956043956e-06, + "loss": 0.1734, + "step": 31988 + }, + { + "epoch": 87.88186813186813, + "grad_norm": 11.771873474121094, + "learning_rate": 6.059065934065934e-06, + "loss": 0.2708, + "step": 31989 + }, + { + "epoch": 87.88461538461539, + "grad_norm": 5.288931846618652, + "learning_rate": 6.057692307692308e-06, + "loss": 0.0936, + "step": 31990 + }, + { + "epoch": 87.88736263736264, + "grad_norm": 7.685833930969238, + "learning_rate": 6.056318681318682e-06, + "loss": 0.1074, + "step": 31991 + }, + { + "epoch": 87.89010989010988, + "grad_norm": 9.509313583374023, + "learning_rate": 6.054945054945055e-06, + "loss": 0.123, + "step": 31992 + }, + { + "epoch": 87.89285714285714, + "grad_norm": 11.066018104553223, + "learning_rate": 6.053571428571429e-06, + "loss": 0.1314, + "step": 31993 + }, + { + "epoch": 87.8956043956044, + "grad_norm": 11.376401901245117, + "learning_rate": 6.052197802197802e-06, + "loss": 0.2173, + "step": 31994 + }, + { + "epoch": 87.89835164835165, + "grad_norm": 26.06157112121582, + "learning_rate": 6.0508241758241765e-06, + "loss": 1.0028, + "step": 31995 + }, + { + "epoch": 87.9010989010989, + "grad_norm": 5.096323013305664, + "learning_rate": 6.04945054945055e-06, + "loss": 0.0437, + "step": 31996 + }, + { + "epoch": 87.90384615384616, + "grad_norm": 9.568424224853516, + "learning_rate": 6.048076923076923e-06, + "loss": 0.1137, + "step": 31997 + }, + { + "epoch": 87.9065934065934, + "grad_norm": 26.636552810668945, + "learning_rate": 6.046703296703297e-06, + "loss": 0.65, + "step": 31998 + }, + { + "epoch": 87.90934065934066, + "grad_norm": 16.8377685546875, + "learning_rate": 6.04532967032967e-06, + "loss": 0.3574, + "step": 31999 + }, + { + "epoch": 87.91208791208791, + "grad_norm": 10.439957618713379, + "learning_rate": 6.043956043956044e-06, + "loss": 0.1501, + "step": 32000 + }, + { + "epoch": 87.91483516483517, + "grad_norm": 12.655117988586426, + "learning_rate": 6.042582417582418e-06, + "loss": 0.3993, + "step": 32001 + }, + { + "epoch": 87.91758241758242, + "grad_norm": 10.724921226501465, + "learning_rate": 6.041208791208792e-06, + "loss": 0.2068, + "step": 32002 + }, + { + "epoch": 87.92032967032966, + "grad_norm": 13.642745018005371, + "learning_rate": 6.039835164835165e-06, + "loss": 0.2369, + "step": 32003 + }, + { + "epoch": 87.92307692307692, + "grad_norm": 6.582082748413086, + "learning_rate": 6.038461538461539e-06, + "loss": 0.1118, + "step": 32004 + }, + { + "epoch": 87.92582417582418, + "grad_norm": 11.083552360534668, + "learning_rate": 6.037087912087912e-06, + "loss": 0.1438, + "step": 32005 + }, + { + "epoch": 87.92857142857143, + "grad_norm": 1.9773579835891724, + "learning_rate": 6.0357142857142865e-06, + "loss": 0.0187, + "step": 32006 + }, + { + "epoch": 87.93131868131869, + "grad_norm": 14.737034797668457, + "learning_rate": 6.034340659340659e-06, + "loss": 0.124, + "step": 32007 + }, + { + "epoch": 87.93406593406593, + "grad_norm": 19.590866088867188, + "learning_rate": 6.032967032967033e-06, + "loss": 0.7272, + "step": 32008 + }, + { + "epoch": 87.93681318681318, + "grad_norm": 8.802602767944336, + "learning_rate": 6.031593406593407e-06, + "loss": 0.1819, + "step": 32009 + }, + { + "epoch": 87.93956043956044, + "grad_norm": 20.05799102783203, + "learning_rate": 6.030219780219781e-06, + "loss": 0.302, + "step": 32010 + }, + { + "epoch": 87.9423076923077, + "grad_norm": 16.228063583374023, + "learning_rate": 6.028846153846154e-06, + "loss": 0.1797, + "step": 32011 + }, + { + "epoch": 87.94505494505495, + "grad_norm": 13.175566673278809, + "learning_rate": 6.027472527472528e-06, + "loss": 0.2562, + "step": 32012 + }, + { + "epoch": 87.9478021978022, + "grad_norm": 9.171085357666016, + "learning_rate": 6.026098901098901e-06, + "loss": 0.2476, + "step": 32013 + }, + { + "epoch": 87.95054945054945, + "grad_norm": 30.111228942871094, + "learning_rate": 6.024725274725275e-06, + "loss": 1.0298, + "step": 32014 + }, + { + "epoch": 87.9532967032967, + "grad_norm": 5.3264994621276855, + "learning_rate": 6.023351648351649e-06, + "loss": 0.0499, + "step": 32015 + }, + { + "epoch": 87.95604395604396, + "grad_norm": 3.5990941524505615, + "learning_rate": 6.021978021978022e-06, + "loss": 0.0534, + "step": 32016 + }, + { + "epoch": 87.95879120879121, + "grad_norm": 15.805832862854004, + "learning_rate": 6.020604395604396e-06, + "loss": 0.2088, + "step": 32017 + }, + { + "epoch": 87.96153846153847, + "grad_norm": 10.541328430175781, + "learning_rate": 6.019230769230769e-06, + "loss": 0.1135, + "step": 32018 + }, + { + "epoch": 87.96428571428571, + "grad_norm": 31.45541763305664, + "learning_rate": 6.017857142857143e-06, + "loss": 1.0703, + "step": 32019 + }, + { + "epoch": 87.96703296703296, + "grad_norm": 10.718809127807617, + "learning_rate": 6.016483516483517e-06, + "loss": 0.1354, + "step": 32020 + }, + { + "epoch": 87.96978021978022, + "grad_norm": 8.685036659240723, + "learning_rate": 6.015109890109891e-06, + "loss": 0.2106, + "step": 32021 + }, + { + "epoch": 87.97252747252747, + "grad_norm": 20.607666015625, + "learning_rate": 6.0137362637362635e-06, + "loss": 0.3406, + "step": 32022 + }, + { + "epoch": 87.97527472527473, + "grad_norm": 5.403628349304199, + "learning_rate": 6.012362637362638e-06, + "loss": 0.0618, + "step": 32023 + }, + { + "epoch": 87.97802197802197, + "grad_norm": 15.433296203613281, + "learning_rate": 6.010989010989011e-06, + "loss": 0.5609, + "step": 32024 + }, + { + "epoch": 87.98076923076923, + "grad_norm": 7.071357727050781, + "learning_rate": 6.0096153846153855e-06, + "loss": 0.2308, + "step": 32025 + }, + { + "epoch": 87.98351648351648, + "grad_norm": 8.947870254516602, + "learning_rate": 6.008241758241759e-06, + "loss": 0.0856, + "step": 32026 + }, + { + "epoch": 87.98626373626374, + "grad_norm": 16.07640266418457, + "learning_rate": 6.006868131868132e-06, + "loss": 0.3136, + "step": 32027 + }, + { + "epoch": 87.98901098901099, + "grad_norm": 11.366787910461426, + "learning_rate": 6.005494505494506e-06, + "loss": 0.1771, + "step": 32028 + }, + { + "epoch": 87.99175824175825, + "grad_norm": 21.208393096923828, + "learning_rate": 6.004120879120879e-06, + "loss": 0.5171, + "step": 32029 + }, + { + "epoch": 87.99450549450549, + "grad_norm": 9.700980186462402, + "learning_rate": 6.002747252747253e-06, + "loss": 0.1597, + "step": 32030 + }, + { + "epoch": 87.99725274725274, + "grad_norm": 7.9479570388793945, + "learning_rate": 6.001373626373627e-06, + "loss": 0.1246, + "step": 32031 + }, + { + "epoch": 88.0, + "grad_norm": 12.815469741821289, + "learning_rate": 6e-06, + "loss": 0.0995, + "step": 32032 + }, + { + "epoch": 88.0, + "eval_accuracy": 0.7658402203856749, + "eval_f1": 0.7693597238346579, + "eval_f1_DuraRiadoRio_64x64": 0.7216494845360825, + "eval_f1_Mole_64x64": 0.828125, + "eval_f1_Quebrado_64x64": 0.8657718120805369, + "eval_f1_RiadoRio_64x64": 0.644927536231884, + "eval_f1_RioFechado_64x64": 0.7863247863247863, + "eval_loss": 1.0813448429107666, + "eval_precision": 0.8151206672756117, + "eval_precision_DuraRiadoRio_64x64": 0.5737704918032787, + "eval_precision_Mole_64x64": 0.9464285714285714, + "eval_precision_Quebrado_64x64": 0.8376623376623377, + "eval_precision_RiadoRio_64x64": 0.717741935483871, + "eval_precision_RioFechado_64x64": 1.0, + "eval_recall": 0.7675160612799605, + "eval_recall_DuraRiadoRio_64x64": 0.9722222222222222, + "eval_recall_Mole_64x64": 0.7361111111111112, + "eval_recall_Quebrado_64x64": 0.8958333333333334, + "eval_recall_RiadoRio_64x64": 0.5855263157894737, + "eval_recall_RioFechado_64x64": 0.647887323943662, + "eval_runtime": 1.7325, + "eval_samples_per_second": 419.05, + "eval_steps_per_second": 26.551, + "step": 32032 + }, + { + "epoch": 88.00274725274726, + "grad_norm": 13.230315208435059, + "learning_rate": 5.9986263736263736e-06, + "loss": 0.1122, + "step": 32033 + }, + { + "epoch": 88.00549450549451, + "grad_norm": 18.321290969848633, + "learning_rate": 5.997252747252748e-06, + "loss": 0.3649, + "step": 32034 + }, + { + "epoch": 88.00824175824175, + "grad_norm": 19.268802642822266, + "learning_rate": 5.995879120879121e-06, + "loss": 0.6996, + "step": 32035 + }, + { + "epoch": 88.01098901098901, + "grad_norm": 9.68681812286377, + "learning_rate": 5.994505494505495e-06, + "loss": 0.1116, + "step": 32036 + }, + { + "epoch": 88.01373626373626, + "grad_norm": 8.45229434967041, + "learning_rate": 5.993131868131868e-06, + "loss": 0.1559, + "step": 32037 + }, + { + "epoch": 88.01648351648352, + "grad_norm": 1.0978703498840332, + "learning_rate": 5.991758241758242e-06, + "loss": 0.0166, + "step": 32038 + }, + { + "epoch": 88.01923076923077, + "grad_norm": 24.84576416015625, + "learning_rate": 5.990384615384616e-06, + "loss": 0.5673, + "step": 32039 + }, + { + "epoch": 88.02197802197803, + "grad_norm": 5.875227928161621, + "learning_rate": 5.98901098901099e-06, + "loss": 0.095, + "step": 32040 + }, + { + "epoch": 88.02472527472527, + "grad_norm": 5.401178359985352, + "learning_rate": 5.9876373626373625e-06, + "loss": 0.0992, + "step": 32041 + }, + { + "epoch": 88.02747252747253, + "grad_norm": 18.04157829284668, + "learning_rate": 5.986263736263737e-06, + "loss": 0.1757, + "step": 32042 + }, + { + "epoch": 88.03021978021978, + "grad_norm": 5.5348920822143555, + "learning_rate": 5.98489010989011e-06, + "loss": 0.0806, + "step": 32043 + }, + { + "epoch": 88.03296703296704, + "grad_norm": 24.66272735595703, + "learning_rate": 5.9835164835164836e-06, + "loss": 0.6919, + "step": 32044 + }, + { + "epoch": 88.03571428571429, + "grad_norm": 6.7127461433410645, + "learning_rate": 5.982142857142858e-06, + "loss": 0.0456, + "step": 32045 + }, + { + "epoch": 88.03846153846153, + "grad_norm": 31.115657806396484, + "learning_rate": 5.98076923076923e-06, + "loss": 0.4105, + "step": 32046 + }, + { + "epoch": 88.04120879120879, + "grad_norm": 11.491389274597168, + "learning_rate": 5.979395604395605e-06, + "loss": 0.116, + "step": 32047 + }, + { + "epoch": 88.04395604395604, + "grad_norm": 3.505725383758545, + "learning_rate": 5.978021978021978e-06, + "loss": 0.0372, + "step": 32048 + }, + { + "epoch": 88.0467032967033, + "grad_norm": 6.1104044914245605, + "learning_rate": 5.976648351648352e-06, + "loss": 0.098, + "step": 32049 + }, + { + "epoch": 88.04945054945055, + "grad_norm": 4.020818710327148, + "learning_rate": 5.975274725274726e-06, + "loss": 0.0537, + "step": 32050 + }, + { + "epoch": 88.0521978021978, + "grad_norm": 10.9242582321167, + "learning_rate": 5.973901098901099e-06, + "loss": 0.3528, + "step": 32051 + }, + { + "epoch": 88.05494505494505, + "grad_norm": 2.4341037273406982, + "learning_rate": 5.9725274725274725e-06, + "loss": 0.0247, + "step": 32052 + }, + { + "epoch": 88.0576923076923, + "grad_norm": 4.829850673675537, + "learning_rate": 5.971153846153847e-06, + "loss": 0.0463, + "step": 32053 + }, + { + "epoch": 88.06043956043956, + "grad_norm": 5.039707660675049, + "learning_rate": 5.96978021978022e-06, + "loss": 0.1181, + "step": 32054 + }, + { + "epoch": 88.06318681318682, + "grad_norm": 12.621789932250977, + "learning_rate": 5.968406593406594e-06, + "loss": 0.2859, + "step": 32055 + }, + { + "epoch": 88.06593406593407, + "grad_norm": 25.319242477416992, + "learning_rate": 5.967032967032967e-06, + "loss": 0.6999, + "step": 32056 + }, + { + "epoch": 88.06868131868131, + "grad_norm": 8.073473930358887, + "learning_rate": 5.965659340659341e-06, + "loss": 0.0961, + "step": 32057 + }, + { + "epoch": 88.07142857142857, + "grad_norm": 10.714339256286621, + "learning_rate": 5.964285714285715e-06, + "loss": 0.2527, + "step": 32058 + }, + { + "epoch": 88.07417582417582, + "grad_norm": 19.288494110107422, + "learning_rate": 5.962912087912088e-06, + "loss": 0.3627, + "step": 32059 + }, + { + "epoch": 88.07692307692308, + "grad_norm": 15.107023239135742, + "learning_rate": 5.961538461538462e-06, + "loss": 0.3875, + "step": 32060 + }, + { + "epoch": 88.07967032967034, + "grad_norm": 17.332256317138672, + "learning_rate": 5.960164835164835e-06, + "loss": 0.4523, + "step": 32061 + }, + { + "epoch": 88.08241758241758, + "grad_norm": 13.106880187988281, + "learning_rate": 5.958791208791209e-06, + "loss": 0.1743, + "step": 32062 + }, + { + "epoch": 88.08516483516483, + "grad_norm": 10.067440032958984, + "learning_rate": 5.9574175824175825e-06, + "loss": 0.0694, + "step": 32063 + }, + { + "epoch": 88.08791208791209, + "grad_norm": 3.595423936843872, + "learning_rate": 5.956043956043957e-06, + "loss": 0.0485, + "step": 32064 + }, + { + "epoch": 88.09065934065934, + "grad_norm": 0.7433201670646667, + "learning_rate": 5.954670329670329e-06, + "loss": 0.01, + "step": 32065 + }, + { + "epoch": 88.0934065934066, + "grad_norm": 8.910035133361816, + "learning_rate": 5.953296703296704e-06, + "loss": 0.1556, + "step": 32066 + }, + { + "epoch": 88.09615384615384, + "grad_norm": 4.987982273101807, + "learning_rate": 5.951923076923077e-06, + "loss": 0.0458, + "step": 32067 + }, + { + "epoch": 88.0989010989011, + "grad_norm": 14.641881942749023, + "learning_rate": 5.950549450549451e-06, + "loss": 0.4757, + "step": 32068 + }, + { + "epoch": 88.10164835164835, + "grad_norm": 5.359202861785889, + "learning_rate": 5.949175824175825e-06, + "loss": 0.1065, + "step": 32069 + }, + { + "epoch": 88.1043956043956, + "grad_norm": 9.221085548400879, + "learning_rate": 5.947802197802198e-06, + "loss": 0.092, + "step": 32070 + }, + { + "epoch": 88.10714285714286, + "grad_norm": 17.938234329223633, + "learning_rate": 5.9464285714285715e-06, + "loss": 0.576, + "step": 32071 + }, + { + "epoch": 88.10989010989012, + "grad_norm": 14.003509521484375, + "learning_rate": 5.945054945054945e-06, + "loss": 0.1683, + "step": 32072 + }, + { + "epoch": 88.11263736263736, + "grad_norm": 16.614105224609375, + "learning_rate": 5.943681318681319e-06, + "loss": 0.331, + "step": 32073 + }, + { + "epoch": 88.11538461538461, + "grad_norm": 13.613787651062012, + "learning_rate": 5.9423076923076926e-06, + "loss": 0.3312, + "step": 32074 + }, + { + "epoch": 88.11813186813187, + "grad_norm": 21.468576431274414, + "learning_rate": 5.940934065934066e-06, + "loss": 0.4861, + "step": 32075 + }, + { + "epoch": 88.12087912087912, + "grad_norm": 12.652914047241211, + "learning_rate": 5.939560439560439e-06, + "loss": 0.2602, + "step": 32076 + }, + { + "epoch": 88.12362637362638, + "grad_norm": 21.603073120117188, + "learning_rate": 5.938186813186814e-06, + "loss": 0.3298, + "step": 32077 + }, + { + "epoch": 88.12637362637362, + "grad_norm": 11.994595527648926, + "learning_rate": 5.936813186813187e-06, + "loss": 0.1045, + "step": 32078 + }, + { + "epoch": 88.12912087912088, + "grad_norm": 8.597113609313965, + "learning_rate": 5.935439560439561e-06, + "loss": 0.1476, + "step": 32079 + }, + { + "epoch": 88.13186813186813, + "grad_norm": 6.586208343505859, + "learning_rate": 5.934065934065934e-06, + "loss": 0.1012, + "step": 32080 + }, + { + "epoch": 88.13461538461539, + "grad_norm": 17.796228408813477, + "learning_rate": 5.932692307692308e-06, + "loss": 0.3324, + "step": 32081 + }, + { + "epoch": 88.13736263736264, + "grad_norm": 5.888850212097168, + "learning_rate": 5.9313186813186815e-06, + "loss": 0.0897, + "step": 32082 + }, + { + "epoch": 88.14010989010988, + "grad_norm": 3.132730484008789, + "learning_rate": 5.929945054945056e-06, + "loss": 0.0301, + "step": 32083 + }, + { + "epoch": 88.14285714285714, + "grad_norm": 8.089574813842773, + "learning_rate": 5.928571428571429e-06, + "loss": 0.1304, + "step": 32084 + }, + { + "epoch": 88.1456043956044, + "grad_norm": 9.895426750183105, + "learning_rate": 5.9271978021978026e-06, + "loss": 0.1006, + "step": 32085 + }, + { + "epoch": 88.14835164835165, + "grad_norm": 19.714664459228516, + "learning_rate": 5.925824175824176e-06, + "loss": 0.5795, + "step": 32086 + }, + { + "epoch": 88.1510989010989, + "grad_norm": 15.598278999328613, + "learning_rate": 5.924450549450549e-06, + "loss": 0.3048, + "step": 32087 + }, + { + "epoch": 88.15384615384616, + "grad_norm": 22.252628326416016, + "learning_rate": 5.923076923076924e-06, + "loss": 0.5902, + "step": 32088 + }, + { + "epoch": 88.1565934065934, + "grad_norm": 21.459945678710938, + "learning_rate": 5.921703296703297e-06, + "loss": 0.4599, + "step": 32089 + }, + { + "epoch": 88.15934065934066, + "grad_norm": 6.0456366539001465, + "learning_rate": 5.9203296703296705e-06, + "loss": 0.059, + "step": 32090 + }, + { + "epoch": 88.16208791208791, + "grad_norm": 11.032818794250488, + "learning_rate": 5.918956043956044e-06, + "loss": 0.0788, + "step": 32091 + }, + { + "epoch": 88.16483516483517, + "grad_norm": 9.732901573181152, + "learning_rate": 5.917582417582418e-06, + "loss": 0.1819, + "step": 32092 + }, + { + "epoch": 88.16758241758242, + "grad_norm": 9.550895690917969, + "learning_rate": 5.9162087912087915e-06, + "loss": 0.1111, + "step": 32093 + }, + { + "epoch": 88.17032967032966, + "grad_norm": 9.063617706298828, + "learning_rate": 5.914835164835165e-06, + "loss": 0.1862, + "step": 32094 + }, + { + "epoch": 88.17307692307692, + "grad_norm": 13.820181846618652, + "learning_rate": 5.913461538461538e-06, + "loss": 0.1812, + "step": 32095 + }, + { + "epoch": 88.17582417582418, + "grad_norm": 8.731914520263672, + "learning_rate": 5.912087912087913e-06, + "loss": 0.1082, + "step": 32096 + }, + { + "epoch": 88.17857142857143, + "grad_norm": 21.805662155151367, + "learning_rate": 5.910714285714286e-06, + "loss": 0.2999, + "step": 32097 + }, + { + "epoch": 88.18131868131869, + "grad_norm": 10.444795608520508, + "learning_rate": 5.90934065934066e-06, + "loss": 0.2684, + "step": 32098 + }, + { + "epoch": 88.18406593406593, + "grad_norm": 8.549100875854492, + "learning_rate": 5.907967032967033e-06, + "loss": 0.068, + "step": 32099 + }, + { + "epoch": 88.18681318681318, + "grad_norm": 22.597379684448242, + "learning_rate": 5.906593406593407e-06, + "loss": 0.4838, + "step": 32100 + }, + { + "epoch": 88.18956043956044, + "grad_norm": 17.052997589111328, + "learning_rate": 5.9052197802197805e-06, + "loss": 0.3647, + "step": 32101 + }, + { + "epoch": 88.1923076923077, + "grad_norm": 14.670404434204102, + "learning_rate": 5.903846153846154e-06, + "loss": 0.2695, + "step": 32102 + }, + { + "epoch": 88.19505494505495, + "grad_norm": 17.832551956176758, + "learning_rate": 5.902472527472528e-06, + "loss": 0.5219, + "step": 32103 + }, + { + "epoch": 88.1978021978022, + "grad_norm": 21.103561401367188, + "learning_rate": 5.901098901098901e-06, + "loss": 0.2943, + "step": 32104 + }, + { + "epoch": 88.20054945054945, + "grad_norm": 13.248165130615234, + "learning_rate": 5.899725274725275e-06, + "loss": 0.1979, + "step": 32105 + }, + { + "epoch": 88.2032967032967, + "grad_norm": 7.919636249542236, + "learning_rate": 5.898351648351648e-06, + "loss": 0.1286, + "step": 32106 + }, + { + "epoch": 88.20604395604396, + "grad_norm": 9.00396728515625, + "learning_rate": 5.896978021978023e-06, + "loss": 0.1284, + "step": 32107 + }, + { + "epoch": 88.20879120879121, + "grad_norm": 11.468343734741211, + "learning_rate": 5.895604395604396e-06, + "loss": 0.1949, + "step": 32108 + }, + { + "epoch": 88.21153846153847, + "grad_norm": 10.887521743774414, + "learning_rate": 5.894230769230769e-06, + "loss": 0.1612, + "step": 32109 + }, + { + "epoch": 88.21428571428571, + "grad_norm": 15.101058959960938, + "learning_rate": 5.892857142857143e-06, + "loss": 0.2711, + "step": 32110 + }, + { + "epoch": 88.21703296703296, + "grad_norm": 8.368209838867188, + "learning_rate": 5.891483516483517e-06, + "loss": 0.0577, + "step": 32111 + }, + { + "epoch": 88.21978021978022, + "grad_norm": 12.71682357788086, + "learning_rate": 5.8901098901098905e-06, + "loss": 0.1818, + "step": 32112 + }, + { + "epoch": 88.22252747252747, + "grad_norm": 19.92816925048828, + "learning_rate": 5.888736263736265e-06, + "loss": 0.2767, + "step": 32113 + }, + { + "epoch": 88.22527472527473, + "grad_norm": 17.776166915893555, + "learning_rate": 5.887362637362637e-06, + "loss": 0.2484, + "step": 32114 + }, + { + "epoch": 88.22802197802197, + "grad_norm": 5.335463047027588, + "learning_rate": 5.8859890109890116e-06, + "loss": 0.0489, + "step": 32115 + }, + { + "epoch": 88.23076923076923, + "grad_norm": 12.656314849853516, + "learning_rate": 5.884615384615385e-06, + "loss": 0.1765, + "step": 32116 + }, + { + "epoch": 88.23351648351648, + "grad_norm": 10.003867149353027, + "learning_rate": 5.883241758241758e-06, + "loss": 0.1089, + "step": 32117 + }, + { + "epoch": 88.23626373626374, + "grad_norm": 13.407795906066895, + "learning_rate": 5.881868131868133e-06, + "loss": 0.2257, + "step": 32118 + }, + { + "epoch": 88.23901098901099, + "grad_norm": 9.44582748413086, + "learning_rate": 5.880494505494505e-06, + "loss": 0.1283, + "step": 32119 + }, + { + "epoch": 88.24175824175825, + "grad_norm": 7.023289680480957, + "learning_rate": 5.8791208791208794e-06, + "loss": 0.0723, + "step": 32120 + }, + { + "epoch": 88.24450549450549, + "grad_norm": 10.20618724822998, + "learning_rate": 5.877747252747253e-06, + "loss": 0.0433, + "step": 32121 + }, + { + "epoch": 88.24725274725274, + "grad_norm": 12.189908981323242, + "learning_rate": 5.876373626373627e-06, + "loss": 0.1628, + "step": 32122 + }, + { + "epoch": 88.25, + "grad_norm": 3.06361985206604, + "learning_rate": 5.875e-06, + "loss": 0.0271, + "step": 32123 + }, + { + "epoch": 88.25274725274726, + "grad_norm": 14.562618255615234, + "learning_rate": 5.873626373626374e-06, + "loss": 0.2875, + "step": 32124 + }, + { + "epoch": 88.25549450549451, + "grad_norm": 19.109600067138672, + "learning_rate": 5.872252747252747e-06, + "loss": 0.2192, + "step": 32125 + }, + { + "epoch": 88.25824175824175, + "grad_norm": 8.64731216430664, + "learning_rate": 5.8708791208791216e-06, + "loss": 0.0964, + "step": 32126 + }, + { + "epoch": 88.26098901098901, + "grad_norm": 8.789992332458496, + "learning_rate": 5.869505494505495e-06, + "loss": 0.0956, + "step": 32127 + }, + { + "epoch": 88.26373626373626, + "grad_norm": 25.98282241821289, + "learning_rate": 5.868131868131868e-06, + "loss": 0.6424, + "step": 32128 + }, + { + "epoch": 88.26648351648352, + "grad_norm": 5.781885623931885, + "learning_rate": 5.866758241758242e-06, + "loss": 0.0502, + "step": 32129 + }, + { + "epoch": 88.26923076923077, + "grad_norm": 8.51975154876709, + "learning_rate": 5.865384615384616e-06, + "loss": 0.1277, + "step": 32130 + }, + { + "epoch": 88.27197802197803, + "grad_norm": 17.00385093688965, + "learning_rate": 5.8640109890109895e-06, + "loss": 0.4608, + "step": 32131 + }, + { + "epoch": 88.27472527472527, + "grad_norm": 11.692742347717285, + "learning_rate": 5.862637362637363e-06, + "loss": 0.3578, + "step": 32132 + }, + { + "epoch": 88.27747252747253, + "grad_norm": 15.979767799377441, + "learning_rate": 5.861263736263736e-06, + "loss": 0.1927, + "step": 32133 + }, + { + "epoch": 88.28021978021978, + "grad_norm": 6.052129745483398, + "learning_rate": 5.85989010989011e-06, + "loss": 0.0808, + "step": 32134 + }, + { + "epoch": 88.28296703296704, + "grad_norm": 7.108026027679443, + "learning_rate": 5.858516483516484e-06, + "loss": 0.0956, + "step": 32135 + }, + { + "epoch": 88.28571428571429, + "grad_norm": 10.645745277404785, + "learning_rate": 5.857142857142857e-06, + "loss": 0.1768, + "step": 32136 + }, + { + "epoch": 88.28846153846153, + "grad_norm": 9.923428535461426, + "learning_rate": 5.855769230769232e-06, + "loss": 0.1107, + "step": 32137 + }, + { + "epoch": 88.29120879120879, + "grad_norm": 21.567617416381836, + "learning_rate": 5.854395604395604e-06, + "loss": 0.4643, + "step": 32138 + }, + { + "epoch": 88.29395604395604, + "grad_norm": 22.84998321533203, + "learning_rate": 5.853021978021978e-06, + "loss": 0.4959, + "step": 32139 + }, + { + "epoch": 88.2967032967033, + "grad_norm": 5.942764759063721, + "learning_rate": 5.851648351648352e-06, + "loss": 0.146, + "step": 32140 + }, + { + "epoch": 88.29945054945055, + "grad_norm": 12.078466415405273, + "learning_rate": 5.850274725274726e-06, + "loss": 0.1749, + "step": 32141 + }, + { + "epoch": 88.3021978021978, + "grad_norm": 21.082849502563477, + "learning_rate": 5.8489010989010995e-06, + "loss": 0.3027, + "step": 32142 + }, + { + "epoch": 88.30494505494505, + "grad_norm": 18.131134033203125, + "learning_rate": 5.847527472527473e-06, + "loss": 0.4057, + "step": 32143 + }, + { + "epoch": 88.3076923076923, + "grad_norm": 13.074705123901367, + "learning_rate": 5.846153846153846e-06, + "loss": 0.1161, + "step": 32144 + }, + { + "epoch": 88.31043956043956, + "grad_norm": 18.26495361328125, + "learning_rate": 5.8447802197802205e-06, + "loss": 0.3326, + "step": 32145 + }, + { + "epoch": 88.31318681318682, + "grad_norm": 9.486125946044922, + "learning_rate": 5.843406593406594e-06, + "loss": 0.1748, + "step": 32146 + }, + { + "epoch": 88.31593406593407, + "grad_norm": 10.662690162658691, + "learning_rate": 5.842032967032967e-06, + "loss": 0.1705, + "step": 32147 + }, + { + "epoch": 88.31868131868131, + "grad_norm": 3.5287423133850098, + "learning_rate": 5.840659340659341e-06, + "loss": 0.0344, + "step": 32148 + }, + { + "epoch": 88.32142857142857, + "grad_norm": 17.022014617919922, + "learning_rate": 5.839285714285714e-06, + "loss": 0.2439, + "step": 32149 + }, + { + "epoch": 88.32417582417582, + "grad_norm": 10.512726783752441, + "learning_rate": 5.837912087912088e-06, + "loss": 0.255, + "step": 32150 + }, + { + "epoch": 88.32692307692308, + "grad_norm": 7.175230979919434, + "learning_rate": 5.836538461538462e-06, + "loss": 0.0965, + "step": 32151 + }, + { + "epoch": 88.32967032967034, + "grad_norm": 6.60355281829834, + "learning_rate": 5.835164835164835e-06, + "loss": 0.1738, + "step": 32152 + }, + { + "epoch": 88.33241758241758, + "grad_norm": 8.449304580688477, + "learning_rate": 5.833791208791209e-06, + "loss": 0.0787, + "step": 32153 + }, + { + "epoch": 88.33516483516483, + "grad_norm": 6.71054220199585, + "learning_rate": 5.832417582417583e-06, + "loss": 0.1702, + "step": 32154 + }, + { + "epoch": 88.33791208791209, + "grad_norm": 18.76014518737793, + "learning_rate": 5.831043956043956e-06, + "loss": 0.465, + "step": 32155 + }, + { + "epoch": 88.34065934065934, + "grad_norm": 7.263983249664307, + "learning_rate": 5.8296703296703306e-06, + "loss": 0.0748, + "step": 32156 + }, + { + "epoch": 88.3434065934066, + "grad_norm": 4.867333889007568, + "learning_rate": 5.828296703296703e-06, + "loss": 0.0859, + "step": 32157 + }, + { + "epoch": 88.34615384615384, + "grad_norm": 19.11170768737793, + "learning_rate": 5.826923076923077e-06, + "loss": 0.2773, + "step": 32158 + }, + { + "epoch": 88.3489010989011, + "grad_norm": 12.31559944152832, + "learning_rate": 5.825549450549451e-06, + "loss": 0.2693, + "step": 32159 + }, + { + "epoch": 88.35164835164835, + "grad_norm": 7.762775897979736, + "learning_rate": 5.824175824175824e-06, + "loss": 0.0651, + "step": 32160 + }, + { + "epoch": 88.3543956043956, + "grad_norm": 19.318405151367188, + "learning_rate": 5.8228021978021984e-06, + "loss": 0.4166, + "step": 32161 + }, + { + "epoch": 88.35714285714286, + "grad_norm": 16.250835418701172, + "learning_rate": 5.821428571428572e-06, + "loss": 0.3875, + "step": 32162 + }, + { + "epoch": 88.35989010989012, + "grad_norm": 4.874717712402344, + "learning_rate": 5.820054945054945e-06, + "loss": 0.0549, + "step": 32163 + }, + { + "epoch": 88.36263736263736, + "grad_norm": 22.073671340942383, + "learning_rate": 5.818681318681319e-06, + "loss": 0.4518, + "step": 32164 + }, + { + "epoch": 88.36538461538461, + "grad_norm": 16.251802444458008, + "learning_rate": 5.817307692307693e-06, + "loss": 0.3778, + "step": 32165 + }, + { + "epoch": 88.36813186813187, + "grad_norm": 10.85470199584961, + "learning_rate": 5.815934065934066e-06, + "loss": 0.1549, + "step": 32166 + }, + { + "epoch": 88.37087912087912, + "grad_norm": 22.853775024414062, + "learning_rate": 5.81456043956044e-06, + "loss": 0.4035, + "step": 32167 + }, + { + "epoch": 88.37362637362638, + "grad_norm": 14.658513069152832, + "learning_rate": 5.813186813186813e-06, + "loss": 0.1611, + "step": 32168 + }, + { + "epoch": 88.37637362637362, + "grad_norm": 12.01486587524414, + "learning_rate": 5.811813186813187e-06, + "loss": 0.1811, + "step": 32169 + }, + { + "epoch": 88.37912087912088, + "grad_norm": 19.183143615722656, + "learning_rate": 5.810439560439561e-06, + "loss": 0.3203, + "step": 32170 + }, + { + "epoch": 88.38186813186813, + "grad_norm": 5.130391597747803, + "learning_rate": 5.809065934065935e-06, + "loss": 0.0441, + "step": 32171 + }, + { + "epoch": 88.38461538461539, + "grad_norm": 11.303186416625977, + "learning_rate": 5.807692307692308e-06, + "loss": 0.1524, + "step": 32172 + }, + { + "epoch": 88.38736263736264, + "grad_norm": 27.251628875732422, + "learning_rate": 5.806318681318682e-06, + "loss": 0.6365, + "step": 32173 + }, + { + "epoch": 88.39010989010988, + "grad_norm": 25.092973709106445, + "learning_rate": 5.804945054945055e-06, + "loss": 0.4592, + "step": 32174 + }, + { + "epoch": 88.39285714285714, + "grad_norm": 8.472820281982422, + "learning_rate": 5.803571428571429e-06, + "loss": 0.123, + "step": 32175 + }, + { + "epoch": 88.3956043956044, + "grad_norm": 26.256460189819336, + "learning_rate": 5.802197802197803e-06, + "loss": 0.4382, + "step": 32176 + }, + { + "epoch": 88.39835164835165, + "grad_norm": 1.9187928438186646, + "learning_rate": 5.8008241758241755e-06, + "loss": 0.0207, + "step": 32177 + }, + { + "epoch": 88.4010989010989, + "grad_norm": 2.718629837036133, + "learning_rate": 5.79945054945055e-06, + "loss": 0.0169, + "step": 32178 + }, + { + "epoch": 88.40384615384616, + "grad_norm": 5.920636177062988, + "learning_rate": 5.798076923076923e-06, + "loss": 0.1226, + "step": 32179 + }, + { + "epoch": 88.4065934065934, + "grad_norm": 14.299144744873047, + "learning_rate": 5.796703296703297e-06, + "loss": 0.3048, + "step": 32180 + }, + { + "epoch": 88.40934065934066, + "grad_norm": 17.945329666137695, + "learning_rate": 5.79532967032967e-06, + "loss": 0.2738, + "step": 32181 + }, + { + "epoch": 88.41208791208791, + "grad_norm": 10.013057708740234, + "learning_rate": 5.793956043956044e-06, + "loss": 0.1289, + "step": 32182 + }, + { + "epoch": 88.41483516483517, + "grad_norm": 16.763822555541992, + "learning_rate": 5.792582417582418e-06, + "loss": 0.1761, + "step": 32183 + }, + { + "epoch": 88.41758241758242, + "grad_norm": 22.786165237426758, + "learning_rate": 5.791208791208792e-06, + "loss": 0.7982, + "step": 32184 + }, + { + "epoch": 88.42032967032966, + "grad_norm": 9.446374893188477, + "learning_rate": 5.789835164835165e-06, + "loss": 0.1074, + "step": 32185 + }, + { + "epoch": 88.42307692307692, + "grad_norm": 19.03968620300293, + "learning_rate": 5.788461538461539e-06, + "loss": 0.4578, + "step": 32186 + }, + { + "epoch": 88.42582417582418, + "grad_norm": 5.755678653717041, + "learning_rate": 5.787087912087912e-06, + "loss": 0.0785, + "step": 32187 + }, + { + "epoch": 88.42857142857143, + "grad_norm": 12.425384521484375, + "learning_rate": 5.785714285714286e-06, + "loss": 0.1281, + "step": 32188 + }, + { + "epoch": 88.43131868131869, + "grad_norm": 25.984554290771484, + "learning_rate": 5.78434065934066e-06, + "loss": 0.3377, + "step": 32189 + }, + { + "epoch": 88.43406593406593, + "grad_norm": 1.620384693145752, + "learning_rate": 5.782967032967033e-06, + "loss": 0.02, + "step": 32190 + }, + { + "epoch": 88.43681318681318, + "grad_norm": 10.796785354614258, + "learning_rate": 5.7815934065934066e-06, + "loss": 0.1409, + "step": 32191 + }, + { + "epoch": 88.43956043956044, + "grad_norm": 1.118951439857483, + "learning_rate": 5.78021978021978e-06, + "loss": 0.0118, + "step": 32192 + }, + { + "epoch": 88.4423076923077, + "grad_norm": 4.623355865478516, + "learning_rate": 5.778846153846154e-06, + "loss": 0.1043, + "step": 32193 + }, + { + "epoch": 88.44505494505495, + "grad_norm": 20.248804092407227, + "learning_rate": 5.777472527472528e-06, + "loss": 0.3445, + "step": 32194 + }, + { + "epoch": 88.4478021978022, + "grad_norm": 11.790770530700684, + "learning_rate": 5.776098901098902e-06, + "loss": 0.3192, + "step": 32195 + }, + { + "epoch": 88.45054945054945, + "grad_norm": 24.065763473510742, + "learning_rate": 5.7747252747252744e-06, + "loss": 0.6172, + "step": 32196 + }, + { + "epoch": 88.4532967032967, + "grad_norm": 9.940165519714355, + "learning_rate": 5.773351648351649e-06, + "loss": 0.0926, + "step": 32197 + }, + { + "epoch": 88.45604395604396, + "grad_norm": 16.171234130859375, + "learning_rate": 5.771978021978022e-06, + "loss": 0.389, + "step": 32198 + }, + { + "epoch": 88.45879120879121, + "grad_norm": 13.516109466552734, + "learning_rate": 5.770604395604396e-06, + "loss": 0.1766, + "step": 32199 + }, + { + "epoch": 88.46153846153847, + "grad_norm": 15.830169677734375, + "learning_rate": 5.76923076923077e-06, + "loss": 0.4139, + "step": 32200 + }, + { + "epoch": 88.46428571428571, + "grad_norm": 4.577735424041748, + "learning_rate": 5.767857142857143e-06, + "loss": 0.0572, + "step": 32201 + }, + { + "epoch": 88.46703296703296, + "grad_norm": 9.133548736572266, + "learning_rate": 5.766483516483517e-06, + "loss": 0.0799, + "step": 32202 + }, + { + "epoch": 88.46978021978022, + "grad_norm": 14.11489486694336, + "learning_rate": 5.765109890109891e-06, + "loss": 0.2063, + "step": 32203 + }, + { + "epoch": 88.47252747252747, + "grad_norm": 10.497228622436523, + "learning_rate": 5.763736263736264e-06, + "loss": 0.3031, + "step": 32204 + }, + { + "epoch": 88.47527472527473, + "grad_norm": 26.619354248046875, + "learning_rate": 5.762362637362638e-06, + "loss": 0.6134, + "step": 32205 + }, + { + "epoch": 88.47802197802197, + "grad_norm": 5.8268585205078125, + "learning_rate": 5.760989010989011e-06, + "loss": 0.0478, + "step": 32206 + }, + { + "epoch": 88.48076923076923, + "grad_norm": 20.320598602294922, + "learning_rate": 5.7596153846153845e-06, + "loss": 0.3394, + "step": 32207 + }, + { + "epoch": 88.48351648351648, + "grad_norm": 14.154009819030762, + "learning_rate": 5.758241758241759e-06, + "loss": 0.2143, + "step": 32208 + }, + { + "epoch": 88.48626373626374, + "grad_norm": 3.4348561763763428, + "learning_rate": 5.756868131868132e-06, + "loss": 0.0299, + "step": 32209 + }, + { + "epoch": 88.48901098901099, + "grad_norm": 10.394248962402344, + "learning_rate": 5.7554945054945055e-06, + "loss": 0.1938, + "step": 32210 + }, + { + "epoch": 88.49175824175825, + "grad_norm": 17.053512573242188, + "learning_rate": 5.754120879120879e-06, + "loss": 0.3247, + "step": 32211 + }, + { + "epoch": 88.49450549450549, + "grad_norm": 14.509967803955078, + "learning_rate": 5.752747252747253e-06, + "loss": 0.4444, + "step": 32212 + }, + { + "epoch": 88.49725274725274, + "grad_norm": 23.60643196105957, + "learning_rate": 5.751373626373627e-06, + "loss": 0.2514, + "step": 32213 + }, + { + "epoch": 88.5, + "grad_norm": 19.761180877685547, + "learning_rate": 5.750000000000001e-06, + "loss": 0.3266, + "step": 32214 + }, + { + "epoch": 88.50274725274726, + "grad_norm": 2.3390953540802, + "learning_rate": 5.748626373626373e-06, + "loss": 0.0274, + "step": 32215 + }, + { + "epoch": 88.50549450549451, + "grad_norm": 9.539052963256836, + "learning_rate": 5.747252747252748e-06, + "loss": 0.1933, + "step": 32216 + }, + { + "epoch": 88.50824175824175, + "grad_norm": 14.459539413452148, + "learning_rate": 5.745879120879121e-06, + "loss": 0.1151, + "step": 32217 + }, + { + "epoch": 88.51098901098901, + "grad_norm": 16.081220626831055, + "learning_rate": 5.744505494505495e-06, + "loss": 0.1942, + "step": 32218 + }, + { + "epoch": 88.51373626373626, + "grad_norm": 13.184755325317383, + "learning_rate": 5.743131868131869e-06, + "loss": 0.2529, + "step": 32219 + }, + { + "epoch": 88.51648351648352, + "grad_norm": 18.9091796875, + "learning_rate": 5.741758241758242e-06, + "loss": 0.4513, + "step": 32220 + }, + { + "epoch": 88.51923076923077, + "grad_norm": 16.460773468017578, + "learning_rate": 5.7403846153846155e-06, + "loss": 0.2448, + "step": 32221 + }, + { + "epoch": 88.52197802197803, + "grad_norm": 7.988648414611816, + "learning_rate": 5.739010989010989e-06, + "loss": 0.0985, + "step": 32222 + }, + { + "epoch": 88.52472527472527, + "grad_norm": 17.939008712768555, + "learning_rate": 5.737637362637363e-06, + "loss": 0.2861, + "step": 32223 + }, + { + "epoch": 88.52747252747253, + "grad_norm": 16.696210861206055, + "learning_rate": 5.736263736263737e-06, + "loss": 0.3372, + "step": 32224 + }, + { + "epoch": 88.53021978021978, + "grad_norm": 1.963826060295105, + "learning_rate": 5.73489010989011e-06, + "loss": 0.0271, + "step": 32225 + }, + { + "epoch": 88.53296703296704, + "grad_norm": 15.121479034423828, + "learning_rate": 5.7335164835164834e-06, + "loss": 0.3829, + "step": 32226 + }, + { + "epoch": 88.53571428571429, + "grad_norm": 28.772409439086914, + "learning_rate": 5.732142857142858e-06, + "loss": 0.5235, + "step": 32227 + }, + { + "epoch": 88.53846153846153, + "grad_norm": 8.434798240661621, + "learning_rate": 5.730769230769231e-06, + "loss": 0.0961, + "step": 32228 + }, + { + "epoch": 88.54120879120879, + "grad_norm": 23.547588348388672, + "learning_rate": 5.729395604395605e-06, + "loss": 1.0734, + "step": 32229 + }, + { + "epoch": 88.54395604395604, + "grad_norm": 17.68903160095215, + "learning_rate": 5.728021978021978e-06, + "loss": 0.3319, + "step": 32230 + }, + { + "epoch": 88.5467032967033, + "grad_norm": 15.823779106140137, + "learning_rate": 5.726648351648352e-06, + "loss": 0.2009, + "step": 32231 + }, + { + "epoch": 88.54945054945055, + "grad_norm": 15.068875312805176, + "learning_rate": 5.7252747252747256e-06, + "loss": 0.3966, + "step": 32232 + }, + { + "epoch": 88.5521978021978, + "grad_norm": 11.945586204528809, + "learning_rate": 5.723901098901099e-06, + "loss": 0.217, + "step": 32233 + }, + { + "epoch": 88.55494505494505, + "grad_norm": 10.12678337097168, + "learning_rate": 5.722527472527472e-06, + "loss": 0.205, + "step": 32234 + }, + { + "epoch": 88.5576923076923, + "grad_norm": 11.218369483947754, + "learning_rate": 5.721153846153847e-06, + "loss": 0.2491, + "step": 32235 + }, + { + "epoch": 88.56043956043956, + "grad_norm": 9.372091293334961, + "learning_rate": 5.71978021978022e-06, + "loss": 0.1443, + "step": 32236 + }, + { + "epoch": 88.56318681318682, + "grad_norm": 13.06641960144043, + "learning_rate": 5.7184065934065934e-06, + "loss": 0.1717, + "step": 32237 + }, + { + "epoch": 88.56593406593407, + "grad_norm": 12.9103422164917, + "learning_rate": 5.717032967032968e-06, + "loss": 0.1692, + "step": 32238 + }, + { + "epoch": 88.56868131868131, + "grad_norm": 6.016487121582031, + "learning_rate": 5.71565934065934e-06, + "loss": 0.1478, + "step": 32239 + }, + { + "epoch": 88.57142857142857, + "grad_norm": 0.5362880825996399, + "learning_rate": 5.7142857142857145e-06, + "loss": 0.0067, + "step": 32240 + }, + { + "epoch": 88.57417582417582, + "grad_norm": 16.9812068939209, + "learning_rate": 5.712912087912088e-06, + "loss": 0.3476, + "step": 32241 + }, + { + "epoch": 88.57692307692308, + "grad_norm": 5.517860412597656, + "learning_rate": 5.711538461538462e-06, + "loss": 0.0731, + "step": 32242 + }, + { + "epoch": 88.57967032967034, + "grad_norm": 3.9267351627349854, + "learning_rate": 5.710164835164836e-06, + "loss": 0.0401, + "step": 32243 + }, + { + "epoch": 88.58241758241758, + "grad_norm": 4.407007694244385, + "learning_rate": 5.708791208791209e-06, + "loss": 0.0427, + "step": 32244 + }, + { + "epoch": 88.58516483516483, + "grad_norm": 8.283038139343262, + "learning_rate": 5.707417582417582e-06, + "loss": 0.2123, + "step": 32245 + }, + { + "epoch": 88.58791208791209, + "grad_norm": 9.74990463256836, + "learning_rate": 5.706043956043957e-06, + "loss": 0.2177, + "step": 32246 + }, + { + "epoch": 88.59065934065934, + "grad_norm": 10.858975410461426, + "learning_rate": 5.70467032967033e-06, + "loss": 0.1388, + "step": 32247 + }, + { + "epoch": 88.5934065934066, + "grad_norm": 13.302351951599121, + "learning_rate": 5.7032967032967035e-06, + "loss": 0.1656, + "step": 32248 + }, + { + "epoch": 88.59615384615384, + "grad_norm": 20.804964065551758, + "learning_rate": 5.701923076923077e-06, + "loss": 0.3827, + "step": 32249 + }, + { + "epoch": 88.5989010989011, + "grad_norm": 23.364904403686523, + "learning_rate": 5.70054945054945e-06, + "loss": 0.6376, + "step": 32250 + }, + { + "epoch": 88.60164835164835, + "grad_norm": 17.709762573242188, + "learning_rate": 5.6991758241758245e-06, + "loss": 0.3527, + "step": 32251 + }, + { + "epoch": 88.6043956043956, + "grad_norm": 11.612907409667969, + "learning_rate": 5.697802197802198e-06, + "loss": 0.0963, + "step": 32252 + }, + { + "epoch": 88.60714285714286, + "grad_norm": 10.305887222290039, + "learning_rate": 5.696428571428572e-06, + "loss": 0.1222, + "step": 32253 + }, + { + "epoch": 88.60989010989012, + "grad_norm": 9.84564208984375, + "learning_rate": 5.695054945054945e-06, + "loss": 0.1296, + "step": 32254 + }, + { + "epoch": 88.61263736263736, + "grad_norm": 14.13952350616455, + "learning_rate": 5.693681318681319e-06, + "loss": 0.2082, + "step": 32255 + }, + { + "epoch": 88.61538461538461, + "grad_norm": 18.485240936279297, + "learning_rate": 5.692307692307692e-06, + "loss": 0.2956, + "step": 32256 + }, + { + "epoch": 88.61813186813187, + "grad_norm": 6.838532447814941, + "learning_rate": 5.690934065934067e-06, + "loss": 0.1252, + "step": 32257 + }, + { + "epoch": 88.62087912087912, + "grad_norm": 11.612611770629883, + "learning_rate": 5.68956043956044e-06, + "loss": 0.4302, + "step": 32258 + }, + { + "epoch": 88.62362637362638, + "grad_norm": 6.950704097747803, + "learning_rate": 5.6881868131868135e-06, + "loss": 0.0967, + "step": 32259 + }, + { + "epoch": 88.62637362637362, + "grad_norm": 12.136797904968262, + "learning_rate": 5.686813186813187e-06, + "loss": 0.2517, + "step": 32260 + }, + { + "epoch": 88.62912087912088, + "grad_norm": 2.2109293937683105, + "learning_rate": 5.685439560439561e-06, + "loss": 0.0244, + "step": 32261 + }, + { + "epoch": 88.63186813186813, + "grad_norm": 6.380423545837402, + "learning_rate": 5.6840659340659345e-06, + "loss": 0.1286, + "step": 32262 + }, + { + "epoch": 88.63461538461539, + "grad_norm": 12.332697868347168, + "learning_rate": 5.682692307692308e-06, + "loss": 0.3208, + "step": 32263 + }, + { + "epoch": 88.63736263736264, + "grad_norm": 6.446285247802734, + "learning_rate": 5.681318681318681e-06, + "loss": 0.1071, + "step": 32264 + }, + { + "epoch": 88.64010989010988, + "grad_norm": 5.536640644073486, + "learning_rate": 5.679945054945055e-06, + "loss": 0.0796, + "step": 32265 + }, + { + "epoch": 88.64285714285714, + "grad_norm": 19.183862686157227, + "learning_rate": 5.678571428571429e-06, + "loss": 0.4217, + "step": 32266 + }, + { + "epoch": 88.6456043956044, + "grad_norm": 7.220212936401367, + "learning_rate": 5.6771978021978024e-06, + "loss": 0.1088, + "step": 32267 + }, + { + "epoch": 88.64835164835165, + "grad_norm": 12.335994720458984, + "learning_rate": 5.675824175824176e-06, + "loss": 0.2524, + "step": 32268 + }, + { + "epoch": 88.6510989010989, + "grad_norm": 16.12173080444336, + "learning_rate": 5.674450549450549e-06, + "loss": 0.3087, + "step": 32269 + }, + { + "epoch": 88.65384615384616, + "grad_norm": 6.390649318695068, + "learning_rate": 5.6730769230769235e-06, + "loss": 0.0687, + "step": 32270 + }, + { + "epoch": 88.6565934065934, + "grad_norm": 13.861452102661133, + "learning_rate": 5.671703296703297e-06, + "loss": 0.1255, + "step": 32271 + }, + { + "epoch": 88.65934065934066, + "grad_norm": 17.361703872680664, + "learning_rate": 5.670329670329671e-06, + "loss": 0.4506, + "step": 32272 + }, + { + "epoch": 88.66208791208791, + "grad_norm": 5.429934024810791, + "learning_rate": 5.668956043956044e-06, + "loss": 0.0623, + "step": 32273 + }, + { + "epoch": 88.66483516483517, + "grad_norm": 2.51250958442688, + "learning_rate": 5.667582417582418e-06, + "loss": 0.0327, + "step": 32274 + }, + { + "epoch": 88.66758241758242, + "grad_norm": 15.848457336425781, + "learning_rate": 5.666208791208791e-06, + "loss": 0.4477, + "step": 32275 + }, + { + "epoch": 88.67032967032966, + "grad_norm": 14.363801002502441, + "learning_rate": 5.664835164835166e-06, + "loss": 0.3361, + "step": 32276 + }, + { + "epoch": 88.67307692307692, + "grad_norm": 12.955278396606445, + "learning_rate": 5.663461538461539e-06, + "loss": 0.3151, + "step": 32277 + }, + { + "epoch": 88.67582417582418, + "grad_norm": 26.346050262451172, + "learning_rate": 5.6620879120879124e-06, + "loss": 0.5665, + "step": 32278 + }, + { + "epoch": 88.67857142857143, + "grad_norm": 13.350541114807129, + "learning_rate": 5.660714285714286e-06, + "loss": 0.1426, + "step": 32279 + }, + { + "epoch": 88.68131868131869, + "grad_norm": 10.310678482055664, + "learning_rate": 5.659340659340659e-06, + "loss": 0.2513, + "step": 32280 + }, + { + "epoch": 88.68406593406593, + "grad_norm": 6.998711585998535, + "learning_rate": 5.6579670329670335e-06, + "loss": 0.1012, + "step": 32281 + }, + { + "epoch": 88.68681318681318, + "grad_norm": 13.235652923583984, + "learning_rate": 5.656593406593407e-06, + "loss": 0.1396, + "step": 32282 + }, + { + "epoch": 88.68956043956044, + "grad_norm": 14.500519752502441, + "learning_rate": 5.65521978021978e-06, + "loss": 0.2267, + "step": 32283 + }, + { + "epoch": 88.6923076923077, + "grad_norm": 3.368234872817993, + "learning_rate": 5.653846153846154e-06, + "loss": 0.0254, + "step": 32284 + }, + { + "epoch": 88.69505494505495, + "grad_norm": 7.5263543128967285, + "learning_rate": 5.652472527472528e-06, + "loss": 0.1484, + "step": 32285 + }, + { + "epoch": 88.6978021978022, + "grad_norm": 5.668903350830078, + "learning_rate": 5.651098901098901e-06, + "loss": 0.0855, + "step": 32286 + }, + { + "epoch": 88.70054945054945, + "grad_norm": 32.992855072021484, + "learning_rate": 5.649725274725276e-06, + "loss": 0.9754, + "step": 32287 + }, + { + "epoch": 88.7032967032967, + "grad_norm": 10.498006820678711, + "learning_rate": 5.648351648351648e-06, + "loss": 0.256, + "step": 32288 + }, + { + "epoch": 88.70604395604396, + "grad_norm": 2.7627713680267334, + "learning_rate": 5.6469780219780225e-06, + "loss": 0.041, + "step": 32289 + }, + { + "epoch": 88.70879120879121, + "grad_norm": 13.60184097290039, + "learning_rate": 5.645604395604396e-06, + "loss": 0.2066, + "step": 32290 + }, + { + "epoch": 88.71153846153847, + "grad_norm": 19.685272216796875, + "learning_rate": 5.64423076923077e-06, + "loss": 0.2398, + "step": 32291 + }, + { + "epoch": 88.71428571428571, + "grad_norm": 9.610630989074707, + "learning_rate": 5.642857142857143e-06, + "loss": 0.1169, + "step": 32292 + }, + { + "epoch": 88.71703296703296, + "grad_norm": 15.001116752624512, + "learning_rate": 5.641483516483517e-06, + "loss": 0.2082, + "step": 32293 + }, + { + "epoch": 88.71978021978022, + "grad_norm": 11.247815132141113, + "learning_rate": 5.64010989010989e-06, + "loss": 0.1427, + "step": 32294 + }, + { + "epoch": 88.72252747252747, + "grad_norm": 7.9950385093688965, + "learning_rate": 5.638736263736264e-06, + "loss": 0.1672, + "step": 32295 + }, + { + "epoch": 88.72527472527473, + "grad_norm": 6.18699312210083, + "learning_rate": 5.637362637362638e-06, + "loss": 0.0726, + "step": 32296 + }, + { + "epoch": 88.72802197802197, + "grad_norm": 14.656978607177734, + "learning_rate": 5.6359890109890106e-06, + "loss": 0.2131, + "step": 32297 + }, + { + "epoch": 88.73076923076923, + "grad_norm": 3.399268627166748, + "learning_rate": 5.634615384615385e-06, + "loss": 0.094, + "step": 32298 + }, + { + "epoch": 88.73351648351648, + "grad_norm": 6.477850437164307, + "learning_rate": 5.633241758241758e-06, + "loss": 0.1065, + "step": 32299 + }, + { + "epoch": 88.73626373626374, + "grad_norm": 9.211129188537598, + "learning_rate": 5.6318681318681325e-06, + "loss": 0.1691, + "step": 32300 + }, + { + "epoch": 88.73901098901099, + "grad_norm": 5.817075252532959, + "learning_rate": 5.630494505494506e-06, + "loss": 0.0696, + "step": 32301 + }, + { + "epoch": 88.74175824175825, + "grad_norm": 3.220918893814087, + "learning_rate": 5.629120879120879e-06, + "loss": 0.0481, + "step": 32302 + }, + { + "epoch": 88.74450549450549, + "grad_norm": 6.994171619415283, + "learning_rate": 5.627747252747253e-06, + "loss": 0.1394, + "step": 32303 + }, + { + "epoch": 88.74725274725274, + "grad_norm": 7.666689395904541, + "learning_rate": 5.626373626373627e-06, + "loss": 0.1254, + "step": 32304 + }, + { + "epoch": 88.75, + "grad_norm": 19.142053604125977, + "learning_rate": 5.625e-06, + "loss": 0.2334, + "step": 32305 + }, + { + "epoch": 88.75274725274726, + "grad_norm": 10.201416969299316, + "learning_rate": 5.623626373626374e-06, + "loss": 0.1654, + "step": 32306 + }, + { + "epoch": 88.75549450549451, + "grad_norm": 9.746310234069824, + "learning_rate": 5.622252747252747e-06, + "loss": 0.2628, + "step": 32307 + }, + { + "epoch": 88.75824175824175, + "grad_norm": 19.103796005249023, + "learning_rate": 5.620879120879121e-06, + "loss": 0.1994, + "step": 32308 + }, + { + "epoch": 88.76098901098901, + "grad_norm": 4.051782608032227, + "learning_rate": 5.619505494505495e-06, + "loss": 0.0527, + "step": 32309 + }, + { + "epoch": 88.76373626373626, + "grad_norm": 12.509922981262207, + "learning_rate": 5.618131868131868e-06, + "loss": 0.1458, + "step": 32310 + }, + { + "epoch": 88.76648351648352, + "grad_norm": 15.571525573730469, + "learning_rate": 5.6167582417582425e-06, + "loss": 0.3219, + "step": 32311 + }, + { + "epoch": 88.76923076923077, + "grad_norm": 1.3594156503677368, + "learning_rate": 5.615384615384615e-06, + "loss": 0.0224, + "step": 32312 + }, + { + "epoch": 88.77197802197803, + "grad_norm": 11.451986312866211, + "learning_rate": 5.614010989010989e-06, + "loss": 0.2295, + "step": 32313 + }, + { + "epoch": 88.77472527472527, + "grad_norm": 7.575131416320801, + "learning_rate": 5.612637362637363e-06, + "loss": 0.086, + "step": 32314 + }, + { + "epoch": 88.77747252747253, + "grad_norm": 13.569931030273438, + "learning_rate": 5.611263736263737e-06, + "loss": 0.3092, + "step": 32315 + }, + { + "epoch": 88.78021978021978, + "grad_norm": 2.0661160945892334, + "learning_rate": 5.60989010989011e-06, + "loss": 0.0189, + "step": 32316 + }, + { + "epoch": 88.78296703296704, + "grad_norm": 18.846694946289062, + "learning_rate": 5.608516483516484e-06, + "loss": 0.3247, + "step": 32317 + }, + { + "epoch": 88.78571428571429, + "grad_norm": 2.320605754852295, + "learning_rate": 5.607142857142857e-06, + "loss": 0.0222, + "step": 32318 + }, + { + "epoch": 88.78846153846153, + "grad_norm": 16.583587646484375, + "learning_rate": 5.6057692307692314e-06, + "loss": 0.2793, + "step": 32319 + }, + { + "epoch": 88.79120879120879, + "grad_norm": 2.469392776489258, + "learning_rate": 5.604395604395605e-06, + "loss": 0.0322, + "step": 32320 + }, + { + "epoch": 88.79395604395604, + "grad_norm": 23.675142288208008, + "learning_rate": 5.603021978021978e-06, + "loss": 0.5575, + "step": 32321 + }, + { + "epoch": 88.7967032967033, + "grad_norm": 7.47494649887085, + "learning_rate": 5.601648351648352e-06, + "loss": 0.2455, + "step": 32322 + }, + { + "epoch": 88.79945054945055, + "grad_norm": 15.784353256225586, + "learning_rate": 5.600274725274726e-06, + "loss": 0.2035, + "step": 32323 + }, + { + "epoch": 88.8021978021978, + "grad_norm": 14.808995246887207, + "learning_rate": 5.598901098901099e-06, + "loss": 0.24, + "step": 32324 + }, + { + "epoch": 88.80494505494505, + "grad_norm": 21.225643157958984, + "learning_rate": 5.597527472527473e-06, + "loss": 0.3857, + "step": 32325 + }, + { + "epoch": 88.8076923076923, + "grad_norm": 10.079485893249512, + "learning_rate": 5.596153846153846e-06, + "loss": 0.1422, + "step": 32326 + }, + { + "epoch": 88.81043956043956, + "grad_norm": 8.414599418640137, + "learning_rate": 5.5947802197802195e-06, + "loss": 0.1285, + "step": 32327 + }, + { + "epoch": 88.81318681318682, + "grad_norm": 7.788127899169922, + "learning_rate": 5.593406593406594e-06, + "loss": 0.1483, + "step": 32328 + }, + { + "epoch": 88.81593406593407, + "grad_norm": 14.13367748260498, + "learning_rate": 5.592032967032967e-06, + "loss": 0.2938, + "step": 32329 + }, + { + "epoch": 88.81868131868131, + "grad_norm": 9.7401123046875, + "learning_rate": 5.5906593406593415e-06, + "loss": 0.1818, + "step": 32330 + }, + { + "epoch": 88.82142857142857, + "grad_norm": 5.330321311950684, + "learning_rate": 5.589285714285714e-06, + "loss": 0.0985, + "step": 32331 + }, + { + "epoch": 88.82417582417582, + "grad_norm": 24.270734786987305, + "learning_rate": 5.587912087912088e-06, + "loss": 0.6389, + "step": 32332 + }, + { + "epoch": 88.82692307692308, + "grad_norm": 7.639594078063965, + "learning_rate": 5.586538461538462e-06, + "loss": 0.153, + "step": 32333 + }, + { + "epoch": 88.82967032967034, + "grad_norm": 14.336689949035645, + "learning_rate": 5.585164835164836e-06, + "loss": 0.1633, + "step": 32334 + }, + { + "epoch": 88.83241758241758, + "grad_norm": 24.23680305480957, + "learning_rate": 5.583791208791209e-06, + "loss": 0.56, + "step": 32335 + }, + { + "epoch": 88.83516483516483, + "grad_norm": 13.568634033203125, + "learning_rate": 5.582417582417583e-06, + "loss": 0.2018, + "step": 32336 + }, + { + "epoch": 88.83791208791209, + "grad_norm": 0.7390239238739014, + "learning_rate": 5.581043956043956e-06, + "loss": 0.0088, + "step": 32337 + }, + { + "epoch": 88.84065934065934, + "grad_norm": 13.08464527130127, + "learning_rate": 5.5796703296703296e-06, + "loss": 0.1635, + "step": 32338 + }, + { + "epoch": 88.8434065934066, + "grad_norm": 7.254478454589844, + "learning_rate": 5.578296703296704e-06, + "loss": 0.1005, + "step": 32339 + }, + { + "epoch": 88.84615384615384, + "grad_norm": 6.293534755706787, + "learning_rate": 5.576923076923077e-06, + "loss": 0.0747, + "step": 32340 + }, + { + "epoch": 88.8489010989011, + "grad_norm": 11.98370361328125, + "learning_rate": 5.575549450549451e-06, + "loss": 0.2423, + "step": 32341 + }, + { + "epoch": 88.85164835164835, + "grad_norm": 15.02284049987793, + "learning_rate": 5.574175824175824e-06, + "loss": 0.4601, + "step": 32342 + }, + { + "epoch": 88.8543956043956, + "grad_norm": 6.455854415893555, + "learning_rate": 5.572802197802198e-06, + "loss": 0.0453, + "step": 32343 + }, + { + "epoch": 88.85714285714286, + "grad_norm": 15.88371467590332, + "learning_rate": 5.571428571428572e-06, + "loss": 0.3331, + "step": 32344 + }, + { + "epoch": 88.85989010989012, + "grad_norm": 8.26119327545166, + "learning_rate": 5.570054945054946e-06, + "loss": 0.2464, + "step": 32345 + }, + { + "epoch": 88.86263736263736, + "grad_norm": 21.211669921875, + "learning_rate": 5.5686813186813185e-06, + "loss": 0.3904, + "step": 32346 + }, + { + "epoch": 88.86538461538461, + "grad_norm": 3.3644495010375977, + "learning_rate": 5.567307692307693e-06, + "loss": 0.0473, + "step": 32347 + }, + { + "epoch": 88.86813186813187, + "grad_norm": 15.799912452697754, + "learning_rate": 5.565934065934066e-06, + "loss": 0.3637, + "step": 32348 + }, + { + "epoch": 88.87087912087912, + "grad_norm": 4.001442909240723, + "learning_rate": 5.56456043956044e-06, + "loss": 0.0494, + "step": 32349 + }, + { + "epoch": 88.87362637362638, + "grad_norm": 5.096249103546143, + "learning_rate": 5.563186813186813e-06, + "loss": 0.0663, + "step": 32350 + }, + { + "epoch": 88.87637362637362, + "grad_norm": 14.33781909942627, + "learning_rate": 5.561813186813187e-06, + "loss": 0.3895, + "step": 32351 + }, + { + "epoch": 88.87912087912088, + "grad_norm": 4.725517272949219, + "learning_rate": 5.560439560439561e-06, + "loss": 0.0923, + "step": 32352 + }, + { + "epoch": 88.88186813186813, + "grad_norm": 7.294503688812256, + "learning_rate": 5.559065934065934e-06, + "loss": 0.1325, + "step": 32353 + }, + { + "epoch": 88.88461538461539, + "grad_norm": 22.582096099853516, + "learning_rate": 5.557692307692308e-06, + "loss": 0.1825, + "step": 32354 + }, + { + "epoch": 88.88736263736264, + "grad_norm": 7.484366416931152, + "learning_rate": 5.556318681318681e-06, + "loss": 0.1107, + "step": 32355 + }, + { + "epoch": 88.89010989010988, + "grad_norm": 12.909453392028809, + "learning_rate": 5.554945054945055e-06, + "loss": 0.2333, + "step": 32356 + }, + { + "epoch": 88.89285714285714, + "grad_norm": 8.57568073272705, + "learning_rate": 5.5535714285714285e-06, + "loss": 0.1347, + "step": 32357 + }, + { + "epoch": 88.8956043956044, + "grad_norm": 7.764471054077148, + "learning_rate": 5.552197802197803e-06, + "loss": 0.1507, + "step": 32358 + }, + { + "epoch": 88.89835164835165, + "grad_norm": 17.87113380432129, + "learning_rate": 5.550824175824176e-06, + "loss": 0.4924, + "step": 32359 + }, + { + "epoch": 88.9010989010989, + "grad_norm": 12.759227752685547, + "learning_rate": 5.54945054945055e-06, + "loss": 0.2163, + "step": 32360 + }, + { + "epoch": 88.90384615384616, + "grad_norm": 23.556930541992188, + "learning_rate": 5.548076923076923e-06, + "loss": 0.4346, + "step": 32361 + }, + { + "epoch": 88.9065934065934, + "grad_norm": 11.661018371582031, + "learning_rate": 5.546703296703297e-06, + "loss": 0.1315, + "step": 32362 + }, + { + "epoch": 88.90934065934066, + "grad_norm": 15.206514358520508, + "learning_rate": 5.545329670329671e-06, + "loss": 0.273, + "step": 32363 + }, + { + "epoch": 88.91208791208791, + "grad_norm": 9.771547317504883, + "learning_rate": 5.543956043956045e-06, + "loss": 0.1948, + "step": 32364 + }, + { + "epoch": 88.91483516483517, + "grad_norm": 9.154326438903809, + "learning_rate": 5.5425824175824175e-06, + "loss": 0.228, + "step": 32365 + }, + { + "epoch": 88.91758241758242, + "grad_norm": 2.3593244552612305, + "learning_rate": 5.541208791208792e-06, + "loss": 0.0837, + "step": 32366 + }, + { + "epoch": 88.92032967032966, + "grad_norm": 13.32730484008789, + "learning_rate": 5.539835164835165e-06, + "loss": 0.1907, + "step": 32367 + }, + { + "epoch": 88.92307692307692, + "grad_norm": 13.352635383605957, + "learning_rate": 5.5384615384615385e-06, + "loss": 0.1595, + "step": 32368 + }, + { + "epoch": 88.92582417582418, + "grad_norm": 4.441988468170166, + "learning_rate": 5.537087912087913e-06, + "loss": 0.0704, + "step": 32369 + }, + { + "epoch": 88.92857142857143, + "grad_norm": 16.874649047851562, + "learning_rate": 5.535714285714285e-06, + "loss": 0.4049, + "step": 32370 + }, + { + "epoch": 88.93131868131869, + "grad_norm": 10.33715534210205, + "learning_rate": 5.53434065934066e-06, + "loss": 0.1265, + "step": 32371 + }, + { + "epoch": 88.93406593406593, + "grad_norm": 9.773907661437988, + "learning_rate": 5.532967032967033e-06, + "loss": 0.1189, + "step": 32372 + }, + { + "epoch": 88.93681318681318, + "grad_norm": 9.309137344360352, + "learning_rate": 5.531593406593407e-06, + "loss": 0.1336, + "step": 32373 + }, + { + "epoch": 88.93956043956044, + "grad_norm": 9.489477157592773, + "learning_rate": 5.530219780219781e-06, + "loss": 0.1904, + "step": 32374 + }, + { + "epoch": 88.9423076923077, + "grad_norm": 6.634662628173828, + "learning_rate": 5.528846153846154e-06, + "loss": 0.163, + "step": 32375 + }, + { + "epoch": 88.94505494505495, + "grad_norm": 18.738109588623047, + "learning_rate": 5.5274725274725275e-06, + "loss": 0.3481, + "step": 32376 + }, + { + "epoch": 88.9478021978022, + "grad_norm": 2.951580762863159, + "learning_rate": 5.526098901098902e-06, + "loss": 0.0448, + "step": 32377 + }, + { + "epoch": 88.95054945054945, + "grad_norm": 14.203383445739746, + "learning_rate": 5.524725274725275e-06, + "loss": 0.266, + "step": 32378 + }, + { + "epoch": 88.9532967032967, + "grad_norm": 9.081777572631836, + "learning_rate": 5.5233516483516486e-06, + "loss": 0.1416, + "step": 32379 + }, + { + "epoch": 88.95604395604396, + "grad_norm": 20.53479766845703, + "learning_rate": 5.521978021978022e-06, + "loss": 0.504, + "step": 32380 + }, + { + "epoch": 88.95879120879121, + "grad_norm": 10.436986923217773, + "learning_rate": 5.520604395604396e-06, + "loss": 0.1894, + "step": 32381 + }, + { + "epoch": 88.96153846153847, + "grad_norm": 13.267265319824219, + "learning_rate": 5.51923076923077e-06, + "loss": 0.1961, + "step": 32382 + }, + { + "epoch": 88.96428571428571, + "grad_norm": 10.057168960571289, + "learning_rate": 5.517857142857143e-06, + "loss": 0.2352, + "step": 32383 + }, + { + "epoch": 88.96703296703296, + "grad_norm": 11.33119010925293, + "learning_rate": 5.5164835164835164e-06, + "loss": 0.1647, + "step": 32384 + }, + { + "epoch": 88.96978021978022, + "grad_norm": 2.612729787826538, + "learning_rate": 5.51510989010989e-06, + "loss": 0.0298, + "step": 32385 + }, + { + "epoch": 88.97252747252747, + "grad_norm": 6.436648368835449, + "learning_rate": 5.513736263736264e-06, + "loss": 0.0664, + "step": 32386 + }, + { + "epoch": 88.97527472527473, + "grad_norm": 4.925643444061279, + "learning_rate": 5.5123626373626375e-06, + "loss": 0.0544, + "step": 32387 + }, + { + "epoch": 88.97802197802197, + "grad_norm": 20.069705963134766, + "learning_rate": 5.510989010989012e-06, + "loss": 0.3573, + "step": 32388 + }, + { + "epoch": 88.98076923076923, + "grad_norm": 16.788541793823242, + "learning_rate": 5.509615384615384e-06, + "loss": 0.365, + "step": 32389 + }, + { + "epoch": 88.98351648351648, + "grad_norm": 23.72406578063965, + "learning_rate": 5.5082417582417586e-06, + "loss": 0.576, + "step": 32390 + }, + { + "epoch": 88.98626373626374, + "grad_norm": 7.453587055206299, + "learning_rate": 5.506868131868132e-06, + "loss": 0.1114, + "step": 32391 + }, + { + "epoch": 88.98901098901099, + "grad_norm": 11.968439102172852, + "learning_rate": 5.505494505494506e-06, + "loss": 0.1976, + "step": 32392 + }, + { + "epoch": 88.99175824175825, + "grad_norm": 13.92976188659668, + "learning_rate": 5.50412087912088e-06, + "loss": 0.2515, + "step": 32393 + }, + { + "epoch": 88.99450549450549, + "grad_norm": 17.415077209472656, + "learning_rate": 5.502747252747253e-06, + "loss": 0.3022, + "step": 32394 + }, + { + "epoch": 88.99725274725274, + "grad_norm": 10.477431297302246, + "learning_rate": 5.5013736263736265e-06, + "loss": 0.1067, + "step": 32395 + }, + { + "epoch": 89.0, + "grad_norm": 74.93746948242188, + "learning_rate": 5.500000000000001e-06, + "loss": 1.8463, + "step": 32396 + }, + { + "epoch": 89.0, + "eval_accuracy": 0.571625344352617, + "eval_f1": 0.5426225531698503, + "eval_f1_DuraRiadoRio_64x64": 0.54, + "eval_f1_Mole_64x64": 0.6576576576576577, + "eval_f1_Quebrado_64x64": 0.7238605898123325, + "eval_f1_RiadoRio_64x64": 0.5354969574036511, + "eval_f1_RioFechado_64x64": 0.25609756097560976, + "eval_loss": 3.0008208751678467, + "eval_precision": 0.766269005915435, + "eval_precision_DuraRiadoRio_64x64": 0.9642857142857143, + "eval_precision_Mole_64x64": 0.9358974358974359, + "eval_precision_Quebrado_64x64": 0.5895196506550219, + "eval_precision_RiadoRio_64x64": 0.3870967741935484, + "eval_precision_RioFechado_64x64": 0.9545454545454546, + "eval_recall": 0.5671505642039371, + "eval_recall_DuraRiadoRio_64x64": 0.375, + "eval_recall_Mole_64x64": 0.5069444444444444, + "eval_recall_Quebrado_64x64": 0.9375, + "eval_recall_RiadoRio_64x64": 0.868421052631579, + "eval_recall_RioFechado_64x64": 0.14788732394366197, + "eval_runtime": 1.7289, + "eval_samples_per_second": 419.919, + "eval_steps_per_second": 26.606, + "step": 32396 + }, + { + "epoch": 89.00274725274726, + "grad_norm": 5.119385242462158, + "learning_rate": 5.498626373626374e-06, + "loss": 0.0783, + "step": 32397 + }, + { + "epoch": 89.00549450549451, + "grad_norm": 15.644735336303711, + "learning_rate": 5.4972527472527475e-06, + "loss": 0.2658, + "step": 32398 + }, + { + "epoch": 89.00824175824175, + "grad_norm": 7.262630939483643, + "learning_rate": 5.495879120879121e-06, + "loss": 0.0944, + "step": 32399 + }, + { + "epoch": 89.01098901098901, + "grad_norm": 17.02825927734375, + "learning_rate": 5.494505494505494e-06, + "loss": 0.2932, + "step": 32400 + }, + { + "epoch": 89.01373626373626, + "grad_norm": 26.1815185546875, + "learning_rate": 5.493131868131869e-06, + "loss": 0.4395, + "step": 32401 + }, + { + "epoch": 89.01648351648352, + "grad_norm": 19.24222755432129, + "learning_rate": 5.491758241758242e-06, + "loss": 0.3285, + "step": 32402 + }, + { + "epoch": 89.01923076923077, + "grad_norm": 12.047699928283691, + "learning_rate": 5.490384615384616e-06, + "loss": 0.1047, + "step": 32403 + }, + { + "epoch": 89.02197802197803, + "grad_norm": 4.992770671844482, + "learning_rate": 5.489010989010989e-06, + "loss": 0.0409, + "step": 32404 + }, + { + "epoch": 89.02472527472527, + "grad_norm": 12.977851867675781, + "learning_rate": 5.487637362637363e-06, + "loss": 0.1558, + "step": 32405 + }, + { + "epoch": 89.02747252747253, + "grad_norm": 8.216584205627441, + "learning_rate": 5.4862637362637365e-06, + "loss": 0.0943, + "step": 32406 + }, + { + "epoch": 89.03021978021978, + "grad_norm": 10.45729923248291, + "learning_rate": 5.484890109890111e-06, + "loss": 0.2025, + "step": 32407 + }, + { + "epoch": 89.03296703296704, + "grad_norm": 15.054234504699707, + "learning_rate": 5.483516483516483e-06, + "loss": 0.2479, + "step": 32408 + }, + { + "epoch": 89.03571428571429, + "grad_norm": 16.344600677490234, + "learning_rate": 5.4821428571428575e-06, + "loss": 0.3626, + "step": 32409 + }, + { + "epoch": 89.03846153846153, + "grad_norm": 11.572469711303711, + "learning_rate": 5.480769230769231e-06, + "loss": 0.1335, + "step": 32410 + }, + { + "epoch": 89.04120879120879, + "grad_norm": 21.628713607788086, + "learning_rate": 5.479395604395604e-06, + "loss": 0.4378, + "step": 32411 + }, + { + "epoch": 89.04395604395604, + "grad_norm": 23.53664207458496, + "learning_rate": 5.478021978021979e-06, + "loss": 0.5788, + "step": 32412 + }, + { + "epoch": 89.0467032967033, + "grad_norm": 12.88405704498291, + "learning_rate": 5.476648351648352e-06, + "loss": 0.3537, + "step": 32413 + }, + { + "epoch": 89.04945054945055, + "grad_norm": 16.182655334472656, + "learning_rate": 5.475274725274725e-06, + "loss": 0.38, + "step": 32414 + }, + { + "epoch": 89.0521978021978, + "grad_norm": 4.4468817710876465, + "learning_rate": 5.473901098901099e-06, + "loss": 0.0495, + "step": 32415 + }, + { + "epoch": 89.05494505494505, + "grad_norm": 16.242300033569336, + "learning_rate": 5.472527472527473e-06, + "loss": 0.2119, + "step": 32416 + }, + { + "epoch": 89.0576923076923, + "grad_norm": 8.569578170776367, + "learning_rate": 5.4711538461538465e-06, + "loss": 0.1338, + "step": 32417 + }, + { + "epoch": 89.06043956043956, + "grad_norm": 3.6693835258483887, + "learning_rate": 5.46978021978022e-06, + "loss": 0.0326, + "step": 32418 + }, + { + "epoch": 89.06318681318682, + "grad_norm": 16.925010681152344, + "learning_rate": 5.468406593406593e-06, + "loss": 0.2729, + "step": 32419 + }, + { + "epoch": 89.06593406593407, + "grad_norm": 8.288947105407715, + "learning_rate": 5.4670329670329675e-06, + "loss": 0.0952, + "step": 32420 + }, + { + "epoch": 89.06868131868131, + "grad_norm": 23.089078903198242, + "learning_rate": 5.465659340659341e-06, + "loss": 0.7495, + "step": 32421 + }, + { + "epoch": 89.07142857142857, + "grad_norm": 9.878985404968262, + "learning_rate": 5.464285714285715e-06, + "loss": 0.4171, + "step": 32422 + }, + { + "epoch": 89.07417582417582, + "grad_norm": 18.236003875732422, + "learning_rate": 5.462912087912088e-06, + "loss": 0.5271, + "step": 32423 + }, + { + "epoch": 89.07692307692308, + "grad_norm": 20.597034454345703, + "learning_rate": 5.461538461538462e-06, + "loss": 0.5136, + "step": 32424 + }, + { + "epoch": 89.07967032967034, + "grad_norm": 12.420125961303711, + "learning_rate": 5.4601648351648354e-06, + "loss": 0.3975, + "step": 32425 + }, + { + "epoch": 89.08241758241758, + "grad_norm": 8.549038887023926, + "learning_rate": 5.458791208791209e-06, + "loss": 0.0841, + "step": 32426 + }, + { + "epoch": 89.08516483516483, + "grad_norm": 15.677083969116211, + "learning_rate": 5.457417582417583e-06, + "loss": 0.2522, + "step": 32427 + }, + { + "epoch": 89.08791208791209, + "grad_norm": 11.931901931762695, + "learning_rate": 5.456043956043956e-06, + "loss": 0.1123, + "step": 32428 + }, + { + "epoch": 89.09065934065934, + "grad_norm": 11.307126998901367, + "learning_rate": 5.45467032967033e-06, + "loss": 0.2101, + "step": 32429 + }, + { + "epoch": 89.0934065934066, + "grad_norm": 21.51169204711914, + "learning_rate": 5.453296703296703e-06, + "loss": 0.3416, + "step": 32430 + }, + { + "epoch": 89.09615384615384, + "grad_norm": 17.5589656829834, + "learning_rate": 5.4519230769230776e-06, + "loss": 0.3271, + "step": 32431 + }, + { + "epoch": 89.0989010989011, + "grad_norm": 15.257911682128906, + "learning_rate": 5.450549450549451e-06, + "loss": 0.2289, + "step": 32432 + }, + { + "epoch": 89.10164835164835, + "grad_norm": 3.4406559467315674, + "learning_rate": 5.449175824175824e-06, + "loss": 0.0599, + "step": 32433 + }, + { + "epoch": 89.1043956043956, + "grad_norm": 11.75068187713623, + "learning_rate": 5.447802197802198e-06, + "loss": 0.1668, + "step": 32434 + }, + { + "epoch": 89.10714285714286, + "grad_norm": 2.835326671600342, + "learning_rate": 5.446428571428572e-06, + "loss": 0.0323, + "step": 32435 + }, + { + "epoch": 89.10989010989012, + "grad_norm": 12.528861045837402, + "learning_rate": 5.4450549450549454e-06, + "loss": 0.1682, + "step": 32436 + }, + { + "epoch": 89.11263736263736, + "grad_norm": 17.232009887695312, + "learning_rate": 5.443681318681319e-06, + "loss": 0.2117, + "step": 32437 + }, + { + "epoch": 89.11538461538461, + "grad_norm": 15.571564674377441, + "learning_rate": 5.442307692307692e-06, + "loss": 0.4145, + "step": 32438 + }, + { + "epoch": 89.11813186813187, + "grad_norm": 7.2592878341674805, + "learning_rate": 5.4409340659340665e-06, + "loss": 0.112, + "step": 32439 + }, + { + "epoch": 89.12087912087912, + "grad_norm": 16.75953483581543, + "learning_rate": 5.43956043956044e-06, + "loss": 0.2527, + "step": 32440 + }, + { + "epoch": 89.12362637362638, + "grad_norm": 13.833111763000488, + "learning_rate": 5.438186813186813e-06, + "loss": 0.2733, + "step": 32441 + }, + { + "epoch": 89.12637362637362, + "grad_norm": 3.2542037963867188, + "learning_rate": 5.436813186813187e-06, + "loss": 0.0266, + "step": 32442 + }, + { + "epoch": 89.12912087912088, + "grad_norm": 22.386701583862305, + "learning_rate": 5.43543956043956e-06, + "loss": 0.6661, + "step": 32443 + }, + { + "epoch": 89.13186813186813, + "grad_norm": 1.727840542793274, + "learning_rate": 5.434065934065934e-06, + "loss": 0.0162, + "step": 32444 + }, + { + "epoch": 89.13461538461539, + "grad_norm": 15.756929397583008, + "learning_rate": 5.432692307692308e-06, + "loss": 0.2891, + "step": 32445 + }, + { + "epoch": 89.13736263736264, + "grad_norm": 15.174537658691406, + "learning_rate": 5.431318681318682e-06, + "loss": 0.2912, + "step": 32446 + }, + { + "epoch": 89.14010989010988, + "grad_norm": 12.084759712219238, + "learning_rate": 5.429945054945055e-06, + "loss": 0.2305, + "step": 32447 + }, + { + "epoch": 89.14285714285714, + "grad_norm": 5.722995281219482, + "learning_rate": 5.428571428571429e-06, + "loss": 0.0572, + "step": 32448 + }, + { + "epoch": 89.1456043956044, + "grad_norm": 8.499241828918457, + "learning_rate": 5.427197802197802e-06, + "loss": 0.0969, + "step": 32449 + }, + { + "epoch": 89.14835164835165, + "grad_norm": 15.843632698059082, + "learning_rate": 5.4258241758241765e-06, + "loss": 0.4987, + "step": 32450 + }, + { + "epoch": 89.1510989010989, + "grad_norm": 17.820886611938477, + "learning_rate": 5.42445054945055e-06, + "loss": 0.6884, + "step": 32451 + }, + { + "epoch": 89.15384615384616, + "grad_norm": 13.459563255310059, + "learning_rate": 5.423076923076923e-06, + "loss": 0.3799, + "step": 32452 + }, + { + "epoch": 89.1565934065934, + "grad_norm": 9.826465606689453, + "learning_rate": 5.421703296703297e-06, + "loss": 0.2081, + "step": 32453 + }, + { + "epoch": 89.15934065934066, + "grad_norm": 10.48430347442627, + "learning_rate": 5.420329670329671e-06, + "loss": 0.1617, + "step": 32454 + }, + { + "epoch": 89.16208791208791, + "grad_norm": 18.1213321685791, + "learning_rate": 5.418956043956044e-06, + "loss": 0.4056, + "step": 32455 + }, + { + "epoch": 89.16483516483517, + "grad_norm": 12.173783302307129, + "learning_rate": 5.417582417582418e-06, + "loss": 0.2463, + "step": 32456 + }, + { + "epoch": 89.16758241758242, + "grad_norm": 8.34512710571289, + "learning_rate": 5.416208791208791e-06, + "loss": 0.1133, + "step": 32457 + }, + { + "epoch": 89.17032967032966, + "grad_norm": 8.379592895507812, + "learning_rate": 5.414835164835165e-06, + "loss": 0.1067, + "step": 32458 + }, + { + "epoch": 89.17307692307692, + "grad_norm": 20.244525909423828, + "learning_rate": 5.413461538461539e-06, + "loss": 0.3316, + "step": 32459 + }, + { + "epoch": 89.17582417582418, + "grad_norm": 4.277378559112549, + "learning_rate": 5.412087912087912e-06, + "loss": 0.0617, + "step": 32460 + }, + { + "epoch": 89.17857142857143, + "grad_norm": 10.227354049682617, + "learning_rate": 5.4107142857142865e-06, + "loss": 0.2346, + "step": 32461 + }, + { + "epoch": 89.18131868131869, + "grad_norm": 13.279084205627441, + "learning_rate": 5.409340659340659e-06, + "loss": 0.2231, + "step": 32462 + }, + { + "epoch": 89.18406593406593, + "grad_norm": 17.172561645507812, + "learning_rate": 5.407967032967033e-06, + "loss": 0.3269, + "step": 32463 + }, + { + "epoch": 89.18681318681318, + "grad_norm": 21.455259323120117, + "learning_rate": 5.406593406593407e-06, + "loss": 0.5895, + "step": 32464 + }, + { + "epoch": 89.18956043956044, + "grad_norm": 21.184640884399414, + "learning_rate": 5.405219780219781e-06, + "loss": 0.2155, + "step": 32465 + }, + { + "epoch": 89.1923076923077, + "grad_norm": 5.04498291015625, + "learning_rate": 5.403846153846154e-06, + "loss": 0.0956, + "step": 32466 + }, + { + "epoch": 89.19505494505495, + "grad_norm": 5.780994415283203, + "learning_rate": 5.402472527472528e-06, + "loss": 0.1109, + "step": 32467 + }, + { + "epoch": 89.1978021978022, + "grad_norm": 12.810940742492676, + "learning_rate": 5.401098901098901e-06, + "loss": 0.2371, + "step": 32468 + }, + { + "epoch": 89.20054945054945, + "grad_norm": 13.321123123168945, + "learning_rate": 5.3997252747252755e-06, + "loss": 0.1481, + "step": 32469 + }, + { + "epoch": 89.2032967032967, + "grad_norm": 15.78043270111084, + "learning_rate": 5.398351648351649e-06, + "loss": 0.2994, + "step": 32470 + }, + { + "epoch": 89.20604395604396, + "grad_norm": 1.382991075515747, + "learning_rate": 5.396978021978022e-06, + "loss": 0.0226, + "step": 32471 + }, + { + "epoch": 89.20879120879121, + "grad_norm": 30.28536033630371, + "learning_rate": 5.395604395604396e-06, + "loss": 0.6408, + "step": 32472 + }, + { + "epoch": 89.21153846153847, + "grad_norm": 8.434617042541504, + "learning_rate": 5.394230769230769e-06, + "loss": 0.0926, + "step": 32473 + }, + { + "epoch": 89.21428571428571, + "grad_norm": 15.761215209960938, + "learning_rate": 5.392857142857143e-06, + "loss": 0.1901, + "step": 32474 + }, + { + "epoch": 89.21703296703296, + "grad_norm": 4.816170692443848, + "learning_rate": 5.391483516483517e-06, + "loss": 0.0629, + "step": 32475 + }, + { + "epoch": 89.21978021978022, + "grad_norm": 18.1033935546875, + "learning_rate": 5.39010989010989e-06, + "loss": 0.1872, + "step": 32476 + }, + { + "epoch": 89.22252747252747, + "grad_norm": 11.271927833557129, + "learning_rate": 5.388736263736264e-06, + "loss": 0.2504, + "step": 32477 + }, + { + "epoch": 89.22527472527473, + "grad_norm": 9.928404808044434, + "learning_rate": 5.387362637362638e-06, + "loss": 0.1241, + "step": 32478 + }, + { + "epoch": 89.22802197802197, + "grad_norm": 9.261213302612305, + "learning_rate": 5.385989010989011e-06, + "loss": 0.1396, + "step": 32479 + }, + { + "epoch": 89.23076923076923, + "grad_norm": 15.72085189819336, + "learning_rate": 5.3846153846153855e-06, + "loss": 0.2319, + "step": 32480 + }, + { + "epoch": 89.23351648351648, + "grad_norm": 15.066574096679688, + "learning_rate": 5.383241758241758e-06, + "loss": 0.3514, + "step": 32481 + }, + { + "epoch": 89.23626373626374, + "grad_norm": 3.2677829265594482, + "learning_rate": 5.381868131868132e-06, + "loss": 0.0977, + "step": 32482 + }, + { + "epoch": 89.23901098901099, + "grad_norm": 5.895129203796387, + "learning_rate": 5.380494505494506e-06, + "loss": 0.1111, + "step": 32483 + }, + { + "epoch": 89.24175824175825, + "grad_norm": 17.599760055541992, + "learning_rate": 5.379120879120879e-06, + "loss": 0.461, + "step": 32484 + }, + { + "epoch": 89.24450549450549, + "grad_norm": 5.315162658691406, + "learning_rate": 5.377747252747253e-06, + "loss": 0.0667, + "step": 32485 + }, + { + "epoch": 89.24725274725274, + "grad_norm": 12.017104148864746, + "learning_rate": 5.376373626373627e-06, + "loss": 0.1577, + "step": 32486 + }, + { + "epoch": 89.25, + "grad_norm": 4.993090629577637, + "learning_rate": 5.375e-06, + "loss": 0.0646, + "step": 32487 + }, + { + "epoch": 89.25274725274726, + "grad_norm": 19.92702293395996, + "learning_rate": 5.373626373626374e-06, + "loss": 0.2828, + "step": 32488 + }, + { + "epoch": 89.25549450549451, + "grad_norm": 14.26962661743164, + "learning_rate": 5.372252747252748e-06, + "loss": 0.1953, + "step": 32489 + }, + { + "epoch": 89.25824175824175, + "grad_norm": 7.56519889831543, + "learning_rate": 5.3708791208791204e-06, + "loss": 0.1355, + "step": 32490 + }, + { + "epoch": 89.26098901098901, + "grad_norm": 13.891796112060547, + "learning_rate": 5.369505494505495e-06, + "loss": 0.1165, + "step": 32491 + }, + { + "epoch": 89.26373626373626, + "grad_norm": 9.121536254882812, + "learning_rate": 5.368131868131868e-06, + "loss": 0.1255, + "step": 32492 + }, + { + "epoch": 89.26648351648352, + "grad_norm": 19.37184715270996, + "learning_rate": 5.366758241758242e-06, + "loss": 0.4835, + "step": 32493 + }, + { + "epoch": 89.26923076923077, + "grad_norm": 9.6345853805542, + "learning_rate": 5.365384615384616e-06, + "loss": 0.1375, + "step": 32494 + }, + { + "epoch": 89.27197802197803, + "grad_norm": 8.709965705871582, + "learning_rate": 5.364010989010989e-06, + "loss": 0.197, + "step": 32495 + }, + { + "epoch": 89.27472527472527, + "grad_norm": 10.551793098449707, + "learning_rate": 5.3626373626373626e-06, + "loss": 0.1669, + "step": 32496 + }, + { + "epoch": 89.27747252747253, + "grad_norm": 9.09523868560791, + "learning_rate": 5.361263736263737e-06, + "loss": 0.1813, + "step": 32497 + }, + { + "epoch": 89.28021978021978, + "grad_norm": 10.556694984436035, + "learning_rate": 5.35989010989011e-06, + "loss": 0.2518, + "step": 32498 + }, + { + "epoch": 89.28296703296704, + "grad_norm": 22.049354553222656, + "learning_rate": 5.358516483516484e-06, + "loss": 0.6257, + "step": 32499 + }, + { + "epoch": 89.28571428571429, + "grad_norm": 15.416509628295898, + "learning_rate": 5.357142857142857e-06, + "loss": 0.2862, + "step": 32500 + }, + { + "epoch": 89.28846153846153, + "grad_norm": 18.43447494506836, + "learning_rate": 5.355769230769231e-06, + "loss": 0.47, + "step": 32501 + }, + { + "epoch": 89.29120879120879, + "grad_norm": 9.576010704040527, + "learning_rate": 5.354395604395605e-06, + "loss": 0.0818, + "step": 32502 + }, + { + "epoch": 89.29395604395604, + "grad_norm": 13.155699729919434, + "learning_rate": 5.353021978021978e-06, + "loss": 0.276, + "step": 32503 + }, + { + "epoch": 89.2967032967033, + "grad_norm": 18.094974517822266, + "learning_rate": 5.351648351648352e-06, + "loss": 0.3573, + "step": 32504 + }, + { + "epoch": 89.29945054945055, + "grad_norm": 10.890885353088379, + "learning_rate": 5.350274725274725e-06, + "loss": 0.2128, + "step": 32505 + }, + { + "epoch": 89.3021978021978, + "grad_norm": 4.812002182006836, + "learning_rate": 5.348901098901099e-06, + "loss": 0.0534, + "step": 32506 + }, + { + "epoch": 89.30494505494505, + "grad_norm": 8.698836326599121, + "learning_rate": 5.347527472527473e-06, + "loss": 0.122, + "step": 32507 + }, + { + "epoch": 89.3076923076923, + "grad_norm": 22.772260665893555, + "learning_rate": 5.346153846153847e-06, + "loss": 0.3806, + "step": 32508 + }, + { + "epoch": 89.31043956043956, + "grad_norm": 16.133359909057617, + "learning_rate": 5.34478021978022e-06, + "loss": 0.2579, + "step": 32509 + }, + { + "epoch": 89.31318681318682, + "grad_norm": 13.495061874389648, + "learning_rate": 5.343406593406594e-06, + "loss": 0.1729, + "step": 32510 + }, + { + "epoch": 89.31593406593407, + "grad_norm": 8.871519088745117, + "learning_rate": 5.342032967032967e-06, + "loss": 0.1409, + "step": 32511 + }, + { + "epoch": 89.31868131868131, + "grad_norm": 21.904794692993164, + "learning_rate": 5.340659340659341e-06, + "loss": 0.5737, + "step": 32512 + }, + { + "epoch": 89.32142857142857, + "grad_norm": 10.951716423034668, + "learning_rate": 5.339285714285715e-06, + "loss": 0.1286, + "step": 32513 + }, + { + "epoch": 89.32417582417582, + "grad_norm": 16.324670791625977, + "learning_rate": 5.337912087912088e-06, + "loss": 0.1254, + "step": 32514 + }, + { + "epoch": 89.32692307692308, + "grad_norm": 4.146649360656738, + "learning_rate": 5.3365384615384615e-06, + "loss": 0.1181, + "step": 32515 + }, + { + "epoch": 89.32967032967034, + "grad_norm": 7.887308120727539, + "learning_rate": 5.335164835164835e-06, + "loss": 0.0893, + "step": 32516 + }, + { + "epoch": 89.33241758241758, + "grad_norm": 4.678948402404785, + "learning_rate": 5.333791208791209e-06, + "loss": 0.1032, + "step": 32517 + }, + { + "epoch": 89.33516483516483, + "grad_norm": 10.088488578796387, + "learning_rate": 5.332417582417583e-06, + "loss": 0.1908, + "step": 32518 + }, + { + "epoch": 89.33791208791209, + "grad_norm": 19.836959838867188, + "learning_rate": 5.331043956043956e-06, + "loss": 0.3334, + "step": 32519 + }, + { + "epoch": 89.34065934065934, + "grad_norm": 14.884293556213379, + "learning_rate": 5.329670329670329e-06, + "loss": 0.2856, + "step": 32520 + }, + { + "epoch": 89.3434065934066, + "grad_norm": 4.226125717163086, + "learning_rate": 5.328296703296704e-06, + "loss": 0.0677, + "step": 32521 + }, + { + "epoch": 89.34615384615384, + "grad_norm": 30.38922119140625, + "learning_rate": 5.326923076923077e-06, + "loss": 1.0805, + "step": 32522 + }, + { + "epoch": 89.3489010989011, + "grad_norm": 13.066479682922363, + "learning_rate": 5.325549450549451e-06, + "loss": 0.1811, + "step": 32523 + }, + { + "epoch": 89.35164835164835, + "grad_norm": 33.458580017089844, + "learning_rate": 5.324175824175824e-06, + "loss": 0.5104, + "step": 32524 + }, + { + "epoch": 89.3543956043956, + "grad_norm": 15.590556144714355, + "learning_rate": 5.322802197802198e-06, + "loss": 0.2966, + "step": 32525 + }, + { + "epoch": 89.35714285714286, + "grad_norm": 21.795822143554688, + "learning_rate": 5.3214285714285715e-06, + "loss": 0.6181, + "step": 32526 + }, + { + "epoch": 89.35989010989012, + "grad_norm": 9.221729278564453, + "learning_rate": 5.320054945054946e-06, + "loss": 0.1146, + "step": 32527 + }, + { + "epoch": 89.36263736263736, + "grad_norm": 17.74073600769043, + "learning_rate": 5.318681318681319e-06, + "loss": 0.2376, + "step": 32528 + }, + { + "epoch": 89.36538461538461, + "grad_norm": 18.232633590698242, + "learning_rate": 5.317307692307693e-06, + "loss": 0.44, + "step": 32529 + }, + { + "epoch": 89.36813186813187, + "grad_norm": 18.31723976135254, + "learning_rate": 5.315934065934066e-06, + "loss": 0.277, + "step": 32530 + }, + { + "epoch": 89.37087912087912, + "grad_norm": 11.351506233215332, + "learning_rate": 5.3145604395604394e-06, + "loss": 0.218, + "step": 32531 + }, + { + "epoch": 89.37362637362638, + "grad_norm": 24.954753875732422, + "learning_rate": 5.313186813186814e-06, + "loss": 0.5704, + "step": 32532 + }, + { + "epoch": 89.37637362637362, + "grad_norm": 7.975396156311035, + "learning_rate": 5.311813186813187e-06, + "loss": 0.0952, + "step": 32533 + }, + { + "epoch": 89.37912087912088, + "grad_norm": 14.63398551940918, + "learning_rate": 5.3104395604395605e-06, + "loss": 0.2372, + "step": 32534 + }, + { + "epoch": 89.38186813186813, + "grad_norm": 11.5654296875, + "learning_rate": 5.309065934065934e-06, + "loss": 0.2354, + "step": 32535 + }, + { + "epoch": 89.38461538461539, + "grad_norm": 18.138364791870117, + "learning_rate": 5.307692307692308e-06, + "loss": 0.2103, + "step": 32536 + }, + { + "epoch": 89.38736263736264, + "grad_norm": 14.237802505493164, + "learning_rate": 5.3063186813186816e-06, + "loss": 0.4147, + "step": 32537 + }, + { + "epoch": 89.39010989010988, + "grad_norm": 18.116371154785156, + "learning_rate": 5.304945054945056e-06, + "loss": 0.4419, + "step": 32538 + }, + { + "epoch": 89.39285714285714, + "grad_norm": 19.759431838989258, + "learning_rate": 5.303571428571428e-06, + "loss": 0.5373, + "step": 32539 + }, + { + "epoch": 89.3956043956044, + "grad_norm": 13.980018615722656, + "learning_rate": 5.302197802197803e-06, + "loss": 0.2761, + "step": 32540 + }, + { + "epoch": 89.39835164835165, + "grad_norm": 14.796914100646973, + "learning_rate": 5.300824175824176e-06, + "loss": 0.2295, + "step": 32541 + }, + { + "epoch": 89.4010989010989, + "grad_norm": 2.344637632369995, + "learning_rate": 5.29945054945055e-06, + "loss": 0.0255, + "step": 32542 + }, + { + "epoch": 89.40384615384616, + "grad_norm": 13.056792259216309, + "learning_rate": 5.298076923076924e-06, + "loss": 0.1151, + "step": 32543 + }, + { + "epoch": 89.4065934065934, + "grad_norm": 18.203399658203125, + "learning_rate": 5.296703296703297e-06, + "loss": 0.4712, + "step": 32544 + }, + { + "epoch": 89.40934065934066, + "grad_norm": 2.6842215061187744, + "learning_rate": 5.2953296703296705e-06, + "loss": 0.0251, + "step": 32545 + }, + { + "epoch": 89.41208791208791, + "grad_norm": 19.66562271118164, + "learning_rate": 5.293956043956044e-06, + "loss": 0.2064, + "step": 32546 + }, + { + "epoch": 89.41483516483517, + "grad_norm": 11.557967185974121, + "learning_rate": 5.292582417582418e-06, + "loss": 0.1658, + "step": 32547 + }, + { + "epoch": 89.41758241758242, + "grad_norm": 3.742990732192993, + "learning_rate": 5.291208791208791e-06, + "loss": 0.0291, + "step": 32548 + }, + { + "epoch": 89.42032967032966, + "grad_norm": 10.856792449951172, + "learning_rate": 5.289835164835165e-06, + "loss": 0.1628, + "step": 32549 + }, + { + "epoch": 89.42307692307692, + "grad_norm": 12.00568675994873, + "learning_rate": 5.288461538461538e-06, + "loss": 0.2097, + "step": 32550 + }, + { + "epoch": 89.42582417582418, + "grad_norm": 1.4172923564910889, + "learning_rate": 5.287087912087913e-06, + "loss": 0.0087, + "step": 32551 + }, + { + "epoch": 89.42857142857143, + "grad_norm": 13.451988220214844, + "learning_rate": 5.285714285714286e-06, + "loss": 0.1502, + "step": 32552 + }, + { + "epoch": 89.43131868131869, + "grad_norm": 13.366680145263672, + "learning_rate": 5.2843406593406595e-06, + "loss": 0.205, + "step": 32553 + }, + { + "epoch": 89.43406593406593, + "grad_norm": 6.880402088165283, + "learning_rate": 5.282967032967033e-06, + "loss": 0.1991, + "step": 32554 + }, + { + "epoch": 89.43681318681318, + "grad_norm": 12.692851066589355, + "learning_rate": 5.281593406593407e-06, + "loss": 0.2106, + "step": 32555 + }, + { + "epoch": 89.43956043956044, + "grad_norm": 10.255226135253906, + "learning_rate": 5.2802197802197805e-06, + "loss": 0.0678, + "step": 32556 + }, + { + "epoch": 89.4423076923077, + "grad_norm": 19.77231788635254, + "learning_rate": 5.278846153846155e-06, + "loss": 0.5272, + "step": 32557 + }, + { + "epoch": 89.44505494505495, + "grad_norm": 4.604933738708496, + "learning_rate": 5.277472527472527e-06, + "loss": 0.0455, + "step": 32558 + }, + { + "epoch": 89.4478021978022, + "grad_norm": 13.855547904968262, + "learning_rate": 5.276098901098902e-06, + "loss": 0.1329, + "step": 32559 + }, + { + "epoch": 89.45054945054945, + "grad_norm": 2.872227907180786, + "learning_rate": 5.274725274725275e-06, + "loss": 0.0166, + "step": 32560 + }, + { + "epoch": 89.4532967032967, + "grad_norm": 5.6800408363342285, + "learning_rate": 5.273351648351648e-06, + "loss": 0.0444, + "step": 32561 + }, + { + "epoch": 89.45604395604396, + "grad_norm": 11.762727737426758, + "learning_rate": 5.271978021978023e-06, + "loss": 0.4189, + "step": 32562 + }, + { + "epoch": 89.45879120879121, + "grad_norm": 17.110321044921875, + "learning_rate": 5.270604395604395e-06, + "loss": 0.2453, + "step": 32563 + }, + { + "epoch": 89.46153846153847, + "grad_norm": 7.984512805938721, + "learning_rate": 5.2692307692307695e-06, + "loss": 0.1449, + "step": 32564 + }, + { + "epoch": 89.46428571428571, + "grad_norm": 6.277474403381348, + "learning_rate": 5.267857142857143e-06, + "loss": 0.104, + "step": 32565 + }, + { + "epoch": 89.46703296703296, + "grad_norm": 7.227784633636475, + "learning_rate": 5.266483516483517e-06, + "loss": 0.1389, + "step": 32566 + }, + { + "epoch": 89.46978021978022, + "grad_norm": 7.867751121520996, + "learning_rate": 5.2651098901098905e-06, + "loss": 0.1007, + "step": 32567 + }, + { + "epoch": 89.47252747252747, + "grad_norm": 12.315282821655273, + "learning_rate": 5.263736263736264e-06, + "loss": 0.1002, + "step": 32568 + }, + { + "epoch": 89.47527472527473, + "grad_norm": 0.5282121300697327, + "learning_rate": 5.262362637362637e-06, + "loss": 0.0075, + "step": 32569 + }, + { + "epoch": 89.47802197802197, + "grad_norm": 5.115437984466553, + "learning_rate": 5.260989010989012e-06, + "loss": 0.0728, + "step": 32570 + }, + { + "epoch": 89.48076923076923, + "grad_norm": 12.812970161437988, + "learning_rate": 5.259615384615385e-06, + "loss": 0.2826, + "step": 32571 + }, + { + "epoch": 89.48351648351648, + "grad_norm": 11.224787712097168, + "learning_rate": 5.258241758241758e-06, + "loss": 0.1218, + "step": 32572 + }, + { + "epoch": 89.48626373626374, + "grad_norm": 3.477586030960083, + "learning_rate": 5.256868131868132e-06, + "loss": 0.047, + "step": 32573 + }, + { + "epoch": 89.48901098901099, + "grad_norm": 6.401697158813477, + "learning_rate": 5.255494505494506e-06, + "loss": 0.1096, + "step": 32574 + }, + { + "epoch": 89.49175824175825, + "grad_norm": 4.154203414916992, + "learning_rate": 5.2541208791208795e-06, + "loss": 0.0365, + "step": 32575 + }, + { + "epoch": 89.49450549450549, + "grad_norm": 21.550548553466797, + "learning_rate": 5.252747252747253e-06, + "loss": 0.7543, + "step": 32576 + }, + { + "epoch": 89.49725274725274, + "grad_norm": 3.7403647899627686, + "learning_rate": 5.251373626373626e-06, + "loss": 0.0296, + "step": 32577 + }, + { + "epoch": 89.5, + "grad_norm": 5.8790764808654785, + "learning_rate": 5.25e-06, + "loss": 0.1141, + "step": 32578 + }, + { + "epoch": 89.50274725274726, + "grad_norm": 13.233736991882324, + "learning_rate": 5.248626373626374e-06, + "loss": 0.5066, + "step": 32579 + }, + { + "epoch": 89.50549450549451, + "grad_norm": 3.4221246242523193, + "learning_rate": 5.247252747252747e-06, + "loss": 0.0434, + "step": 32580 + }, + { + "epoch": 89.50824175824175, + "grad_norm": 16.810279846191406, + "learning_rate": 5.245879120879122e-06, + "loss": 0.2576, + "step": 32581 + }, + { + "epoch": 89.51098901098901, + "grad_norm": 4.593317985534668, + "learning_rate": 5.244505494505494e-06, + "loss": 0.0637, + "step": 32582 + }, + { + "epoch": 89.51373626373626, + "grad_norm": 11.227289199829102, + "learning_rate": 5.2431318681318684e-06, + "loss": 0.154, + "step": 32583 + }, + { + "epoch": 89.51648351648352, + "grad_norm": 19.05693817138672, + "learning_rate": 5.241758241758242e-06, + "loss": 0.2371, + "step": 32584 + }, + { + "epoch": 89.51923076923077, + "grad_norm": 10.012060165405273, + "learning_rate": 5.240384615384616e-06, + "loss": 0.1578, + "step": 32585 + }, + { + "epoch": 89.52197802197803, + "grad_norm": 13.697284698486328, + "learning_rate": 5.2390109890109895e-06, + "loss": 0.3381, + "step": 32586 + }, + { + "epoch": 89.52472527472527, + "grad_norm": 10.57455062866211, + "learning_rate": 5.237637362637363e-06, + "loss": 0.1704, + "step": 32587 + }, + { + "epoch": 89.52747252747253, + "grad_norm": 15.719741821289062, + "learning_rate": 5.236263736263736e-06, + "loss": 0.653, + "step": 32588 + }, + { + "epoch": 89.53021978021978, + "grad_norm": 18.138423919677734, + "learning_rate": 5.23489010989011e-06, + "loss": 0.4592, + "step": 32589 + }, + { + "epoch": 89.53296703296704, + "grad_norm": 23.07487678527832, + "learning_rate": 5.233516483516484e-06, + "loss": 0.4527, + "step": 32590 + }, + { + "epoch": 89.53571428571429, + "grad_norm": 4.125237464904785, + "learning_rate": 5.232142857142857e-06, + "loss": 0.0298, + "step": 32591 + }, + { + "epoch": 89.53846153846153, + "grad_norm": 11.894344329833984, + "learning_rate": 5.230769230769231e-06, + "loss": 0.2642, + "step": 32592 + }, + { + "epoch": 89.54120879120879, + "grad_norm": 13.004977226257324, + "learning_rate": 5.229395604395604e-06, + "loss": 0.1953, + "step": 32593 + }, + { + "epoch": 89.54395604395604, + "grad_norm": 5.946544170379639, + "learning_rate": 5.2280219780219785e-06, + "loss": 0.1379, + "step": 32594 + }, + { + "epoch": 89.5467032967033, + "grad_norm": 9.519980430603027, + "learning_rate": 5.226648351648352e-06, + "loss": 0.206, + "step": 32595 + }, + { + "epoch": 89.54945054945055, + "grad_norm": 4.6661553382873535, + "learning_rate": 5.225274725274726e-06, + "loss": 0.0558, + "step": 32596 + }, + { + "epoch": 89.5521978021978, + "grad_norm": 4.911896228790283, + "learning_rate": 5.223901098901099e-06, + "loss": 0.0434, + "step": 32597 + }, + { + "epoch": 89.55494505494505, + "grad_norm": 15.588668823242188, + "learning_rate": 5.222527472527473e-06, + "loss": 0.2623, + "step": 32598 + }, + { + "epoch": 89.5576923076923, + "grad_norm": 7.832574367523193, + "learning_rate": 5.221153846153846e-06, + "loss": 0.1221, + "step": 32599 + }, + { + "epoch": 89.56043956043956, + "grad_norm": 2.809000015258789, + "learning_rate": 5.219780219780221e-06, + "loss": 0.0288, + "step": 32600 + }, + { + "epoch": 89.56318681318682, + "grad_norm": 11.267572402954102, + "learning_rate": 5.218406593406594e-06, + "loss": 0.3326, + "step": 32601 + }, + { + "epoch": 89.56593406593407, + "grad_norm": 12.678829193115234, + "learning_rate": 5.217032967032967e-06, + "loss": 0.2422, + "step": 32602 + }, + { + "epoch": 89.56868131868131, + "grad_norm": 9.96052360534668, + "learning_rate": 5.215659340659341e-06, + "loss": 0.1519, + "step": 32603 + }, + { + "epoch": 89.57142857142857, + "grad_norm": 9.063374519348145, + "learning_rate": 5.214285714285714e-06, + "loss": 0.1584, + "step": 32604 + }, + { + "epoch": 89.57417582417582, + "grad_norm": 8.299848556518555, + "learning_rate": 5.2129120879120885e-06, + "loss": 0.1158, + "step": 32605 + }, + { + "epoch": 89.57692307692308, + "grad_norm": 9.320646286010742, + "learning_rate": 5.211538461538461e-06, + "loss": 0.112, + "step": 32606 + }, + { + "epoch": 89.57967032967034, + "grad_norm": 21.471328735351562, + "learning_rate": 5.210164835164835e-06, + "loss": 0.478, + "step": 32607 + }, + { + "epoch": 89.58241758241758, + "grad_norm": 7.6489667892456055, + "learning_rate": 5.208791208791209e-06, + "loss": 0.0807, + "step": 32608 + }, + { + "epoch": 89.58516483516483, + "grad_norm": 6.501486301422119, + "learning_rate": 5.207417582417583e-06, + "loss": 0.1555, + "step": 32609 + }, + { + "epoch": 89.58791208791209, + "grad_norm": 5.457647800445557, + "learning_rate": 5.206043956043956e-06, + "loss": 0.0945, + "step": 32610 + }, + { + "epoch": 89.59065934065934, + "grad_norm": 9.208219528198242, + "learning_rate": 5.20467032967033e-06, + "loss": 0.1267, + "step": 32611 + }, + { + "epoch": 89.5934065934066, + "grad_norm": 12.344244003295898, + "learning_rate": 5.203296703296703e-06, + "loss": 0.2906, + "step": 32612 + }, + { + "epoch": 89.59615384615384, + "grad_norm": 11.277458190917969, + "learning_rate": 5.201923076923077e-06, + "loss": 0.1435, + "step": 32613 + }, + { + "epoch": 89.5989010989011, + "grad_norm": 20.136133193969727, + "learning_rate": 5.200549450549451e-06, + "loss": 0.4846, + "step": 32614 + }, + { + "epoch": 89.60164835164835, + "grad_norm": 12.809638977050781, + "learning_rate": 5.199175824175825e-06, + "loss": 0.1519, + "step": 32615 + }, + { + "epoch": 89.6043956043956, + "grad_norm": 9.73038101196289, + "learning_rate": 5.197802197802198e-06, + "loss": 0.1747, + "step": 32616 + }, + { + "epoch": 89.60714285714286, + "grad_norm": 20.662464141845703, + "learning_rate": 5.196428571428572e-06, + "loss": 0.333, + "step": 32617 + }, + { + "epoch": 89.60989010989012, + "grad_norm": 21.62677574157715, + "learning_rate": 5.195054945054945e-06, + "loss": 0.5225, + "step": 32618 + }, + { + "epoch": 89.61263736263736, + "grad_norm": 18.311176300048828, + "learning_rate": 5.193681318681319e-06, + "loss": 0.2695, + "step": 32619 + }, + { + "epoch": 89.61538461538461, + "grad_norm": 14.313017845153809, + "learning_rate": 5.192307692307693e-06, + "loss": 0.1332, + "step": 32620 + }, + { + "epoch": 89.61813186813187, + "grad_norm": 20.901214599609375, + "learning_rate": 5.1909340659340655e-06, + "loss": 0.2806, + "step": 32621 + }, + { + "epoch": 89.62087912087912, + "grad_norm": 13.093522071838379, + "learning_rate": 5.18956043956044e-06, + "loss": 0.2052, + "step": 32622 + }, + { + "epoch": 89.62362637362638, + "grad_norm": 9.56733226776123, + "learning_rate": 5.188186813186813e-06, + "loss": 0.2027, + "step": 32623 + }, + { + "epoch": 89.62637362637362, + "grad_norm": 9.456620216369629, + "learning_rate": 5.1868131868131874e-06, + "loss": 0.213, + "step": 32624 + }, + { + "epoch": 89.62912087912088, + "grad_norm": 21.790788650512695, + "learning_rate": 5.185439560439561e-06, + "loss": 0.3622, + "step": 32625 + }, + { + "epoch": 89.63186813186813, + "grad_norm": 18.31719398498535, + "learning_rate": 5.184065934065934e-06, + "loss": 0.2992, + "step": 32626 + }, + { + "epoch": 89.63461538461539, + "grad_norm": 17.03262710571289, + "learning_rate": 5.182692307692308e-06, + "loss": 0.3503, + "step": 32627 + }, + { + "epoch": 89.63736263736264, + "grad_norm": 12.270536422729492, + "learning_rate": 5.181318681318682e-06, + "loss": 0.1302, + "step": 32628 + }, + { + "epoch": 89.64010989010988, + "grad_norm": 6.504302501678467, + "learning_rate": 5.179945054945055e-06, + "loss": 0.0436, + "step": 32629 + }, + { + "epoch": 89.64285714285714, + "grad_norm": 6.656987190246582, + "learning_rate": 5.1785714285714296e-06, + "loss": 0.1569, + "step": 32630 + }, + { + "epoch": 89.6456043956044, + "grad_norm": 15.277420043945312, + "learning_rate": 5.177197802197802e-06, + "loss": 0.4351, + "step": 32631 + }, + { + "epoch": 89.64835164835165, + "grad_norm": 17.098499298095703, + "learning_rate": 5.175824175824176e-06, + "loss": 0.3724, + "step": 32632 + }, + { + "epoch": 89.6510989010989, + "grad_norm": 9.991243362426758, + "learning_rate": 5.17445054945055e-06, + "loss": 0.2434, + "step": 32633 + }, + { + "epoch": 89.65384615384616, + "grad_norm": 18.07759666442871, + "learning_rate": 5.173076923076923e-06, + "loss": 0.6022, + "step": 32634 + }, + { + "epoch": 89.6565934065934, + "grad_norm": 8.63695240020752, + "learning_rate": 5.171703296703297e-06, + "loss": 0.0695, + "step": 32635 + }, + { + "epoch": 89.65934065934066, + "grad_norm": 6.635504245758057, + "learning_rate": 5.17032967032967e-06, + "loss": 0.14, + "step": 32636 + }, + { + "epoch": 89.66208791208791, + "grad_norm": 19.348894119262695, + "learning_rate": 5.168956043956044e-06, + "loss": 0.2196, + "step": 32637 + }, + { + "epoch": 89.66483516483517, + "grad_norm": 12.76374626159668, + "learning_rate": 5.167582417582418e-06, + "loss": 0.1385, + "step": 32638 + }, + { + "epoch": 89.66758241758242, + "grad_norm": 2.5989716053009033, + "learning_rate": 5.166208791208792e-06, + "loss": 0.0354, + "step": 32639 + }, + { + "epoch": 89.67032967032966, + "grad_norm": 8.455329895019531, + "learning_rate": 5.1648351648351645e-06, + "loss": 0.1103, + "step": 32640 + }, + { + "epoch": 89.67307692307692, + "grad_norm": 20.07447052001953, + "learning_rate": 5.163461538461539e-06, + "loss": 0.4169, + "step": 32641 + }, + { + "epoch": 89.67582417582418, + "grad_norm": 20.47115135192871, + "learning_rate": 5.162087912087912e-06, + "loss": 0.3716, + "step": 32642 + }, + { + "epoch": 89.67857142857143, + "grad_norm": 8.027440071105957, + "learning_rate": 5.160714285714286e-06, + "loss": 0.1284, + "step": 32643 + }, + { + "epoch": 89.68131868131869, + "grad_norm": 21.385236740112305, + "learning_rate": 5.15934065934066e-06, + "loss": 0.2977, + "step": 32644 + }, + { + "epoch": 89.68406593406593, + "grad_norm": 12.953581809997559, + "learning_rate": 5.157967032967033e-06, + "loss": 0.4928, + "step": 32645 + }, + { + "epoch": 89.68681318681318, + "grad_norm": 8.95053768157959, + "learning_rate": 5.156593406593407e-06, + "loss": 0.0911, + "step": 32646 + }, + { + "epoch": 89.68956043956044, + "grad_norm": 16.502408981323242, + "learning_rate": 5.155219780219781e-06, + "loss": 0.4976, + "step": 32647 + }, + { + "epoch": 89.6923076923077, + "grad_norm": 6.51194953918457, + "learning_rate": 5.153846153846154e-06, + "loss": 0.1021, + "step": 32648 + }, + { + "epoch": 89.69505494505495, + "grad_norm": 4.453144073486328, + "learning_rate": 5.152472527472528e-06, + "loss": 0.0621, + "step": 32649 + }, + { + "epoch": 89.6978021978022, + "grad_norm": 11.192972183227539, + "learning_rate": 5.151098901098901e-06, + "loss": 0.1691, + "step": 32650 + }, + { + "epoch": 89.70054945054945, + "grad_norm": 6.634974479675293, + "learning_rate": 5.1497252747252745e-06, + "loss": 0.0743, + "step": 32651 + }, + { + "epoch": 89.7032967032967, + "grad_norm": 13.445738792419434, + "learning_rate": 5.148351648351649e-06, + "loss": 0.2907, + "step": 32652 + }, + { + "epoch": 89.70604395604396, + "grad_norm": 15.991630554199219, + "learning_rate": 5.146978021978022e-06, + "loss": 0.2684, + "step": 32653 + }, + { + "epoch": 89.70879120879121, + "grad_norm": 22.47895050048828, + "learning_rate": 5.145604395604396e-06, + "loss": 0.406, + "step": 32654 + }, + { + "epoch": 89.71153846153847, + "grad_norm": 9.442703247070312, + "learning_rate": 5.144230769230769e-06, + "loss": 0.1521, + "step": 32655 + }, + { + "epoch": 89.71428571428571, + "grad_norm": 11.704850196838379, + "learning_rate": 5.142857142857143e-06, + "loss": 0.2687, + "step": 32656 + }, + { + "epoch": 89.71703296703296, + "grad_norm": 5.533562183380127, + "learning_rate": 5.141483516483517e-06, + "loss": 0.0292, + "step": 32657 + }, + { + "epoch": 89.71978021978022, + "grad_norm": 9.002917289733887, + "learning_rate": 5.140109890109891e-06, + "loss": 0.2233, + "step": 32658 + }, + { + "epoch": 89.72252747252747, + "grad_norm": 7.038732051849365, + "learning_rate": 5.138736263736264e-06, + "loss": 0.0919, + "step": 32659 + }, + { + "epoch": 89.72527472527473, + "grad_norm": 8.463774681091309, + "learning_rate": 5.137362637362638e-06, + "loss": 0.1536, + "step": 32660 + }, + { + "epoch": 89.72802197802197, + "grad_norm": 6.694606304168701, + "learning_rate": 5.135989010989011e-06, + "loss": 0.0715, + "step": 32661 + }, + { + "epoch": 89.73076923076923, + "grad_norm": 8.87706470489502, + "learning_rate": 5.1346153846153845e-06, + "loss": 0.1092, + "step": 32662 + }, + { + "epoch": 89.73351648351648, + "grad_norm": 9.950396537780762, + "learning_rate": 5.133241758241759e-06, + "loss": 0.1255, + "step": 32663 + }, + { + "epoch": 89.73626373626374, + "grad_norm": 31.30156707763672, + "learning_rate": 5.131868131868132e-06, + "loss": 0.8164, + "step": 32664 + }, + { + "epoch": 89.73901098901099, + "grad_norm": 27.354272842407227, + "learning_rate": 5.130494505494506e-06, + "loss": 0.3799, + "step": 32665 + }, + { + "epoch": 89.74175824175825, + "grad_norm": 8.541900634765625, + "learning_rate": 5.129120879120879e-06, + "loss": 0.2161, + "step": 32666 + }, + { + "epoch": 89.74450549450549, + "grad_norm": 2.834949254989624, + "learning_rate": 5.127747252747253e-06, + "loss": 0.018, + "step": 32667 + }, + { + "epoch": 89.74725274725274, + "grad_norm": 6.154713153839111, + "learning_rate": 5.126373626373627e-06, + "loss": 0.079, + "step": 32668 + }, + { + "epoch": 89.75, + "grad_norm": 8.959928512573242, + "learning_rate": 5.125e-06, + "loss": 0.1559, + "step": 32669 + }, + { + "epoch": 89.75274725274726, + "grad_norm": 5.162120819091797, + "learning_rate": 5.1236263736263735e-06, + "loss": 0.0674, + "step": 32670 + }, + { + "epoch": 89.75549450549451, + "grad_norm": 20.727651596069336, + "learning_rate": 5.122252747252748e-06, + "loss": 0.1947, + "step": 32671 + }, + { + "epoch": 89.75824175824175, + "grad_norm": 15.563849449157715, + "learning_rate": 5.120879120879121e-06, + "loss": 0.7294, + "step": 32672 + }, + { + "epoch": 89.76098901098901, + "grad_norm": 15.801077842712402, + "learning_rate": 5.119505494505495e-06, + "loss": 0.1626, + "step": 32673 + }, + { + "epoch": 89.76373626373626, + "grad_norm": 7.888123989105225, + "learning_rate": 5.118131868131868e-06, + "loss": 0.0756, + "step": 32674 + }, + { + "epoch": 89.76648351648352, + "grad_norm": 3.0663509368896484, + "learning_rate": 5.116758241758242e-06, + "loss": 0.0194, + "step": 32675 + }, + { + "epoch": 89.76923076923077, + "grad_norm": 9.56839370727539, + "learning_rate": 5.115384615384616e-06, + "loss": 0.2339, + "step": 32676 + }, + { + "epoch": 89.77197802197803, + "grad_norm": 6.072405815124512, + "learning_rate": 5.114010989010989e-06, + "loss": 0.0653, + "step": 32677 + }, + { + "epoch": 89.77472527472527, + "grad_norm": 10.988457679748535, + "learning_rate": 5.112637362637363e-06, + "loss": 0.1946, + "step": 32678 + }, + { + "epoch": 89.77747252747253, + "grad_norm": 7.990356922149658, + "learning_rate": 5.111263736263737e-06, + "loss": 0.1153, + "step": 32679 + }, + { + "epoch": 89.78021978021978, + "grad_norm": 5.690628528594971, + "learning_rate": 5.10989010989011e-06, + "loss": 0.0685, + "step": 32680 + }, + { + "epoch": 89.78296703296704, + "grad_norm": 6.436811923980713, + "learning_rate": 5.1085164835164835e-06, + "loss": 0.1165, + "step": 32681 + }, + { + "epoch": 89.78571428571429, + "grad_norm": 13.526034355163574, + "learning_rate": 5.107142857142858e-06, + "loss": 0.1667, + "step": 32682 + }, + { + "epoch": 89.78846153846153, + "grad_norm": 5.58041524887085, + "learning_rate": 5.105769230769231e-06, + "loss": 0.0853, + "step": 32683 + }, + { + "epoch": 89.79120879120879, + "grad_norm": 6.884444713592529, + "learning_rate": 5.1043956043956045e-06, + "loss": 0.1104, + "step": 32684 + }, + { + "epoch": 89.79395604395604, + "grad_norm": 13.65628433227539, + "learning_rate": 5.103021978021978e-06, + "loss": 0.1361, + "step": 32685 + }, + { + "epoch": 89.7967032967033, + "grad_norm": 2.786109209060669, + "learning_rate": 5.101648351648352e-06, + "loss": 0.0353, + "step": 32686 + }, + { + "epoch": 89.79945054945055, + "grad_norm": 2.050291061401367, + "learning_rate": 5.100274725274726e-06, + "loss": 0.0173, + "step": 32687 + }, + { + "epoch": 89.8021978021978, + "grad_norm": 24.83379554748535, + "learning_rate": 5.0989010989011e-06, + "loss": 0.6192, + "step": 32688 + }, + { + "epoch": 89.80494505494505, + "grad_norm": 7.195954322814941, + "learning_rate": 5.0975274725274724e-06, + "loss": 0.1426, + "step": 32689 + }, + { + "epoch": 89.8076923076923, + "grad_norm": 18.00765037536621, + "learning_rate": 5.096153846153847e-06, + "loss": 0.4214, + "step": 32690 + }, + { + "epoch": 89.81043956043956, + "grad_norm": 13.621343612670898, + "learning_rate": 5.09478021978022e-06, + "loss": 0.3105, + "step": 32691 + }, + { + "epoch": 89.81318681318682, + "grad_norm": 4.2295637130737305, + "learning_rate": 5.0934065934065935e-06, + "loss": 0.0399, + "step": 32692 + }, + { + "epoch": 89.81593406593407, + "grad_norm": 22.680072784423828, + "learning_rate": 5.092032967032967e-06, + "loss": 0.3602, + "step": 32693 + }, + { + "epoch": 89.81868131868131, + "grad_norm": 25.42296600341797, + "learning_rate": 5.09065934065934e-06, + "loss": 0.7956, + "step": 32694 + }, + { + "epoch": 89.82142857142857, + "grad_norm": 6.953625679016113, + "learning_rate": 5.0892857142857146e-06, + "loss": 0.0659, + "step": 32695 + }, + { + "epoch": 89.82417582417582, + "grad_norm": 14.7027587890625, + "learning_rate": 5.087912087912088e-06, + "loss": 0.231, + "step": 32696 + }, + { + "epoch": 89.82692307692308, + "grad_norm": 9.665170669555664, + "learning_rate": 5.086538461538462e-06, + "loss": 0.1627, + "step": 32697 + }, + { + "epoch": 89.82967032967034, + "grad_norm": 10.182103157043457, + "learning_rate": 5.085164835164835e-06, + "loss": 0.0779, + "step": 32698 + }, + { + "epoch": 89.83241758241758, + "grad_norm": 11.939546585083008, + "learning_rate": 5.083791208791209e-06, + "loss": 0.0898, + "step": 32699 + }, + { + "epoch": 89.83516483516483, + "grad_norm": 7.457083225250244, + "learning_rate": 5.0824175824175824e-06, + "loss": 0.1002, + "step": 32700 + }, + { + "epoch": 89.83791208791209, + "grad_norm": 3.03548526763916, + "learning_rate": 5.081043956043957e-06, + "loss": 0.022, + "step": 32701 + }, + { + "epoch": 89.84065934065934, + "grad_norm": 4.5515360832214355, + "learning_rate": 5.07967032967033e-06, + "loss": 0.0945, + "step": 32702 + }, + { + "epoch": 89.8434065934066, + "grad_norm": 5.3846259117126465, + "learning_rate": 5.0782967032967035e-06, + "loss": 0.0821, + "step": 32703 + }, + { + "epoch": 89.84615384615384, + "grad_norm": 19.626985549926758, + "learning_rate": 5.076923076923077e-06, + "loss": 0.3263, + "step": 32704 + }, + { + "epoch": 89.8489010989011, + "grad_norm": 7.153541088104248, + "learning_rate": 5.075549450549451e-06, + "loss": 0.1918, + "step": 32705 + }, + { + "epoch": 89.85164835164835, + "grad_norm": 22.996639251708984, + "learning_rate": 5.074175824175825e-06, + "loss": 0.5308, + "step": 32706 + }, + { + "epoch": 89.8543956043956, + "grad_norm": 4.636667728424072, + "learning_rate": 5.072802197802198e-06, + "loss": 0.0734, + "step": 32707 + }, + { + "epoch": 89.85714285714286, + "grad_norm": 8.475375175476074, + "learning_rate": 5.071428571428571e-06, + "loss": 0.1161, + "step": 32708 + }, + { + "epoch": 89.85989010989012, + "grad_norm": 8.863007545471191, + "learning_rate": 5.070054945054945e-06, + "loss": 0.1166, + "step": 32709 + }, + { + "epoch": 89.86263736263736, + "grad_norm": 0.8873282670974731, + "learning_rate": 5.068681318681319e-06, + "loss": 0.0119, + "step": 32710 + }, + { + "epoch": 89.86538461538461, + "grad_norm": 7.772644519805908, + "learning_rate": 5.0673076923076925e-06, + "loss": 0.2381, + "step": 32711 + }, + { + "epoch": 89.86813186813187, + "grad_norm": 18.075984954833984, + "learning_rate": 5.065934065934067e-06, + "loss": 0.4382, + "step": 32712 + }, + { + "epoch": 89.87087912087912, + "grad_norm": 8.255614280700684, + "learning_rate": 5.064560439560439e-06, + "loss": 0.0475, + "step": 32713 + }, + { + "epoch": 89.87362637362638, + "grad_norm": 18.376989364624023, + "learning_rate": 5.0631868131868135e-06, + "loss": 0.1948, + "step": 32714 + }, + { + "epoch": 89.87637362637362, + "grad_norm": 4.281146049499512, + "learning_rate": 5.061813186813187e-06, + "loss": 0.0527, + "step": 32715 + }, + { + "epoch": 89.87912087912088, + "grad_norm": 1.462265133857727, + "learning_rate": 5.060439560439561e-06, + "loss": 0.0128, + "step": 32716 + }, + { + "epoch": 89.88186813186813, + "grad_norm": 19.074678421020508, + "learning_rate": 5.059065934065935e-06, + "loss": 0.3853, + "step": 32717 + }, + { + "epoch": 89.88461538461539, + "grad_norm": 8.154446601867676, + "learning_rate": 5.057692307692308e-06, + "loss": 0.1149, + "step": 32718 + }, + { + "epoch": 89.88736263736264, + "grad_norm": 14.022209167480469, + "learning_rate": 5.056318681318681e-06, + "loss": 0.1826, + "step": 32719 + }, + { + "epoch": 89.89010989010988, + "grad_norm": 16.074846267700195, + "learning_rate": 5.054945054945056e-06, + "loss": 0.2289, + "step": 32720 + }, + { + "epoch": 89.89285714285714, + "grad_norm": 3.758857488632202, + "learning_rate": 5.053571428571429e-06, + "loss": 0.0515, + "step": 32721 + }, + { + "epoch": 89.8956043956044, + "grad_norm": 18.0717716217041, + "learning_rate": 5.0521978021978025e-06, + "loss": 0.3427, + "step": 32722 + }, + { + "epoch": 89.89835164835165, + "grad_norm": 2.14327335357666, + "learning_rate": 5.050824175824176e-06, + "loss": 0.0302, + "step": 32723 + }, + { + "epoch": 89.9010989010989, + "grad_norm": 3.5968849658966064, + "learning_rate": 5.049450549450549e-06, + "loss": 0.0385, + "step": 32724 + }, + { + "epoch": 89.90384615384616, + "grad_norm": 10.474114418029785, + "learning_rate": 5.0480769230769235e-06, + "loss": 0.2257, + "step": 32725 + }, + { + "epoch": 89.9065934065934, + "grad_norm": 7.773931503295898, + "learning_rate": 5.046703296703297e-06, + "loss": 0.1709, + "step": 32726 + }, + { + "epoch": 89.90934065934066, + "grad_norm": 6.379452228546143, + "learning_rate": 5.04532967032967e-06, + "loss": 0.1401, + "step": 32727 + }, + { + "epoch": 89.91208791208791, + "grad_norm": 17.858760833740234, + "learning_rate": 5.043956043956044e-06, + "loss": 0.4346, + "step": 32728 + }, + { + "epoch": 89.91483516483517, + "grad_norm": 11.237058639526367, + "learning_rate": 5.042582417582418e-06, + "loss": 0.3305, + "step": 32729 + }, + { + "epoch": 89.91758241758242, + "grad_norm": 27.645814895629883, + "learning_rate": 5.0412087912087914e-06, + "loss": 0.5488, + "step": 32730 + }, + { + "epoch": 89.92032967032966, + "grad_norm": 5.846032619476318, + "learning_rate": 5.039835164835166e-06, + "loss": 0.0908, + "step": 32731 + }, + { + "epoch": 89.92307692307692, + "grad_norm": 13.844992637634277, + "learning_rate": 5.038461538461538e-06, + "loss": 0.4184, + "step": 32732 + }, + { + "epoch": 89.92582417582418, + "grad_norm": 7.921014785766602, + "learning_rate": 5.0370879120879125e-06, + "loss": 0.147, + "step": 32733 + }, + { + "epoch": 89.92857142857143, + "grad_norm": 7.807804584503174, + "learning_rate": 5.035714285714286e-06, + "loss": 0.1076, + "step": 32734 + }, + { + "epoch": 89.93131868131869, + "grad_norm": 13.57774543762207, + "learning_rate": 5.03434065934066e-06, + "loss": 0.1751, + "step": 32735 + }, + { + "epoch": 89.93406593406593, + "grad_norm": 0.5652915835380554, + "learning_rate": 5.0329670329670336e-06, + "loss": 0.0063, + "step": 32736 + }, + { + "epoch": 89.93681318681318, + "grad_norm": 13.69231128692627, + "learning_rate": 5.031593406593407e-06, + "loss": 0.3783, + "step": 32737 + }, + { + "epoch": 89.93956043956044, + "grad_norm": 5.084652900695801, + "learning_rate": 5.03021978021978e-06, + "loss": 0.0503, + "step": 32738 + }, + { + "epoch": 89.9423076923077, + "grad_norm": 10.12621021270752, + "learning_rate": 5.028846153846154e-06, + "loss": 0.1477, + "step": 32739 + }, + { + "epoch": 89.94505494505495, + "grad_norm": 13.97375202178955, + "learning_rate": 5.027472527472528e-06, + "loss": 0.3217, + "step": 32740 + }, + { + "epoch": 89.9478021978022, + "grad_norm": 21.519546508789062, + "learning_rate": 5.0260989010989014e-06, + "loss": 0.6056, + "step": 32741 + }, + { + "epoch": 89.95054945054945, + "grad_norm": 6.819830894470215, + "learning_rate": 5.024725274725275e-06, + "loss": 0.0921, + "step": 32742 + }, + { + "epoch": 89.9532967032967, + "grad_norm": 16.362573623657227, + "learning_rate": 5.023351648351648e-06, + "loss": 0.1875, + "step": 32743 + }, + { + "epoch": 89.95604395604396, + "grad_norm": 15.050623893737793, + "learning_rate": 5.0219780219780225e-06, + "loss": 0.2619, + "step": 32744 + }, + { + "epoch": 89.95879120879121, + "grad_norm": 18.935972213745117, + "learning_rate": 5.020604395604396e-06, + "loss": 0.2796, + "step": 32745 + }, + { + "epoch": 89.96153846153847, + "grad_norm": 10.917959213256836, + "learning_rate": 5.019230769230769e-06, + "loss": 0.0929, + "step": 32746 + }, + { + "epoch": 89.96428571428571, + "grad_norm": 6.241260051727295, + "learning_rate": 5.017857142857143e-06, + "loss": 0.0859, + "step": 32747 + }, + { + "epoch": 89.96703296703296, + "grad_norm": 18.596420288085938, + "learning_rate": 5.016483516483517e-06, + "loss": 0.3284, + "step": 32748 + }, + { + "epoch": 89.96978021978022, + "grad_norm": 3.8115384578704834, + "learning_rate": 5.01510989010989e-06, + "loss": 0.0418, + "step": 32749 + }, + { + "epoch": 89.97252747252747, + "grad_norm": 8.663274765014648, + "learning_rate": 5.013736263736264e-06, + "loss": 0.2007, + "step": 32750 + }, + { + "epoch": 89.97527472527473, + "grad_norm": 9.04207706451416, + "learning_rate": 5.012362637362637e-06, + "loss": 0.205, + "step": 32751 + }, + { + "epoch": 89.97802197802197, + "grad_norm": 5.7750983238220215, + "learning_rate": 5.0109890109890115e-06, + "loss": 0.0905, + "step": 32752 + }, + { + "epoch": 89.98076923076923, + "grad_norm": 16.430561065673828, + "learning_rate": 5.009615384615385e-06, + "loss": 0.3138, + "step": 32753 + }, + { + "epoch": 89.98351648351648, + "grad_norm": 6.77622127532959, + "learning_rate": 5.008241758241758e-06, + "loss": 0.0593, + "step": 32754 + }, + { + "epoch": 89.98626373626374, + "grad_norm": 17.630741119384766, + "learning_rate": 5.0068681318681325e-06, + "loss": 0.4589, + "step": 32755 + }, + { + "epoch": 89.98901098901099, + "grad_norm": 21.197965621948242, + "learning_rate": 5.005494505494505e-06, + "loss": 0.3605, + "step": 32756 + }, + { + "epoch": 89.99175824175825, + "grad_norm": 16.34840202331543, + "learning_rate": 5.004120879120879e-06, + "loss": 0.4116, + "step": 32757 + }, + { + "epoch": 89.99450549450549, + "grad_norm": 13.120607376098633, + "learning_rate": 5.002747252747253e-06, + "loss": 0.2029, + "step": 32758 + }, + { + "epoch": 89.99725274725274, + "grad_norm": 5.39251708984375, + "learning_rate": 5.001373626373627e-06, + "loss": 0.0646, + "step": 32759 + }, + { + "epoch": 90.0, + "grad_norm": 2.0977840423583984, + "learning_rate": 5e-06, + "loss": 0.0148, + "step": 32760 + }, + { + "epoch": 90.0, + "eval_accuracy": 0.8622589531680441, + "eval_f1": 0.8675144914840647, + "eval_f1_DuraRiadoRio_64x64": 0.8989547038327527, + "eval_f1_Mole_64x64": 0.8880597014925373, + "eval_f1_Quebrado_64x64": 0.8489208633093526, + "eval_f1_RiadoRio_64x64": 0.764367816091954, + "eval_f1_RioFechado_64x64": 0.9372693726937269, + "eval_loss": 0.5000524520874023, + "eval_precision": 0.88108797779611, + "eval_precision_DuraRiadoRio_64x64": 0.9020979020979021, + "eval_precision_Mole_64x64": 0.9596774193548387, + "eval_precision_Quebrado_64x64": 0.8805970149253731, + "eval_precision_RiadoRio_64x64": 0.6785714285714286, + "eval_precision_RioFechado_64x64": 0.9844961240310077, + "eval_recall": 0.8622065727699532, + "eval_recall_DuraRiadoRio_64x64": 0.8958333333333334, + "eval_recall_Mole_64x64": 0.8263888888888888, + "eval_recall_Quebrado_64x64": 0.8194444444444444, + "eval_recall_RiadoRio_64x64": 0.875, + "eval_recall_RioFechado_64x64": 0.8943661971830986, + "eval_runtime": 1.7555, + "eval_samples_per_second": 413.549, + "eval_steps_per_second": 26.203, + "step": 32760 + }, + { + "epoch": 90.00274725274726, + "grad_norm": 14.430015563964844, + "learning_rate": 4.998626373626374e-06, + "loss": 0.4109, + "step": 32761 + }, + { + "epoch": 90.00549450549451, + "grad_norm": 8.482211112976074, + "learning_rate": 4.997252747252747e-06, + "loss": 0.1009, + "step": 32762 + }, + { + "epoch": 90.00824175824175, + "grad_norm": 3.135866641998291, + "learning_rate": 4.9958791208791215e-06, + "loss": 0.0308, + "step": 32763 + }, + { + "epoch": 90.01098901098901, + "grad_norm": 5.367469787597656, + "learning_rate": 4.994505494505495e-06, + "loss": 0.063, + "step": 32764 + }, + { + "epoch": 90.01373626373626, + "grad_norm": 13.520844459533691, + "learning_rate": 4.993131868131868e-06, + "loss": 0.1968, + "step": 32765 + }, + { + "epoch": 90.01648351648352, + "grad_norm": 4.121726036071777, + "learning_rate": 4.991758241758242e-06, + "loss": 0.065, + "step": 32766 + }, + { + "epoch": 90.01923076923077, + "grad_norm": 13.770544052124023, + "learning_rate": 4.990384615384615e-06, + "loss": 0.2602, + "step": 32767 + }, + { + "epoch": 90.02197802197803, + "grad_norm": 1.3652565479278564, + "learning_rate": 4.989010989010989e-06, + "loss": 0.0227, + "step": 32768 + }, + { + "epoch": 90.02472527472527, + "grad_norm": 10.317119598388672, + "learning_rate": 4.987637362637363e-06, + "loss": 0.4354, + "step": 32769 + }, + { + "epoch": 90.02747252747253, + "grad_norm": 6.619006156921387, + "learning_rate": 4.986263736263737e-06, + "loss": 0.1208, + "step": 32770 + }, + { + "epoch": 90.03021978021978, + "grad_norm": 5.992372512817383, + "learning_rate": 4.98489010989011e-06, + "loss": 0.0749, + "step": 32771 + }, + { + "epoch": 90.03296703296704, + "grad_norm": 23.904664993286133, + "learning_rate": 4.983516483516484e-06, + "loss": 0.4676, + "step": 32772 + }, + { + "epoch": 90.03571428571429, + "grad_norm": 12.758367538452148, + "learning_rate": 4.982142857142857e-06, + "loss": 0.2806, + "step": 32773 + }, + { + "epoch": 90.03846153846153, + "grad_norm": 3.5857911109924316, + "learning_rate": 4.9807692307692315e-06, + "loss": 0.0285, + "step": 32774 + }, + { + "epoch": 90.04120879120879, + "grad_norm": 20.73973846435547, + "learning_rate": 4.979395604395604e-06, + "loss": 0.3165, + "step": 32775 + }, + { + "epoch": 90.04395604395604, + "grad_norm": 17.359027862548828, + "learning_rate": 4.978021978021978e-06, + "loss": 0.2294, + "step": 32776 + }, + { + "epoch": 90.0467032967033, + "grad_norm": 14.049896240234375, + "learning_rate": 4.976648351648352e-06, + "loss": 0.308, + "step": 32777 + }, + { + "epoch": 90.04945054945055, + "grad_norm": 4.455606460571289, + "learning_rate": 4.975274725274726e-06, + "loss": 0.0631, + "step": 32778 + }, + { + "epoch": 90.0521978021978, + "grad_norm": 3.4569497108459473, + "learning_rate": 4.973901098901099e-06, + "loss": 0.0686, + "step": 32779 + }, + { + "epoch": 90.05494505494505, + "grad_norm": 9.356689453125, + "learning_rate": 4.972527472527473e-06, + "loss": 0.2046, + "step": 32780 + }, + { + "epoch": 90.0576923076923, + "grad_norm": 12.590656280517578, + "learning_rate": 4.971153846153846e-06, + "loss": 0.2367, + "step": 32781 + }, + { + "epoch": 90.06043956043956, + "grad_norm": 12.25792121887207, + "learning_rate": 4.96978021978022e-06, + "loss": 0.2545, + "step": 32782 + }, + { + "epoch": 90.06318681318682, + "grad_norm": 9.352471351623535, + "learning_rate": 4.968406593406594e-06, + "loss": 0.0781, + "step": 32783 + }, + { + "epoch": 90.06593406593407, + "grad_norm": 1.3697845935821533, + "learning_rate": 4.967032967032967e-06, + "loss": 0.0166, + "step": 32784 + }, + { + "epoch": 90.06868131868131, + "grad_norm": 13.79068660736084, + "learning_rate": 4.965659340659341e-06, + "loss": 0.2436, + "step": 32785 + }, + { + "epoch": 90.07142857142857, + "grad_norm": 9.357232093811035, + "learning_rate": 4.964285714285714e-06, + "loss": 0.2196, + "step": 32786 + }, + { + "epoch": 90.07417582417582, + "grad_norm": 14.692902565002441, + "learning_rate": 4.962912087912088e-06, + "loss": 0.3509, + "step": 32787 + }, + { + "epoch": 90.07692307692308, + "grad_norm": 11.46784496307373, + "learning_rate": 4.961538461538462e-06, + "loss": 0.3104, + "step": 32788 + }, + { + "epoch": 90.07967032967034, + "grad_norm": 5.8329176902771, + "learning_rate": 4.960164835164836e-06, + "loss": 0.0667, + "step": 32789 + }, + { + "epoch": 90.08241758241758, + "grad_norm": 20.26093292236328, + "learning_rate": 4.9587912087912085e-06, + "loss": 0.3396, + "step": 32790 + }, + { + "epoch": 90.08516483516483, + "grad_norm": 15.400221824645996, + "learning_rate": 4.957417582417583e-06, + "loss": 0.3817, + "step": 32791 + }, + { + "epoch": 90.08791208791209, + "grad_norm": 19.30956268310547, + "learning_rate": 4.956043956043956e-06, + "loss": 0.2912, + "step": 32792 + }, + { + "epoch": 90.09065934065934, + "grad_norm": 8.758572578430176, + "learning_rate": 4.9546703296703305e-06, + "loss": 0.1758, + "step": 32793 + }, + { + "epoch": 90.0934065934066, + "grad_norm": 19.661882400512695, + "learning_rate": 4.953296703296704e-06, + "loss": 0.5536, + "step": 32794 + }, + { + "epoch": 90.09615384615384, + "grad_norm": 16.60549545288086, + "learning_rate": 4.951923076923077e-06, + "loss": 0.1636, + "step": 32795 + }, + { + "epoch": 90.0989010989011, + "grad_norm": 20.62625503540039, + "learning_rate": 4.950549450549451e-06, + "loss": 0.3909, + "step": 32796 + }, + { + "epoch": 90.10164835164835, + "grad_norm": 13.486844062805176, + "learning_rate": 4.949175824175824e-06, + "loss": 0.4195, + "step": 32797 + }, + { + "epoch": 90.1043956043956, + "grad_norm": 17.190725326538086, + "learning_rate": 4.947802197802198e-06, + "loss": 0.332, + "step": 32798 + }, + { + "epoch": 90.10714285714286, + "grad_norm": 19.51032829284668, + "learning_rate": 4.946428571428572e-06, + "loss": 0.317, + "step": 32799 + }, + { + "epoch": 90.10989010989012, + "grad_norm": 29.655155181884766, + "learning_rate": 4.945054945054945e-06, + "loss": 0.3237, + "step": 32800 + }, + { + "epoch": 90.11263736263736, + "grad_norm": 12.320075988769531, + "learning_rate": 4.9436813186813186e-06, + "loss": 0.2374, + "step": 32801 + }, + { + "epoch": 90.11538461538461, + "grad_norm": 11.672796249389648, + "learning_rate": 4.942307692307693e-06, + "loss": 0.1525, + "step": 32802 + }, + { + "epoch": 90.11813186813187, + "grad_norm": 14.649063110351562, + "learning_rate": 4.940934065934066e-06, + "loss": 0.3649, + "step": 32803 + }, + { + "epoch": 90.12087912087912, + "grad_norm": 5.192213535308838, + "learning_rate": 4.93956043956044e-06, + "loss": 0.0911, + "step": 32804 + }, + { + "epoch": 90.12362637362638, + "grad_norm": 7.815594673156738, + "learning_rate": 4.938186813186813e-06, + "loss": 0.1471, + "step": 32805 + }, + { + "epoch": 90.12637362637362, + "grad_norm": 5.128730773925781, + "learning_rate": 4.936813186813187e-06, + "loss": 0.0899, + "step": 32806 + }, + { + "epoch": 90.12912087912088, + "grad_norm": 11.547297477722168, + "learning_rate": 4.935439560439561e-06, + "loss": 0.2189, + "step": 32807 + }, + { + "epoch": 90.13186813186813, + "grad_norm": 23.13691520690918, + "learning_rate": 4.934065934065935e-06, + "loss": 0.6102, + "step": 32808 + }, + { + "epoch": 90.13461538461539, + "grad_norm": 23.929790496826172, + "learning_rate": 4.9326923076923075e-06, + "loss": 0.6635, + "step": 32809 + }, + { + "epoch": 90.13736263736264, + "grad_norm": 28.558996200561523, + "learning_rate": 4.931318681318682e-06, + "loss": 0.6006, + "step": 32810 + }, + { + "epoch": 90.14010989010988, + "grad_norm": 7.769251823425293, + "learning_rate": 4.929945054945055e-06, + "loss": 0.0737, + "step": 32811 + }, + { + "epoch": 90.14285714285714, + "grad_norm": 2.0984878540039062, + "learning_rate": 4.9285714285714286e-06, + "loss": 0.0277, + "step": 32812 + }, + { + "epoch": 90.1456043956044, + "grad_norm": 13.427484512329102, + "learning_rate": 4.927197802197803e-06, + "loss": 0.2426, + "step": 32813 + }, + { + "epoch": 90.14835164835165, + "grad_norm": 11.969902992248535, + "learning_rate": 4.925824175824175e-06, + "loss": 0.2948, + "step": 32814 + }, + { + "epoch": 90.1510989010989, + "grad_norm": 4.076528072357178, + "learning_rate": 4.92445054945055e-06, + "loss": 0.0642, + "step": 32815 + }, + { + "epoch": 90.15384615384616, + "grad_norm": 25.419639587402344, + "learning_rate": 4.923076923076923e-06, + "loss": 0.6029, + "step": 32816 + }, + { + "epoch": 90.1565934065934, + "grad_norm": 18.709964752197266, + "learning_rate": 4.921703296703297e-06, + "loss": 0.3138, + "step": 32817 + }, + { + "epoch": 90.15934065934066, + "grad_norm": 8.00913143157959, + "learning_rate": 4.920329670329671e-06, + "loss": 0.0605, + "step": 32818 + }, + { + "epoch": 90.16208791208791, + "grad_norm": 12.884906768798828, + "learning_rate": 4.918956043956044e-06, + "loss": 0.1944, + "step": 32819 + }, + { + "epoch": 90.16483516483517, + "grad_norm": 5.917790412902832, + "learning_rate": 4.9175824175824175e-06, + "loss": 0.0498, + "step": 32820 + }, + { + "epoch": 90.16758241758242, + "grad_norm": 16.767627716064453, + "learning_rate": 4.916208791208792e-06, + "loss": 0.3355, + "step": 32821 + }, + { + "epoch": 90.17032967032966, + "grad_norm": 23.121864318847656, + "learning_rate": 4.914835164835165e-06, + "loss": 0.3012, + "step": 32822 + }, + { + "epoch": 90.17307692307692, + "grad_norm": 21.28803825378418, + "learning_rate": 4.913461538461539e-06, + "loss": 0.1966, + "step": 32823 + }, + { + "epoch": 90.17582417582418, + "grad_norm": 12.450121879577637, + "learning_rate": 4.912087912087912e-06, + "loss": 0.1923, + "step": 32824 + }, + { + "epoch": 90.17857142857143, + "grad_norm": 8.521897315979004, + "learning_rate": 4.910714285714286e-06, + "loss": 0.0765, + "step": 32825 + }, + { + "epoch": 90.18131868131869, + "grad_norm": 8.741480827331543, + "learning_rate": 4.90934065934066e-06, + "loss": 0.1662, + "step": 32826 + }, + { + "epoch": 90.18406593406593, + "grad_norm": 21.54202651977539, + "learning_rate": 4.907967032967033e-06, + "loss": 0.3894, + "step": 32827 + }, + { + "epoch": 90.18681318681318, + "grad_norm": 13.561059951782227, + "learning_rate": 4.906593406593407e-06, + "loss": 0.192, + "step": 32828 + }, + { + "epoch": 90.18956043956044, + "grad_norm": 13.907316207885742, + "learning_rate": 4.90521978021978e-06, + "loss": 0.1963, + "step": 32829 + }, + { + "epoch": 90.1923076923077, + "grad_norm": 10.987053871154785, + "learning_rate": 4.903846153846154e-06, + "loss": 0.1845, + "step": 32830 + }, + { + "epoch": 90.19505494505495, + "grad_norm": 6.781715393066406, + "learning_rate": 4.9024725274725275e-06, + "loss": 0.0836, + "step": 32831 + }, + { + "epoch": 90.1978021978022, + "grad_norm": 5.8358845710754395, + "learning_rate": 4.901098901098902e-06, + "loss": 0.1082, + "step": 32832 + }, + { + "epoch": 90.20054945054945, + "grad_norm": 13.391219139099121, + "learning_rate": 4.899725274725274e-06, + "loss": 0.3213, + "step": 32833 + }, + { + "epoch": 90.2032967032967, + "grad_norm": 7.752917289733887, + "learning_rate": 4.898351648351649e-06, + "loss": 0.121, + "step": 32834 + }, + { + "epoch": 90.20604395604396, + "grad_norm": 5.413966655731201, + "learning_rate": 4.896978021978022e-06, + "loss": 0.0791, + "step": 32835 + }, + { + "epoch": 90.20879120879121, + "grad_norm": 26.56465721130371, + "learning_rate": 4.895604395604396e-06, + "loss": 0.5101, + "step": 32836 + }, + { + "epoch": 90.21153846153847, + "grad_norm": 18.142902374267578, + "learning_rate": 4.89423076923077e-06, + "loss": 0.3216, + "step": 32837 + }, + { + "epoch": 90.21428571428571, + "grad_norm": 10.438087463378906, + "learning_rate": 4.892857142857143e-06, + "loss": 0.1417, + "step": 32838 + }, + { + "epoch": 90.21703296703296, + "grad_norm": 20.080585479736328, + "learning_rate": 4.8914835164835165e-06, + "loss": 0.4958, + "step": 32839 + }, + { + "epoch": 90.21978021978022, + "grad_norm": 14.269461631774902, + "learning_rate": 4.89010989010989e-06, + "loss": 0.2067, + "step": 32840 + }, + { + "epoch": 90.22252747252747, + "grad_norm": 1.5093824863433838, + "learning_rate": 4.888736263736264e-06, + "loss": 0.017, + "step": 32841 + }, + { + "epoch": 90.22527472527473, + "grad_norm": 23.511938095092773, + "learning_rate": 4.8873626373626376e-06, + "loss": 0.6513, + "step": 32842 + }, + { + "epoch": 90.22802197802197, + "grad_norm": 3.00726318359375, + "learning_rate": 4.885989010989011e-06, + "loss": 0.0419, + "step": 32843 + }, + { + "epoch": 90.23076923076923, + "grad_norm": 28.832551956176758, + "learning_rate": 4.884615384615384e-06, + "loss": 0.6909, + "step": 32844 + }, + { + "epoch": 90.23351648351648, + "grad_norm": 8.107301712036133, + "learning_rate": 4.883241758241759e-06, + "loss": 0.1126, + "step": 32845 + }, + { + "epoch": 90.23626373626374, + "grad_norm": 15.359733581542969, + "learning_rate": 4.881868131868132e-06, + "loss": 0.6032, + "step": 32846 + }, + { + "epoch": 90.23901098901099, + "grad_norm": 9.800042152404785, + "learning_rate": 4.880494505494506e-06, + "loss": 0.0977, + "step": 32847 + }, + { + "epoch": 90.24175824175825, + "grad_norm": 3.4134771823883057, + "learning_rate": 4.879120879120879e-06, + "loss": 0.0371, + "step": 32848 + }, + { + "epoch": 90.24450549450549, + "grad_norm": 15.24282455444336, + "learning_rate": 4.877747252747253e-06, + "loss": 0.2275, + "step": 32849 + }, + { + "epoch": 90.24725274725274, + "grad_norm": 4.417858600616455, + "learning_rate": 4.8763736263736265e-06, + "loss": 0.0748, + "step": 32850 + }, + { + "epoch": 90.25, + "grad_norm": 17.55755043029785, + "learning_rate": 4.875000000000001e-06, + "loss": 0.2223, + "step": 32851 + }, + { + "epoch": 90.25274725274726, + "grad_norm": 8.019024848937988, + "learning_rate": 4.873626373626374e-06, + "loss": 0.0977, + "step": 32852 + }, + { + "epoch": 90.25549450549451, + "grad_norm": 11.946748733520508, + "learning_rate": 4.8722527472527476e-06, + "loss": 0.1702, + "step": 32853 + }, + { + "epoch": 90.25824175824175, + "grad_norm": 2.848994493484497, + "learning_rate": 4.870879120879121e-06, + "loss": 0.0422, + "step": 32854 + }, + { + "epoch": 90.26098901098901, + "grad_norm": 6.49729585647583, + "learning_rate": 4.869505494505494e-06, + "loss": 0.0402, + "step": 32855 + }, + { + "epoch": 90.26373626373626, + "grad_norm": 10.560138702392578, + "learning_rate": 4.868131868131869e-06, + "loss": 0.0606, + "step": 32856 + }, + { + "epoch": 90.26648351648352, + "grad_norm": 4.4045796394348145, + "learning_rate": 4.866758241758242e-06, + "loss": 0.087, + "step": 32857 + }, + { + "epoch": 90.26923076923077, + "grad_norm": 13.506778717041016, + "learning_rate": 4.8653846153846155e-06, + "loss": 0.2914, + "step": 32858 + }, + { + "epoch": 90.27197802197803, + "grad_norm": 9.42805004119873, + "learning_rate": 4.864010989010989e-06, + "loss": 0.1977, + "step": 32859 + }, + { + "epoch": 90.27472527472527, + "grad_norm": 11.703226089477539, + "learning_rate": 4.862637362637363e-06, + "loss": 0.2015, + "step": 32860 + }, + { + "epoch": 90.27747252747253, + "grad_norm": 14.407105445861816, + "learning_rate": 4.8612637362637365e-06, + "loss": 0.3647, + "step": 32861 + }, + { + "epoch": 90.28021978021978, + "grad_norm": 8.050097465515137, + "learning_rate": 4.85989010989011e-06, + "loss": 0.1451, + "step": 32862 + }, + { + "epoch": 90.28296703296704, + "grad_norm": 10.874382972717285, + "learning_rate": 4.858516483516483e-06, + "loss": 0.2293, + "step": 32863 + }, + { + "epoch": 90.28571428571429, + "grad_norm": 7.733025550842285, + "learning_rate": 4.857142857142858e-06, + "loss": 0.1188, + "step": 32864 + }, + { + "epoch": 90.28846153846153, + "grad_norm": 4.303512096405029, + "learning_rate": 4.855769230769231e-06, + "loss": 0.0554, + "step": 32865 + }, + { + "epoch": 90.29120879120879, + "grad_norm": 10.835576057434082, + "learning_rate": 4.854395604395605e-06, + "loss": 0.113, + "step": 32866 + }, + { + "epoch": 90.29395604395604, + "grad_norm": 6.795154094696045, + "learning_rate": 4.853021978021978e-06, + "loss": 0.0765, + "step": 32867 + }, + { + "epoch": 90.2967032967033, + "grad_norm": 29.88027572631836, + "learning_rate": 4.851648351648352e-06, + "loss": 1.1698, + "step": 32868 + }, + { + "epoch": 90.29945054945055, + "grad_norm": 4.951770305633545, + "learning_rate": 4.8502747252747255e-06, + "loss": 0.0526, + "step": 32869 + }, + { + "epoch": 90.3021978021978, + "grad_norm": 6.760656833648682, + "learning_rate": 4.848901098901099e-06, + "loss": 0.0496, + "step": 32870 + }, + { + "epoch": 90.30494505494505, + "grad_norm": 10.793121337890625, + "learning_rate": 4.847527472527473e-06, + "loss": 0.1646, + "step": 32871 + }, + { + "epoch": 90.3076923076923, + "grad_norm": 14.671550750732422, + "learning_rate": 4.846153846153846e-06, + "loss": 0.3507, + "step": 32872 + }, + { + "epoch": 90.31043956043956, + "grad_norm": 2.815471887588501, + "learning_rate": 4.84478021978022e-06, + "loss": 0.0355, + "step": 32873 + }, + { + "epoch": 90.31318681318682, + "grad_norm": 17.363149642944336, + "learning_rate": 4.843406593406593e-06, + "loss": 0.1851, + "step": 32874 + }, + { + "epoch": 90.31593406593407, + "grad_norm": 21.38909339904785, + "learning_rate": 4.842032967032968e-06, + "loss": 0.5293, + "step": 32875 + }, + { + "epoch": 90.31868131868131, + "grad_norm": 16.942405700683594, + "learning_rate": 4.840659340659341e-06, + "loss": 0.3839, + "step": 32876 + }, + { + "epoch": 90.32142857142857, + "grad_norm": 13.802386283874512, + "learning_rate": 4.839285714285714e-06, + "loss": 0.181, + "step": 32877 + }, + { + "epoch": 90.32417582417582, + "grad_norm": 7.791040897369385, + "learning_rate": 4.837912087912088e-06, + "loss": 0.1968, + "step": 32878 + }, + { + "epoch": 90.32692307692308, + "grad_norm": 9.146533966064453, + "learning_rate": 4.836538461538462e-06, + "loss": 0.1206, + "step": 32879 + }, + { + "epoch": 90.32967032967034, + "grad_norm": 11.348424911499023, + "learning_rate": 4.8351648351648355e-06, + "loss": 0.1967, + "step": 32880 + }, + { + "epoch": 90.33241758241758, + "grad_norm": 5.230147838592529, + "learning_rate": 4.83379120879121e-06, + "loss": 0.0824, + "step": 32881 + }, + { + "epoch": 90.33516483516483, + "grad_norm": 8.454610824584961, + "learning_rate": 4.832417582417582e-06, + "loss": 0.1547, + "step": 32882 + }, + { + "epoch": 90.33791208791209, + "grad_norm": 18.539430618286133, + "learning_rate": 4.8310439560439566e-06, + "loss": 0.2546, + "step": 32883 + }, + { + "epoch": 90.34065934065934, + "grad_norm": 17.567873001098633, + "learning_rate": 4.82967032967033e-06, + "loss": 0.3128, + "step": 32884 + }, + { + "epoch": 90.3434065934066, + "grad_norm": 8.074982643127441, + "learning_rate": 4.828296703296703e-06, + "loss": 0.1279, + "step": 32885 + }, + { + "epoch": 90.34615384615384, + "grad_norm": 10.521867752075195, + "learning_rate": 4.826923076923078e-06, + "loss": 0.3353, + "step": 32886 + }, + { + "epoch": 90.3489010989011, + "grad_norm": 12.410567283630371, + "learning_rate": 4.82554945054945e-06, + "loss": 0.162, + "step": 32887 + }, + { + "epoch": 90.35164835164835, + "grad_norm": 21.282094955444336, + "learning_rate": 4.8241758241758244e-06, + "loss": 0.263, + "step": 32888 + }, + { + "epoch": 90.3543956043956, + "grad_norm": 4.954302787780762, + "learning_rate": 4.822802197802198e-06, + "loss": 0.0419, + "step": 32889 + }, + { + "epoch": 90.35714285714286, + "grad_norm": 6.301264762878418, + "learning_rate": 4.821428571428572e-06, + "loss": 0.0566, + "step": 32890 + }, + { + "epoch": 90.35989010989012, + "grad_norm": 12.171038627624512, + "learning_rate": 4.820054945054945e-06, + "loss": 0.1393, + "step": 32891 + }, + { + "epoch": 90.36263736263736, + "grad_norm": 15.86115550994873, + "learning_rate": 4.818681318681319e-06, + "loss": 0.4177, + "step": 32892 + }, + { + "epoch": 90.36538461538461, + "grad_norm": 9.550786972045898, + "learning_rate": 4.817307692307692e-06, + "loss": 0.1083, + "step": 32893 + }, + { + "epoch": 90.36813186813187, + "grad_norm": 16.375619888305664, + "learning_rate": 4.8159340659340666e-06, + "loss": 0.3185, + "step": 32894 + }, + { + "epoch": 90.37087912087912, + "grad_norm": 11.756714820861816, + "learning_rate": 4.81456043956044e-06, + "loss": 0.1816, + "step": 32895 + }, + { + "epoch": 90.37362637362638, + "grad_norm": 4.1989898681640625, + "learning_rate": 4.813186813186813e-06, + "loss": 0.0353, + "step": 32896 + }, + { + "epoch": 90.37637362637362, + "grad_norm": 5.983080863952637, + "learning_rate": 4.811813186813187e-06, + "loss": 0.0851, + "step": 32897 + }, + { + "epoch": 90.37912087912088, + "grad_norm": 16.47395896911621, + "learning_rate": 4.810439560439561e-06, + "loss": 0.3575, + "step": 32898 + }, + { + "epoch": 90.38186813186813, + "grad_norm": 21.473979949951172, + "learning_rate": 4.8090659340659345e-06, + "loss": 0.4507, + "step": 32899 + }, + { + "epoch": 90.38461538461539, + "grad_norm": 13.183109283447266, + "learning_rate": 4.807692307692308e-06, + "loss": 0.2574, + "step": 32900 + }, + { + "epoch": 90.38736263736264, + "grad_norm": 14.744168281555176, + "learning_rate": 4.806318681318681e-06, + "loss": 0.2581, + "step": 32901 + }, + { + "epoch": 90.39010989010988, + "grad_norm": 25.82952308654785, + "learning_rate": 4.804945054945055e-06, + "loss": 0.4245, + "step": 32902 + }, + { + "epoch": 90.39285714285714, + "grad_norm": 8.472684860229492, + "learning_rate": 4.803571428571429e-06, + "loss": 0.126, + "step": 32903 + }, + { + "epoch": 90.3956043956044, + "grad_norm": 15.125480651855469, + "learning_rate": 4.802197802197802e-06, + "loss": 0.2209, + "step": 32904 + }, + { + "epoch": 90.39835164835165, + "grad_norm": 8.679557800292969, + "learning_rate": 4.800824175824177e-06, + "loss": 0.081, + "step": 32905 + }, + { + "epoch": 90.4010989010989, + "grad_norm": 10.404257774353027, + "learning_rate": 4.799450549450549e-06, + "loss": 0.2114, + "step": 32906 + }, + { + "epoch": 90.40384615384616, + "grad_norm": 23.103097915649414, + "learning_rate": 4.798076923076923e-06, + "loss": 0.4165, + "step": 32907 + }, + { + "epoch": 90.4065934065934, + "grad_norm": 3.390817642211914, + "learning_rate": 4.796703296703297e-06, + "loss": 0.0339, + "step": 32908 + }, + { + "epoch": 90.40934065934066, + "grad_norm": 14.173356056213379, + "learning_rate": 4.795329670329671e-06, + "loss": 0.3563, + "step": 32909 + }, + { + "epoch": 90.41208791208791, + "grad_norm": 2.5933103561401367, + "learning_rate": 4.7939560439560445e-06, + "loss": 0.0245, + "step": 32910 + }, + { + "epoch": 90.41483516483517, + "grad_norm": 3.279479503631592, + "learning_rate": 4.792582417582418e-06, + "loss": 0.0185, + "step": 32911 + }, + { + "epoch": 90.41758241758242, + "grad_norm": 16.995885848999023, + "learning_rate": 4.791208791208791e-06, + "loss": 0.1868, + "step": 32912 + }, + { + "epoch": 90.42032967032966, + "grad_norm": 6.055034160614014, + "learning_rate": 4.7898351648351655e-06, + "loss": 0.1382, + "step": 32913 + }, + { + "epoch": 90.42307692307692, + "grad_norm": 25.2229061126709, + "learning_rate": 4.788461538461539e-06, + "loss": 0.6111, + "step": 32914 + }, + { + "epoch": 90.42582417582418, + "grad_norm": 12.264394760131836, + "learning_rate": 4.787087912087912e-06, + "loss": 0.2543, + "step": 32915 + }, + { + "epoch": 90.42857142857143, + "grad_norm": 9.396346092224121, + "learning_rate": 4.785714285714286e-06, + "loss": 0.1389, + "step": 32916 + }, + { + "epoch": 90.43131868131869, + "grad_norm": 6.336543083190918, + "learning_rate": 4.784340659340659e-06, + "loss": 0.0613, + "step": 32917 + }, + { + "epoch": 90.43406593406593, + "grad_norm": 10.960043907165527, + "learning_rate": 4.782967032967033e-06, + "loss": 0.2054, + "step": 32918 + }, + { + "epoch": 90.43681318681318, + "grad_norm": 9.13908863067627, + "learning_rate": 4.781593406593407e-06, + "loss": 0.1029, + "step": 32919 + }, + { + "epoch": 90.43956043956044, + "grad_norm": 8.958348274230957, + "learning_rate": 4.78021978021978e-06, + "loss": 0.1399, + "step": 32920 + }, + { + "epoch": 90.4423076923077, + "grad_norm": 12.221879959106445, + "learning_rate": 4.778846153846154e-06, + "loss": 0.2865, + "step": 32921 + }, + { + "epoch": 90.44505494505495, + "grad_norm": 14.164764404296875, + "learning_rate": 4.777472527472528e-06, + "loss": 0.1864, + "step": 32922 + }, + { + "epoch": 90.4478021978022, + "grad_norm": 3.3999414443969727, + "learning_rate": 4.776098901098901e-06, + "loss": 0.0425, + "step": 32923 + }, + { + "epoch": 90.45054945054945, + "grad_norm": 8.509586334228516, + "learning_rate": 4.7747252747252756e-06, + "loss": 0.134, + "step": 32924 + }, + { + "epoch": 90.4532967032967, + "grad_norm": 1.7679080963134766, + "learning_rate": 4.773351648351648e-06, + "loss": 0.0267, + "step": 32925 + }, + { + "epoch": 90.45604395604396, + "grad_norm": 5.906362533569336, + "learning_rate": 4.771978021978022e-06, + "loss": 0.1053, + "step": 32926 + }, + { + "epoch": 90.45879120879121, + "grad_norm": 9.794849395751953, + "learning_rate": 4.770604395604396e-06, + "loss": 0.0991, + "step": 32927 + }, + { + "epoch": 90.46153846153847, + "grad_norm": 11.648588180541992, + "learning_rate": 4.769230769230769e-06, + "loss": 0.1754, + "step": 32928 + }, + { + "epoch": 90.46428571428571, + "grad_norm": 5.833909034729004, + "learning_rate": 4.7678571428571434e-06, + "loss": 0.1478, + "step": 32929 + }, + { + "epoch": 90.46703296703296, + "grad_norm": 18.025575637817383, + "learning_rate": 4.766483516483517e-06, + "loss": 0.3607, + "step": 32930 + }, + { + "epoch": 90.46978021978022, + "grad_norm": 12.662541389465332, + "learning_rate": 4.76510989010989e-06, + "loss": 0.2025, + "step": 32931 + }, + { + "epoch": 90.47252747252747, + "grad_norm": 14.753361701965332, + "learning_rate": 4.763736263736264e-06, + "loss": 0.2738, + "step": 32932 + }, + { + "epoch": 90.47527472527473, + "grad_norm": 13.804073333740234, + "learning_rate": 4.762362637362638e-06, + "loss": 0.2754, + "step": 32933 + }, + { + "epoch": 90.47802197802197, + "grad_norm": 15.78707218170166, + "learning_rate": 4.760989010989011e-06, + "loss": 0.404, + "step": 32934 + }, + { + "epoch": 90.48076923076923, + "grad_norm": 14.789299964904785, + "learning_rate": 4.759615384615385e-06, + "loss": 0.1668, + "step": 32935 + }, + { + "epoch": 90.48351648351648, + "grad_norm": 7.915855884552002, + "learning_rate": 4.758241758241758e-06, + "loss": 0.1305, + "step": 32936 + }, + { + "epoch": 90.48626373626374, + "grad_norm": 8.416269302368164, + "learning_rate": 4.756868131868132e-06, + "loss": 0.1055, + "step": 32937 + }, + { + "epoch": 90.48901098901099, + "grad_norm": 15.957980155944824, + "learning_rate": 4.755494505494506e-06, + "loss": 0.2775, + "step": 32938 + }, + { + "epoch": 90.49175824175825, + "grad_norm": 4.729548454284668, + "learning_rate": 4.75412087912088e-06, + "loss": 0.0555, + "step": 32939 + }, + { + "epoch": 90.49450549450549, + "grad_norm": 7.19159460067749, + "learning_rate": 4.752747252747253e-06, + "loss": 0.193, + "step": 32940 + }, + { + "epoch": 90.49725274725274, + "grad_norm": 4.316619873046875, + "learning_rate": 4.751373626373627e-06, + "loss": 0.0555, + "step": 32941 + }, + { + "epoch": 90.5, + "grad_norm": 9.404864311218262, + "learning_rate": 4.75e-06, + "loss": 0.1435, + "step": 32942 + }, + { + "epoch": 90.50274725274726, + "grad_norm": 9.649462699890137, + "learning_rate": 4.748626373626374e-06, + "loss": 0.2433, + "step": 32943 + }, + { + "epoch": 90.50549450549451, + "grad_norm": 4.239348888397217, + "learning_rate": 4.747252747252748e-06, + "loss": 0.0678, + "step": 32944 + }, + { + "epoch": 90.50824175824175, + "grad_norm": 0.9263874888420105, + "learning_rate": 4.7458791208791205e-06, + "loss": 0.0106, + "step": 32945 + }, + { + "epoch": 90.51098901098901, + "grad_norm": 13.69008731842041, + "learning_rate": 4.744505494505495e-06, + "loss": 0.2747, + "step": 32946 + }, + { + "epoch": 90.51373626373626, + "grad_norm": 23.265132904052734, + "learning_rate": 4.743131868131868e-06, + "loss": 0.8501, + "step": 32947 + }, + { + "epoch": 90.51648351648352, + "grad_norm": 13.86165714263916, + "learning_rate": 4.741758241758242e-06, + "loss": 0.2264, + "step": 32948 + }, + { + "epoch": 90.51923076923077, + "grad_norm": 4.495087623596191, + "learning_rate": 4.740384615384615e-06, + "loss": 0.0377, + "step": 32949 + }, + { + "epoch": 90.52197802197803, + "grad_norm": 19.060611724853516, + "learning_rate": 4.739010989010989e-06, + "loss": 0.3725, + "step": 32950 + }, + { + "epoch": 90.52472527472527, + "grad_norm": 6.079518795013428, + "learning_rate": 4.737637362637363e-06, + "loss": 0.1196, + "step": 32951 + }, + { + "epoch": 90.52747252747253, + "grad_norm": 10.179625511169434, + "learning_rate": 4.736263736263737e-06, + "loss": 0.1832, + "step": 32952 + }, + { + "epoch": 90.53021978021978, + "grad_norm": 2.4193575382232666, + "learning_rate": 4.73489010989011e-06, + "loss": 0.0163, + "step": 32953 + }, + { + "epoch": 90.53296703296704, + "grad_norm": 9.145822525024414, + "learning_rate": 4.733516483516484e-06, + "loss": 0.1716, + "step": 32954 + }, + { + "epoch": 90.53571428571429, + "grad_norm": 9.652751922607422, + "learning_rate": 4.732142857142857e-06, + "loss": 0.1598, + "step": 32955 + }, + { + "epoch": 90.53846153846153, + "grad_norm": 5.916280269622803, + "learning_rate": 4.730769230769231e-06, + "loss": 0.0942, + "step": 32956 + }, + { + "epoch": 90.54120879120879, + "grad_norm": 6.383100986480713, + "learning_rate": 4.729395604395605e-06, + "loss": 0.1136, + "step": 32957 + }, + { + "epoch": 90.54395604395604, + "grad_norm": 9.87180233001709, + "learning_rate": 4.728021978021978e-06, + "loss": 0.1237, + "step": 32958 + }, + { + "epoch": 90.5467032967033, + "grad_norm": 4.002161502838135, + "learning_rate": 4.7266483516483516e-06, + "loss": 0.0582, + "step": 32959 + }, + { + "epoch": 90.54945054945055, + "grad_norm": 14.28614616394043, + "learning_rate": 4.725274725274725e-06, + "loss": 0.2009, + "step": 32960 + }, + { + "epoch": 90.5521978021978, + "grad_norm": 3.085336685180664, + "learning_rate": 4.723901098901099e-06, + "loss": 0.0238, + "step": 32961 + }, + { + "epoch": 90.55494505494505, + "grad_norm": 6.309864044189453, + "learning_rate": 4.722527472527473e-06, + "loss": 0.0761, + "step": 32962 + }, + { + "epoch": 90.5576923076923, + "grad_norm": 14.736313819885254, + "learning_rate": 4.721153846153847e-06, + "loss": 0.4096, + "step": 32963 + }, + { + "epoch": 90.56043956043956, + "grad_norm": 4.868067264556885, + "learning_rate": 4.7197802197802194e-06, + "loss": 0.0334, + "step": 32964 + }, + { + "epoch": 90.56318681318682, + "grad_norm": 18.664804458618164, + "learning_rate": 4.718406593406594e-06, + "loss": 0.3975, + "step": 32965 + }, + { + "epoch": 90.56593406593407, + "grad_norm": 19.68391990661621, + "learning_rate": 4.717032967032967e-06, + "loss": 0.4321, + "step": 32966 + }, + { + "epoch": 90.56868131868131, + "grad_norm": 9.01779556274414, + "learning_rate": 4.715659340659341e-06, + "loss": 0.2522, + "step": 32967 + }, + { + "epoch": 90.57142857142857, + "grad_norm": 10.565374374389648, + "learning_rate": 4.714285714285715e-06, + "loss": 0.1111, + "step": 32968 + }, + { + "epoch": 90.57417582417582, + "grad_norm": 9.547016143798828, + "learning_rate": 4.712912087912088e-06, + "loss": 0.1937, + "step": 32969 + }, + { + "epoch": 90.57692307692308, + "grad_norm": 4.589754581451416, + "learning_rate": 4.711538461538462e-06, + "loss": 0.0547, + "step": 32970 + }, + { + "epoch": 90.57967032967034, + "grad_norm": 10.446488380432129, + "learning_rate": 4.710164835164836e-06, + "loss": 0.1134, + "step": 32971 + }, + { + "epoch": 90.58241758241758, + "grad_norm": 6.353317737579346, + "learning_rate": 4.708791208791209e-06, + "loss": 0.1066, + "step": 32972 + }, + { + "epoch": 90.58516483516483, + "grad_norm": 16.169225692749023, + "learning_rate": 4.707417582417583e-06, + "loss": 0.3298, + "step": 32973 + }, + { + "epoch": 90.58791208791209, + "grad_norm": 8.931462287902832, + "learning_rate": 4.706043956043956e-06, + "loss": 0.1108, + "step": 32974 + }, + { + "epoch": 90.59065934065934, + "grad_norm": 11.023113250732422, + "learning_rate": 4.7046703296703295e-06, + "loss": 0.121, + "step": 32975 + }, + { + "epoch": 90.5934065934066, + "grad_norm": 8.860258102416992, + "learning_rate": 4.703296703296704e-06, + "loss": 0.1713, + "step": 32976 + }, + { + "epoch": 90.59615384615384, + "grad_norm": 12.947619438171387, + "learning_rate": 4.701923076923077e-06, + "loss": 0.4301, + "step": 32977 + }, + { + "epoch": 90.5989010989011, + "grad_norm": 6.705014228820801, + "learning_rate": 4.7005494505494505e-06, + "loss": 0.0908, + "step": 32978 + }, + { + "epoch": 90.60164835164835, + "grad_norm": 14.393077850341797, + "learning_rate": 4.699175824175824e-06, + "loss": 0.21, + "step": 32979 + }, + { + "epoch": 90.6043956043956, + "grad_norm": 18.06627655029297, + "learning_rate": 4.697802197802198e-06, + "loss": 0.2098, + "step": 32980 + }, + { + "epoch": 90.60714285714286, + "grad_norm": 8.899731636047363, + "learning_rate": 4.696428571428572e-06, + "loss": 0.1897, + "step": 32981 + }, + { + "epoch": 90.60989010989012, + "grad_norm": 3.27555251121521, + "learning_rate": 4.695054945054946e-06, + "loss": 0.0329, + "step": 32982 + }, + { + "epoch": 90.61263736263736, + "grad_norm": 19.368459701538086, + "learning_rate": 4.693681318681318e-06, + "loss": 0.2661, + "step": 32983 + }, + { + "epoch": 90.61538461538461, + "grad_norm": 16.694042205810547, + "learning_rate": 4.692307692307693e-06, + "loss": 0.2536, + "step": 32984 + }, + { + "epoch": 90.61813186813187, + "grad_norm": 16.411882400512695, + "learning_rate": 4.690934065934066e-06, + "loss": 0.2366, + "step": 32985 + }, + { + "epoch": 90.62087912087912, + "grad_norm": 10.92194652557373, + "learning_rate": 4.68956043956044e-06, + "loss": 0.2341, + "step": 32986 + }, + { + "epoch": 90.62362637362638, + "grad_norm": 13.781994819641113, + "learning_rate": 4.688186813186814e-06, + "loss": 0.2822, + "step": 32987 + }, + { + "epoch": 90.62637362637362, + "grad_norm": 11.693017959594727, + "learning_rate": 4.686813186813187e-06, + "loss": 0.3175, + "step": 32988 + }, + { + "epoch": 90.62912087912088, + "grad_norm": 17.704370498657227, + "learning_rate": 4.6854395604395605e-06, + "loss": 0.6903, + "step": 32989 + }, + { + "epoch": 90.63186813186813, + "grad_norm": 14.941689491271973, + "learning_rate": 4.684065934065934e-06, + "loss": 0.2961, + "step": 32990 + }, + { + "epoch": 90.63461538461539, + "grad_norm": 4.424571514129639, + "learning_rate": 4.682692307692308e-06, + "loss": 0.0847, + "step": 32991 + }, + { + "epoch": 90.63736263736264, + "grad_norm": 10.123958587646484, + "learning_rate": 4.681318681318682e-06, + "loss": 0.1877, + "step": 32992 + }, + { + "epoch": 90.64010989010988, + "grad_norm": 9.988696098327637, + "learning_rate": 4.679945054945055e-06, + "loss": 0.2626, + "step": 32993 + }, + { + "epoch": 90.64285714285714, + "grad_norm": 17.88661766052246, + "learning_rate": 4.6785714285714284e-06, + "loss": 0.1969, + "step": 32994 + }, + { + "epoch": 90.6456043956044, + "grad_norm": 13.964818954467773, + "learning_rate": 4.677197802197803e-06, + "loss": 0.2733, + "step": 32995 + }, + { + "epoch": 90.64835164835165, + "grad_norm": 20.168325424194336, + "learning_rate": 4.675824175824176e-06, + "loss": 0.612, + "step": 32996 + }, + { + "epoch": 90.6510989010989, + "grad_norm": 13.174094200134277, + "learning_rate": 4.67445054945055e-06, + "loss": 0.2947, + "step": 32997 + }, + { + "epoch": 90.65384615384616, + "grad_norm": 13.67240047454834, + "learning_rate": 4.673076923076923e-06, + "loss": 0.1561, + "step": 32998 + }, + { + "epoch": 90.6565934065934, + "grad_norm": 23.168684005737305, + "learning_rate": 4.671703296703297e-06, + "loss": 0.3977, + "step": 32999 + }, + { + "epoch": 90.65934065934066, + "grad_norm": 6.815909385681152, + "learning_rate": 4.6703296703296706e-06, + "loss": 0.0787, + "step": 33000 + }, + { + "epoch": 90.66208791208791, + "grad_norm": 6.881357669830322, + "learning_rate": 4.668956043956044e-06, + "loss": 0.0506, + "step": 33001 + }, + { + "epoch": 90.66483516483517, + "grad_norm": 15.318790435791016, + "learning_rate": 4.667582417582417e-06, + "loss": 0.3017, + "step": 33002 + }, + { + "epoch": 90.66758241758242, + "grad_norm": 25.1892147064209, + "learning_rate": 4.666208791208792e-06, + "loss": 0.4531, + "step": 33003 + }, + { + "epoch": 90.67032967032966, + "grad_norm": 11.892529487609863, + "learning_rate": 4.664835164835165e-06, + "loss": 0.1925, + "step": 33004 + }, + { + "epoch": 90.67307692307692, + "grad_norm": 19.104265213012695, + "learning_rate": 4.6634615384615384e-06, + "loss": 0.4756, + "step": 33005 + }, + { + "epoch": 90.67582417582418, + "grad_norm": 4.22150182723999, + "learning_rate": 4.662087912087913e-06, + "loss": 0.071, + "step": 33006 + }, + { + "epoch": 90.67857142857143, + "grad_norm": 16.613889694213867, + "learning_rate": 4.660714285714285e-06, + "loss": 0.2284, + "step": 33007 + }, + { + "epoch": 90.68131868131869, + "grad_norm": 5.5431599617004395, + "learning_rate": 4.6593406593406595e-06, + "loss": 0.1077, + "step": 33008 + }, + { + "epoch": 90.68406593406593, + "grad_norm": 7.105922698974609, + "learning_rate": 4.657967032967033e-06, + "loss": 0.062, + "step": 33009 + }, + { + "epoch": 90.68681318681318, + "grad_norm": 4.645521640777588, + "learning_rate": 4.656593406593407e-06, + "loss": 0.052, + "step": 33010 + }, + { + "epoch": 90.68956043956044, + "grad_norm": 0.48265960812568665, + "learning_rate": 4.655219780219781e-06, + "loss": 0.0065, + "step": 33011 + }, + { + "epoch": 90.6923076923077, + "grad_norm": 11.55660343170166, + "learning_rate": 4.653846153846154e-06, + "loss": 0.0915, + "step": 33012 + }, + { + "epoch": 90.69505494505495, + "grad_norm": 15.929359436035156, + "learning_rate": 4.652472527472527e-06, + "loss": 0.3599, + "step": 33013 + }, + { + "epoch": 90.6978021978022, + "grad_norm": 5.07352352142334, + "learning_rate": 4.651098901098902e-06, + "loss": 0.0565, + "step": 33014 + }, + { + "epoch": 90.70054945054945, + "grad_norm": 8.34587287902832, + "learning_rate": 4.649725274725275e-06, + "loss": 0.0998, + "step": 33015 + }, + { + "epoch": 90.7032967032967, + "grad_norm": 21.556161880493164, + "learning_rate": 4.6483516483516485e-06, + "loss": 0.2433, + "step": 33016 + }, + { + "epoch": 90.70604395604396, + "grad_norm": 2.288054943084717, + "learning_rate": 4.646978021978022e-06, + "loss": 0.0223, + "step": 33017 + }, + { + "epoch": 90.70879120879121, + "grad_norm": 1.124263048171997, + "learning_rate": 4.645604395604396e-06, + "loss": 0.0125, + "step": 33018 + }, + { + "epoch": 90.71153846153847, + "grad_norm": 0.9831543564796448, + "learning_rate": 4.6442307692307695e-06, + "loss": 0.0105, + "step": 33019 + }, + { + "epoch": 90.71428571428571, + "grad_norm": 4.171717643737793, + "learning_rate": 4.642857142857143e-06, + "loss": 0.0465, + "step": 33020 + }, + { + "epoch": 90.71703296703296, + "grad_norm": 5.915694713592529, + "learning_rate": 4.641483516483517e-06, + "loss": 0.0779, + "step": 33021 + }, + { + "epoch": 90.71978021978022, + "grad_norm": 10.14270305633545, + "learning_rate": 4.64010989010989e-06, + "loss": 0.1394, + "step": 33022 + }, + { + "epoch": 90.72252747252747, + "grad_norm": 10.678894996643066, + "learning_rate": 4.638736263736264e-06, + "loss": 0.1438, + "step": 33023 + }, + { + "epoch": 90.72527472527473, + "grad_norm": 6.352903842926025, + "learning_rate": 4.637362637362637e-06, + "loss": 0.0589, + "step": 33024 + }, + { + "epoch": 90.72802197802197, + "grad_norm": 13.677922248840332, + "learning_rate": 4.635989010989012e-06, + "loss": 0.1388, + "step": 33025 + }, + { + "epoch": 90.73076923076923, + "grad_norm": 11.312665939331055, + "learning_rate": 4.634615384615385e-06, + "loss": 0.1526, + "step": 33026 + }, + { + "epoch": 90.73351648351648, + "grad_norm": 10.245368957519531, + "learning_rate": 4.6332417582417585e-06, + "loss": 0.1637, + "step": 33027 + }, + { + "epoch": 90.73626373626374, + "grad_norm": 10.992820739746094, + "learning_rate": 4.631868131868132e-06, + "loss": 0.1738, + "step": 33028 + }, + { + "epoch": 90.73901098901099, + "grad_norm": 10.13691520690918, + "learning_rate": 4.630494505494506e-06, + "loss": 0.1075, + "step": 33029 + }, + { + "epoch": 90.74175824175825, + "grad_norm": 18.896955490112305, + "learning_rate": 4.6291208791208795e-06, + "loss": 0.3934, + "step": 33030 + }, + { + "epoch": 90.74450549450549, + "grad_norm": 5.464174747467041, + "learning_rate": 4.627747252747253e-06, + "loss": 0.079, + "step": 33031 + }, + { + "epoch": 90.74725274725274, + "grad_norm": 11.026638984680176, + "learning_rate": 4.626373626373626e-06, + "loss": 0.2255, + "step": 33032 + }, + { + "epoch": 90.75, + "grad_norm": 4.595437049865723, + "learning_rate": 4.625e-06, + "loss": 0.0565, + "step": 33033 + }, + { + "epoch": 90.75274725274726, + "grad_norm": 18.310949325561523, + "learning_rate": 4.623626373626374e-06, + "loss": 0.5269, + "step": 33034 + }, + { + "epoch": 90.75549450549451, + "grad_norm": 8.865538597106934, + "learning_rate": 4.6222527472527474e-06, + "loss": 0.1468, + "step": 33035 + }, + { + "epoch": 90.75824175824175, + "grad_norm": 9.2353515625, + "learning_rate": 4.620879120879121e-06, + "loss": 0.1826, + "step": 33036 + }, + { + "epoch": 90.76098901098901, + "grad_norm": 17.82160186767578, + "learning_rate": 4.619505494505494e-06, + "loss": 0.1815, + "step": 33037 + }, + { + "epoch": 90.76373626373626, + "grad_norm": 15.522313117980957, + "learning_rate": 4.6181318681318685e-06, + "loss": 0.1296, + "step": 33038 + }, + { + "epoch": 90.76648351648352, + "grad_norm": 13.144475936889648, + "learning_rate": 4.616758241758242e-06, + "loss": 0.3254, + "step": 33039 + }, + { + "epoch": 90.76923076923077, + "grad_norm": 4.831239223480225, + "learning_rate": 4.615384615384616e-06, + "loss": 0.0513, + "step": 33040 + }, + { + "epoch": 90.77197802197803, + "grad_norm": 8.516650199890137, + "learning_rate": 4.614010989010989e-06, + "loss": 0.1181, + "step": 33041 + }, + { + "epoch": 90.77472527472527, + "grad_norm": 15.99706745147705, + "learning_rate": 4.612637362637363e-06, + "loss": 0.2922, + "step": 33042 + }, + { + "epoch": 90.77747252747253, + "grad_norm": 2.15002179145813, + "learning_rate": 4.611263736263736e-06, + "loss": 0.0313, + "step": 33043 + }, + { + "epoch": 90.78021978021978, + "grad_norm": 5.740364074707031, + "learning_rate": 4.609890109890111e-06, + "loss": 0.0654, + "step": 33044 + }, + { + "epoch": 90.78296703296704, + "grad_norm": 5.699802875518799, + "learning_rate": 4.608516483516484e-06, + "loss": 0.0973, + "step": 33045 + }, + { + "epoch": 90.78571428571429, + "grad_norm": 14.507906913757324, + "learning_rate": 4.6071428571428574e-06, + "loss": 0.6766, + "step": 33046 + }, + { + "epoch": 90.78846153846153, + "grad_norm": 4.007754325866699, + "learning_rate": 4.605769230769231e-06, + "loss": 0.0581, + "step": 33047 + }, + { + "epoch": 90.79120879120879, + "grad_norm": 5.188833236694336, + "learning_rate": 4.604395604395604e-06, + "loss": 0.0755, + "step": 33048 + }, + { + "epoch": 90.79395604395604, + "grad_norm": 15.891155242919922, + "learning_rate": 4.6030219780219785e-06, + "loss": 0.3412, + "step": 33049 + }, + { + "epoch": 90.7967032967033, + "grad_norm": 16.525074005126953, + "learning_rate": 4.601648351648352e-06, + "loss": 0.3006, + "step": 33050 + }, + { + "epoch": 90.79945054945055, + "grad_norm": 1.5300750732421875, + "learning_rate": 4.600274725274725e-06, + "loss": 0.0173, + "step": 33051 + }, + { + "epoch": 90.8021978021978, + "grad_norm": 12.99997329711914, + "learning_rate": 4.598901098901099e-06, + "loss": 0.4081, + "step": 33052 + }, + { + "epoch": 90.80494505494505, + "grad_norm": 1.9842008352279663, + "learning_rate": 4.597527472527473e-06, + "loss": 0.0307, + "step": 33053 + }, + { + "epoch": 90.8076923076923, + "grad_norm": 4.634109020233154, + "learning_rate": 4.596153846153846e-06, + "loss": 0.0672, + "step": 33054 + }, + { + "epoch": 90.81043956043956, + "grad_norm": 14.690337181091309, + "learning_rate": 4.594780219780221e-06, + "loss": 0.2586, + "step": 33055 + }, + { + "epoch": 90.81318681318682, + "grad_norm": 14.53385066986084, + "learning_rate": 4.593406593406593e-06, + "loss": 0.4241, + "step": 33056 + }, + { + "epoch": 90.81593406593407, + "grad_norm": 14.741891860961914, + "learning_rate": 4.5920329670329675e-06, + "loss": 0.2115, + "step": 33057 + }, + { + "epoch": 90.81868131868131, + "grad_norm": 15.630651473999023, + "learning_rate": 4.590659340659341e-06, + "loss": 0.5187, + "step": 33058 + }, + { + "epoch": 90.82142857142857, + "grad_norm": 13.605744361877441, + "learning_rate": 4.589285714285715e-06, + "loss": 0.2387, + "step": 33059 + }, + { + "epoch": 90.82417582417582, + "grad_norm": 15.714249610900879, + "learning_rate": 4.587912087912088e-06, + "loss": 0.3797, + "step": 33060 + }, + { + "epoch": 90.82692307692308, + "grad_norm": 21.29839515686035, + "learning_rate": 4.586538461538462e-06, + "loss": 0.2331, + "step": 33061 + }, + { + "epoch": 90.82967032967034, + "grad_norm": 25.904062271118164, + "learning_rate": 4.585164835164835e-06, + "loss": 0.4609, + "step": 33062 + }, + { + "epoch": 90.83241758241758, + "grad_norm": 6.770239353179932, + "learning_rate": 4.583791208791209e-06, + "loss": 0.0933, + "step": 33063 + }, + { + "epoch": 90.83516483516483, + "grad_norm": 6.007946968078613, + "learning_rate": 4.582417582417583e-06, + "loss": 0.1039, + "step": 33064 + }, + { + "epoch": 90.83791208791209, + "grad_norm": 13.609024047851562, + "learning_rate": 4.5810439560439556e-06, + "loss": 0.1218, + "step": 33065 + }, + { + "epoch": 90.84065934065934, + "grad_norm": 17.728519439697266, + "learning_rate": 4.57967032967033e-06, + "loss": 0.2072, + "step": 33066 + }, + { + "epoch": 90.8434065934066, + "grad_norm": 10.752286911010742, + "learning_rate": 4.578296703296703e-06, + "loss": 0.1373, + "step": 33067 + }, + { + "epoch": 90.84615384615384, + "grad_norm": 7.008963108062744, + "learning_rate": 4.5769230769230775e-06, + "loss": 0.0451, + "step": 33068 + }, + { + "epoch": 90.8489010989011, + "grad_norm": 22.276031494140625, + "learning_rate": 4.575549450549451e-06, + "loss": 0.4236, + "step": 33069 + }, + { + "epoch": 90.85164835164835, + "grad_norm": 28.37334442138672, + "learning_rate": 4.574175824175824e-06, + "loss": 0.7756, + "step": 33070 + }, + { + "epoch": 90.8543956043956, + "grad_norm": 19.635311126708984, + "learning_rate": 4.572802197802198e-06, + "loss": 0.2023, + "step": 33071 + }, + { + "epoch": 90.85714285714286, + "grad_norm": 12.814043998718262, + "learning_rate": 4.571428571428572e-06, + "loss": 0.2654, + "step": 33072 + }, + { + "epoch": 90.85989010989012, + "grad_norm": 17.57236671447754, + "learning_rate": 4.570054945054945e-06, + "loss": 0.6171, + "step": 33073 + }, + { + "epoch": 90.86263736263736, + "grad_norm": 22.323034286499023, + "learning_rate": 4.568681318681319e-06, + "loss": 0.4892, + "step": 33074 + }, + { + "epoch": 90.86538461538461, + "grad_norm": 7.012242794036865, + "learning_rate": 4.567307692307692e-06, + "loss": 0.0742, + "step": 33075 + }, + { + "epoch": 90.86813186813187, + "grad_norm": 12.690409660339355, + "learning_rate": 4.565934065934066e-06, + "loss": 0.2443, + "step": 33076 + }, + { + "epoch": 90.87087912087912, + "grad_norm": 18.24883460998535, + "learning_rate": 4.56456043956044e-06, + "loss": 0.3774, + "step": 33077 + }, + { + "epoch": 90.87362637362638, + "grad_norm": 18.791749954223633, + "learning_rate": 4.563186813186813e-06, + "loss": 0.2219, + "step": 33078 + }, + { + "epoch": 90.87637362637362, + "grad_norm": 19.335420608520508, + "learning_rate": 4.5618131868131875e-06, + "loss": 0.3628, + "step": 33079 + }, + { + "epoch": 90.87912087912088, + "grad_norm": 6.850909233093262, + "learning_rate": 4.56043956043956e-06, + "loss": 0.1887, + "step": 33080 + }, + { + "epoch": 90.88186813186813, + "grad_norm": 3.1817853450775146, + "learning_rate": 4.559065934065934e-06, + "loss": 0.0349, + "step": 33081 + }, + { + "epoch": 90.88461538461539, + "grad_norm": 7.058234691619873, + "learning_rate": 4.557692307692308e-06, + "loss": 0.0879, + "step": 33082 + }, + { + "epoch": 90.88736263736264, + "grad_norm": 14.955994606018066, + "learning_rate": 4.556318681318682e-06, + "loss": 0.2734, + "step": 33083 + }, + { + "epoch": 90.89010989010988, + "grad_norm": 25.95578956604004, + "learning_rate": 4.554945054945055e-06, + "loss": 0.8672, + "step": 33084 + }, + { + "epoch": 90.89285714285714, + "grad_norm": 2.8575127124786377, + "learning_rate": 4.553571428571429e-06, + "loss": 0.0206, + "step": 33085 + }, + { + "epoch": 90.8956043956044, + "grad_norm": 11.71467113494873, + "learning_rate": 4.552197802197802e-06, + "loss": 0.2334, + "step": 33086 + }, + { + "epoch": 90.89835164835165, + "grad_norm": 3.427941083908081, + "learning_rate": 4.5508241758241764e-06, + "loss": 0.0476, + "step": 33087 + }, + { + "epoch": 90.9010989010989, + "grad_norm": 14.922404289245605, + "learning_rate": 4.54945054945055e-06, + "loss": 0.3906, + "step": 33088 + }, + { + "epoch": 90.90384615384616, + "grad_norm": 12.842673301696777, + "learning_rate": 4.548076923076923e-06, + "loss": 0.2621, + "step": 33089 + }, + { + "epoch": 90.9065934065934, + "grad_norm": 8.54492473602295, + "learning_rate": 4.546703296703297e-06, + "loss": 0.0719, + "step": 33090 + }, + { + "epoch": 90.90934065934066, + "grad_norm": 12.855758666992188, + "learning_rate": 4.545329670329671e-06, + "loss": 0.3549, + "step": 33091 + }, + { + "epoch": 90.91208791208791, + "grad_norm": 8.702183723449707, + "learning_rate": 4.543956043956044e-06, + "loss": 0.1327, + "step": 33092 + }, + { + "epoch": 90.91483516483517, + "grad_norm": 11.786080360412598, + "learning_rate": 4.542582417582418e-06, + "loss": 0.1596, + "step": 33093 + }, + { + "epoch": 90.91758241758242, + "grad_norm": 13.837808609008789, + "learning_rate": 4.541208791208791e-06, + "loss": 0.2789, + "step": 33094 + }, + { + "epoch": 90.92032967032966, + "grad_norm": 16.631532669067383, + "learning_rate": 4.5398351648351645e-06, + "loss": 0.3592, + "step": 33095 + }, + { + "epoch": 90.92307692307692, + "grad_norm": 21.446191787719727, + "learning_rate": 4.538461538461539e-06, + "loss": 0.406, + "step": 33096 + }, + { + "epoch": 90.92582417582418, + "grad_norm": 8.27004623413086, + "learning_rate": 4.537087912087912e-06, + "loss": 0.1865, + "step": 33097 + }, + { + "epoch": 90.92857142857143, + "grad_norm": 20.667570114135742, + "learning_rate": 4.5357142857142865e-06, + "loss": 0.5111, + "step": 33098 + }, + { + "epoch": 90.93131868131869, + "grad_norm": 17.68840789794922, + "learning_rate": 4.534340659340659e-06, + "loss": 0.3641, + "step": 33099 + }, + { + "epoch": 90.93406593406593, + "grad_norm": 14.00259780883789, + "learning_rate": 4.532967032967033e-06, + "loss": 0.343, + "step": 33100 + }, + { + "epoch": 90.93681318681318, + "grad_norm": 5.857490539550781, + "learning_rate": 4.531593406593407e-06, + "loss": 0.0709, + "step": 33101 + }, + { + "epoch": 90.93956043956044, + "grad_norm": 9.462139129638672, + "learning_rate": 4.530219780219781e-06, + "loss": 0.1207, + "step": 33102 + }, + { + "epoch": 90.9423076923077, + "grad_norm": 16.944583892822266, + "learning_rate": 4.528846153846154e-06, + "loss": 0.2723, + "step": 33103 + }, + { + "epoch": 90.94505494505495, + "grad_norm": 9.313616752624512, + "learning_rate": 4.527472527472528e-06, + "loss": 0.1383, + "step": 33104 + }, + { + "epoch": 90.9478021978022, + "grad_norm": 13.728147506713867, + "learning_rate": 4.526098901098901e-06, + "loss": 0.2551, + "step": 33105 + }, + { + "epoch": 90.95054945054945, + "grad_norm": 5.426362037658691, + "learning_rate": 4.5247252747252746e-06, + "loss": 0.0733, + "step": 33106 + }, + { + "epoch": 90.9532967032967, + "grad_norm": 12.396247863769531, + "learning_rate": 4.523351648351649e-06, + "loss": 0.1924, + "step": 33107 + }, + { + "epoch": 90.95604395604396, + "grad_norm": 15.242387771606445, + "learning_rate": 4.521978021978022e-06, + "loss": 0.3127, + "step": 33108 + }, + { + "epoch": 90.95879120879121, + "grad_norm": 11.93581771850586, + "learning_rate": 4.520604395604396e-06, + "loss": 0.1145, + "step": 33109 + }, + { + "epoch": 90.96153846153847, + "grad_norm": 16.154346466064453, + "learning_rate": 4.519230769230769e-06, + "loss": 0.2398, + "step": 33110 + }, + { + "epoch": 90.96428571428571, + "grad_norm": 4.583212852478027, + "learning_rate": 4.517857142857143e-06, + "loss": 0.0711, + "step": 33111 + }, + { + "epoch": 90.96703296703296, + "grad_norm": 2.5276036262512207, + "learning_rate": 4.516483516483517e-06, + "loss": 0.0297, + "step": 33112 + }, + { + "epoch": 90.96978021978022, + "grad_norm": 17.17351531982422, + "learning_rate": 4.515109890109891e-06, + "loss": 0.171, + "step": 33113 + }, + { + "epoch": 90.97252747252747, + "grad_norm": 9.576983451843262, + "learning_rate": 4.5137362637362635e-06, + "loss": 0.2119, + "step": 33114 + }, + { + "epoch": 90.97527472527473, + "grad_norm": 5.196107864379883, + "learning_rate": 4.512362637362638e-06, + "loss": 0.0544, + "step": 33115 + }, + { + "epoch": 90.97802197802197, + "grad_norm": 14.997870445251465, + "learning_rate": 4.510989010989011e-06, + "loss": 0.2437, + "step": 33116 + }, + { + "epoch": 90.98076923076923, + "grad_norm": 11.417621612548828, + "learning_rate": 4.509615384615385e-06, + "loss": 0.1821, + "step": 33117 + }, + { + "epoch": 90.98351648351648, + "grad_norm": 18.136470794677734, + "learning_rate": 4.508241758241758e-06, + "loss": 0.3229, + "step": 33118 + }, + { + "epoch": 90.98626373626374, + "grad_norm": 35.74235534667969, + "learning_rate": 4.506868131868132e-06, + "loss": 0.5006, + "step": 33119 + }, + { + "epoch": 90.98901098901099, + "grad_norm": 14.944023132324219, + "learning_rate": 4.505494505494506e-06, + "loss": 0.4553, + "step": 33120 + }, + { + "epoch": 90.99175824175825, + "grad_norm": 14.948332786560059, + "learning_rate": 4.504120879120879e-06, + "loss": 0.2589, + "step": 33121 + }, + { + "epoch": 90.99450549450549, + "grad_norm": 6.107563495635986, + "learning_rate": 4.502747252747253e-06, + "loss": 0.1377, + "step": 33122 + }, + { + "epoch": 90.99725274725274, + "grad_norm": 15.03592586517334, + "learning_rate": 4.501373626373626e-06, + "loss": 0.3137, + "step": 33123 + }, + { + "epoch": 91.0, + "grad_norm": 25.844982147216797, + "learning_rate": 4.5e-06, + "loss": 0.2192, + "step": 33124 + }, + { + "epoch": 91.0, + "eval_accuracy": 0.859504132231405, + "eval_f1": 0.8587238221718854, + "eval_f1_DuraRiadoRio_64x64": 0.8253968253968254, + "eval_f1_Mole_64x64": 0.9215017064846417, + "eval_f1_Quebrado_64x64": 0.872852233676976, + "eval_f1_RiadoRio_64x64": 0.7920792079207921, + "eval_f1_RioFechado_64x64": 0.8817891373801917, + "eval_loss": 0.6180627346038818, + "eval_precision": 0.8669336680530495, + "eval_precision_DuraRiadoRio_64x64": 0.9629629629629629, + "eval_precision_Mole_64x64": 0.9060402684563759, + "eval_precision_Quebrado_64x64": 0.8639455782312925, + "eval_precision_RiadoRio_64x64": 0.7947019867549668, + "eval_precision_RioFechado_64x64": 0.8070175438596491, + "eval_recall": 0.8605942673585373, + "eval_recall_DuraRiadoRio_64x64": 0.7222222222222222, + "eval_recall_Mole_64x64": 0.9375, + "eval_recall_Quebrado_64x64": 0.8819444444444444, + "eval_recall_RiadoRio_64x64": 0.7894736842105263, + "eval_recall_RioFechado_64x64": 0.971830985915493, + "eval_runtime": 1.7597, + "eval_samples_per_second": 412.58, + "eval_steps_per_second": 26.141, + "step": 33124 + }, + { + "epoch": 91.00274725274726, + "grad_norm": 7.63163948059082, + "learning_rate": 4.4986263736263735e-06, + "loss": 0.1116, + "step": 33125 + }, + { + "epoch": 91.00549450549451, + "grad_norm": 9.869379043579102, + "learning_rate": 4.497252747252748e-06, + "loss": 0.2009, + "step": 33126 + }, + { + "epoch": 91.00824175824175, + "grad_norm": 7.788296699523926, + "learning_rate": 4.495879120879121e-06, + "loss": 0.0953, + "step": 33127 + }, + { + "epoch": 91.01098901098901, + "grad_norm": 13.856799125671387, + "learning_rate": 4.494505494505495e-06, + "loss": 0.208, + "step": 33128 + }, + { + "epoch": 91.01373626373626, + "grad_norm": 8.080530166625977, + "learning_rate": 4.493131868131868e-06, + "loss": 0.0861, + "step": 33129 + }, + { + "epoch": 91.01648351648352, + "grad_norm": 1.937677264213562, + "learning_rate": 4.491758241758242e-06, + "loss": 0.0284, + "step": 33130 + }, + { + "epoch": 91.01923076923077, + "grad_norm": 6.7778449058532715, + "learning_rate": 4.490384615384616e-06, + "loss": 0.0611, + "step": 33131 + }, + { + "epoch": 91.02197802197803, + "grad_norm": 2.0442512035369873, + "learning_rate": 4.48901098901099e-06, + "loss": 0.0392, + "step": 33132 + }, + { + "epoch": 91.02472527472527, + "grad_norm": 21.655715942382812, + "learning_rate": 4.4876373626373625e-06, + "loss": 0.5791, + "step": 33133 + }, + { + "epoch": 91.02747252747253, + "grad_norm": 8.414648056030273, + "learning_rate": 4.486263736263737e-06, + "loss": 0.1762, + "step": 33134 + }, + { + "epoch": 91.03021978021978, + "grad_norm": 3.482654094696045, + "learning_rate": 4.48489010989011e-06, + "loss": 0.0483, + "step": 33135 + }, + { + "epoch": 91.03296703296704, + "grad_norm": 13.048781394958496, + "learning_rate": 4.4835164835164835e-06, + "loss": 0.2062, + "step": 33136 + }, + { + "epoch": 91.03571428571429, + "grad_norm": 5.095400810241699, + "learning_rate": 4.482142857142858e-06, + "loss": 0.0713, + "step": 33137 + }, + { + "epoch": 91.03846153846153, + "grad_norm": 20.46018409729004, + "learning_rate": 4.48076923076923e-06, + "loss": 0.6724, + "step": 33138 + }, + { + "epoch": 91.04120879120879, + "grad_norm": 18.562692642211914, + "learning_rate": 4.479395604395605e-06, + "loss": 0.5024, + "step": 33139 + }, + { + "epoch": 91.04395604395604, + "grad_norm": 12.0707368850708, + "learning_rate": 4.478021978021978e-06, + "loss": 0.2977, + "step": 33140 + }, + { + "epoch": 91.0467032967033, + "grad_norm": 15.302020072937012, + "learning_rate": 4.476648351648352e-06, + "loss": 0.2438, + "step": 33141 + }, + { + "epoch": 91.04945054945055, + "grad_norm": 5.427065372467041, + "learning_rate": 4.475274725274726e-06, + "loss": 0.1573, + "step": 33142 + }, + { + "epoch": 91.0521978021978, + "grad_norm": 6.95584774017334, + "learning_rate": 4.473901098901099e-06, + "loss": 0.1612, + "step": 33143 + }, + { + "epoch": 91.05494505494505, + "grad_norm": 4.0684590339660645, + "learning_rate": 4.4725274725274725e-06, + "loss": 0.0685, + "step": 33144 + }, + { + "epoch": 91.0576923076923, + "grad_norm": 11.686251640319824, + "learning_rate": 4.471153846153847e-06, + "loss": 0.2981, + "step": 33145 + }, + { + "epoch": 91.06043956043956, + "grad_norm": 14.96948528289795, + "learning_rate": 4.46978021978022e-06, + "loss": 0.2821, + "step": 33146 + }, + { + "epoch": 91.06318681318682, + "grad_norm": 1.9902243614196777, + "learning_rate": 4.4684065934065936e-06, + "loss": 0.0608, + "step": 33147 + }, + { + "epoch": 91.06593406593407, + "grad_norm": 18.576812744140625, + "learning_rate": 4.467032967032967e-06, + "loss": 0.2147, + "step": 33148 + }, + { + "epoch": 91.06868131868131, + "grad_norm": 7.583390235900879, + "learning_rate": 4.465659340659341e-06, + "loss": 0.0783, + "step": 33149 + }, + { + "epoch": 91.07142857142857, + "grad_norm": 16.44508171081543, + "learning_rate": 4.464285714285715e-06, + "loss": 0.7291, + "step": 33150 + }, + { + "epoch": 91.07417582417582, + "grad_norm": 3.5547237396240234, + "learning_rate": 4.462912087912088e-06, + "loss": 0.042, + "step": 33151 + }, + { + "epoch": 91.07692307692308, + "grad_norm": 9.986855506896973, + "learning_rate": 4.4615384615384614e-06, + "loss": 0.1069, + "step": 33152 + }, + { + "epoch": 91.07967032967034, + "grad_norm": 8.322766304016113, + "learning_rate": 4.460164835164835e-06, + "loss": 0.089, + "step": 33153 + }, + { + "epoch": 91.08241758241758, + "grad_norm": 5.344404220581055, + "learning_rate": 4.458791208791209e-06, + "loss": 0.0653, + "step": 33154 + }, + { + "epoch": 91.08516483516483, + "grad_norm": 18.328508377075195, + "learning_rate": 4.4574175824175825e-06, + "loss": 0.2143, + "step": 33155 + }, + { + "epoch": 91.08791208791209, + "grad_norm": 15.01467227935791, + "learning_rate": 4.456043956043957e-06, + "loss": 0.1455, + "step": 33156 + }, + { + "epoch": 91.09065934065934, + "grad_norm": 7.128836154937744, + "learning_rate": 4.454670329670329e-06, + "loss": 0.1047, + "step": 33157 + }, + { + "epoch": 91.0934065934066, + "grad_norm": 9.480789184570312, + "learning_rate": 4.4532967032967036e-06, + "loss": 0.2001, + "step": 33158 + }, + { + "epoch": 91.09615384615384, + "grad_norm": 2.1630971431732178, + "learning_rate": 4.451923076923077e-06, + "loss": 0.0261, + "step": 33159 + }, + { + "epoch": 91.0989010989011, + "grad_norm": 0.8470394611358643, + "learning_rate": 4.450549450549451e-06, + "loss": 0.0099, + "step": 33160 + }, + { + "epoch": 91.10164835164835, + "grad_norm": 7.00169038772583, + "learning_rate": 4.449175824175825e-06, + "loss": 0.1967, + "step": 33161 + }, + { + "epoch": 91.1043956043956, + "grad_norm": 4.194622993469238, + "learning_rate": 4.447802197802198e-06, + "loss": 0.0921, + "step": 33162 + }, + { + "epoch": 91.10714285714286, + "grad_norm": 0.9613305926322937, + "learning_rate": 4.4464285714285715e-06, + "loss": 0.0124, + "step": 33163 + }, + { + "epoch": 91.10989010989012, + "grad_norm": 11.48202896118164, + "learning_rate": 4.445054945054946e-06, + "loss": 0.2388, + "step": 33164 + }, + { + "epoch": 91.11263736263736, + "grad_norm": 8.12852954864502, + "learning_rate": 4.443681318681319e-06, + "loss": 0.1227, + "step": 33165 + }, + { + "epoch": 91.11538461538461, + "grad_norm": 9.220122337341309, + "learning_rate": 4.4423076923076925e-06, + "loss": 0.1178, + "step": 33166 + }, + { + "epoch": 91.11813186813187, + "grad_norm": 8.164027214050293, + "learning_rate": 4.440934065934066e-06, + "loss": 0.1549, + "step": 33167 + }, + { + "epoch": 91.12087912087912, + "grad_norm": 4.160140037536621, + "learning_rate": 4.439560439560439e-06, + "loss": 0.0446, + "step": 33168 + }, + { + "epoch": 91.12362637362638, + "grad_norm": 18.57479476928711, + "learning_rate": 4.438186813186814e-06, + "loss": 0.3121, + "step": 33169 + }, + { + "epoch": 91.12637362637362, + "grad_norm": 25.282718658447266, + "learning_rate": 4.436813186813187e-06, + "loss": 0.4475, + "step": 33170 + }, + { + "epoch": 91.12912087912088, + "grad_norm": 16.679533004760742, + "learning_rate": 4.435439560439561e-06, + "loss": 0.2342, + "step": 33171 + }, + { + "epoch": 91.13186813186813, + "grad_norm": 5.545039176940918, + "learning_rate": 4.434065934065934e-06, + "loss": 0.0321, + "step": 33172 + }, + { + "epoch": 91.13461538461539, + "grad_norm": 11.476592063903809, + "learning_rate": 4.432692307692308e-06, + "loss": 0.1622, + "step": 33173 + }, + { + "epoch": 91.13736263736264, + "grad_norm": 9.820422172546387, + "learning_rate": 4.4313186813186815e-06, + "loss": 0.2156, + "step": 33174 + }, + { + "epoch": 91.14010989010988, + "grad_norm": 4.367415428161621, + "learning_rate": 4.429945054945056e-06, + "loss": 0.0419, + "step": 33175 + }, + { + "epoch": 91.14285714285714, + "grad_norm": 10.020123481750488, + "learning_rate": 4.428571428571428e-06, + "loss": 0.1082, + "step": 33176 + }, + { + "epoch": 91.1456043956044, + "grad_norm": 22.22593116760254, + "learning_rate": 4.4271978021978025e-06, + "loss": 0.799, + "step": 33177 + }, + { + "epoch": 91.14835164835165, + "grad_norm": 19.065460205078125, + "learning_rate": 4.425824175824176e-06, + "loss": 0.4026, + "step": 33178 + }, + { + "epoch": 91.1510989010989, + "grad_norm": 11.89227294921875, + "learning_rate": 4.424450549450549e-06, + "loss": 0.1794, + "step": 33179 + }, + { + "epoch": 91.15384615384616, + "grad_norm": 13.100921630859375, + "learning_rate": 4.423076923076924e-06, + "loss": 0.3584, + "step": 33180 + }, + { + "epoch": 91.1565934065934, + "grad_norm": 3.7938919067382812, + "learning_rate": 4.421703296703297e-06, + "loss": 0.0366, + "step": 33181 + }, + { + "epoch": 91.15934065934066, + "grad_norm": 7.014585971832275, + "learning_rate": 4.42032967032967e-06, + "loss": 0.0898, + "step": 33182 + }, + { + "epoch": 91.16208791208791, + "grad_norm": 13.616922378540039, + "learning_rate": 4.418956043956044e-06, + "loss": 0.3056, + "step": 33183 + }, + { + "epoch": 91.16483516483517, + "grad_norm": 18.93259048461914, + "learning_rate": 4.417582417582418e-06, + "loss": 0.4683, + "step": 33184 + }, + { + "epoch": 91.16758241758242, + "grad_norm": 2.1215105056762695, + "learning_rate": 4.4162087912087915e-06, + "loss": 0.0181, + "step": 33185 + }, + { + "epoch": 91.17032967032966, + "grad_norm": 21.500974655151367, + "learning_rate": 4.414835164835165e-06, + "loss": 0.509, + "step": 33186 + }, + { + "epoch": 91.17307692307692, + "grad_norm": 27.781261444091797, + "learning_rate": 4.413461538461538e-06, + "loss": 0.6087, + "step": 33187 + }, + { + "epoch": 91.17582417582418, + "grad_norm": 7.137402534484863, + "learning_rate": 4.4120879120879126e-06, + "loss": 0.1564, + "step": 33188 + }, + { + "epoch": 91.17857142857143, + "grad_norm": 5.230558395385742, + "learning_rate": 4.410714285714286e-06, + "loss": 0.0559, + "step": 33189 + }, + { + "epoch": 91.18131868131869, + "grad_norm": 5.963040351867676, + "learning_rate": 4.40934065934066e-06, + "loss": 0.0901, + "step": 33190 + }, + { + "epoch": 91.18406593406593, + "grad_norm": 11.809335708618164, + "learning_rate": 4.407967032967033e-06, + "loss": 0.3355, + "step": 33191 + }, + { + "epoch": 91.18681318681318, + "grad_norm": 13.958837509155273, + "learning_rate": 4.406593406593407e-06, + "loss": 0.3435, + "step": 33192 + }, + { + "epoch": 91.18956043956044, + "grad_norm": 18.47137451171875, + "learning_rate": 4.4052197802197804e-06, + "loss": 0.2297, + "step": 33193 + }, + { + "epoch": 91.1923076923077, + "grad_norm": 3.0160436630249023, + "learning_rate": 4.403846153846154e-06, + "loss": 0.0403, + "step": 33194 + }, + { + "epoch": 91.19505494505495, + "grad_norm": 10.336447715759277, + "learning_rate": 4.402472527472528e-06, + "loss": 0.0658, + "step": 33195 + }, + { + "epoch": 91.1978021978022, + "grad_norm": 7.194045543670654, + "learning_rate": 4.4010989010989015e-06, + "loss": 0.0649, + "step": 33196 + }, + { + "epoch": 91.20054945054945, + "grad_norm": 8.7477388381958, + "learning_rate": 4.399725274725275e-06, + "loss": 0.1356, + "step": 33197 + }, + { + "epoch": 91.2032967032967, + "grad_norm": 2.2188096046447754, + "learning_rate": 4.398351648351648e-06, + "loss": 0.0143, + "step": 33198 + }, + { + "epoch": 91.20604395604396, + "grad_norm": 9.940762519836426, + "learning_rate": 4.3969780219780226e-06, + "loss": 0.1855, + "step": 33199 + }, + { + "epoch": 91.20879120879121, + "grad_norm": 6.796417713165283, + "learning_rate": 4.395604395604396e-06, + "loss": 0.132, + "step": 33200 + }, + { + "epoch": 91.21153846153847, + "grad_norm": 1.6238510608673096, + "learning_rate": 4.394230769230769e-06, + "loss": 0.0136, + "step": 33201 + }, + { + "epoch": 91.21428571428571, + "grad_norm": 3.8572633266448975, + "learning_rate": 4.392857142857143e-06, + "loss": 0.0325, + "step": 33202 + }, + { + "epoch": 91.21703296703296, + "grad_norm": 5.930454254150391, + "learning_rate": 4.391483516483517e-06, + "loss": 0.0663, + "step": 33203 + }, + { + "epoch": 91.21978021978022, + "grad_norm": 11.169008255004883, + "learning_rate": 4.3901098901098904e-06, + "loss": 0.14, + "step": 33204 + }, + { + "epoch": 91.22252747252747, + "grad_norm": 13.119637489318848, + "learning_rate": 4.388736263736264e-06, + "loss": 0.4017, + "step": 33205 + }, + { + "epoch": 91.22527472527473, + "grad_norm": 16.802515029907227, + "learning_rate": 4.387362637362637e-06, + "loss": 0.1556, + "step": 33206 + }, + { + "epoch": 91.22802197802197, + "grad_norm": 12.357522010803223, + "learning_rate": 4.3859890109890115e-06, + "loss": 0.4073, + "step": 33207 + }, + { + "epoch": 91.23076923076923, + "grad_norm": 8.505192756652832, + "learning_rate": 4.384615384615385e-06, + "loss": 0.2154, + "step": 33208 + }, + { + "epoch": 91.23351648351648, + "grad_norm": 2.362748622894287, + "learning_rate": 4.383241758241758e-06, + "loss": 0.021, + "step": 33209 + }, + { + "epoch": 91.23626373626374, + "grad_norm": 10.340432167053223, + "learning_rate": 4.381868131868132e-06, + "loss": 0.2115, + "step": 33210 + }, + { + "epoch": 91.23901098901099, + "grad_norm": 22.759246826171875, + "learning_rate": 4.380494505494505e-06, + "loss": 0.6418, + "step": 33211 + }, + { + "epoch": 91.24175824175825, + "grad_norm": 26.70856285095215, + "learning_rate": 4.379120879120879e-06, + "loss": 0.4732, + "step": 33212 + }, + { + "epoch": 91.24450549450549, + "grad_norm": 22.129993438720703, + "learning_rate": 4.377747252747253e-06, + "loss": 0.4913, + "step": 33213 + }, + { + "epoch": 91.24725274725274, + "grad_norm": 11.972176551818848, + "learning_rate": 4.376373626373627e-06, + "loss": 0.2289, + "step": 33214 + }, + { + "epoch": 91.25, + "grad_norm": 13.5741605758667, + "learning_rate": 4.375e-06, + "loss": 0.486, + "step": 33215 + }, + { + "epoch": 91.25274725274726, + "grad_norm": 15.394926071166992, + "learning_rate": 4.373626373626374e-06, + "loss": 0.2577, + "step": 33216 + }, + { + "epoch": 91.25549450549451, + "grad_norm": 16.132747650146484, + "learning_rate": 4.372252747252747e-06, + "loss": 0.1159, + "step": 33217 + }, + { + "epoch": 91.25824175824175, + "grad_norm": 12.993297576904297, + "learning_rate": 4.3708791208791215e-06, + "loss": 0.1206, + "step": 33218 + }, + { + "epoch": 91.26098901098901, + "grad_norm": 7.2649760246276855, + "learning_rate": 4.369505494505495e-06, + "loss": 0.0739, + "step": 33219 + }, + { + "epoch": 91.26373626373626, + "grad_norm": 5.6786394119262695, + "learning_rate": 4.368131868131868e-06, + "loss": 0.0604, + "step": 33220 + }, + { + "epoch": 91.26648351648352, + "grad_norm": 18.825498580932617, + "learning_rate": 4.366758241758242e-06, + "loss": 0.2982, + "step": 33221 + }, + { + "epoch": 91.26923076923077, + "grad_norm": 11.009590148925781, + "learning_rate": 4.365384615384616e-06, + "loss": 0.1555, + "step": 33222 + }, + { + "epoch": 91.27197802197803, + "grad_norm": 5.539228916168213, + "learning_rate": 4.364010989010989e-06, + "loss": 0.0655, + "step": 33223 + }, + { + "epoch": 91.27472527472527, + "grad_norm": 22.373659133911133, + "learning_rate": 4.362637362637363e-06, + "loss": 0.3137, + "step": 33224 + }, + { + "epoch": 91.27747252747253, + "grad_norm": 30.50071144104004, + "learning_rate": 4.361263736263736e-06, + "loss": 0.6525, + "step": 33225 + }, + { + "epoch": 91.28021978021978, + "grad_norm": 4.710818767547607, + "learning_rate": 4.35989010989011e-06, + "loss": 0.0663, + "step": 33226 + }, + { + "epoch": 91.28296703296704, + "grad_norm": 9.135607719421387, + "learning_rate": 4.358516483516484e-06, + "loss": 0.0977, + "step": 33227 + }, + { + "epoch": 91.28571428571429, + "grad_norm": 20.34774398803711, + "learning_rate": 4.357142857142857e-06, + "loss": 0.5047, + "step": 33228 + }, + { + "epoch": 91.28846153846153, + "grad_norm": 10.74863338470459, + "learning_rate": 4.3557692307692315e-06, + "loss": 0.2135, + "step": 33229 + }, + { + "epoch": 91.29120879120879, + "grad_norm": 1.6277438402175903, + "learning_rate": 4.354395604395604e-06, + "loss": 0.015, + "step": 33230 + }, + { + "epoch": 91.29395604395604, + "grad_norm": 9.099785804748535, + "learning_rate": 4.353021978021978e-06, + "loss": 0.0858, + "step": 33231 + }, + { + "epoch": 91.2967032967033, + "grad_norm": 5.360671520233154, + "learning_rate": 4.351648351648352e-06, + "loss": 0.0682, + "step": 33232 + }, + { + "epoch": 91.29945054945055, + "grad_norm": 29.016084671020508, + "learning_rate": 4.350274725274726e-06, + "loss": 0.6488, + "step": 33233 + }, + { + "epoch": 91.3021978021978, + "grad_norm": 17.53185272216797, + "learning_rate": 4.348901098901099e-06, + "loss": 0.1889, + "step": 33234 + }, + { + "epoch": 91.30494505494505, + "grad_norm": 22.28704071044922, + "learning_rate": 4.347527472527473e-06, + "loss": 0.2345, + "step": 33235 + }, + { + "epoch": 91.3076923076923, + "grad_norm": 2.6791210174560547, + "learning_rate": 4.346153846153846e-06, + "loss": 0.0235, + "step": 33236 + }, + { + "epoch": 91.31043956043956, + "grad_norm": 8.759243965148926, + "learning_rate": 4.3447802197802205e-06, + "loss": 0.1145, + "step": 33237 + }, + { + "epoch": 91.31318681318682, + "grad_norm": 21.674589157104492, + "learning_rate": 4.343406593406594e-06, + "loss": 0.5894, + "step": 33238 + }, + { + "epoch": 91.31593406593407, + "grad_norm": 12.250383377075195, + "learning_rate": 4.342032967032967e-06, + "loss": 0.212, + "step": 33239 + }, + { + "epoch": 91.31868131868131, + "grad_norm": 24.44532585144043, + "learning_rate": 4.340659340659341e-06, + "loss": 0.5597, + "step": 33240 + }, + { + "epoch": 91.32142857142857, + "grad_norm": 11.675594329833984, + "learning_rate": 4.339285714285714e-06, + "loss": 0.1883, + "step": 33241 + }, + { + "epoch": 91.32417582417582, + "grad_norm": 2.2358477115631104, + "learning_rate": 4.337912087912088e-06, + "loss": 0.0247, + "step": 33242 + }, + { + "epoch": 91.32692307692308, + "grad_norm": 11.443554878234863, + "learning_rate": 4.336538461538462e-06, + "loss": 0.0942, + "step": 33243 + }, + { + "epoch": 91.32967032967034, + "grad_norm": 6.929312229156494, + "learning_rate": 4.335164835164835e-06, + "loss": 0.0535, + "step": 33244 + }, + { + "epoch": 91.33241758241758, + "grad_norm": 10.218297004699707, + "learning_rate": 4.333791208791209e-06, + "loss": 0.2666, + "step": 33245 + }, + { + "epoch": 91.33516483516483, + "grad_norm": 15.367624282836914, + "learning_rate": 4.332417582417583e-06, + "loss": 0.3538, + "step": 33246 + }, + { + "epoch": 91.33791208791209, + "grad_norm": 4.0955328941345215, + "learning_rate": 4.331043956043956e-06, + "loss": 0.0628, + "step": 33247 + }, + { + "epoch": 91.34065934065934, + "grad_norm": 11.239507675170898, + "learning_rate": 4.3296703296703305e-06, + "loss": 0.1069, + "step": 33248 + }, + { + "epoch": 91.3434065934066, + "grad_norm": 11.672208786010742, + "learning_rate": 4.328296703296703e-06, + "loss": 0.1617, + "step": 33249 + }, + { + "epoch": 91.34615384615384, + "grad_norm": 11.458559036254883, + "learning_rate": 4.326923076923077e-06, + "loss": 0.4255, + "step": 33250 + }, + { + "epoch": 91.3489010989011, + "grad_norm": 7.429336071014404, + "learning_rate": 4.325549450549451e-06, + "loss": 0.0978, + "step": 33251 + }, + { + "epoch": 91.35164835164835, + "grad_norm": 18.012603759765625, + "learning_rate": 4.324175824175825e-06, + "loss": 0.5054, + "step": 33252 + }, + { + "epoch": 91.3543956043956, + "grad_norm": 14.341805458068848, + "learning_rate": 4.322802197802198e-06, + "loss": 0.4648, + "step": 33253 + }, + { + "epoch": 91.35714285714286, + "grad_norm": 5.369882106781006, + "learning_rate": 4.321428571428572e-06, + "loss": 0.0598, + "step": 33254 + }, + { + "epoch": 91.35989010989012, + "grad_norm": 17.044958114624023, + "learning_rate": 4.320054945054945e-06, + "loss": 0.35, + "step": 33255 + }, + { + "epoch": 91.36263736263736, + "grad_norm": 21.14150047302246, + "learning_rate": 4.318681318681319e-06, + "loss": 0.348, + "step": 33256 + }, + { + "epoch": 91.36538461538461, + "grad_norm": 4.442233085632324, + "learning_rate": 4.317307692307693e-06, + "loss": 0.0375, + "step": 33257 + }, + { + "epoch": 91.36813186813187, + "grad_norm": 12.86197280883789, + "learning_rate": 4.3159340659340654e-06, + "loss": 0.1871, + "step": 33258 + }, + { + "epoch": 91.37087912087912, + "grad_norm": 7.163751602172852, + "learning_rate": 4.31456043956044e-06, + "loss": 0.0941, + "step": 33259 + }, + { + "epoch": 91.37362637362638, + "grad_norm": 15.952248573303223, + "learning_rate": 4.313186813186813e-06, + "loss": 0.2605, + "step": 33260 + }, + { + "epoch": 91.37637362637362, + "grad_norm": 2.1505470275878906, + "learning_rate": 4.311813186813187e-06, + "loss": 0.0346, + "step": 33261 + }, + { + "epoch": 91.37912087912088, + "grad_norm": 14.938117027282715, + "learning_rate": 4.310439560439561e-06, + "loss": 0.3431, + "step": 33262 + }, + { + "epoch": 91.38186813186813, + "grad_norm": 8.474371910095215, + "learning_rate": 4.309065934065934e-06, + "loss": 0.1316, + "step": 33263 + }, + { + "epoch": 91.38461538461539, + "grad_norm": 14.972168922424316, + "learning_rate": 4.3076923076923076e-06, + "loss": 0.174, + "step": 33264 + }, + { + "epoch": 91.38736263736264, + "grad_norm": 17.888954162597656, + "learning_rate": 4.306318681318682e-06, + "loss": 0.1966, + "step": 33265 + }, + { + "epoch": 91.39010989010988, + "grad_norm": 14.10888957977295, + "learning_rate": 4.304945054945055e-06, + "loss": 0.3138, + "step": 33266 + }, + { + "epoch": 91.39285714285714, + "grad_norm": 5.352777004241943, + "learning_rate": 4.303571428571429e-06, + "loss": 0.0875, + "step": 33267 + }, + { + "epoch": 91.3956043956044, + "grad_norm": 14.126832008361816, + "learning_rate": 4.302197802197802e-06, + "loss": 0.38, + "step": 33268 + }, + { + "epoch": 91.39835164835165, + "grad_norm": 5.969874858856201, + "learning_rate": 4.300824175824176e-06, + "loss": 0.0794, + "step": 33269 + }, + { + "epoch": 91.4010989010989, + "grad_norm": 10.78979778289795, + "learning_rate": 4.29945054945055e-06, + "loss": 0.0815, + "step": 33270 + }, + { + "epoch": 91.40384615384616, + "grad_norm": 11.341033935546875, + "learning_rate": 4.298076923076923e-06, + "loss": 0.1959, + "step": 33271 + }, + { + "epoch": 91.4065934065934, + "grad_norm": 19.866531372070312, + "learning_rate": 4.296703296703297e-06, + "loss": 0.3786, + "step": 33272 + }, + { + "epoch": 91.40934065934066, + "grad_norm": 5.734976768493652, + "learning_rate": 4.29532967032967e-06, + "loss": 0.0944, + "step": 33273 + }, + { + "epoch": 91.41208791208791, + "grad_norm": 17.309301376342773, + "learning_rate": 4.293956043956044e-06, + "loss": 0.3732, + "step": 33274 + }, + { + "epoch": 91.41483516483517, + "grad_norm": 5.846484661102295, + "learning_rate": 4.292582417582418e-06, + "loss": 0.053, + "step": 33275 + }, + { + "epoch": 91.41758241758242, + "grad_norm": 16.75156593322754, + "learning_rate": 4.291208791208792e-06, + "loss": 0.2688, + "step": 33276 + }, + { + "epoch": 91.42032967032966, + "grad_norm": 3.901902914047241, + "learning_rate": 4.289835164835165e-06, + "loss": 0.0433, + "step": 33277 + }, + { + "epoch": 91.42307692307692, + "grad_norm": 7.380453586578369, + "learning_rate": 4.288461538461539e-06, + "loss": 0.1292, + "step": 33278 + }, + { + "epoch": 91.42582417582418, + "grad_norm": 20.75201988220215, + "learning_rate": 4.287087912087912e-06, + "loss": 0.4803, + "step": 33279 + }, + { + "epoch": 91.42857142857143, + "grad_norm": 6.060437202453613, + "learning_rate": 4.285714285714286e-06, + "loss": 0.0775, + "step": 33280 + }, + { + "epoch": 91.43131868131869, + "grad_norm": 20.80560874938965, + "learning_rate": 4.28434065934066e-06, + "loss": 0.1442, + "step": 33281 + }, + { + "epoch": 91.43406593406593, + "grad_norm": 16.029661178588867, + "learning_rate": 4.282967032967033e-06, + "loss": 0.2992, + "step": 33282 + }, + { + "epoch": 91.43681318681318, + "grad_norm": 25.05900764465332, + "learning_rate": 4.2815934065934065e-06, + "loss": 0.8883, + "step": 33283 + }, + { + "epoch": 91.43956043956044, + "grad_norm": 26.087970733642578, + "learning_rate": 4.28021978021978e-06, + "loss": 0.5985, + "step": 33284 + }, + { + "epoch": 91.4423076923077, + "grad_norm": 12.897941589355469, + "learning_rate": 4.278846153846154e-06, + "loss": 0.1628, + "step": 33285 + }, + { + "epoch": 91.44505494505495, + "grad_norm": 11.09160327911377, + "learning_rate": 4.277472527472528e-06, + "loss": 0.1466, + "step": 33286 + }, + { + "epoch": 91.4478021978022, + "grad_norm": 9.109708786010742, + "learning_rate": 4.276098901098901e-06, + "loss": 0.1683, + "step": 33287 + }, + { + "epoch": 91.45054945054945, + "grad_norm": 5.713860988616943, + "learning_rate": 4.274725274725274e-06, + "loss": 0.0859, + "step": 33288 + }, + { + "epoch": 91.4532967032967, + "grad_norm": 35.47671127319336, + "learning_rate": 4.273351648351649e-06, + "loss": 1.0252, + "step": 33289 + }, + { + "epoch": 91.45604395604396, + "grad_norm": 8.551647186279297, + "learning_rate": 4.271978021978022e-06, + "loss": 0.2391, + "step": 33290 + }, + { + "epoch": 91.45879120879121, + "grad_norm": 13.60165786743164, + "learning_rate": 4.270604395604396e-06, + "loss": 0.4143, + "step": 33291 + }, + { + "epoch": 91.46153846153847, + "grad_norm": 4.978531360626221, + "learning_rate": 4.269230769230769e-06, + "loss": 0.0989, + "step": 33292 + }, + { + "epoch": 91.46428571428571, + "grad_norm": 18.394786834716797, + "learning_rate": 4.267857142857143e-06, + "loss": 0.362, + "step": 33293 + }, + { + "epoch": 91.46703296703296, + "grad_norm": 2.317138671875, + "learning_rate": 4.2664835164835165e-06, + "loss": 0.0295, + "step": 33294 + }, + { + "epoch": 91.46978021978022, + "grad_norm": 3.3363938331604004, + "learning_rate": 4.265109890109891e-06, + "loss": 0.0312, + "step": 33295 + }, + { + "epoch": 91.47252747252747, + "grad_norm": 11.960125923156738, + "learning_rate": 4.263736263736264e-06, + "loss": 0.3176, + "step": 33296 + }, + { + "epoch": 91.47527472527473, + "grad_norm": 18.641748428344727, + "learning_rate": 4.262362637362638e-06, + "loss": 0.3505, + "step": 33297 + }, + { + "epoch": 91.47802197802197, + "grad_norm": 19.11153793334961, + "learning_rate": 4.260989010989011e-06, + "loss": 0.4473, + "step": 33298 + }, + { + "epoch": 91.48076923076923, + "grad_norm": 0.5269991755485535, + "learning_rate": 4.2596153846153844e-06, + "loss": 0.0074, + "step": 33299 + }, + { + "epoch": 91.48351648351648, + "grad_norm": 5.4761505126953125, + "learning_rate": 4.258241758241759e-06, + "loss": 0.0782, + "step": 33300 + }, + { + "epoch": 91.48626373626374, + "grad_norm": 17.44891357421875, + "learning_rate": 4.256868131868132e-06, + "loss": 0.4769, + "step": 33301 + }, + { + "epoch": 91.48901098901099, + "grad_norm": 13.374841690063477, + "learning_rate": 4.2554945054945055e-06, + "loss": 0.2035, + "step": 33302 + }, + { + "epoch": 91.49175824175825, + "grad_norm": 11.597232818603516, + "learning_rate": 4.254120879120879e-06, + "loss": 0.0893, + "step": 33303 + }, + { + "epoch": 91.49450549450549, + "grad_norm": 7.734536647796631, + "learning_rate": 4.252747252747253e-06, + "loss": 0.0855, + "step": 33304 + }, + { + "epoch": 91.49725274725274, + "grad_norm": 17.437416076660156, + "learning_rate": 4.2513736263736266e-06, + "loss": 0.2899, + "step": 33305 + }, + { + "epoch": 91.5, + "grad_norm": 7.332820892333984, + "learning_rate": 4.250000000000001e-06, + "loss": 0.0656, + "step": 33306 + }, + { + "epoch": 91.50274725274726, + "grad_norm": 11.566861152648926, + "learning_rate": 4.248626373626373e-06, + "loss": 0.2024, + "step": 33307 + }, + { + "epoch": 91.50549450549451, + "grad_norm": 2.300549030303955, + "learning_rate": 4.247252747252748e-06, + "loss": 0.0395, + "step": 33308 + }, + { + "epoch": 91.50824175824175, + "grad_norm": 8.426687240600586, + "learning_rate": 4.245879120879121e-06, + "loss": 0.1556, + "step": 33309 + }, + { + "epoch": 91.51098901098901, + "grad_norm": 5.808200359344482, + "learning_rate": 4.244505494505495e-06, + "loss": 0.0588, + "step": 33310 + }, + { + "epoch": 91.51373626373626, + "grad_norm": 8.052913665771484, + "learning_rate": 4.243131868131869e-06, + "loss": 0.1137, + "step": 33311 + }, + { + "epoch": 91.51648351648352, + "grad_norm": 14.897144317626953, + "learning_rate": 4.241758241758242e-06, + "loss": 0.1462, + "step": 33312 + }, + { + "epoch": 91.51923076923077, + "grad_norm": 6.944656848907471, + "learning_rate": 4.2403846153846155e-06, + "loss": 0.1051, + "step": 33313 + }, + { + "epoch": 91.52197802197803, + "grad_norm": 13.69627857208252, + "learning_rate": 4.239010989010989e-06, + "loss": 0.1741, + "step": 33314 + }, + { + "epoch": 91.52472527472527, + "grad_norm": 7.487452983856201, + "learning_rate": 4.237637362637363e-06, + "loss": 0.0599, + "step": 33315 + }, + { + "epoch": 91.52747252747253, + "grad_norm": 5.318849086761475, + "learning_rate": 4.236263736263736e-06, + "loss": 0.0514, + "step": 33316 + }, + { + "epoch": 91.53021978021978, + "grad_norm": 16.430482864379883, + "learning_rate": 4.23489010989011e-06, + "loss": 0.4672, + "step": 33317 + }, + { + "epoch": 91.53296703296704, + "grad_norm": 13.656815528869629, + "learning_rate": 4.233516483516483e-06, + "loss": 0.3603, + "step": 33318 + }, + { + "epoch": 91.53571428571429, + "grad_norm": 4.814915180206299, + "learning_rate": 4.232142857142858e-06, + "loss": 0.0876, + "step": 33319 + }, + { + "epoch": 91.53846153846153, + "grad_norm": 15.52582836151123, + "learning_rate": 4.230769230769231e-06, + "loss": 0.2332, + "step": 33320 + }, + { + "epoch": 91.54120879120879, + "grad_norm": 11.30849552154541, + "learning_rate": 4.2293956043956045e-06, + "loss": 0.1732, + "step": 33321 + }, + { + "epoch": 91.54395604395604, + "grad_norm": 9.256702423095703, + "learning_rate": 4.228021978021978e-06, + "loss": 0.1262, + "step": 33322 + }, + { + "epoch": 91.5467032967033, + "grad_norm": 27.172348022460938, + "learning_rate": 4.226648351648352e-06, + "loss": 0.4563, + "step": 33323 + }, + { + "epoch": 91.54945054945055, + "grad_norm": 4.192007541656494, + "learning_rate": 4.2252747252747255e-06, + "loss": 0.0567, + "step": 33324 + }, + { + "epoch": 91.5521978021978, + "grad_norm": 12.23500919342041, + "learning_rate": 4.2239010989011e-06, + "loss": 0.2486, + "step": 33325 + }, + { + "epoch": 91.55494505494505, + "grad_norm": 15.942490577697754, + "learning_rate": 4.222527472527472e-06, + "loss": 0.2145, + "step": 33326 + }, + { + "epoch": 91.5576923076923, + "grad_norm": 6.263942718505859, + "learning_rate": 4.221153846153847e-06, + "loss": 0.1372, + "step": 33327 + }, + { + "epoch": 91.56043956043956, + "grad_norm": 16.145999908447266, + "learning_rate": 4.21978021978022e-06, + "loss": 0.1609, + "step": 33328 + }, + { + "epoch": 91.56318681318682, + "grad_norm": 2.5870306491851807, + "learning_rate": 4.218406593406593e-06, + "loss": 0.0252, + "step": 33329 + }, + { + "epoch": 91.56593406593407, + "grad_norm": 5.156924724578857, + "learning_rate": 4.217032967032968e-06, + "loss": 0.0671, + "step": 33330 + }, + { + "epoch": 91.56868131868131, + "grad_norm": 1.5254369974136353, + "learning_rate": 4.21565934065934e-06, + "loss": 0.0162, + "step": 33331 + }, + { + "epoch": 91.57142857142857, + "grad_norm": 8.438250541687012, + "learning_rate": 4.2142857142857145e-06, + "loss": 0.1091, + "step": 33332 + }, + { + "epoch": 91.57417582417582, + "grad_norm": 6.494385719299316, + "learning_rate": 4.212912087912088e-06, + "loss": 0.089, + "step": 33333 + }, + { + "epoch": 91.57692307692308, + "grad_norm": 4.28731107711792, + "learning_rate": 4.211538461538462e-06, + "loss": 0.0816, + "step": 33334 + }, + { + "epoch": 91.57967032967034, + "grad_norm": 11.146509170532227, + "learning_rate": 4.2101648351648355e-06, + "loss": 0.1862, + "step": 33335 + }, + { + "epoch": 91.58241758241758, + "grad_norm": 19.88381576538086, + "learning_rate": 4.208791208791209e-06, + "loss": 0.4654, + "step": 33336 + }, + { + "epoch": 91.58516483516483, + "grad_norm": 7.621565818786621, + "learning_rate": 4.207417582417582e-06, + "loss": 0.1824, + "step": 33337 + }, + { + "epoch": 91.58791208791209, + "grad_norm": 5.3778557777404785, + "learning_rate": 4.206043956043957e-06, + "loss": 0.0531, + "step": 33338 + }, + { + "epoch": 91.59065934065934, + "grad_norm": 33.6501579284668, + "learning_rate": 4.20467032967033e-06, + "loss": 0.4913, + "step": 33339 + }, + { + "epoch": 91.5934065934066, + "grad_norm": 2.9999356269836426, + "learning_rate": 4.203296703296703e-06, + "loss": 0.0266, + "step": 33340 + }, + { + "epoch": 91.59615384615384, + "grad_norm": 11.902921676635742, + "learning_rate": 4.201923076923077e-06, + "loss": 0.3555, + "step": 33341 + }, + { + "epoch": 91.5989010989011, + "grad_norm": 18.225610733032227, + "learning_rate": 4.200549450549451e-06, + "loss": 0.4089, + "step": 33342 + }, + { + "epoch": 91.60164835164835, + "grad_norm": 15.256194114685059, + "learning_rate": 4.1991758241758245e-06, + "loss": 0.2457, + "step": 33343 + }, + { + "epoch": 91.6043956043956, + "grad_norm": 6.978710174560547, + "learning_rate": 4.197802197802198e-06, + "loss": 0.162, + "step": 33344 + }, + { + "epoch": 91.60714285714286, + "grad_norm": 0.9082265496253967, + "learning_rate": 4.196428571428571e-06, + "loss": 0.0103, + "step": 33345 + }, + { + "epoch": 91.60989010989012, + "grad_norm": 8.690892219543457, + "learning_rate": 4.195054945054945e-06, + "loss": 0.1634, + "step": 33346 + }, + { + "epoch": 91.61263736263736, + "grad_norm": 14.692486763000488, + "learning_rate": 4.193681318681319e-06, + "loss": 0.331, + "step": 33347 + }, + { + "epoch": 91.61538461538461, + "grad_norm": 5.600006580352783, + "learning_rate": 4.192307692307692e-06, + "loss": 0.0544, + "step": 33348 + }, + { + "epoch": 91.61813186813187, + "grad_norm": 2.326784610748291, + "learning_rate": 4.190934065934067e-06, + "loss": 0.0201, + "step": 33349 + }, + { + "epoch": 91.62087912087912, + "grad_norm": 20.108503341674805, + "learning_rate": 4.189560439560439e-06, + "loss": 0.2371, + "step": 33350 + }, + { + "epoch": 91.62362637362638, + "grad_norm": 14.837599754333496, + "learning_rate": 4.1881868131868134e-06, + "loss": 0.2096, + "step": 33351 + }, + { + "epoch": 91.62637362637362, + "grad_norm": 13.892574310302734, + "learning_rate": 4.186813186813187e-06, + "loss": 0.0993, + "step": 33352 + }, + { + "epoch": 91.62912087912088, + "grad_norm": 10.422213554382324, + "learning_rate": 4.185439560439561e-06, + "loss": 0.0728, + "step": 33353 + }, + { + "epoch": 91.63186813186813, + "grad_norm": 20.915363311767578, + "learning_rate": 4.1840659340659345e-06, + "loss": 0.3538, + "step": 33354 + }, + { + "epoch": 91.63461538461539, + "grad_norm": 8.1802978515625, + "learning_rate": 4.182692307692308e-06, + "loss": 0.1401, + "step": 33355 + }, + { + "epoch": 91.63736263736264, + "grad_norm": 28.41487693786621, + "learning_rate": 4.181318681318681e-06, + "loss": 0.3954, + "step": 33356 + }, + { + "epoch": 91.64010989010988, + "grad_norm": 16.794410705566406, + "learning_rate": 4.179945054945055e-06, + "loss": 0.1562, + "step": 33357 + }, + { + "epoch": 91.64285714285714, + "grad_norm": 21.376853942871094, + "learning_rate": 4.178571428571429e-06, + "loss": 0.2588, + "step": 33358 + }, + { + "epoch": 91.6456043956044, + "grad_norm": 11.466743469238281, + "learning_rate": 4.177197802197802e-06, + "loss": 0.1467, + "step": 33359 + }, + { + "epoch": 91.64835164835165, + "grad_norm": 28.881200790405273, + "learning_rate": 4.175824175824176e-06, + "loss": 0.3164, + "step": 33360 + }, + { + "epoch": 91.6510989010989, + "grad_norm": 11.46989631652832, + "learning_rate": 4.174450549450549e-06, + "loss": 0.1858, + "step": 33361 + }, + { + "epoch": 91.65384615384616, + "grad_norm": 14.429298400878906, + "learning_rate": 4.1730769230769235e-06, + "loss": 0.1852, + "step": 33362 + }, + { + "epoch": 91.6565934065934, + "grad_norm": 8.384202003479004, + "learning_rate": 4.171703296703297e-06, + "loss": 0.0829, + "step": 33363 + }, + { + "epoch": 91.65934065934066, + "grad_norm": 19.805362701416016, + "learning_rate": 4.170329670329671e-06, + "loss": 0.3402, + "step": 33364 + }, + { + "epoch": 91.66208791208791, + "grad_norm": 11.058213233947754, + "learning_rate": 4.168956043956044e-06, + "loss": 0.1764, + "step": 33365 + }, + { + "epoch": 91.66483516483517, + "grad_norm": 5.026715278625488, + "learning_rate": 4.167582417582418e-06, + "loss": 0.0622, + "step": 33366 + }, + { + "epoch": 91.66758241758242, + "grad_norm": 11.951141357421875, + "learning_rate": 4.166208791208791e-06, + "loss": 0.2103, + "step": 33367 + }, + { + "epoch": 91.67032967032966, + "grad_norm": 13.30675983428955, + "learning_rate": 4.164835164835166e-06, + "loss": 0.1876, + "step": 33368 + }, + { + "epoch": 91.67307692307692, + "grad_norm": 1.3271498680114746, + "learning_rate": 4.163461538461539e-06, + "loss": 0.0149, + "step": 33369 + }, + { + "epoch": 91.67582417582418, + "grad_norm": 9.62199878692627, + "learning_rate": 4.162087912087912e-06, + "loss": 0.143, + "step": 33370 + }, + { + "epoch": 91.67857142857143, + "grad_norm": 13.852967262268066, + "learning_rate": 4.160714285714286e-06, + "loss": 0.2118, + "step": 33371 + }, + { + "epoch": 91.68131868131869, + "grad_norm": 3.3465778827667236, + "learning_rate": 4.159340659340659e-06, + "loss": 0.0463, + "step": 33372 + }, + { + "epoch": 91.68406593406593, + "grad_norm": 17.96808624267578, + "learning_rate": 4.1579670329670335e-06, + "loss": 0.3412, + "step": 33373 + }, + { + "epoch": 91.68681318681318, + "grad_norm": 11.261309623718262, + "learning_rate": 4.156593406593407e-06, + "loss": 0.1716, + "step": 33374 + }, + { + "epoch": 91.68956043956044, + "grad_norm": 31.405956268310547, + "learning_rate": 4.15521978021978e-06, + "loss": 1.2001, + "step": 33375 + }, + { + "epoch": 91.6923076923077, + "grad_norm": 4.191878795623779, + "learning_rate": 4.153846153846154e-06, + "loss": 0.0403, + "step": 33376 + }, + { + "epoch": 91.69505494505495, + "grad_norm": 10.986857414245605, + "learning_rate": 4.152472527472528e-06, + "loss": 0.1832, + "step": 33377 + }, + { + "epoch": 91.6978021978022, + "grad_norm": 17.56527328491211, + "learning_rate": 4.151098901098901e-06, + "loss": 0.3802, + "step": 33378 + }, + { + "epoch": 91.70054945054945, + "grad_norm": 4.885556221008301, + "learning_rate": 4.149725274725275e-06, + "loss": 0.0814, + "step": 33379 + }, + { + "epoch": 91.7032967032967, + "grad_norm": 19.272417068481445, + "learning_rate": 4.148351648351648e-06, + "loss": 0.2022, + "step": 33380 + }, + { + "epoch": 91.70604395604396, + "grad_norm": 8.044471740722656, + "learning_rate": 4.146978021978022e-06, + "loss": 0.2177, + "step": 33381 + }, + { + "epoch": 91.70879120879121, + "grad_norm": 27.418811798095703, + "learning_rate": 4.145604395604396e-06, + "loss": 0.5149, + "step": 33382 + }, + { + "epoch": 91.71153846153847, + "grad_norm": 16.987098693847656, + "learning_rate": 4.14423076923077e-06, + "loss": 0.201, + "step": 33383 + }, + { + "epoch": 91.71428571428571, + "grad_norm": 16.843935012817383, + "learning_rate": 4.142857142857143e-06, + "loss": 0.1966, + "step": 33384 + }, + { + "epoch": 91.71703296703296, + "grad_norm": 2.3260014057159424, + "learning_rate": 4.141483516483517e-06, + "loss": 0.0285, + "step": 33385 + }, + { + "epoch": 91.71978021978022, + "grad_norm": 18.94908332824707, + "learning_rate": 4.14010989010989e-06, + "loss": 0.5164, + "step": 33386 + }, + { + "epoch": 91.72252747252747, + "grad_norm": 12.517436981201172, + "learning_rate": 4.138736263736264e-06, + "loss": 0.323, + "step": 33387 + }, + { + "epoch": 91.72527472527473, + "grad_norm": 14.853811264038086, + "learning_rate": 4.137362637362638e-06, + "loss": 0.1671, + "step": 33388 + }, + { + "epoch": 91.72802197802197, + "grad_norm": 14.468260765075684, + "learning_rate": 4.1359890109890105e-06, + "loss": 0.1966, + "step": 33389 + }, + { + "epoch": 91.73076923076923, + "grad_norm": 12.895960807800293, + "learning_rate": 4.134615384615385e-06, + "loss": 0.1782, + "step": 33390 + }, + { + "epoch": 91.73351648351648, + "grad_norm": 4.873347282409668, + "learning_rate": 4.133241758241758e-06, + "loss": 0.0306, + "step": 33391 + }, + { + "epoch": 91.73626373626374, + "grad_norm": 6.852480411529541, + "learning_rate": 4.1318681318681324e-06, + "loss": 0.1025, + "step": 33392 + }, + { + "epoch": 91.73901098901099, + "grad_norm": 13.133649826049805, + "learning_rate": 4.130494505494506e-06, + "loss": 0.2964, + "step": 33393 + }, + { + "epoch": 91.74175824175825, + "grad_norm": 3.026599645614624, + "learning_rate": 4.129120879120879e-06, + "loss": 0.0549, + "step": 33394 + }, + { + "epoch": 91.74450549450549, + "grad_norm": 6.905823230743408, + "learning_rate": 4.127747252747253e-06, + "loss": 0.1933, + "step": 33395 + }, + { + "epoch": 91.74725274725274, + "grad_norm": 2.837777853012085, + "learning_rate": 4.126373626373627e-06, + "loss": 0.0303, + "step": 33396 + }, + { + "epoch": 91.75, + "grad_norm": 6.164054870605469, + "learning_rate": 4.125e-06, + "loss": 0.0865, + "step": 33397 + }, + { + "epoch": 91.75274725274726, + "grad_norm": 6.6467509269714355, + "learning_rate": 4.1236263736263746e-06, + "loss": 0.0813, + "step": 33398 + }, + { + "epoch": 91.75549450549451, + "grad_norm": 16.69719696044922, + "learning_rate": 4.122252747252747e-06, + "loss": 0.6322, + "step": 33399 + }, + { + "epoch": 91.75824175824175, + "grad_norm": 21.393905639648438, + "learning_rate": 4.120879120879121e-06, + "loss": 0.2923, + "step": 33400 + }, + { + "epoch": 91.76098901098901, + "grad_norm": 16.88657569885254, + "learning_rate": 4.119505494505495e-06, + "loss": 0.318, + "step": 33401 + }, + { + "epoch": 91.76373626373626, + "grad_norm": 16.196807861328125, + "learning_rate": 4.118131868131868e-06, + "loss": 0.2152, + "step": 33402 + }, + { + "epoch": 91.76648351648352, + "grad_norm": 13.525352478027344, + "learning_rate": 4.116758241758242e-06, + "loss": 0.1149, + "step": 33403 + }, + { + "epoch": 91.76923076923077, + "grad_norm": 12.184825897216797, + "learning_rate": 4.115384615384615e-06, + "loss": 0.3069, + "step": 33404 + }, + { + "epoch": 91.77197802197803, + "grad_norm": 17.719877243041992, + "learning_rate": 4.114010989010989e-06, + "loss": 0.3506, + "step": 33405 + }, + { + "epoch": 91.77472527472527, + "grad_norm": 15.719561576843262, + "learning_rate": 4.112637362637363e-06, + "loss": 0.5316, + "step": 33406 + }, + { + "epoch": 91.77747252747253, + "grad_norm": 18.17578887939453, + "learning_rate": 4.111263736263737e-06, + "loss": 0.1836, + "step": 33407 + }, + { + "epoch": 91.78021978021978, + "grad_norm": 4.780701637268066, + "learning_rate": 4.1098901098901095e-06, + "loss": 0.0481, + "step": 33408 + }, + { + "epoch": 91.78296703296704, + "grad_norm": 4.774885177612305, + "learning_rate": 4.108516483516484e-06, + "loss": 0.0893, + "step": 33409 + }, + { + "epoch": 91.78571428571429, + "grad_norm": 20.639820098876953, + "learning_rate": 4.107142857142857e-06, + "loss": 0.4085, + "step": 33410 + }, + { + "epoch": 91.78846153846153, + "grad_norm": 24.925081253051758, + "learning_rate": 4.105769230769231e-06, + "loss": 0.6496, + "step": 33411 + }, + { + "epoch": 91.79120879120879, + "grad_norm": 28.349000930786133, + "learning_rate": 4.104395604395605e-06, + "loss": 0.3608, + "step": 33412 + }, + { + "epoch": 91.79395604395604, + "grad_norm": 11.63691520690918, + "learning_rate": 4.103021978021978e-06, + "loss": 0.1585, + "step": 33413 + }, + { + "epoch": 91.7967032967033, + "grad_norm": 10.572203636169434, + "learning_rate": 4.101648351648352e-06, + "loss": 0.199, + "step": 33414 + }, + { + "epoch": 91.79945054945055, + "grad_norm": 19.07229995727539, + "learning_rate": 4.100274725274726e-06, + "loss": 0.3253, + "step": 33415 + }, + { + "epoch": 91.8021978021978, + "grad_norm": 3.826846122741699, + "learning_rate": 4.098901098901099e-06, + "loss": 0.0228, + "step": 33416 + }, + { + "epoch": 91.80494505494505, + "grad_norm": 13.183246612548828, + "learning_rate": 4.097527472527473e-06, + "loss": 0.2085, + "step": 33417 + }, + { + "epoch": 91.8076923076923, + "grad_norm": 22.74388313293457, + "learning_rate": 4.096153846153846e-06, + "loss": 0.5573, + "step": 33418 + }, + { + "epoch": 91.81043956043956, + "grad_norm": 8.68423843383789, + "learning_rate": 4.0947802197802195e-06, + "loss": 0.1249, + "step": 33419 + }, + { + "epoch": 91.81318681318682, + "grad_norm": 15.237915992736816, + "learning_rate": 4.093406593406594e-06, + "loss": 0.2094, + "step": 33420 + }, + { + "epoch": 91.81593406593407, + "grad_norm": 2.389148712158203, + "learning_rate": 4.092032967032967e-06, + "loss": 0.019, + "step": 33421 + }, + { + "epoch": 91.81868131868131, + "grad_norm": 13.444997787475586, + "learning_rate": 4.090659340659341e-06, + "loss": 0.2337, + "step": 33422 + }, + { + "epoch": 91.82142857142857, + "grad_norm": 17.551225662231445, + "learning_rate": 4.089285714285714e-06, + "loss": 0.4283, + "step": 33423 + }, + { + "epoch": 91.82417582417582, + "grad_norm": 2.581634998321533, + "learning_rate": 4.087912087912088e-06, + "loss": 0.0342, + "step": 33424 + }, + { + "epoch": 91.82692307692308, + "grad_norm": 5.484612464904785, + "learning_rate": 4.086538461538462e-06, + "loss": 0.0752, + "step": 33425 + }, + { + "epoch": 91.82967032967034, + "grad_norm": 23.111337661743164, + "learning_rate": 4.085164835164836e-06, + "loss": 0.5273, + "step": 33426 + }, + { + "epoch": 91.83241758241758, + "grad_norm": 13.749448776245117, + "learning_rate": 4.083791208791209e-06, + "loss": 0.3634, + "step": 33427 + }, + { + "epoch": 91.83516483516483, + "grad_norm": 28.84903907775879, + "learning_rate": 4.082417582417583e-06, + "loss": 1.0007, + "step": 33428 + }, + { + "epoch": 91.83791208791209, + "grad_norm": 13.593884468078613, + "learning_rate": 4.081043956043956e-06, + "loss": 0.4084, + "step": 33429 + }, + { + "epoch": 91.84065934065934, + "grad_norm": 13.46895694732666, + "learning_rate": 4.07967032967033e-06, + "loss": 0.1747, + "step": 33430 + }, + { + "epoch": 91.8434065934066, + "grad_norm": 14.36894416809082, + "learning_rate": 4.078296703296704e-06, + "loss": 0.1267, + "step": 33431 + }, + { + "epoch": 91.84615384615384, + "grad_norm": 0.8735141158103943, + "learning_rate": 4.076923076923077e-06, + "loss": 0.0114, + "step": 33432 + }, + { + "epoch": 91.8489010989011, + "grad_norm": 4.450873851776123, + "learning_rate": 4.075549450549451e-06, + "loss": 0.0742, + "step": 33433 + }, + { + "epoch": 91.85164835164835, + "grad_norm": 3.830307722091675, + "learning_rate": 4.074175824175824e-06, + "loss": 0.0654, + "step": 33434 + }, + { + "epoch": 91.8543956043956, + "grad_norm": 15.658770561218262, + "learning_rate": 4.072802197802198e-06, + "loss": 0.2389, + "step": 33435 + }, + { + "epoch": 91.85714285714286, + "grad_norm": 3.3074867725372314, + "learning_rate": 4.071428571428572e-06, + "loss": 0.0297, + "step": 33436 + }, + { + "epoch": 91.85989010989012, + "grad_norm": 13.62309741973877, + "learning_rate": 4.070054945054945e-06, + "loss": 0.2123, + "step": 33437 + }, + { + "epoch": 91.86263736263736, + "grad_norm": 11.138836860656738, + "learning_rate": 4.0686813186813185e-06, + "loss": 0.227, + "step": 33438 + }, + { + "epoch": 91.86538461538461, + "grad_norm": 4.696771621704102, + "learning_rate": 4.067307692307693e-06, + "loss": 0.0668, + "step": 33439 + }, + { + "epoch": 91.86813186813187, + "grad_norm": 5.989482879638672, + "learning_rate": 4.065934065934066e-06, + "loss": 0.0617, + "step": 33440 + }, + { + "epoch": 91.87087912087912, + "grad_norm": 16.06829833984375, + "learning_rate": 4.06456043956044e-06, + "loss": 0.3065, + "step": 33441 + }, + { + "epoch": 91.87362637362638, + "grad_norm": 8.181735038757324, + "learning_rate": 4.063186813186813e-06, + "loss": 0.1793, + "step": 33442 + }, + { + "epoch": 91.87637362637362, + "grad_norm": 28.503005981445312, + "learning_rate": 4.061813186813187e-06, + "loss": 0.6262, + "step": 33443 + }, + { + "epoch": 91.87912087912088, + "grad_norm": 5.7520880699157715, + "learning_rate": 4.060439560439561e-06, + "loss": 0.1075, + "step": 33444 + }, + { + "epoch": 91.88186813186813, + "grad_norm": 14.236199378967285, + "learning_rate": 4.059065934065934e-06, + "loss": 0.3723, + "step": 33445 + }, + { + "epoch": 91.88461538461539, + "grad_norm": 23.75975227355957, + "learning_rate": 4.057692307692308e-06, + "loss": 0.4819, + "step": 33446 + }, + { + "epoch": 91.88736263736264, + "grad_norm": 17.416431427001953, + "learning_rate": 4.056318681318682e-06, + "loss": 0.4749, + "step": 33447 + }, + { + "epoch": 91.89010989010988, + "grad_norm": 22.26056671142578, + "learning_rate": 4.054945054945055e-06, + "loss": 0.5403, + "step": 33448 + }, + { + "epoch": 91.89285714285714, + "grad_norm": 7.221053123474121, + "learning_rate": 4.0535714285714285e-06, + "loss": 0.0613, + "step": 33449 + }, + { + "epoch": 91.8956043956044, + "grad_norm": 8.608183860778809, + "learning_rate": 4.052197802197803e-06, + "loss": 0.1556, + "step": 33450 + }, + { + "epoch": 91.89835164835165, + "grad_norm": 6.946850299835205, + "learning_rate": 4.050824175824176e-06, + "loss": 0.1122, + "step": 33451 + }, + { + "epoch": 91.9010989010989, + "grad_norm": 8.292964935302734, + "learning_rate": 4.0494505494505496e-06, + "loss": 0.1241, + "step": 33452 + }, + { + "epoch": 91.90384615384616, + "grad_norm": 8.801651000976562, + "learning_rate": 4.048076923076923e-06, + "loss": 0.1695, + "step": 33453 + }, + { + "epoch": 91.9065934065934, + "grad_norm": 14.80258560180664, + "learning_rate": 4.046703296703297e-06, + "loss": 0.2267, + "step": 33454 + }, + { + "epoch": 91.90934065934066, + "grad_norm": 2.744367837905884, + "learning_rate": 4.045329670329671e-06, + "loss": 0.0418, + "step": 33455 + }, + { + "epoch": 91.91208791208791, + "grad_norm": 8.182266235351562, + "learning_rate": 4.043956043956045e-06, + "loss": 0.2935, + "step": 33456 + }, + { + "epoch": 91.91483516483517, + "grad_norm": 1.2905858755111694, + "learning_rate": 4.0425824175824174e-06, + "loss": 0.0154, + "step": 33457 + }, + { + "epoch": 91.91758241758242, + "grad_norm": 10.482710838317871, + "learning_rate": 4.041208791208792e-06, + "loss": 0.112, + "step": 33458 + }, + { + "epoch": 91.92032967032966, + "grad_norm": 2.8254106044769287, + "learning_rate": 4.039835164835165e-06, + "loss": 0.0398, + "step": 33459 + }, + { + "epoch": 91.92307692307692, + "grad_norm": 21.933658599853516, + "learning_rate": 4.0384615384615385e-06, + "loss": 0.6956, + "step": 33460 + }, + { + "epoch": 91.92582417582418, + "grad_norm": 2.886770486831665, + "learning_rate": 4.037087912087912e-06, + "loss": 0.034, + "step": 33461 + }, + { + "epoch": 91.92857142857143, + "grad_norm": 11.103946685791016, + "learning_rate": 4.035714285714285e-06, + "loss": 0.1828, + "step": 33462 + }, + { + "epoch": 91.93131868131869, + "grad_norm": 6.241270542144775, + "learning_rate": 4.0343406593406596e-06, + "loss": 0.0849, + "step": 33463 + }, + { + "epoch": 91.93406593406593, + "grad_norm": 4.310317039489746, + "learning_rate": 4.032967032967033e-06, + "loss": 0.0373, + "step": 33464 + }, + { + "epoch": 91.93681318681318, + "grad_norm": 18.28458023071289, + "learning_rate": 4.031593406593407e-06, + "loss": 0.2802, + "step": 33465 + }, + { + "epoch": 91.93956043956044, + "grad_norm": 7.618433952331543, + "learning_rate": 4.03021978021978e-06, + "loss": 0.1368, + "step": 33466 + }, + { + "epoch": 91.9423076923077, + "grad_norm": 6.874605655670166, + "learning_rate": 4.028846153846154e-06, + "loss": 0.0968, + "step": 33467 + }, + { + "epoch": 91.94505494505495, + "grad_norm": 6.495124816894531, + "learning_rate": 4.0274725274725274e-06, + "loss": 0.1108, + "step": 33468 + }, + { + "epoch": 91.9478021978022, + "grad_norm": 5.243141174316406, + "learning_rate": 4.026098901098902e-06, + "loss": 0.0453, + "step": 33469 + }, + { + "epoch": 91.95054945054945, + "grad_norm": 4.803700923919678, + "learning_rate": 4.024725274725275e-06, + "loss": 0.0738, + "step": 33470 + }, + { + "epoch": 91.9532967032967, + "grad_norm": 7.647545337677002, + "learning_rate": 4.0233516483516485e-06, + "loss": 0.1313, + "step": 33471 + }, + { + "epoch": 91.95604395604396, + "grad_norm": 11.364655494689941, + "learning_rate": 4.021978021978022e-06, + "loss": 0.1835, + "step": 33472 + }, + { + "epoch": 91.95879120879121, + "grad_norm": 8.761503219604492, + "learning_rate": 4.020604395604396e-06, + "loss": 0.1532, + "step": 33473 + }, + { + "epoch": 91.96153846153847, + "grad_norm": 18.625099182128906, + "learning_rate": 4.01923076923077e-06, + "loss": 0.4272, + "step": 33474 + }, + { + "epoch": 91.96428571428571, + "grad_norm": 15.643017768859863, + "learning_rate": 4.017857142857143e-06, + "loss": 0.4513, + "step": 33475 + }, + { + "epoch": 91.96703296703296, + "grad_norm": 15.73151969909668, + "learning_rate": 4.016483516483516e-06, + "loss": 0.2391, + "step": 33476 + }, + { + "epoch": 91.96978021978022, + "grad_norm": 6.5346598625183105, + "learning_rate": 4.01510989010989e-06, + "loss": 0.1104, + "step": 33477 + }, + { + "epoch": 91.97252747252747, + "grad_norm": 10.921926498413086, + "learning_rate": 4.013736263736264e-06, + "loss": 0.1722, + "step": 33478 + }, + { + "epoch": 91.97527472527473, + "grad_norm": 3.969473361968994, + "learning_rate": 4.0123626373626375e-06, + "loss": 0.1189, + "step": 33479 + }, + { + "epoch": 91.97802197802197, + "grad_norm": 9.163311958312988, + "learning_rate": 4.010989010989012e-06, + "loss": 0.1455, + "step": 33480 + }, + { + "epoch": 91.98076923076923, + "grad_norm": 16.4129581451416, + "learning_rate": 4.009615384615384e-06, + "loss": 0.1732, + "step": 33481 + }, + { + "epoch": 91.98351648351648, + "grad_norm": 9.906005859375, + "learning_rate": 4.0082417582417585e-06, + "loss": 0.187, + "step": 33482 + }, + { + "epoch": 91.98626373626374, + "grad_norm": 8.554662704467773, + "learning_rate": 4.006868131868132e-06, + "loss": 0.0788, + "step": 33483 + }, + { + "epoch": 91.98901098901099, + "grad_norm": 3.653965473175049, + "learning_rate": 4.005494505494506e-06, + "loss": 0.0934, + "step": 33484 + }, + { + "epoch": 91.99175824175825, + "grad_norm": 12.093218803405762, + "learning_rate": 4.00412087912088e-06, + "loss": 0.1334, + "step": 33485 + }, + { + "epoch": 91.99450549450549, + "grad_norm": 19.631189346313477, + "learning_rate": 4.002747252747253e-06, + "loss": 0.3376, + "step": 33486 + }, + { + "epoch": 91.99725274725274, + "grad_norm": 5.152737617492676, + "learning_rate": 4.001373626373626e-06, + "loss": 0.0758, + "step": 33487 + }, + { + "epoch": 92.0, + "grad_norm": 76.13890838623047, + "learning_rate": 4.000000000000001e-06, + "loss": 2.2473, + "step": 33488 + }, + { + "epoch": 92.0, + "eval_accuracy": 0.8305785123966942, + "eval_f1": 0.8322560985356653, + "eval_f1_DuraRiadoRio_64x64": 0.8571428571428571, + "eval_f1_Mole_64x64": 0.9333333333333333, + "eval_f1_Quebrado_64x64": 0.8148148148148148, + "eval_f1_RiadoRio_64x64": 0.7673716012084593, + "eval_f1_RioFechado_64x64": 0.7886178861788617, + "eval_loss": 0.6959831118583679, + "eval_precision": 0.8506422977170531, + "eval_precision_DuraRiadoRio_64x64": 0.9344262295081968, + "eval_precision_Mole_64x64": 0.9432624113475178, + "eval_precision_Quebrado_64x64": 0.7333333333333333, + "eval_precision_RiadoRio_64x64": 0.7094972067039106, + "eval_precision_RioFechado_64x64": 0.9326923076923077, + "eval_recall": 0.8301138703566429, + "eval_recall_DuraRiadoRio_64x64": 0.7916666666666666, + "eval_recall_Mole_64x64": 0.9236111111111112, + "eval_recall_Quebrado_64x64": 0.9166666666666666, + "eval_recall_RiadoRio_64x64": 0.8355263157894737, + "eval_recall_RioFechado_64x64": 0.6830985915492958, + "eval_runtime": 1.7803, + "eval_samples_per_second": 407.803, + "eval_steps_per_second": 25.839, + "step": 33488 + }, + { + "epoch": 92.00274725274726, + "grad_norm": 25.07500648498535, + "learning_rate": 3.998626373626374e-06, + "loss": 0.6158, + "step": 33489 + }, + { + "epoch": 92.00549450549451, + "grad_norm": 8.343083381652832, + "learning_rate": 3.9972527472527475e-06, + "loss": 0.1312, + "step": 33490 + }, + { + "epoch": 92.00824175824175, + "grad_norm": 7.785335540771484, + "learning_rate": 3.995879120879121e-06, + "loss": 0.0835, + "step": 33491 + }, + { + "epoch": 92.01098901098901, + "grad_norm": 3.7082736492156982, + "learning_rate": 3.994505494505494e-06, + "loss": 0.0416, + "step": 33492 + }, + { + "epoch": 92.01373626373626, + "grad_norm": 12.834092140197754, + "learning_rate": 3.9931318681318685e-06, + "loss": 0.1704, + "step": 33493 + }, + { + "epoch": 92.01648351648352, + "grad_norm": 9.42101001739502, + "learning_rate": 3.991758241758242e-06, + "loss": 0.2151, + "step": 33494 + }, + { + "epoch": 92.01923076923077, + "grad_norm": 3.3776438236236572, + "learning_rate": 3.990384615384615e-06, + "loss": 0.0454, + "step": 33495 + }, + { + "epoch": 92.02197802197803, + "grad_norm": 9.288820266723633, + "learning_rate": 3.989010989010989e-06, + "loss": 0.1678, + "step": 33496 + }, + { + "epoch": 92.02472527472527, + "grad_norm": 4.125410556793213, + "learning_rate": 3.987637362637363e-06, + "loss": 0.0377, + "step": 33497 + }, + { + "epoch": 92.02747252747253, + "grad_norm": 3.622812509536743, + "learning_rate": 3.9862637362637364e-06, + "loss": 0.0546, + "step": 33498 + }, + { + "epoch": 92.03021978021978, + "grad_norm": 7.75495719909668, + "learning_rate": 3.984890109890111e-06, + "loss": 0.0993, + "step": 33499 + }, + { + "epoch": 92.03296703296704, + "grad_norm": 15.711060523986816, + "learning_rate": 3.983516483516483e-06, + "loss": 0.127, + "step": 33500 + }, + { + "epoch": 92.03571428571429, + "grad_norm": 7.18302583694458, + "learning_rate": 3.9821428571428575e-06, + "loss": 0.084, + "step": 33501 + }, + { + "epoch": 92.03846153846153, + "grad_norm": 7.007455825805664, + "learning_rate": 3.980769230769231e-06, + "loss": 0.1056, + "step": 33502 + }, + { + "epoch": 92.04120879120879, + "grad_norm": 17.533039093017578, + "learning_rate": 3.979395604395605e-06, + "loss": 0.4343, + "step": 33503 + }, + { + "epoch": 92.04395604395604, + "grad_norm": 20.122848510742188, + "learning_rate": 3.9780219780219786e-06, + "loss": 0.4537, + "step": 33504 + }, + { + "epoch": 92.0467032967033, + "grad_norm": 11.439804077148438, + "learning_rate": 3.976648351648352e-06, + "loss": 0.1846, + "step": 33505 + }, + { + "epoch": 92.04945054945055, + "grad_norm": 15.2208833694458, + "learning_rate": 3.975274725274725e-06, + "loss": 0.4101, + "step": 33506 + }, + { + "epoch": 92.0521978021978, + "grad_norm": 3.6669840812683105, + "learning_rate": 3.973901098901099e-06, + "loss": 0.0375, + "step": 33507 + }, + { + "epoch": 92.05494505494505, + "grad_norm": 6.395400524139404, + "learning_rate": 3.972527472527473e-06, + "loss": 0.0624, + "step": 33508 + }, + { + "epoch": 92.0576923076923, + "grad_norm": 6.830106258392334, + "learning_rate": 3.9711538461538464e-06, + "loss": 0.0736, + "step": 33509 + }, + { + "epoch": 92.06043956043956, + "grad_norm": 27.38303565979004, + "learning_rate": 3.96978021978022e-06, + "loss": 0.6248, + "step": 33510 + }, + { + "epoch": 92.06318681318682, + "grad_norm": 6.857335090637207, + "learning_rate": 3.968406593406593e-06, + "loss": 0.0872, + "step": 33511 + }, + { + "epoch": 92.06593406593407, + "grad_norm": 0.8786152601242065, + "learning_rate": 3.9670329670329675e-06, + "loss": 0.0087, + "step": 33512 + }, + { + "epoch": 92.06868131868131, + "grad_norm": 15.492753982543945, + "learning_rate": 3.965659340659341e-06, + "loss": 0.119, + "step": 33513 + }, + { + "epoch": 92.07142857142857, + "grad_norm": 13.71956729888916, + "learning_rate": 3.964285714285714e-06, + "loss": 0.3088, + "step": 33514 + }, + { + "epoch": 92.07417582417582, + "grad_norm": 9.775666236877441, + "learning_rate": 3.962912087912088e-06, + "loss": 0.1972, + "step": 33515 + }, + { + "epoch": 92.07692307692308, + "grad_norm": 8.535798072814941, + "learning_rate": 3.961538461538462e-06, + "loss": 0.1618, + "step": 33516 + }, + { + "epoch": 92.07967032967034, + "grad_norm": 19.077495574951172, + "learning_rate": 3.960164835164835e-06, + "loss": 0.1852, + "step": 33517 + }, + { + "epoch": 92.08241758241758, + "grad_norm": 2.6433680057525635, + "learning_rate": 3.958791208791209e-06, + "loss": 0.0449, + "step": 33518 + }, + { + "epoch": 92.08516483516483, + "grad_norm": 10.135987281799316, + "learning_rate": 3.957417582417582e-06, + "loss": 0.1498, + "step": 33519 + }, + { + "epoch": 92.08791208791209, + "grad_norm": 19.7755184173584, + "learning_rate": 3.9560439560439565e-06, + "loss": 0.2969, + "step": 33520 + }, + { + "epoch": 92.09065934065934, + "grad_norm": 5.737820625305176, + "learning_rate": 3.95467032967033e-06, + "loss": 0.0895, + "step": 33521 + }, + { + "epoch": 92.0934065934066, + "grad_norm": 9.49122428894043, + "learning_rate": 3.953296703296703e-06, + "loss": 0.1858, + "step": 33522 + }, + { + "epoch": 92.09615384615384, + "grad_norm": 18.192068099975586, + "learning_rate": 3.9519230769230775e-06, + "loss": 0.459, + "step": 33523 + }, + { + "epoch": 92.0989010989011, + "grad_norm": 17.62742042541504, + "learning_rate": 3.95054945054945e-06, + "loss": 0.6137, + "step": 33524 + }, + { + "epoch": 92.10164835164835, + "grad_norm": 13.104447364807129, + "learning_rate": 3.949175824175824e-06, + "loss": 0.1866, + "step": 33525 + }, + { + "epoch": 92.1043956043956, + "grad_norm": 16.048954010009766, + "learning_rate": 3.947802197802198e-06, + "loss": 0.2863, + "step": 33526 + }, + { + "epoch": 92.10714285714286, + "grad_norm": 7.159791469573975, + "learning_rate": 3.946428571428572e-06, + "loss": 0.0981, + "step": 33527 + }, + { + "epoch": 92.10989010989012, + "grad_norm": 2.633064031600952, + "learning_rate": 3.945054945054945e-06, + "loss": 0.0271, + "step": 33528 + }, + { + "epoch": 92.11263736263736, + "grad_norm": 8.520979881286621, + "learning_rate": 3.943681318681319e-06, + "loss": 0.1241, + "step": 33529 + }, + { + "epoch": 92.11538461538461, + "grad_norm": 10.256770133972168, + "learning_rate": 3.942307692307692e-06, + "loss": 0.1106, + "step": 33530 + }, + { + "epoch": 92.11813186813187, + "grad_norm": 15.048385620117188, + "learning_rate": 3.9409340659340665e-06, + "loss": 0.3976, + "step": 33531 + }, + { + "epoch": 92.12087912087912, + "grad_norm": 9.89542293548584, + "learning_rate": 3.93956043956044e-06, + "loss": 0.2005, + "step": 33532 + }, + { + "epoch": 92.12362637362638, + "grad_norm": 7.289571285247803, + "learning_rate": 3.938186813186813e-06, + "loss": 0.0879, + "step": 33533 + }, + { + "epoch": 92.12637362637362, + "grad_norm": 13.293538093566895, + "learning_rate": 3.936813186813187e-06, + "loss": 0.3155, + "step": 33534 + }, + { + "epoch": 92.12912087912088, + "grad_norm": 14.166800498962402, + "learning_rate": 3.93543956043956e-06, + "loss": 0.3443, + "step": 33535 + }, + { + "epoch": 92.13186813186813, + "grad_norm": 10.971183776855469, + "learning_rate": 3.934065934065934e-06, + "loss": 0.2202, + "step": 33536 + }, + { + "epoch": 92.13461538461539, + "grad_norm": 16.343685150146484, + "learning_rate": 3.932692307692308e-06, + "loss": 0.2013, + "step": 33537 + }, + { + "epoch": 92.13736263736264, + "grad_norm": 3.1637401580810547, + "learning_rate": 3.931318681318682e-06, + "loss": 0.0441, + "step": 33538 + }, + { + "epoch": 92.14010989010988, + "grad_norm": 5.19548225402832, + "learning_rate": 3.929945054945055e-06, + "loss": 0.0684, + "step": 33539 + }, + { + "epoch": 92.14285714285714, + "grad_norm": 16.16816520690918, + "learning_rate": 3.928571428571429e-06, + "loss": 0.3461, + "step": 33540 + }, + { + "epoch": 92.1456043956044, + "grad_norm": 17.835172653198242, + "learning_rate": 3.927197802197802e-06, + "loss": 0.168, + "step": 33541 + }, + { + "epoch": 92.14835164835165, + "grad_norm": 12.529788970947266, + "learning_rate": 3.9258241758241765e-06, + "loss": 0.1719, + "step": 33542 + }, + { + "epoch": 92.1510989010989, + "grad_norm": 23.073015213012695, + "learning_rate": 3.924450549450549e-06, + "loss": 0.5103, + "step": 33543 + }, + { + "epoch": 92.15384615384616, + "grad_norm": 12.462701797485352, + "learning_rate": 3.923076923076923e-06, + "loss": 0.1174, + "step": 33544 + }, + { + "epoch": 92.1565934065934, + "grad_norm": 23.10495948791504, + "learning_rate": 3.921703296703297e-06, + "loss": 0.7462, + "step": 33545 + }, + { + "epoch": 92.15934065934066, + "grad_norm": 12.609155654907227, + "learning_rate": 3.920329670329671e-06, + "loss": 0.2564, + "step": 33546 + }, + { + "epoch": 92.16208791208791, + "grad_norm": 6.417806625366211, + "learning_rate": 3.918956043956044e-06, + "loss": 0.2608, + "step": 33547 + }, + { + "epoch": 92.16483516483517, + "grad_norm": 17.2653751373291, + "learning_rate": 3.917582417582418e-06, + "loss": 0.3248, + "step": 33548 + }, + { + "epoch": 92.16758241758242, + "grad_norm": 13.483097076416016, + "learning_rate": 3.916208791208791e-06, + "loss": 0.1776, + "step": 33549 + }, + { + "epoch": 92.17032967032966, + "grad_norm": 14.88502025604248, + "learning_rate": 3.914835164835165e-06, + "loss": 0.4051, + "step": 33550 + }, + { + "epoch": 92.17307692307692, + "grad_norm": 2.969341278076172, + "learning_rate": 3.913461538461539e-06, + "loss": 0.0298, + "step": 33551 + }, + { + "epoch": 92.17582417582418, + "grad_norm": 15.147189140319824, + "learning_rate": 3.912087912087912e-06, + "loss": 0.2054, + "step": 33552 + }, + { + "epoch": 92.17857142857143, + "grad_norm": 16.25726318359375, + "learning_rate": 3.910714285714286e-06, + "loss": 0.317, + "step": 33553 + }, + { + "epoch": 92.18131868131869, + "grad_norm": 6.0326690673828125, + "learning_rate": 3.909340659340659e-06, + "loss": 0.0846, + "step": 33554 + }, + { + "epoch": 92.18406593406593, + "grad_norm": 5.358896255493164, + "learning_rate": 3.907967032967033e-06, + "loss": 0.0655, + "step": 33555 + }, + { + "epoch": 92.18681318681318, + "grad_norm": 17.230892181396484, + "learning_rate": 3.906593406593407e-06, + "loss": 0.2665, + "step": 33556 + }, + { + "epoch": 92.18956043956044, + "grad_norm": 4.682418346405029, + "learning_rate": 3.905219780219781e-06, + "loss": 0.0627, + "step": 33557 + }, + { + "epoch": 92.1923076923077, + "grad_norm": 12.73193073272705, + "learning_rate": 3.9038461538461535e-06, + "loss": 0.2162, + "step": 33558 + }, + { + "epoch": 92.19505494505495, + "grad_norm": 17.943567276000977, + "learning_rate": 3.902472527472528e-06, + "loss": 0.1881, + "step": 33559 + }, + { + "epoch": 92.1978021978022, + "grad_norm": 10.957099914550781, + "learning_rate": 3.901098901098901e-06, + "loss": 0.1682, + "step": 33560 + }, + { + "epoch": 92.20054945054945, + "grad_norm": 15.864255905151367, + "learning_rate": 3.8997252747252755e-06, + "loss": 0.4293, + "step": 33561 + }, + { + "epoch": 92.2032967032967, + "grad_norm": 8.147428512573242, + "learning_rate": 3.898351648351649e-06, + "loss": 0.1646, + "step": 33562 + }, + { + "epoch": 92.20604395604396, + "grad_norm": 15.070356369018555, + "learning_rate": 3.896978021978022e-06, + "loss": 0.324, + "step": 33563 + }, + { + "epoch": 92.20879120879121, + "grad_norm": 7.601548194885254, + "learning_rate": 3.895604395604396e-06, + "loss": 0.1097, + "step": 33564 + }, + { + "epoch": 92.21153846153847, + "grad_norm": 12.849230766296387, + "learning_rate": 3.894230769230769e-06, + "loss": 0.4052, + "step": 33565 + }, + { + "epoch": 92.21428571428571, + "grad_norm": 16.440134048461914, + "learning_rate": 3.892857142857143e-06, + "loss": 0.3891, + "step": 33566 + }, + { + "epoch": 92.21703296703296, + "grad_norm": 14.864461898803711, + "learning_rate": 3.891483516483517e-06, + "loss": 0.3055, + "step": 33567 + }, + { + "epoch": 92.21978021978022, + "grad_norm": 32.899967193603516, + "learning_rate": 3.89010989010989e-06, + "loss": 0.5838, + "step": 33568 + }, + { + "epoch": 92.22252747252747, + "grad_norm": 18.222660064697266, + "learning_rate": 3.8887362637362636e-06, + "loss": 0.2852, + "step": 33569 + }, + { + "epoch": 92.22527472527473, + "grad_norm": 2.6911067962646484, + "learning_rate": 3.887362637362638e-06, + "loss": 0.0271, + "step": 33570 + }, + { + "epoch": 92.22802197802197, + "grad_norm": 12.911772727966309, + "learning_rate": 3.885989010989011e-06, + "loss": 0.3181, + "step": 33571 + }, + { + "epoch": 92.23076923076923, + "grad_norm": 16.517751693725586, + "learning_rate": 3.884615384615385e-06, + "loss": 0.376, + "step": 33572 + }, + { + "epoch": 92.23351648351648, + "grad_norm": 5.2714948654174805, + "learning_rate": 3.883241758241758e-06, + "loss": 0.0737, + "step": 33573 + }, + { + "epoch": 92.23626373626374, + "grad_norm": 9.822720527648926, + "learning_rate": 3.881868131868132e-06, + "loss": 0.2781, + "step": 33574 + }, + { + "epoch": 92.23901098901099, + "grad_norm": 3.121908187866211, + "learning_rate": 3.880494505494506e-06, + "loss": 0.033, + "step": 33575 + }, + { + "epoch": 92.24175824175825, + "grad_norm": 11.327259063720703, + "learning_rate": 3.87912087912088e-06, + "loss": 0.2813, + "step": 33576 + }, + { + "epoch": 92.24450549450549, + "grad_norm": 3.552269697189331, + "learning_rate": 3.8777472527472525e-06, + "loss": 0.0366, + "step": 33577 + }, + { + "epoch": 92.24725274725274, + "grad_norm": 14.001546859741211, + "learning_rate": 3.876373626373627e-06, + "loss": 0.1809, + "step": 33578 + }, + { + "epoch": 92.25, + "grad_norm": 6.808346748352051, + "learning_rate": 3.875e-06, + "loss": 0.0826, + "step": 33579 + }, + { + "epoch": 92.25274725274726, + "grad_norm": 12.563080787658691, + "learning_rate": 3.8736263736263736e-06, + "loss": 0.2747, + "step": 33580 + }, + { + "epoch": 92.25549450549451, + "grad_norm": 20.433576583862305, + "learning_rate": 3.872252747252748e-06, + "loss": 0.2972, + "step": 33581 + }, + { + "epoch": 92.25824175824175, + "grad_norm": 9.943195343017578, + "learning_rate": 3.87087912087912e-06, + "loss": 0.2818, + "step": 33582 + }, + { + "epoch": 92.26098901098901, + "grad_norm": 22.201581954956055, + "learning_rate": 3.869505494505495e-06, + "loss": 0.4039, + "step": 33583 + }, + { + "epoch": 92.26373626373626, + "grad_norm": 8.410898208618164, + "learning_rate": 3.868131868131868e-06, + "loss": 0.1449, + "step": 33584 + }, + { + "epoch": 92.26648351648352, + "grad_norm": 9.604663848876953, + "learning_rate": 3.866758241758242e-06, + "loss": 0.209, + "step": 33585 + }, + { + "epoch": 92.26923076923077, + "grad_norm": 5.375556945800781, + "learning_rate": 3.865384615384616e-06, + "loss": 0.1179, + "step": 33586 + }, + { + "epoch": 92.27197802197803, + "grad_norm": 1.2999303340911865, + "learning_rate": 3.864010989010989e-06, + "loss": 0.0167, + "step": 33587 + }, + { + "epoch": 92.27472527472527, + "grad_norm": 6.729778289794922, + "learning_rate": 3.8626373626373625e-06, + "loss": 0.0887, + "step": 33588 + }, + { + "epoch": 92.27747252747253, + "grad_norm": 7.1460089683532715, + "learning_rate": 3.861263736263737e-06, + "loss": 0.1413, + "step": 33589 + }, + { + "epoch": 92.28021978021978, + "grad_norm": 9.012297630310059, + "learning_rate": 3.85989010989011e-06, + "loss": 0.1104, + "step": 33590 + }, + { + "epoch": 92.28296703296704, + "grad_norm": 13.261335372924805, + "learning_rate": 3.858516483516484e-06, + "loss": 0.0977, + "step": 33591 + }, + { + "epoch": 92.28571428571429, + "grad_norm": 11.962494850158691, + "learning_rate": 3.857142857142857e-06, + "loss": 0.3034, + "step": 33592 + }, + { + "epoch": 92.28846153846153, + "grad_norm": 18.340496063232422, + "learning_rate": 3.855769230769231e-06, + "loss": 0.3887, + "step": 33593 + }, + { + "epoch": 92.29120879120879, + "grad_norm": 12.980542182922363, + "learning_rate": 3.854395604395605e-06, + "loss": 0.1222, + "step": 33594 + }, + { + "epoch": 92.29395604395604, + "grad_norm": 3.8647053241729736, + "learning_rate": 3.853021978021978e-06, + "loss": 0.043, + "step": 33595 + }, + { + "epoch": 92.2967032967033, + "grad_norm": 2.2142207622528076, + "learning_rate": 3.851648351648352e-06, + "loss": 0.0277, + "step": 33596 + }, + { + "epoch": 92.29945054945055, + "grad_norm": 11.019347190856934, + "learning_rate": 3.850274725274725e-06, + "loss": 0.3133, + "step": 33597 + }, + { + "epoch": 92.3021978021978, + "grad_norm": 8.536866188049316, + "learning_rate": 3.848901098901099e-06, + "loss": 0.1001, + "step": 33598 + }, + { + "epoch": 92.30494505494505, + "grad_norm": 21.854106903076172, + "learning_rate": 3.8475274725274725e-06, + "loss": 0.3397, + "step": 33599 + }, + { + "epoch": 92.3076923076923, + "grad_norm": 4.892168998718262, + "learning_rate": 3.846153846153847e-06, + "loss": 0.0547, + "step": 33600 + }, + { + "epoch": 92.31043956043956, + "grad_norm": 9.136359214782715, + "learning_rate": 3.844780219780219e-06, + "loss": 0.0882, + "step": 33601 + }, + { + "epoch": 92.31318681318682, + "grad_norm": 6.251282691955566, + "learning_rate": 3.843406593406594e-06, + "loss": 0.1075, + "step": 33602 + }, + { + "epoch": 92.31593406593407, + "grad_norm": 14.326631546020508, + "learning_rate": 3.842032967032967e-06, + "loss": 0.3456, + "step": 33603 + }, + { + "epoch": 92.31868131868131, + "grad_norm": 6.291903972625732, + "learning_rate": 3.840659340659341e-06, + "loss": 0.0582, + "step": 33604 + }, + { + "epoch": 92.32142857142857, + "grad_norm": 13.978751182556152, + "learning_rate": 3.839285714285715e-06, + "loss": 0.4546, + "step": 33605 + }, + { + "epoch": 92.32417582417582, + "grad_norm": 16.785701751708984, + "learning_rate": 3.837912087912088e-06, + "loss": 0.249, + "step": 33606 + }, + { + "epoch": 92.32692307692308, + "grad_norm": 27.605981826782227, + "learning_rate": 3.8365384615384615e-06, + "loss": 0.8307, + "step": 33607 + }, + { + "epoch": 92.32967032967034, + "grad_norm": 23.52981948852539, + "learning_rate": 3.835164835164836e-06, + "loss": 0.4207, + "step": 33608 + }, + { + "epoch": 92.33241758241758, + "grad_norm": 9.324645042419434, + "learning_rate": 3.833791208791209e-06, + "loss": 0.1322, + "step": 33609 + }, + { + "epoch": 92.33516483516483, + "grad_norm": 6.326438903808594, + "learning_rate": 3.8324175824175826e-06, + "loss": 0.0761, + "step": 33610 + }, + { + "epoch": 92.33791208791209, + "grad_norm": 7.977450847625732, + "learning_rate": 3.831043956043956e-06, + "loss": 0.0804, + "step": 33611 + }, + { + "epoch": 92.34065934065934, + "grad_norm": 7.258315086364746, + "learning_rate": 3.829670329670329e-06, + "loss": 0.105, + "step": 33612 + }, + { + "epoch": 92.3434065934066, + "grad_norm": 6.226988792419434, + "learning_rate": 3.828296703296704e-06, + "loss": 0.0914, + "step": 33613 + }, + { + "epoch": 92.34615384615384, + "grad_norm": 6.6630401611328125, + "learning_rate": 3.826923076923077e-06, + "loss": 0.1075, + "step": 33614 + }, + { + "epoch": 92.3489010989011, + "grad_norm": 9.541921615600586, + "learning_rate": 3.825549450549451e-06, + "loss": 0.1549, + "step": 33615 + }, + { + "epoch": 92.35164835164835, + "grad_norm": 8.77209186553955, + "learning_rate": 3.824175824175824e-06, + "loss": 0.1606, + "step": 33616 + }, + { + "epoch": 92.3543956043956, + "grad_norm": 20.03653335571289, + "learning_rate": 3.822802197802198e-06, + "loss": 0.3353, + "step": 33617 + }, + { + "epoch": 92.35714285714286, + "grad_norm": 2.217808485031128, + "learning_rate": 3.8214285714285715e-06, + "loss": 0.025, + "step": 33618 + }, + { + "epoch": 92.35989010989012, + "grad_norm": 15.915313720703125, + "learning_rate": 3.820054945054946e-06, + "loss": 0.2004, + "step": 33619 + }, + { + "epoch": 92.36263736263736, + "grad_norm": 11.703434944152832, + "learning_rate": 3.818681318681319e-06, + "loss": 0.1953, + "step": 33620 + }, + { + "epoch": 92.36538461538461, + "grad_norm": 5.941529750823975, + "learning_rate": 3.8173076923076926e-06, + "loss": 0.0947, + "step": 33621 + }, + { + "epoch": 92.36813186813187, + "grad_norm": 18.343568801879883, + "learning_rate": 3.815934065934066e-06, + "loss": 0.2905, + "step": 33622 + }, + { + "epoch": 92.37087912087912, + "grad_norm": 7.096217155456543, + "learning_rate": 3.81456043956044e-06, + "loss": 0.1172, + "step": 33623 + }, + { + "epoch": 92.37362637362638, + "grad_norm": 8.927106857299805, + "learning_rate": 3.8131868131868136e-06, + "loss": 0.1152, + "step": 33624 + }, + { + "epoch": 92.37637362637362, + "grad_norm": 8.124238014221191, + "learning_rate": 3.8118131868131875e-06, + "loss": 0.2164, + "step": 33625 + }, + { + "epoch": 92.37912087912088, + "grad_norm": 3.9654946327209473, + "learning_rate": 3.8104395604395605e-06, + "loss": 0.0463, + "step": 33626 + }, + { + "epoch": 92.38186813186813, + "grad_norm": 21.94767189025879, + "learning_rate": 3.8090659340659343e-06, + "loss": 0.2398, + "step": 33627 + }, + { + "epoch": 92.38461538461539, + "grad_norm": 15.472702026367188, + "learning_rate": 3.807692307692308e-06, + "loss": 0.4299, + "step": 33628 + }, + { + "epoch": 92.38736263736264, + "grad_norm": 9.904151916503906, + "learning_rate": 3.8063186813186815e-06, + "loss": 0.0773, + "step": 33629 + }, + { + "epoch": 92.39010989010988, + "grad_norm": 4.078361511230469, + "learning_rate": 3.804945054945055e-06, + "loss": 0.0573, + "step": 33630 + }, + { + "epoch": 92.39285714285714, + "grad_norm": 9.867377281188965, + "learning_rate": 3.8035714285714288e-06, + "loss": 0.1054, + "step": 33631 + }, + { + "epoch": 92.3956043956044, + "grad_norm": 11.456622123718262, + "learning_rate": 3.802197802197802e-06, + "loss": 0.1441, + "step": 33632 + }, + { + "epoch": 92.39835164835165, + "grad_norm": 2.448899030685425, + "learning_rate": 3.800824175824176e-06, + "loss": 0.0358, + "step": 33633 + }, + { + "epoch": 92.4010989010989, + "grad_norm": 21.720720291137695, + "learning_rate": 3.79945054945055e-06, + "loss": 0.2603, + "step": 33634 + }, + { + "epoch": 92.40384615384616, + "grad_norm": 7.296236038208008, + "learning_rate": 3.798076923076923e-06, + "loss": 0.0963, + "step": 33635 + }, + { + "epoch": 92.4065934065934, + "grad_norm": 13.104748725891113, + "learning_rate": 3.7967032967032966e-06, + "loss": 0.2612, + "step": 33636 + }, + { + "epoch": 92.40934065934066, + "grad_norm": 12.008536338806152, + "learning_rate": 3.7953296703296705e-06, + "loss": 0.0835, + "step": 33637 + }, + { + "epoch": 92.41208791208791, + "grad_norm": 4.700465202331543, + "learning_rate": 3.7939560439560443e-06, + "loss": 0.036, + "step": 33638 + }, + { + "epoch": 92.41483516483517, + "grad_norm": 4.31317138671875, + "learning_rate": 3.792582417582418e-06, + "loss": 0.048, + "step": 33639 + }, + { + "epoch": 92.41758241758242, + "grad_norm": 5.493133544921875, + "learning_rate": 3.791208791208791e-06, + "loss": 0.0626, + "step": 33640 + }, + { + "epoch": 92.42032967032966, + "grad_norm": 4.267011642456055, + "learning_rate": 3.789835164835165e-06, + "loss": 0.0555, + "step": 33641 + }, + { + "epoch": 92.42307692307692, + "grad_norm": 13.347338676452637, + "learning_rate": 3.7884615384615388e-06, + "loss": 0.2536, + "step": 33642 + }, + { + "epoch": 92.42582417582418, + "grad_norm": 14.978252410888672, + "learning_rate": 3.7870879120879126e-06, + "loss": 0.5932, + "step": 33643 + }, + { + "epoch": 92.42857142857143, + "grad_norm": 2.7916533946990967, + "learning_rate": 3.785714285714286e-06, + "loss": 0.0312, + "step": 33644 + }, + { + "epoch": 92.43131868131869, + "grad_norm": 5.261600494384766, + "learning_rate": 3.7843406593406594e-06, + "loss": 0.0347, + "step": 33645 + }, + { + "epoch": 92.43406593406593, + "grad_norm": 4.507025241851807, + "learning_rate": 3.782967032967033e-06, + "loss": 0.0275, + "step": 33646 + }, + { + "epoch": 92.43681318681318, + "grad_norm": 9.203248977661133, + "learning_rate": 3.7815934065934067e-06, + "loss": 0.2177, + "step": 33647 + }, + { + "epoch": 92.43956043956044, + "grad_norm": 22.766489028930664, + "learning_rate": 3.7802197802197805e-06, + "loss": 0.3563, + "step": 33648 + }, + { + "epoch": 92.4423076923077, + "grad_norm": 10.423232078552246, + "learning_rate": 3.7788461538461543e-06, + "loss": 0.1705, + "step": 33649 + }, + { + "epoch": 92.44505494505495, + "grad_norm": 1.1494381427764893, + "learning_rate": 3.7774725274725273e-06, + "loss": 0.0105, + "step": 33650 + }, + { + "epoch": 92.4478021978022, + "grad_norm": 7.348025798797607, + "learning_rate": 3.776098901098901e-06, + "loss": 0.0765, + "step": 33651 + }, + { + "epoch": 92.45054945054945, + "grad_norm": 18.72365951538086, + "learning_rate": 3.774725274725275e-06, + "loss": 0.497, + "step": 33652 + }, + { + "epoch": 92.4532967032967, + "grad_norm": 11.251389503479004, + "learning_rate": 3.773351648351649e-06, + "loss": 0.2198, + "step": 33653 + }, + { + "epoch": 92.45604395604396, + "grad_norm": 13.906996726989746, + "learning_rate": 3.7719780219780226e-06, + "loss": 0.2007, + "step": 33654 + }, + { + "epoch": 92.45879120879121, + "grad_norm": 7.521195888519287, + "learning_rate": 3.7706043956043956e-06, + "loss": 0.2294, + "step": 33655 + }, + { + "epoch": 92.46153846153847, + "grad_norm": 14.151080131530762, + "learning_rate": 3.7692307692307694e-06, + "loss": 0.3316, + "step": 33656 + }, + { + "epoch": 92.46428571428571, + "grad_norm": 8.983583450317383, + "learning_rate": 3.7678571428571433e-06, + "loss": 0.1439, + "step": 33657 + }, + { + "epoch": 92.46703296703296, + "grad_norm": 6.393967628479004, + "learning_rate": 3.7664835164835167e-06, + "loss": 0.1014, + "step": 33658 + }, + { + "epoch": 92.46978021978022, + "grad_norm": 15.945433616638184, + "learning_rate": 3.76510989010989e-06, + "loss": 0.3261, + "step": 33659 + }, + { + "epoch": 92.47252747252747, + "grad_norm": 14.541337013244629, + "learning_rate": 3.763736263736264e-06, + "loss": 0.3208, + "step": 33660 + }, + { + "epoch": 92.47527472527473, + "grad_norm": 28.033946990966797, + "learning_rate": 3.7623626373626373e-06, + "loss": 0.2608, + "step": 33661 + }, + { + "epoch": 92.47802197802197, + "grad_norm": 6.877303600311279, + "learning_rate": 3.760989010989011e-06, + "loss": 0.0583, + "step": 33662 + }, + { + "epoch": 92.48076923076923, + "grad_norm": 15.532002449035645, + "learning_rate": 3.759615384615385e-06, + "loss": 0.3377, + "step": 33663 + }, + { + "epoch": 92.48351648351648, + "grad_norm": 17.824264526367188, + "learning_rate": 3.758241758241758e-06, + "loss": 0.2466, + "step": 33664 + }, + { + "epoch": 92.48626373626374, + "grad_norm": 15.496986389160156, + "learning_rate": 3.756868131868132e-06, + "loss": 0.1048, + "step": 33665 + }, + { + "epoch": 92.48901098901099, + "grad_norm": 5.100418567657471, + "learning_rate": 3.7554945054945056e-06, + "loss": 0.0493, + "step": 33666 + }, + { + "epoch": 92.49175824175825, + "grad_norm": 14.78124713897705, + "learning_rate": 3.7541208791208795e-06, + "loss": 0.1993, + "step": 33667 + }, + { + "epoch": 92.49450549450549, + "grad_norm": 7.421626091003418, + "learning_rate": 3.7527472527472533e-06, + "loss": 0.1401, + "step": 33668 + }, + { + "epoch": 92.49725274725274, + "grad_norm": 18.403322219848633, + "learning_rate": 3.7513736263736263e-06, + "loss": 0.3029, + "step": 33669 + }, + { + "epoch": 92.5, + "grad_norm": 5.055917263031006, + "learning_rate": 3.75e-06, + "loss": 0.0528, + "step": 33670 + }, + { + "epoch": 92.50274725274726, + "grad_norm": 3.0781171321868896, + "learning_rate": 3.748626373626374e-06, + "loss": 0.043, + "step": 33671 + }, + { + "epoch": 92.50549450549451, + "grad_norm": 8.150609970092773, + "learning_rate": 3.7472527472527478e-06, + "loss": 0.0802, + "step": 33672 + }, + { + "epoch": 92.50824175824175, + "grad_norm": 8.004434585571289, + "learning_rate": 3.745879120879121e-06, + "loss": 0.0533, + "step": 33673 + }, + { + "epoch": 92.51098901098901, + "grad_norm": 21.560142517089844, + "learning_rate": 3.7445054945054946e-06, + "loss": 0.3652, + "step": 33674 + }, + { + "epoch": 92.51373626373626, + "grad_norm": 22.81139373779297, + "learning_rate": 3.7431318681318684e-06, + "loss": 0.5921, + "step": 33675 + }, + { + "epoch": 92.51648351648352, + "grad_norm": 10.783202171325684, + "learning_rate": 3.741758241758242e-06, + "loss": 0.1614, + "step": 33676 + }, + { + "epoch": 92.51923076923077, + "grad_norm": 9.807392120361328, + "learning_rate": 3.7403846153846156e-06, + "loss": 0.1162, + "step": 33677 + }, + { + "epoch": 92.52197802197803, + "grad_norm": 7.140248775482178, + "learning_rate": 3.7390109890109895e-06, + "loss": 0.0805, + "step": 33678 + }, + { + "epoch": 92.52472527472527, + "grad_norm": 4.6878743171691895, + "learning_rate": 3.7376373626373625e-06, + "loss": 0.0694, + "step": 33679 + }, + { + "epoch": 92.52747252747253, + "grad_norm": 23.41989517211914, + "learning_rate": 3.7362637362637363e-06, + "loss": 0.2197, + "step": 33680 + }, + { + "epoch": 92.53021978021978, + "grad_norm": 13.318459510803223, + "learning_rate": 3.73489010989011e-06, + "loss": 0.2356, + "step": 33681 + }, + { + "epoch": 92.53296703296704, + "grad_norm": 5.785183429718018, + "learning_rate": 3.733516483516484e-06, + "loss": 0.105, + "step": 33682 + }, + { + "epoch": 92.53571428571429, + "grad_norm": 4.989871025085449, + "learning_rate": 3.7321428571428578e-06, + "loss": 0.0782, + "step": 33683 + }, + { + "epoch": 92.53846153846153, + "grad_norm": 8.143255233764648, + "learning_rate": 3.7307692307692308e-06, + "loss": 0.0435, + "step": 33684 + }, + { + "epoch": 92.54120879120879, + "grad_norm": 6.263935089111328, + "learning_rate": 3.7293956043956046e-06, + "loss": 0.0995, + "step": 33685 + }, + { + "epoch": 92.54395604395604, + "grad_norm": 6.427128314971924, + "learning_rate": 3.7280219780219784e-06, + "loss": 0.0839, + "step": 33686 + }, + { + "epoch": 92.5467032967033, + "grad_norm": 19.267122268676758, + "learning_rate": 3.7266483516483522e-06, + "loss": 0.341, + "step": 33687 + }, + { + "epoch": 92.54945054945055, + "grad_norm": 19.128238677978516, + "learning_rate": 3.7252747252747252e-06, + "loss": 0.4033, + "step": 33688 + }, + { + "epoch": 92.5521978021978, + "grad_norm": 19.872695922851562, + "learning_rate": 3.723901098901099e-06, + "loss": 0.3033, + "step": 33689 + }, + { + "epoch": 92.55494505494505, + "grad_norm": 3.703192949295044, + "learning_rate": 3.7225274725274725e-06, + "loss": 0.0418, + "step": 33690 + }, + { + "epoch": 92.5576923076923, + "grad_norm": 6.06003999710083, + "learning_rate": 3.7211538461538463e-06, + "loss": 0.0648, + "step": 33691 + }, + { + "epoch": 92.56043956043956, + "grad_norm": 13.826127052307129, + "learning_rate": 3.71978021978022e-06, + "loss": 0.1914, + "step": 33692 + }, + { + "epoch": 92.56318681318682, + "grad_norm": 18.716466903686523, + "learning_rate": 3.718406593406593e-06, + "loss": 0.2455, + "step": 33693 + }, + { + "epoch": 92.56593406593407, + "grad_norm": 18.045949935913086, + "learning_rate": 3.717032967032967e-06, + "loss": 0.3349, + "step": 33694 + }, + { + "epoch": 92.56868131868131, + "grad_norm": 11.210363388061523, + "learning_rate": 3.7156593406593408e-06, + "loss": 0.172, + "step": 33695 + }, + { + "epoch": 92.57142857142857, + "grad_norm": 6.319199562072754, + "learning_rate": 3.7142857142857146e-06, + "loss": 0.1057, + "step": 33696 + }, + { + "epoch": 92.57417582417582, + "grad_norm": 13.109885215759277, + "learning_rate": 3.7129120879120884e-06, + "loss": 0.1546, + "step": 33697 + }, + { + "epoch": 92.57692307692308, + "grad_norm": 16.964115142822266, + "learning_rate": 3.7115384615384614e-06, + "loss": 0.259, + "step": 33698 + }, + { + "epoch": 92.57967032967034, + "grad_norm": 17.233766555786133, + "learning_rate": 3.7101648351648352e-06, + "loss": 0.4246, + "step": 33699 + }, + { + "epoch": 92.58241758241758, + "grad_norm": 9.580911636352539, + "learning_rate": 3.708791208791209e-06, + "loss": 0.1217, + "step": 33700 + }, + { + "epoch": 92.58516483516483, + "grad_norm": 9.571669578552246, + "learning_rate": 3.707417582417583e-06, + "loss": 0.1335, + "step": 33701 + }, + { + "epoch": 92.58791208791209, + "grad_norm": 11.026190757751465, + "learning_rate": 3.7060439560439563e-06, + "loss": 0.2071, + "step": 33702 + }, + { + "epoch": 92.59065934065934, + "grad_norm": 7.447274208068848, + "learning_rate": 3.7046703296703297e-06, + "loss": 0.0817, + "step": 33703 + }, + { + "epoch": 92.5934065934066, + "grad_norm": 15.36739730834961, + "learning_rate": 3.7032967032967036e-06, + "loss": 0.3081, + "step": 33704 + }, + { + "epoch": 92.59615384615384, + "grad_norm": 23.88888931274414, + "learning_rate": 3.701923076923077e-06, + "loss": 0.4107, + "step": 33705 + }, + { + "epoch": 92.5989010989011, + "grad_norm": 25.769439697265625, + "learning_rate": 3.7005494505494508e-06, + "loss": 0.3592, + "step": 33706 + }, + { + "epoch": 92.60164835164835, + "grad_norm": 14.663125991821289, + "learning_rate": 3.6991758241758246e-06, + "loss": 0.2084, + "step": 33707 + }, + { + "epoch": 92.6043956043956, + "grad_norm": 8.481369018554688, + "learning_rate": 3.6978021978021976e-06, + "loss": 0.3358, + "step": 33708 + }, + { + "epoch": 92.60714285714286, + "grad_norm": 20.252853393554688, + "learning_rate": 3.6964285714285714e-06, + "loss": 0.2982, + "step": 33709 + }, + { + "epoch": 92.60989010989012, + "grad_norm": 24.14071273803711, + "learning_rate": 3.6950549450549453e-06, + "loss": 0.4501, + "step": 33710 + }, + { + "epoch": 92.61263736263736, + "grad_norm": 7.991549015045166, + "learning_rate": 3.693681318681319e-06, + "loss": 0.1213, + "step": 33711 + }, + { + "epoch": 92.61538461538461, + "grad_norm": 9.521306037902832, + "learning_rate": 3.692307692307693e-06, + "loss": 0.1191, + "step": 33712 + }, + { + "epoch": 92.61813186813187, + "grad_norm": 17.964061737060547, + "learning_rate": 3.690934065934066e-06, + "loss": 0.3057, + "step": 33713 + }, + { + "epoch": 92.62087912087912, + "grad_norm": 21.194719314575195, + "learning_rate": 3.6895604395604397e-06, + "loss": 0.6017, + "step": 33714 + }, + { + "epoch": 92.62362637362638, + "grad_norm": 6.213077545166016, + "learning_rate": 3.6881868131868136e-06, + "loss": 0.1744, + "step": 33715 + }, + { + "epoch": 92.62637362637362, + "grad_norm": 30.598852157592773, + "learning_rate": 3.6868131868131874e-06, + "loss": 0.8032, + "step": 33716 + }, + { + "epoch": 92.62912087912088, + "grad_norm": 27.777605056762695, + "learning_rate": 3.6854395604395604e-06, + "loss": 0.7369, + "step": 33717 + }, + { + "epoch": 92.63186813186813, + "grad_norm": 15.639720916748047, + "learning_rate": 3.684065934065934e-06, + "loss": 0.4982, + "step": 33718 + }, + { + "epoch": 92.63461538461539, + "grad_norm": 15.99985122680664, + "learning_rate": 3.682692307692308e-06, + "loss": 0.2313, + "step": 33719 + }, + { + "epoch": 92.63736263736264, + "grad_norm": 9.894896507263184, + "learning_rate": 3.6813186813186814e-06, + "loss": 0.1002, + "step": 33720 + }, + { + "epoch": 92.64010989010988, + "grad_norm": 13.412778854370117, + "learning_rate": 3.6799450549450553e-06, + "loss": 0.0823, + "step": 33721 + }, + { + "epoch": 92.64285714285714, + "grad_norm": 20.04302406311035, + "learning_rate": 3.6785714285714283e-06, + "loss": 0.509, + "step": 33722 + }, + { + "epoch": 92.6456043956044, + "grad_norm": 19.700130462646484, + "learning_rate": 3.677197802197802e-06, + "loss": 0.5422, + "step": 33723 + }, + { + "epoch": 92.64835164835165, + "grad_norm": 18.544891357421875, + "learning_rate": 3.675824175824176e-06, + "loss": 0.3753, + "step": 33724 + }, + { + "epoch": 92.6510989010989, + "grad_norm": 12.773635864257812, + "learning_rate": 3.6744505494505498e-06, + "loss": 0.442, + "step": 33725 + }, + { + "epoch": 92.65384615384616, + "grad_norm": 12.5746431350708, + "learning_rate": 3.6730769230769236e-06, + "loss": 0.146, + "step": 33726 + }, + { + "epoch": 92.6565934065934, + "grad_norm": 5.380099296569824, + "learning_rate": 3.6717032967032966e-06, + "loss": 0.0673, + "step": 33727 + }, + { + "epoch": 92.65934065934066, + "grad_norm": 7.506653308868408, + "learning_rate": 3.6703296703296704e-06, + "loss": 0.1927, + "step": 33728 + }, + { + "epoch": 92.66208791208791, + "grad_norm": 9.891140937805176, + "learning_rate": 3.6689560439560442e-06, + "loss": 0.098, + "step": 33729 + }, + { + "epoch": 92.66483516483517, + "grad_norm": 10.542078018188477, + "learning_rate": 3.667582417582418e-06, + "loss": 0.1596, + "step": 33730 + }, + { + "epoch": 92.66758241758242, + "grad_norm": 16.764057159423828, + "learning_rate": 3.666208791208792e-06, + "loss": 0.3384, + "step": 33731 + }, + { + "epoch": 92.67032967032966, + "grad_norm": 10.533077239990234, + "learning_rate": 3.664835164835165e-06, + "loss": 0.1016, + "step": 33732 + }, + { + "epoch": 92.67307692307692, + "grad_norm": 21.914962768554688, + "learning_rate": 3.6634615384615387e-06, + "loss": 0.5893, + "step": 33733 + }, + { + "epoch": 92.67582417582418, + "grad_norm": 11.294289588928223, + "learning_rate": 3.662087912087912e-06, + "loss": 0.1973, + "step": 33734 + }, + { + "epoch": 92.67857142857143, + "grad_norm": 13.770614624023438, + "learning_rate": 3.660714285714286e-06, + "loss": 0.2524, + "step": 33735 + }, + { + "epoch": 92.68131868131869, + "grad_norm": 9.916810989379883, + "learning_rate": 3.6593406593406598e-06, + "loss": 0.1002, + "step": 33736 + }, + { + "epoch": 92.68406593406593, + "grad_norm": 15.181841850280762, + "learning_rate": 3.6579670329670328e-06, + "loss": 0.2567, + "step": 33737 + }, + { + "epoch": 92.68681318681318, + "grad_norm": 6.523815155029297, + "learning_rate": 3.6565934065934066e-06, + "loss": 0.0724, + "step": 33738 + }, + { + "epoch": 92.68956043956044, + "grad_norm": 4.185260772705078, + "learning_rate": 3.6552197802197804e-06, + "loss": 0.0654, + "step": 33739 + }, + { + "epoch": 92.6923076923077, + "grad_norm": 4.070618152618408, + "learning_rate": 3.6538461538461542e-06, + "loss": 0.0871, + "step": 33740 + }, + { + "epoch": 92.69505494505495, + "grad_norm": 12.221741676330566, + "learning_rate": 3.652472527472528e-06, + "loss": 0.1668, + "step": 33741 + }, + { + "epoch": 92.6978021978022, + "grad_norm": 12.549838066101074, + "learning_rate": 3.651098901098901e-06, + "loss": 0.1641, + "step": 33742 + }, + { + "epoch": 92.70054945054945, + "grad_norm": 10.944387435913086, + "learning_rate": 3.649725274725275e-06, + "loss": 0.1373, + "step": 33743 + }, + { + "epoch": 92.7032967032967, + "grad_norm": 11.87499713897705, + "learning_rate": 3.6483516483516487e-06, + "loss": 0.161, + "step": 33744 + }, + { + "epoch": 92.70604395604396, + "grad_norm": 12.33441162109375, + "learning_rate": 3.6469780219780225e-06, + "loss": 0.2568, + "step": 33745 + }, + { + "epoch": 92.70879120879121, + "grad_norm": 19.231773376464844, + "learning_rate": 3.6456043956043955e-06, + "loss": 0.2652, + "step": 33746 + }, + { + "epoch": 92.71153846153847, + "grad_norm": 3.7017619609832764, + "learning_rate": 3.6442307692307694e-06, + "loss": 0.0564, + "step": 33747 + }, + { + "epoch": 92.71428571428571, + "grad_norm": 1.8104337453842163, + "learning_rate": 3.642857142857143e-06, + "loss": 0.0186, + "step": 33748 + }, + { + "epoch": 92.71703296703296, + "grad_norm": 16.149879455566406, + "learning_rate": 3.6414835164835166e-06, + "loss": 0.4419, + "step": 33749 + }, + { + "epoch": 92.71978021978022, + "grad_norm": 12.189834594726562, + "learning_rate": 3.6401098901098904e-06, + "loss": 0.1622, + "step": 33750 + }, + { + "epoch": 92.72252747252747, + "grad_norm": 12.533821105957031, + "learning_rate": 3.6387362637362634e-06, + "loss": 0.0842, + "step": 33751 + }, + { + "epoch": 92.72527472527473, + "grad_norm": 13.47462272644043, + "learning_rate": 3.6373626373626372e-06, + "loss": 0.1771, + "step": 33752 + }, + { + "epoch": 92.72802197802197, + "grad_norm": 13.802639961242676, + "learning_rate": 3.635989010989011e-06, + "loss": 0.2177, + "step": 33753 + }, + { + "epoch": 92.73076923076923, + "grad_norm": 14.081727981567383, + "learning_rate": 3.634615384615385e-06, + "loss": 0.1845, + "step": 33754 + }, + { + "epoch": 92.73351648351648, + "grad_norm": 19.143878936767578, + "learning_rate": 3.6332417582417587e-06, + "loss": 0.4112, + "step": 33755 + }, + { + "epoch": 92.73626373626374, + "grad_norm": 15.549596786499023, + "learning_rate": 3.6318681318681317e-06, + "loss": 0.2663, + "step": 33756 + }, + { + "epoch": 92.73901098901099, + "grad_norm": 12.92509937286377, + "learning_rate": 3.6304945054945055e-06, + "loss": 0.2891, + "step": 33757 + }, + { + "epoch": 92.74175824175825, + "grad_norm": 18.89752960205078, + "learning_rate": 3.6291208791208794e-06, + "loss": 0.3797, + "step": 33758 + }, + { + "epoch": 92.74450549450549, + "grad_norm": 10.551626205444336, + "learning_rate": 3.627747252747253e-06, + "loss": 0.1432, + "step": 33759 + }, + { + "epoch": 92.74725274725274, + "grad_norm": 6.933927059173584, + "learning_rate": 3.626373626373627e-06, + "loss": 0.0894, + "step": 33760 + }, + { + "epoch": 92.75, + "grad_norm": 17.843217849731445, + "learning_rate": 3.625e-06, + "loss": 0.446, + "step": 33761 + }, + { + "epoch": 92.75274725274726, + "grad_norm": 5.796021938323975, + "learning_rate": 3.623626373626374e-06, + "loss": 0.0648, + "step": 33762 + }, + { + "epoch": 92.75549450549451, + "grad_norm": 17.600656509399414, + "learning_rate": 3.6222527472527473e-06, + "loss": 0.487, + "step": 33763 + }, + { + "epoch": 92.75824175824175, + "grad_norm": 10.699726104736328, + "learning_rate": 3.620879120879121e-06, + "loss": 0.1887, + "step": 33764 + }, + { + "epoch": 92.76098901098901, + "grad_norm": 12.565356254577637, + "learning_rate": 3.619505494505495e-06, + "loss": 0.1992, + "step": 33765 + }, + { + "epoch": 92.76373626373626, + "grad_norm": 14.344808578491211, + "learning_rate": 3.618131868131868e-06, + "loss": 0.3385, + "step": 33766 + }, + { + "epoch": 92.76648351648352, + "grad_norm": 30.529356002807617, + "learning_rate": 3.6167582417582417e-06, + "loss": 0.5237, + "step": 33767 + }, + { + "epoch": 92.76923076923077, + "grad_norm": 24.792776107788086, + "learning_rate": 3.6153846153846156e-06, + "loss": 0.397, + "step": 33768 + }, + { + "epoch": 92.77197802197803, + "grad_norm": 16.600257873535156, + "learning_rate": 3.6140109890109894e-06, + "loss": 0.2412, + "step": 33769 + }, + { + "epoch": 92.77472527472527, + "grad_norm": 11.235847473144531, + "learning_rate": 3.6126373626373624e-06, + "loss": 0.1206, + "step": 33770 + }, + { + "epoch": 92.77747252747253, + "grad_norm": 18.587858200073242, + "learning_rate": 3.611263736263736e-06, + "loss": 0.4351, + "step": 33771 + }, + { + "epoch": 92.78021978021978, + "grad_norm": 2.2448244094848633, + "learning_rate": 3.60989010989011e-06, + "loss": 0.0288, + "step": 33772 + }, + { + "epoch": 92.78296703296704, + "grad_norm": 8.757246971130371, + "learning_rate": 3.608516483516484e-06, + "loss": 0.1048, + "step": 33773 + }, + { + "epoch": 92.78571428571429, + "grad_norm": 8.358359336853027, + "learning_rate": 3.6071428571428577e-06, + "loss": 0.129, + "step": 33774 + }, + { + "epoch": 92.78846153846153, + "grad_norm": 8.681585311889648, + "learning_rate": 3.6057692307692307e-06, + "loss": 0.078, + "step": 33775 + }, + { + "epoch": 92.79120879120879, + "grad_norm": 4.307797431945801, + "learning_rate": 3.6043956043956045e-06, + "loss": 0.0523, + "step": 33776 + }, + { + "epoch": 92.79395604395604, + "grad_norm": 5.7193779945373535, + "learning_rate": 3.6030219780219783e-06, + "loss": 0.1108, + "step": 33777 + }, + { + "epoch": 92.7967032967033, + "grad_norm": 10.159317016601562, + "learning_rate": 3.6016483516483517e-06, + "loss": 0.1132, + "step": 33778 + }, + { + "epoch": 92.79945054945055, + "grad_norm": 25.032316207885742, + "learning_rate": 3.6002747252747256e-06, + "loss": 0.3367, + "step": 33779 + }, + { + "epoch": 92.8021978021978, + "grad_norm": 5.028210639953613, + "learning_rate": 3.598901098901099e-06, + "loss": 0.0865, + "step": 33780 + }, + { + "epoch": 92.80494505494505, + "grad_norm": 7.035611152648926, + "learning_rate": 3.5975274725274724e-06, + "loss": 0.2026, + "step": 33781 + }, + { + "epoch": 92.8076923076923, + "grad_norm": 13.179698944091797, + "learning_rate": 3.5961538461538462e-06, + "loss": 0.1831, + "step": 33782 + }, + { + "epoch": 92.81043956043956, + "grad_norm": 9.234780311584473, + "learning_rate": 3.59478021978022e-06, + "loss": 0.163, + "step": 33783 + }, + { + "epoch": 92.81318681318682, + "grad_norm": 15.290091514587402, + "learning_rate": 3.593406593406594e-06, + "loss": 0.2059, + "step": 33784 + }, + { + "epoch": 92.81593406593407, + "grad_norm": 11.18134593963623, + "learning_rate": 3.592032967032967e-06, + "loss": 0.3162, + "step": 33785 + }, + { + "epoch": 92.81868131868131, + "grad_norm": 4.431969165802002, + "learning_rate": 3.5906593406593407e-06, + "loss": 0.0378, + "step": 33786 + }, + { + "epoch": 92.82142857142857, + "grad_norm": 15.513647079467773, + "learning_rate": 3.5892857142857145e-06, + "loss": 0.3328, + "step": 33787 + }, + { + "epoch": 92.82417582417582, + "grad_norm": 18.612550735473633, + "learning_rate": 3.5879120879120884e-06, + "loss": 0.1936, + "step": 33788 + }, + { + "epoch": 92.82692307692308, + "grad_norm": 13.456876754760742, + "learning_rate": 3.586538461538462e-06, + "loss": 0.2386, + "step": 33789 + }, + { + "epoch": 92.82967032967034, + "grad_norm": 10.95397663116455, + "learning_rate": 3.585164835164835e-06, + "loss": 0.157, + "step": 33790 + }, + { + "epoch": 92.83241758241758, + "grad_norm": 4.374282360076904, + "learning_rate": 3.583791208791209e-06, + "loss": 0.027, + "step": 33791 + }, + { + "epoch": 92.83516483516483, + "grad_norm": 16.74703025817871, + "learning_rate": 3.582417582417583e-06, + "loss": 0.5523, + "step": 33792 + }, + { + "epoch": 92.83791208791209, + "grad_norm": 8.831122398376465, + "learning_rate": 3.5810439560439562e-06, + "loss": 0.1779, + "step": 33793 + }, + { + "epoch": 92.84065934065934, + "grad_norm": 32.203250885009766, + "learning_rate": 3.57967032967033e-06, + "loss": 0.7589, + "step": 33794 + }, + { + "epoch": 92.8434065934066, + "grad_norm": 7.880232334136963, + "learning_rate": 3.578296703296703e-06, + "loss": 0.069, + "step": 33795 + }, + { + "epoch": 92.84615384615384, + "grad_norm": 3.3231847286224365, + "learning_rate": 3.576923076923077e-06, + "loss": 0.0418, + "step": 33796 + }, + { + "epoch": 92.8489010989011, + "grad_norm": 8.689840316772461, + "learning_rate": 3.5755494505494507e-06, + "loss": 0.1866, + "step": 33797 + }, + { + "epoch": 92.85164835164835, + "grad_norm": 20.592697143554688, + "learning_rate": 3.5741758241758245e-06, + "loss": 0.2797, + "step": 33798 + }, + { + "epoch": 92.8543956043956, + "grad_norm": 20.692567825317383, + "learning_rate": 3.5728021978021975e-06, + "loss": 0.3526, + "step": 33799 + }, + { + "epoch": 92.85714285714286, + "grad_norm": 23.11130714416504, + "learning_rate": 3.5714285714285714e-06, + "loss": 0.4763, + "step": 33800 + }, + { + "epoch": 92.85989010989012, + "grad_norm": 6.010763645172119, + "learning_rate": 3.570054945054945e-06, + "loss": 0.0887, + "step": 33801 + }, + { + "epoch": 92.86263736263736, + "grad_norm": 11.559328079223633, + "learning_rate": 3.568681318681319e-06, + "loss": 0.139, + "step": 33802 + }, + { + "epoch": 92.86538461538461, + "grad_norm": 19.02971649169922, + "learning_rate": 3.567307692307693e-06, + "loss": 0.5434, + "step": 33803 + }, + { + "epoch": 92.86813186813187, + "grad_norm": 7.764729022979736, + "learning_rate": 3.565934065934066e-06, + "loss": 0.1517, + "step": 33804 + }, + { + "epoch": 92.87087912087912, + "grad_norm": 7.2521562576293945, + "learning_rate": 3.5645604395604397e-06, + "loss": 0.1073, + "step": 33805 + }, + { + "epoch": 92.87362637362638, + "grad_norm": 12.81309986114502, + "learning_rate": 3.5631868131868135e-06, + "loss": 0.1537, + "step": 33806 + }, + { + "epoch": 92.87637362637362, + "grad_norm": 14.11176586151123, + "learning_rate": 3.561813186813187e-06, + "loss": 0.1509, + "step": 33807 + }, + { + "epoch": 92.87912087912088, + "grad_norm": 13.709578514099121, + "learning_rate": 3.5604395604395607e-06, + "loss": 0.1662, + "step": 33808 + }, + { + "epoch": 92.88186813186813, + "grad_norm": 18.053165435791016, + "learning_rate": 3.559065934065934e-06, + "loss": 0.3658, + "step": 33809 + }, + { + "epoch": 92.88461538461539, + "grad_norm": 4.7501301765441895, + "learning_rate": 3.5576923076923075e-06, + "loss": 0.0772, + "step": 33810 + }, + { + "epoch": 92.88736263736264, + "grad_norm": 19.077795028686523, + "learning_rate": 3.5563186813186814e-06, + "loss": 0.3541, + "step": 33811 + }, + { + "epoch": 92.89010989010988, + "grad_norm": 6.189171314239502, + "learning_rate": 3.554945054945055e-06, + "loss": 0.0784, + "step": 33812 + }, + { + "epoch": 92.89285714285714, + "grad_norm": 18.901674270629883, + "learning_rate": 3.553571428571429e-06, + "loss": 0.3313, + "step": 33813 + }, + { + "epoch": 92.8956043956044, + "grad_norm": 9.366689682006836, + "learning_rate": 3.552197802197802e-06, + "loss": 0.124, + "step": 33814 + }, + { + "epoch": 92.89835164835165, + "grad_norm": 11.719470977783203, + "learning_rate": 3.550824175824176e-06, + "loss": 0.3121, + "step": 33815 + }, + { + "epoch": 92.9010989010989, + "grad_norm": 9.938857078552246, + "learning_rate": 3.5494505494505497e-06, + "loss": 0.2076, + "step": 33816 + }, + { + "epoch": 92.90384615384616, + "grad_norm": 5.823910713195801, + "learning_rate": 3.5480769230769235e-06, + "loss": 0.0449, + "step": 33817 + }, + { + "epoch": 92.9065934065934, + "grad_norm": 13.537572860717773, + "learning_rate": 3.5467032967032973e-06, + "loss": 0.1427, + "step": 33818 + }, + { + "epoch": 92.90934065934066, + "grad_norm": 14.578792572021484, + "learning_rate": 3.5453296703296703e-06, + "loss": 0.1624, + "step": 33819 + }, + { + "epoch": 92.91208791208791, + "grad_norm": 4.402021408081055, + "learning_rate": 3.543956043956044e-06, + "loss": 0.0465, + "step": 33820 + }, + { + "epoch": 92.91483516483517, + "grad_norm": 14.537332534790039, + "learning_rate": 3.542582417582418e-06, + "loss": 0.3125, + "step": 33821 + }, + { + "epoch": 92.91758241758242, + "grad_norm": 6.425475120544434, + "learning_rate": 3.5412087912087914e-06, + "loss": 0.1226, + "step": 33822 + }, + { + "epoch": 92.92032967032966, + "grad_norm": 9.001082420349121, + "learning_rate": 3.5398351648351652e-06, + "loss": 0.2478, + "step": 33823 + }, + { + "epoch": 92.92307692307692, + "grad_norm": 15.746752738952637, + "learning_rate": 3.538461538461538e-06, + "loss": 0.1867, + "step": 33824 + }, + { + "epoch": 92.92582417582418, + "grad_norm": 10.504616737365723, + "learning_rate": 3.537087912087912e-06, + "loss": 0.0788, + "step": 33825 + }, + { + "epoch": 92.92857142857143, + "grad_norm": 9.236038208007812, + "learning_rate": 3.535714285714286e-06, + "loss": 0.1098, + "step": 33826 + }, + { + "epoch": 92.93131868131869, + "grad_norm": 3.599592924118042, + "learning_rate": 3.5343406593406597e-06, + "loss": 0.0364, + "step": 33827 + }, + { + "epoch": 92.93406593406593, + "grad_norm": 11.110326766967773, + "learning_rate": 3.5329670329670327e-06, + "loss": 0.2597, + "step": 33828 + }, + { + "epoch": 92.93681318681318, + "grad_norm": 19.20187759399414, + "learning_rate": 3.5315934065934065e-06, + "loss": 0.6959, + "step": 33829 + }, + { + "epoch": 92.93956043956044, + "grad_norm": 13.040872573852539, + "learning_rate": 3.5302197802197803e-06, + "loss": 0.3403, + "step": 33830 + }, + { + "epoch": 92.9423076923077, + "grad_norm": 13.180291175842285, + "learning_rate": 3.528846153846154e-06, + "loss": 0.2851, + "step": 33831 + }, + { + "epoch": 92.94505494505495, + "grad_norm": 19.072065353393555, + "learning_rate": 3.527472527472528e-06, + "loss": 0.4883, + "step": 33832 + }, + { + "epoch": 92.9478021978022, + "grad_norm": 25.797523498535156, + "learning_rate": 3.526098901098901e-06, + "loss": 0.4012, + "step": 33833 + }, + { + "epoch": 92.95054945054945, + "grad_norm": 6.354639053344727, + "learning_rate": 3.524725274725275e-06, + "loss": 0.1626, + "step": 33834 + }, + { + "epoch": 92.9532967032967, + "grad_norm": 11.895795822143555, + "learning_rate": 3.5233516483516486e-06, + "loss": 0.1813, + "step": 33835 + }, + { + "epoch": 92.95604395604396, + "grad_norm": 22.44076156616211, + "learning_rate": 3.5219780219780225e-06, + "loss": 0.6763, + "step": 33836 + }, + { + "epoch": 92.95879120879121, + "grad_norm": 21.349443435668945, + "learning_rate": 3.520604395604396e-06, + "loss": 0.3718, + "step": 33837 + }, + { + "epoch": 92.96153846153847, + "grad_norm": 12.211625099182129, + "learning_rate": 3.5192307692307693e-06, + "loss": 0.1516, + "step": 33838 + }, + { + "epoch": 92.96428571428571, + "grad_norm": 10.521848678588867, + "learning_rate": 3.5178571428571427e-06, + "loss": 0.1777, + "step": 33839 + }, + { + "epoch": 92.96703296703296, + "grad_norm": 12.705227851867676, + "learning_rate": 3.5164835164835165e-06, + "loss": 0.1447, + "step": 33840 + }, + { + "epoch": 92.96978021978022, + "grad_norm": 2.0257582664489746, + "learning_rate": 3.5151098901098904e-06, + "loss": 0.0124, + "step": 33841 + }, + { + "epoch": 92.97252747252747, + "grad_norm": 17.99190330505371, + "learning_rate": 3.513736263736264e-06, + "loss": 0.3989, + "step": 33842 + }, + { + "epoch": 92.97527472527473, + "grad_norm": 9.769952774047852, + "learning_rate": 3.512362637362637e-06, + "loss": 0.1522, + "step": 33843 + }, + { + "epoch": 92.97802197802197, + "grad_norm": 7.476321697235107, + "learning_rate": 3.510989010989011e-06, + "loss": 0.155, + "step": 33844 + }, + { + "epoch": 92.98076923076923, + "grad_norm": 15.699140548706055, + "learning_rate": 3.509615384615385e-06, + "loss": 0.1677, + "step": 33845 + }, + { + "epoch": 92.98351648351648, + "grad_norm": 12.086487770080566, + "learning_rate": 3.5082417582417587e-06, + "loss": 0.1392, + "step": 33846 + }, + { + "epoch": 92.98626373626374, + "grad_norm": 17.709369659423828, + "learning_rate": 3.5068681318681325e-06, + "loss": 0.2149, + "step": 33847 + }, + { + "epoch": 92.98901098901099, + "grad_norm": 8.445099830627441, + "learning_rate": 3.5054945054945055e-06, + "loss": 0.126, + "step": 33848 + }, + { + "epoch": 92.99175824175825, + "grad_norm": 20.380273818969727, + "learning_rate": 3.5041208791208793e-06, + "loss": 0.3548, + "step": 33849 + }, + { + "epoch": 92.99450549450549, + "grad_norm": 14.634843826293945, + "learning_rate": 3.502747252747253e-06, + "loss": 0.2077, + "step": 33850 + }, + { + "epoch": 92.99725274725274, + "grad_norm": 25.889554977416992, + "learning_rate": 3.5013736263736265e-06, + "loss": 0.4788, + "step": 33851 + }, + { + "epoch": 93.0, + "grad_norm": 2.107576608657837, + "learning_rate": 3.5000000000000004e-06, + "loss": 0.0272, + "step": 33852 + }, + { + "epoch": 93.0, + "eval_accuracy": 0.6584022038567493, + "eval_f1": 0.6521472088789172, + "eval_f1_DuraRiadoRio_64x64": 0.6927374301675978, + "eval_f1_Mole_64x64": 0.7472527472527473, + "eval_f1_Quebrado_64x64": 0.6488888888888888, + "eval_f1_RiadoRio_64x64": 0.5051903114186851, + "eval_f1_RioFechado_64x64": 0.6666666666666666, + "eval_loss": 1.6523793935775757, + "eval_precision": 0.7209350653441883, + "eval_precision_DuraRiadoRio_64x64": 0.5794392523364486, + "eval_precision_Mole_64x64": 0.6181818181818182, + "eval_precision_Quebrado_64x64": 0.9012345679012346, + "eval_precision_RiadoRio_64x64": 0.5328467153284672, + "eval_precision_RioFechado_64x64": 0.972972972972973, + "eval_recall": 0.6599610822831726, + "eval_recall_DuraRiadoRio_64x64": 0.8611111111111112, + "eval_recall_Mole_64x64": 0.9444444444444444, + "eval_recall_Quebrado_64x64": 0.5069444444444444, + "eval_recall_RiadoRio_64x64": 0.48026315789473684, + "eval_recall_RioFechado_64x64": 0.5070422535211268, + "eval_runtime": 1.7177, + "eval_samples_per_second": 422.669, + "eval_steps_per_second": 26.781, + "step": 33852 + }, + { + "epoch": 93.00274725274726, + "grad_norm": 9.902599334716797, + "learning_rate": 3.4986263736263738e-06, + "loss": 0.1896, + "step": 33853 + }, + { + "epoch": 93.00549450549451, + "grad_norm": 4.735142707824707, + "learning_rate": 3.497252747252747e-06, + "loss": 0.0736, + "step": 33854 + }, + { + "epoch": 93.00824175824175, + "grad_norm": 13.804834365844727, + "learning_rate": 3.495879120879121e-06, + "loss": 0.2586, + "step": 33855 + }, + { + "epoch": 93.01098901098901, + "grad_norm": 5.46846866607666, + "learning_rate": 3.494505494505495e-06, + "loss": 0.078, + "step": 33856 + }, + { + "epoch": 93.01373626373626, + "grad_norm": 1.0927356481552124, + "learning_rate": 3.493131868131868e-06, + "loss": 0.0106, + "step": 33857 + }, + { + "epoch": 93.01648351648352, + "grad_norm": 29.28314971923828, + "learning_rate": 3.4917582417582417e-06, + "loss": 0.9143, + "step": 33858 + }, + { + "epoch": 93.01923076923077, + "grad_norm": 13.462967872619629, + "learning_rate": 3.4903846153846155e-06, + "loss": 0.5035, + "step": 33859 + }, + { + "epoch": 93.02197802197803, + "grad_norm": 15.98214054107666, + "learning_rate": 3.4890109890109893e-06, + "loss": 0.2344, + "step": 33860 + }, + { + "epoch": 93.02472527472527, + "grad_norm": 8.9445219039917, + "learning_rate": 3.487637362637363e-06, + "loss": 0.1309, + "step": 33861 + }, + { + "epoch": 93.02747252747253, + "grad_norm": 23.938386917114258, + "learning_rate": 3.486263736263736e-06, + "loss": 0.6003, + "step": 33862 + }, + { + "epoch": 93.03021978021978, + "grad_norm": 4.611644744873047, + "learning_rate": 3.48489010989011e-06, + "loss": 0.0824, + "step": 33863 + }, + { + "epoch": 93.03296703296704, + "grad_norm": 17.280866622924805, + "learning_rate": 3.483516483516484e-06, + "loss": 0.4011, + "step": 33864 + }, + { + "epoch": 93.03571428571429, + "grad_norm": 2.520610809326172, + "learning_rate": 3.4821428571428576e-06, + "loss": 0.0272, + "step": 33865 + }, + { + "epoch": 93.03846153846153, + "grad_norm": 17.432910919189453, + "learning_rate": 3.480769230769231e-06, + "loss": 0.4408, + "step": 33866 + }, + { + "epoch": 93.04120879120879, + "grad_norm": 11.207335472106934, + "learning_rate": 3.4793956043956044e-06, + "loss": 0.1705, + "step": 33867 + }, + { + "epoch": 93.04395604395604, + "grad_norm": 16.27705192565918, + "learning_rate": 3.478021978021978e-06, + "loss": 0.4516, + "step": 33868 + }, + { + "epoch": 93.0467032967033, + "grad_norm": 12.666078567504883, + "learning_rate": 3.4766483516483517e-06, + "loss": 0.2657, + "step": 33869 + }, + { + "epoch": 93.04945054945055, + "grad_norm": 10.317267417907715, + "learning_rate": 3.4752747252747255e-06, + "loss": 0.1394, + "step": 33870 + }, + { + "epoch": 93.0521978021978, + "grad_norm": 12.142415046691895, + "learning_rate": 3.4739010989010993e-06, + "loss": 0.1587, + "step": 33871 + }, + { + "epoch": 93.05494505494505, + "grad_norm": 19.85165786743164, + "learning_rate": 3.4725274725274723e-06, + "loss": 0.3716, + "step": 33872 + }, + { + "epoch": 93.0576923076923, + "grad_norm": 18.359914779663086, + "learning_rate": 3.471153846153846e-06, + "loss": 0.2825, + "step": 33873 + }, + { + "epoch": 93.06043956043956, + "grad_norm": 3.5352959632873535, + "learning_rate": 3.46978021978022e-06, + "loss": 0.0407, + "step": 33874 + }, + { + "epoch": 93.06318681318682, + "grad_norm": 5.809824466705322, + "learning_rate": 3.468406593406594e-06, + "loss": 0.0541, + "step": 33875 + }, + { + "epoch": 93.06593406593407, + "grad_norm": 13.955901145935059, + "learning_rate": 3.4670329670329676e-06, + "loss": 0.2185, + "step": 33876 + }, + { + "epoch": 93.06868131868131, + "grad_norm": 23.43008804321289, + "learning_rate": 3.4656593406593406e-06, + "loss": 0.5002, + "step": 33877 + }, + { + "epoch": 93.07142857142857, + "grad_norm": 7.166587829589844, + "learning_rate": 3.4642857142857145e-06, + "loss": 0.0493, + "step": 33878 + }, + { + "epoch": 93.07417582417582, + "grad_norm": 6.030412673950195, + "learning_rate": 3.4629120879120883e-06, + "loss": 0.0876, + "step": 33879 + }, + { + "epoch": 93.07692307692308, + "grad_norm": 13.28099536895752, + "learning_rate": 3.4615384615384617e-06, + "loss": 0.1775, + "step": 33880 + }, + { + "epoch": 93.07967032967034, + "grad_norm": 7.250370025634766, + "learning_rate": 3.4601648351648355e-06, + "loss": 0.0854, + "step": 33881 + }, + { + "epoch": 93.08241758241758, + "grad_norm": 3.3568506240844727, + "learning_rate": 3.458791208791209e-06, + "loss": 0.0448, + "step": 33882 + }, + { + "epoch": 93.08516483516483, + "grad_norm": 3.0765163898468018, + "learning_rate": 3.4574175824175823e-06, + "loss": 0.0699, + "step": 33883 + }, + { + "epoch": 93.08791208791209, + "grad_norm": 5.223150253295898, + "learning_rate": 3.456043956043956e-06, + "loss": 0.169, + "step": 33884 + }, + { + "epoch": 93.09065934065934, + "grad_norm": 10.002888679504395, + "learning_rate": 3.45467032967033e-06, + "loss": 0.0835, + "step": 33885 + }, + { + "epoch": 93.0934065934066, + "grad_norm": 5.202966213226318, + "learning_rate": 3.453296703296703e-06, + "loss": 0.1523, + "step": 33886 + }, + { + "epoch": 93.09615384615384, + "grad_norm": 1.9306730031967163, + "learning_rate": 3.451923076923077e-06, + "loss": 0.0322, + "step": 33887 + }, + { + "epoch": 93.0989010989011, + "grad_norm": 6.066153049468994, + "learning_rate": 3.4505494505494506e-06, + "loss": 0.0686, + "step": 33888 + }, + { + "epoch": 93.10164835164835, + "grad_norm": 7.638674259185791, + "learning_rate": 3.4491758241758245e-06, + "loss": 0.1013, + "step": 33889 + }, + { + "epoch": 93.1043956043956, + "grad_norm": 7.385224342346191, + "learning_rate": 3.4478021978021983e-06, + "loss": 0.1354, + "step": 33890 + }, + { + "epoch": 93.10714285714286, + "grad_norm": 14.148497581481934, + "learning_rate": 3.4464285714285713e-06, + "loss": 0.409, + "step": 33891 + }, + { + "epoch": 93.10989010989012, + "grad_norm": 10.496993064880371, + "learning_rate": 3.445054945054945e-06, + "loss": 0.1423, + "step": 33892 + }, + { + "epoch": 93.11263736263736, + "grad_norm": 5.237281799316406, + "learning_rate": 3.443681318681319e-06, + "loss": 0.0531, + "step": 33893 + }, + { + "epoch": 93.11538461538461, + "grad_norm": 4.493145942687988, + "learning_rate": 3.4423076923076928e-06, + "loss": 0.0447, + "step": 33894 + }, + { + "epoch": 93.11813186813187, + "grad_norm": 6.685409069061279, + "learning_rate": 3.440934065934066e-06, + "loss": 0.193, + "step": 33895 + }, + { + "epoch": 93.12087912087912, + "grad_norm": 17.33175277709961, + "learning_rate": 3.4395604395604396e-06, + "loss": 0.227, + "step": 33896 + }, + { + "epoch": 93.12362637362638, + "grad_norm": 10.144715309143066, + "learning_rate": 3.4381868131868134e-06, + "loss": 0.1786, + "step": 33897 + }, + { + "epoch": 93.12637362637362, + "grad_norm": 7.421053409576416, + "learning_rate": 3.436813186813187e-06, + "loss": 0.1295, + "step": 33898 + }, + { + "epoch": 93.12912087912088, + "grad_norm": 10.170790672302246, + "learning_rate": 3.4354395604395607e-06, + "loss": 0.1701, + "step": 33899 + }, + { + "epoch": 93.13186813186813, + "grad_norm": 19.795299530029297, + "learning_rate": 3.4340659340659345e-06, + "loss": 0.6767, + "step": 33900 + }, + { + "epoch": 93.13461538461539, + "grad_norm": 13.916583061218262, + "learning_rate": 3.4326923076923075e-06, + "loss": 0.296, + "step": 33901 + }, + { + "epoch": 93.13736263736264, + "grad_norm": 16.741474151611328, + "learning_rate": 3.4313186813186813e-06, + "loss": 0.3786, + "step": 33902 + }, + { + "epoch": 93.14010989010988, + "grad_norm": 0.640027642250061, + "learning_rate": 3.429945054945055e-06, + "loss": 0.0068, + "step": 33903 + }, + { + "epoch": 93.14285714285714, + "grad_norm": 8.772405624389648, + "learning_rate": 3.428571428571429e-06, + "loss": 0.1352, + "step": 33904 + }, + { + "epoch": 93.1456043956044, + "grad_norm": 24.759597778320312, + "learning_rate": 3.427197802197803e-06, + "loss": 0.5761, + "step": 33905 + }, + { + "epoch": 93.14835164835165, + "grad_norm": 11.715144157409668, + "learning_rate": 3.4258241758241758e-06, + "loss": 0.2158, + "step": 33906 + }, + { + "epoch": 93.1510989010989, + "grad_norm": 14.944184303283691, + "learning_rate": 3.4244505494505496e-06, + "loss": 0.1645, + "step": 33907 + }, + { + "epoch": 93.15384615384616, + "grad_norm": 10.38272476196289, + "learning_rate": 3.4230769230769234e-06, + "loss": 0.1654, + "step": 33908 + }, + { + "epoch": 93.1565934065934, + "grad_norm": 3.787101984024048, + "learning_rate": 3.4217032967032973e-06, + "loss": 0.0349, + "step": 33909 + }, + { + "epoch": 93.15934065934066, + "grad_norm": 16.162601470947266, + "learning_rate": 3.4203296703296707e-06, + "loss": 0.2511, + "step": 33910 + }, + { + "epoch": 93.16208791208791, + "grad_norm": 3.3732352256774902, + "learning_rate": 3.418956043956044e-06, + "loss": 0.0338, + "step": 33911 + }, + { + "epoch": 93.16483516483517, + "grad_norm": 7.8659772872924805, + "learning_rate": 3.4175824175824175e-06, + "loss": 0.1079, + "step": 33912 + }, + { + "epoch": 93.16758241758242, + "grad_norm": 14.804373741149902, + "learning_rate": 3.4162087912087913e-06, + "loss": 0.6227, + "step": 33913 + }, + { + "epoch": 93.17032967032966, + "grad_norm": 5.371644020080566, + "learning_rate": 3.414835164835165e-06, + "loss": 0.0903, + "step": 33914 + }, + { + "epoch": 93.17307692307692, + "grad_norm": 16.82941246032715, + "learning_rate": 3.413461538461538e-06, + "loss": 0.3756, + "step": 33915 + }, + { + "epoch": 93.17582417582418, + "grad_norm": 14.817280769348145, + "learning_rate": 3.412087912087912e-06, + "loss": 0.2186, + "step": 33916 + }, + { + "epoch": 93.17857142857143, + "grad_norm": 10.147693634033203, + "learning_rate": 3.410714285714286e-06, + "loss": 0.0572, + "step": 33917 + }, + { + "epoch": 93.18131868131869, + "grad_norm": 11.00812816619873, + "learning_rate": 3.4093406593406596e-06, + "loss": 0.2086, + "step": 33918 + }, + { + "epoch": 93.18406593406593, + "grad_norm": 2.265364646911621, + "learning_rate": 3.4079670329670335e-06, + "loss": 0.0305, + "step": 33919 + }, + { + "epoch": 93.18681318681318, + "grad_norm": 0.8214425444602966, + "learning_rate": 3.4065934065934064e-06, + "loss": 0.0093, + "step": 33920 + }, + { + "epoch": 93.18956043956044, + "grad_norm": 21.535411834716797, + "learning_rate": 3.4052197802197803e-06, + "loss": 0.3339, + "step": 33921 + }, + { + "epoch": 93.1923076923077, + "grad_norm": 18.59518051147461, + "learning_rate": 3.403846153846154e-06, + "loss": 0.5167, + "step": 33922 + }, + { + "epoch": 93.19505494505495, + "grad_norm": 20.792505264282227, + "learning_rate": 3.402472527472528e-06, + "loss": 0.415, + "step": 33923 + }, + { + "epoch": 93.1978021978022, + "grad_norm": 9.869061470031738, + "learning_rate": 3.4010989010989013e-06, + "loss": 0.2157, + "step": 33924 + }, + { + "epoch": 93.20054945054945, + "grad_norm": 12.895002365112305, + "learning_rate": 3.3997252747252747e-06, + "loss": 0.1706, + "step": 33925 + }, + { + "epoch": 93.2032967032967, + "grad_norm": 7.25859260559082, + "learning_rate": 3.3983516483516486e-06, + "loss": 0.1933, + "step": 33926 + }, + { + "epoch": 93.20604395604396, + "grad_norm": 5.965913772583008, + "learning_rate": 3.396978021978022e-06, + "loss": 0.0744, + "step": 33927 + }, + { + "epoch": 93.20879120879121, + "grad_norm": 3.8814327716827393, + "learning_rate": 3.395604395604396e-06, + "loss": 0.0582, + "step": 33928 + }, + { + "epoch": 93.21153846153847, + "grad_norm": 31.271608352661133, + "learning_rate": 3.3942307692307696e-06, + "loss": 0.5897, + "step": 33929 + }, + { + "epoch": 93.21428571428571, + "grad_norm": 9.089728355407715, + "learning_rate": 3.3928571428571426e-06, + "loss": 0.1803, + "step": 33930 + }, + { + "epoch": 93.21703296703296, + "grad_norm": 21.78220558166504, + "learning_rate": 3.3914835164835165e-06, + "loss": 0.2828, + "step": 33931 + }, + { + "epoch": 93.21978021978022, + "grad_norm": 14.506855964660645, + "learning_rate": 3.3901098901098903e-06, + "loss": 0.2676, + "step": 33932 + }, + { + "epoch": 93.22252747252747, + "grad_norm": 10.137555122375488, + "learning_rate": 3.388736263736264e-06, + "loss": 0.2321, + "step": 33933 + }, + { + "epoch": 93.22527472527473, + "grad_norm": 26.954524993896484, + "learning_rate": 3.387362637362638e-06, + "loss": 0.4815, + "step": 33934 + }, + { + "epoch": 93.22802197802197, + "grad_norm": 6.780938148498535, + "learning_rate": 3.385989010989011e-06, + "loss": 0.1085, + "step": 33935 + }, + { + "epoch": 93.23076923076923, + "grad_norm": 22.94454002380371, + "learning_rate": 3.3846153846153848e-06, + "loss": 0.3361, + "step": 33936 + }, + { + "epoch": 93.23351648351648, + "grad_norm": 19.964452743530273, + "learning_rate": 3.3832417582417586e-06, + "loss": 0.644, + "step": 33937 + }, + { + "epoch": 93.23626373626374, + "grad_norm": 10.18983268737793, + "learning_rate": 3.3818681318681324e-06, + "loss": 0.0903, + "step": 33938 + }, + { + "epoch": 93.23901098901099, + "grad_norm": 8.54074478149414, + "learning_rate": 3.380494505494506e-06, + "loss": 0.1472, + "step": 33939 + }, + { + "epoch": 93.24175824175825, + "grad_norm": 13.198299407958984, + "learning_rate": 3.3791208791208792e-06, + "loss": 0.2866, + "step": 33940 + }, + { + "epoch": 93.24450549450549, + "grad_norm": 2.4069876670837402, + "learning_rate": 3.3777472527472526e-06, + "loss": 0.0272, + "step": 33941 + }, + { + "epoch": 93.24725274725274, + "grad_norm": 6.586469650268555, + "learning_rate": 3.3763736263736265e-06, + "loss": 0.2022, + "step": 33942 + }, + { + "epoch": 93.25, + "grad_norm": 14.74952220916748, + "learning_rate": 3.3750000000000003e-06, + "loss": 0.4316, + "step": 33943 + }, + { + "epoch": 93.25274725274726, + "grad_norm": 3.2815310955047607, + "learning_rate": 3.3736263736263733e-06, + "loss": 0.0291, + "step": 33944 + }, + { + "epoch": 93.25549450549451, + "grad_norm": 11.523869514465332, + "learning_rate": 3.372252747252747e-06, + "loss": 0.2394, + "step": 33945 + }, + { + "epoch": 93.25824175824175, + "grad_norm": 17.674217224121094, + "learning_rate": 3.370879120879121e-06, + "loss": 0.3167, + "step": 33946 + }, + { + "epoch": 93.26098901098901, + "grad_norm": 21.06426429748535, + "learning_rate": 3.3695054945054948e-06, + "loss": 0.228, + "step": 33947 + }, + { + "epoch": 93.26373626373626, + "grad_norm": 13.984464645385742, + "learning_rate": 3.3681318681318686e-06, + "loss": 0.2988, + "step": 33948 + }, + { + "epoch": 93.26648351648352, + "grad_norm": 8.057914733886719, + "learning_rate": 3.3667582417582416e-06, + "loss": 0.0772, + "step": 33949 + }, + { + "epoch": 93.26923076923077, + "grad_norm": 22.098703384399414, + "learning_rate": 3.3653846153846154e-06, + "loss": 0.4868, + "step": 33950 + }, + { + "epoch": 93.27197802197803, + "grad_norm": 11.365021705627441, + "learning_rate": 3.3640109890109892e-06, + "loss": 0.1609, + "step": 33951 + }, + { + "epoch": 93.27472527472527, + "grad_norm": 23.747142791748047, + "learning_rate": 3.362637362637363e-06, + "loss": 0.3483, + "step": 33952 + }, + { + "epoch": 93.27747252747253, + "grad_norm": 7.818618297576904, + "learning_rate": 3.361263736263737e-06, + "loss": 0.1205, + "step": 33953 + }, + { + "epoch": 93.28021978021978, + "grad_norm": 8.884261131286621, + "learning_rate": 3.35989010989011e-06, + "loss": 0.2452, + "step": 33954 + }, + { + "epoch": 93.28296703296704, + "grad_norm": 19.24001693725586, + "learning_rate": 3.3585164835164837e-06, + "loss": 0.4945, + "step": 33955 + }, + { + "epoch": 93.28571428571429, + "grad_norm": 20.178789138793945, + "learning_rate": 3.357142857142857e-06, + "loss": 0.3015, + "step": 33956 + }, + { + "epoch": 93.28846153846153, + "grad_norm": 11.216858863830566, + "learning_rate": 3.355769230769231e-06, + "loss": 0.2429, + "step": 33957 + }, + { + "epoch": 93.29120879120879, + "grad_norm": 12.094014167785645, + "learning_rate": 3.3543956043956048e-06, + "loss": 0.1233, + "step": 33958 + }, + { + "epoch": 93.29395604395604, + "grad_norm": 15.04111385345459, + "learning_rate": 3.3530219780219778e-06, + "loss": 0.2257, + "step": 33959 + }, + { + "epoch": 93.2967032967033, + "grad_norm": 6.0763258934021, + "learning_rate": 3.3516483516483516e-06, + "loss": 0.0973, + "step": 33960 + }, + { + "epoch": 93.29945054945055, + "grad_norm": 11.19100284576416, + "learning_rate": 3.3502747252747254e-06, + "loss": 0.2868, + "step": 33961 + }, + { + "epoch": 93.3021978021978, + "grad_norm": 14.819060325622559, + "learning_rate": 3.3489010989010993e-06, + "loss": 0.3103, + "step": 33962 + }, + { + "epoch": 93.30494505494505, + "grad_norm": 14.860429763793945, + "learning_rate": 3.347527472527473e-06, + "loss": 0.3167, + "step": 33963 + }, + { + "epoch": 93.3076923076923, + "grad_norm": 12.560860633850098, + "learning_rate": 3.346153846153846e-06, + "loss": 0.1818, + "step": 33964 + }, + { + "epoch": 93.31043956043956, + "grad_norm": 8.161516189575195, + "learning_rate": 3.34478021978022e-06, + "loss": 0.2137, + "step": 33965 + }, + { + "epoch": 93.31318681318682, + "grad_norm": 15.943541526794434, + "learning_rate": 3.3434065934065937e-06, + "loss": 0.3008, + "step": 33966 + }, + { + "epoch": 93.31593406593407, + "grad_norm": 9.41614818572998, + "learning_rate": 3.3420329670329676e-06, + "loss": 0.1455, + "step": 33967 + }, + { + "epoch": 93.31868131868131, + "grad_norm": 19.409618377685547, + "learning_rate": 3.340659340659341e-06, + "loss": 0.3669, + "step": 33968 + }, + { + "epoch": 93.32142857142857, + "grad_norm": 8.60294246673584, + "learning_rate": 3.3392857142857144e-06, + "loss": 0.0857, + "step": 33969 + }, + { + "epoch": 93.32417582417582, + "grad_norm": 26.657726287841797, + "learning_rate": 3.337912087912088e-06, + "loss": 0.5822, + "step": 33970 + }, + { + "epoch": 93.32692307692308, + "grad_norm": 17.02410888671875, + "learning_rate": 3.3365384615384616e-06, + "loss": 0.341, + "step": 33971 + }, + { + "epoch": 93.32967032967034, + "grad_norm": 5.526324272155762, + "learning_rate": 3.3351648351648354e-06, + "loss": 0.0568, + "step": 33972 + }, + { + "epoch": 93.33241758241758, + "grad_norm": 13.034111022949219, + "learning_rate": 3.3337912087912084e-06, + "loss": 0.329, + "step": 33973 + }, + { + "epoch": 93.33516483516483, + "grad_norm": 13.061631202697754, + "learning_rate": 3.3324175824175823e-06, + "loss": 0.2358, + "step": 33974 + }, + { + "epoch": 93.33791208791209, + "grad_norm": 16.947603225708008, + "learning_rate": 3.331043956043956e-06, + "loss": 0.1295, + "step": 33975 + }, + { + "epoch": 93.34065934065934, + "grad_norm": 7.584382057189941, + "learning_rate": 3.32967032967033e-06, + "loss": 0.1487, + "step": 33976 + }, + { + "epoch": 93.3434065934066, + "grad_norm": 1.7745651006698608, + "learning_rate": 3.3282967032967038e-06, + "loss": 0.0569, + "step": 33977 + }, + { + "epoch": 93.34615384615384, + "grad_norm": 0.9633851647377014, + "learning_rate": 3.3269230769230767e-06, + "loss": 0.0137, + "step": 33978 + }, + { + "epoch": 93.3489010989011, + "grad_norm": 16.764266967773438, + "learning_rate": 3.3255494505494506e-06, + "loss": 0.3171, + "step": 33979 + }, + { + "epoch": 93.35164835164835, + "grad_norm": 12.856193542480469, + "learning_rate": 3.3241758241758244e-06, + "loss": 0.2348, + "step": 33980 + }, + { + "epoch": 93.3543956043956, + "grad_norm": 3.899090051651001, + "learning_rate": 3.3228021978021982e-06, + "loss": 0.0412, + "step": 33981 + }, + { + "epoch": 93.35714285714286, + "grad_norm": 25.412446975708008, + "learning_rate": 3.321428571428572e-06, + "loss": 0.3031, + "step": 33982 + }, + { + "epoch": 93.35989010989012, + "grad_norm": 10.017178535461426, + "learning_rate": 3.320054945054945e-06, + "loss": 0.1913, + "step": 33983 + }, + { + "epoch": 93.36263736263736, + "grad_norm": 6.995263576507568, + "learning_rate": 3.318681318681319e-06, + "loss": 0.081, + "step": 33984 + }, + { + "epoch": 93.36538461538461, + "grad_norm": 15.5072660446167, + "learning_rate": 3.3173076923076923e-06, + "loss": 0.3428, + "step": 33985 + }, + { + "epoch": 93.36813186813187, + "grad_norm": 8.780302047729492, + "learning_rate": 3.315934065934066e-06, + "loss": 0.276, + "step": 33986 + }, + { + "epoch": 93.37087912087912, + "grad_norm": 10.544304847717285, + "learning_rate": 3.31456043956044e-06, + "loss": 0.1577, + "step": 33987 + }, + { + "epoch": 93.37362637362638, + "grad_norm": 6.977169036865234, + "learning_rate": 3.313186813186813e-06, + "loss": 0.1222, + "step": 33988 + }, + { + "epoch": 93.37637362637362, + "grad_norm": 6.096549987792969, + "learning_rate": 3.3118131868131868e-06, + "loss": 0.0374, + "step": 33989 + }, + { + "epoch": 93.37912087912088, + "grad_norm": 16.055015563964844, + "learning_rate": 3.3104395604395606e-06, + "loss": 0.1915, + "step": 33990 + }, + { + "epoch": 93.38186813186813, + "grad_norm": 26.94059944152832, + "learning_rate": 3.3090659340659344e-06, + "loss": 0.2801, + "step": 33991 + }, + { + "epoch": 93.38461538461539, + "grad_norm": 9.101493835449219, + "learning_rate": 3.3076923076923082e-06, + "loss": 0.1023, + "step": 33992 + }, + { + "epoch": 93.38736263736264, + "grad_norm": 13.065641403198242, + "learning_rate": 3.3063186813186812e-06, + "loss": 0.2726, + "step": 33993 + }, + { + "epoch": 93.39010989010988, + "grad_norm": 14.158985137939453, + "learning_rate": 3.304945054945055e-06, + "loss": 0.4529, + "step": 33994 + }, + { + "epoch": 93.39285714285714, + "grad_norm": 18.023193359375, + "learning_rate": 3.303571428571429e-06, + "loss": 0.2694, + "step": 33995 + }, + { + "epoch": 93.3956043956044, + "grad_norm": 5.218045711517334, + "learning_rate": 3.3021978021978027e-06, + "loss": 0.0964, + "step": 33996 + }, + { + "epoch": 93.39835164835165, + "grad_norm": 9.646881103515625, + "learning_rate": 3.300824175824176e-06, + "loss": 0.142, + "step": 33997 + }, + { + "epoch": 93.4010989010989, + "grad_norm": 13.80170726776123, + "learning_rate": 3.2994505494505495e-06, + "loss": 0.2262, + "step": 33998 + }, + { + "epoch": 93.40384615384616, + "grad_norm": 5.1143412590026855, + "learning_rate": 3.2980769230769234e-06, + "loss": 0.0776, + "step": 33999 + }, + { + "epoch": 93.4065934065934, + "grad_norm": 14.13006591796875, + "learning_rate": 3.2967032967032968e-06, + "loss": 0.2273, + "step": 34000 + }, + { + "epoch": 93.40934065934066, + "grad_norm": 5.6267828941345215, + "learning_rate": 3.2953296703296706e-06, + "loss": 0.0673, + "step": 34001 + }, + { + "epoch": 93.41208791208791, + "grad_norm": 12.883501052856445, + "learning_rate": 3.2939560439560436e-06, + "loss": 0.346, + "step": 34002 + }, + { + "epoch": 93.41483516483517, + "grad_norm": 15.49288272857666, + "learning_rate": 3.2925824175824174e-06, + "loss": 0.412, + "step": 34003 + }, + { + "epoch": 93.41758241758242, + "grad_norm": 5.471570014953613, + "learning_rate": 3.2912087912087912e-06, + "loss": 0.0828, + "step": 34004 + }, + { + "epoch": 93.42032967032966, + "grad_norm": 23.069995880126953, + "learning_rate": 3.289835164835165e-06, + "loss": 0.4786, + "step": 34005 + }, + { + "epoch": 93.42307692307692, + "grad_norm": 18.272186279296875, + "learning_rate": 3.288461538461539e-06, + "loss": 0.424, + "step": 34006 + }, + { + "epoch": 93.42582417582418, + "grad_norm": 8.83928394317627, + "learning_rate": 3.287087912087912e-06, + "loss": 0.1869, + "step": 34007 + }, + { + "epoch": 93.42857142857143, + "grad_norm": 7.290126323699951, + "learning_rate": 3.2857142857142857e-06, + "loss": 0.1526, + "step": 34008 + }, + { + "epoch": 93.43131868131869, + "grad_norm": 8.852479934692383, + "learning_rate": 3.2843406593406595e-06, + "loss": 0.087, + "step": 34009 + }, + { + "epoch": 93.43406593406593, + "grad_norm": 13.037134170532227, + "learning_rate": 3.2829670329670334e-06, + "loss": 0.2157, + "step": 34010 + }, + { + "epoch": 93.43681318681318, + "grad_norm": 14.180449485778809, + "learning_rate": 3.281593406593407e-06, + "loss": 0.2453, + "step": 34011 + }, + { + "epoch": 93.43956043956044, + "grad_norm": 2.6350595951080322, + "learning_rate": 3.28021978021978e-06, + "loss": 0.0254, + "step": 34012 + }, + { + "epoch": 93.4423076923077, + "grad_norm": 9.094305992126465, + "learning_rate": 3.278846153846154e-06, + "loss": 0.1509, + "step": 34013 + }, + { + "epoch": 93.44505494505495, + "grad_norm": 3.525308847427368, + "learning_rate": 3.277472527472528e-06, + "loss": 0.0512, + "step": 34014 + }, + { + "epoch": 93.4478021978022, + "grad_norm": 8.339938163757324, + "learning_rate": 3.2760989010989013e-06, + "loss": 0.1773, + "step": 34015 + }, + { + "epoch": 93.45054945054945, + "grad_norm": 22.848604202270508, + "learning_rate": 3.274725274725275e-06, + "loss": 0.408, + "step": 34016 + }, + { + "epoch": 93.4532967032967, + "grad_norm": 17.271310806274414, + "learning_rate": 3.273351648351648e-06, + "loss": 0.191, + "step": 34017 + }, + { + "epoch": 93.45604395604396, + "grad_norm": 7.944374084472656, + "learning_rate": 3.271978021978022e-06, + "loss": 0.2322, + "step": 34018 + }, + { + "epoch": 93.45879120879121, + "grad_norm": 17.207454681396484, + "learning_rate": 3.2706043956043957e-06, + "loss": 0.397, + "step": 34019 + }, + { + "epoch": 93.46153846153847, + "grad_norm": 10.379681587219238, + "learning_rate": 3.2692307692307696e-06, + "loss": 0.1703, + "step": 34020 + }, + { + "epoch": 93.46428571428571, + "grad_norm": 10.059797286987305, + "learning_rate": 3.2678571428571434e-06, + "loss": 0.2521, + "step": 34021 + }, + { + "epoch": 93.46703296703296, + "grad_norm": 12.497597694396973, + "learning_rate": 3.2664835164835164e-06, + "loss": 0.2828, + "step": 34022 + }, + { + "epoch": 93.46978021978022, + "grad_norm": 2.8175225257873535, + "learning_rate": 3.26510989010989e-06, + "loss": 0.0456, + "step": 34023 + }, + { + "epoch": 93.47252747252747, + "grad_norm": 9.57441520690918, + "learning_rate": 3.263736263736264e-06, + "loss": 0.0637, + "step": 34024 + }, + { + "epoch": 93.47527472527473, + "grad_norm": 12.908161163330078, + "learning_rate": 3.262362637362638e-06, + "loss": 0.2309, + "step": 34025 + }, + { + "epoch": 93.47802197802197, + "grad_norm": 2.9141364097595215, + "learning_rate": 3.260989010989011e-06, + "loss": 0.0383, + "step": 34026 + }, + { + "epoch": 93.48076923076923, + "grad_norm": 20.60453987121582, + "learning_rate": 3.2596153846153847e-06, + "loss": 0.6217, + "step": 34027 + }, + { + "epoch": 93.48351648351648, + "grad_norm": 4.017669677734375, + "learning_rate": 3.2582417582417585e-06, + "loss": 0.0729, + "step": 34028 + }, + { + "epoch": 93.48626373626374, + "grad_norm": 10.163372993469238, + "learning_rate": 3.256868131868132e-06, + "loss": 0.1388, + "step": 34029 + }, + { + "epoch": 93.48901098901099, + "grad_norm": 2.159468412399292, + "learning_rate": 3.2554945054945057e-06, + "loss": 0.0315, + "step": 34030 + }, + { + "epoch": 93.49175824175825, + "grad_norm": 7.754044055938721, + "learning_rate": 3.254120879120879e-06, + "loss": 0.0862, + "step": 34031 + }, + { + "epoch": 93.49450549450549, + "grad_norm": 2.5903799533843994, + "learning_rate": 3.2527472527472526e-06, + "loss": 0.03, + "step": 34032 + }, + { + "epoch": 93.49725274725274, + "grad_norm": 19.82501792907715, + "learning_rate": 3.2513736263736264e-06, + "loss": 0.2836, + "step": 34033 + }, + { + "epoch": 93.5, + "grad_norm": 9.731225967407227, + "learning_rate": 3.2500000000000002e-06, + "loss": 0.1636, + "step": 34034 + }, + { + "epoch": 93.50274725274726, + "grad_norm": 3.610159158706665, + "learning_rate": 3.248626373626374e-06, + "loss": 0.0469, + "step": 34035 + }, + { + "epoch": 93.50549450549451, + "grad_norm": 15.5515775680542, + "learning_rate": 3.247252747252747e-06, + "loss": 0.3219, + "step": 34036 + }, + { + "epoch": 93.50824175824175, + "grad_norm": 3.152519464492798, + "learning_rate": 3.245879120879121e-06, + "loss": 0.03, + "step": 34037 + }, + { + "epoch": 93.51098901098901, + "grad_norm": 1.727903127670288, + "learning_rate": 3.2445054945054947e-06, + "loss": 0.0191, + "step": 34038 + }, + { + "epoch": 93.51373626373626, + "grad_norm": 9.672348022460938, + "learning_rate": 3.2431318681318685e-06, + "loss": 0.1852, + "step": 34039 + }, + { + "epoch": 93.51648351648352, + "grad_norm": 2.922332525253296, + "learning_rate": 3.2417582417582424e-06, + "loss": 0.0879, + "step": 34040 + }, + { + "epoch": 93.51923076923077, + "grad_norm": 10.36174488067627, + "learning_rate": 3.2403846153846153e-06, + "loss": 0.1295, + "step": 34041 + }, + { + "epoch": 93.52197802197803, + "grad_norm": 2.3205418586730957, + "learning_rate": 3.239010989010989e-06, + "loss": 0.0266, + "step": 34042 + }, + { + "epoch": 93.52472527472527, + "grad_norm": 6.916192054748535, + "learning_rate": 3.237637362637363e-06, + "loss": 0.0817, + "step": 34043 + }, + { + "epoch": 93.52747252747253, + "grad_norm": 22.870004653930664, + "learning_rate": 3.2362637362637364e-06, + "loss": 0.579, + "step": 34044 + }, + { + "epoch": 93.53021978021978, + "grad_norm": 11.607619285583496, + "learning_rate": 3.2348901098901102e-06, + "loss": 0.1279, + "step": 34045 + }, + { + "epoch": 93.53296703296704, + "grad_norm": 12.907920837402344, + "learning_rate": 3.2335164835164832e-06, + "loss": 0.1461, + "step": 34046 + }, + { + "epoch": 93.53571428571429, + "grad_norm": 17.495548248291016, + "learning_rate": 3.232142857142857e-06, + "loss": 0.2661, + "step": 34047 + }, + { + "epoch": 93.53846153846153, + "grad_norm": 19.64861297607422, + "learning_rate": 3.230769230769231e-06, + "loss": 0.2168, + "step": 34048 + }, + { + "epoch": 93.54120879120879, + "grad_norm": 5.349157810211182, + "learning_rate": 3.2293956043956047e-06, + "loss": 0.0573, + "step": 34049 + }, + { + "epoch": 93.54395604395604, + "grad_norm": 15.658514976501465, + "learning_rate": 3.2280219780219785e-06, + "loss": 0.3013, + "step": 34050 + }, + { + "epoch": 93.5467032967033, + "grad_norm": 6.279860019683838, + "learning_rate": 3.2266483516483515e-06, + "loss": 0.0757, + "step": 34051 + }, + { + "epoch": 93.54945054945055, + "grad_norm": 20.910903930664062, + "learning_rate": 3.2252747252747254e-06, + "loss": 0.2657, + "step": 34052 + }, + { + "epoch": 93.5521978021978, + "grad_norm": 4.731681823730469, + "learning_rate": 3.223901098901099e-06, + "loss": 0.0508, + "step": 34053 + }, + { + "epoch": 93.55494505494505, + "grad_norm": 12.177278518676758, + "learning_rate": 3.222527472527473e-06, + "loss": 0.2265, + "step": 34054 + }, + { + "epoch": 93.5576923076923, + "grad_norm": 3.9391934871673584, + "learning_rate": 3.221153846153846e-06, + "loss": 0.083, + "step": 34055 + }, + { + "epoch": 93.56043956043956, + "grad_norm": 3.9749960899353027, + "learning_rate": 3.21978021978022e-06, + "loss": 0.0656, + "step": 34056 + }, + { + "epoch": 93.56318681318682, + "grad_norm": 24.95150375366211, + "learning_rate": 3.2184065934065937e-06, + "loss": 0.3846, + "step": 34057 + }, + { + "epoch": 93.56593406593407, + "grad_norm": 2.803952693939209, + "learning_rate": 3.217032967032967e-06, + "loss": 0.0288, + "step": 34058 + }, + { + "epoch": 93.56868131868131, + "grad_norm": 11.987786293029785, + "learning_rate": 3.215659340659341e-06, + "loss": 0.2463, + "step": 34059 + }, + { + "epoch": 93.57142857142857, + "grad_norm": 11.466401100158691, + "learning_rate": 3.2142857142857143e-06, + "loss": 0.2414, + "step": 34060 + }, + { + "epoch": 93.57417582417582, + "grad_norm": 21.702159881591797, + "learning_rate": 3.2129120879120877e-06, + "loss": 0.3752, + "step": 34061 + }, + { + "epoch": 93.57692307692308, + "grad_norm": 15.967110633850098, + "learning_rate": 3.2115384615384615e-06, + "loss": 0.5365, + "step": 34062 + }, + { + "epoch": 93.57967032967034, + "grad_norm": 21.363962173461914, + "learning_rate": 3.2101648351648354e-06, + "loss": 0.3493, + "step": 34063 + }, + { + "epoch": 93.58241758241758, + "grad_norm": 12.564382553100586, + "learning_rate": 3.208791208791209e-06, + "loss": 0.2319, + "step": 34064 + }, + { + "epoch": 93.58516483516483, + "grad_norm": 14.211000442504883, + "learning_rate": 3.207417582417582e-06, + "loss": 0.2066, + "step": 34065 + }, + { + "epoch": 93.58791208791209, + "grad_norm": 10.044784545898438, + "learning_rate": 3.206043956043956e-06, + "loss": 0.1937, + "step": 34066 + }, + { + "epoch": 93.59065934065934, + "grad_norm": 7.249892234802246, + "learning_rate": 3.20467032967033e-06, + "loss": 0.0827, + "step": 34067 + }, + { + "epoch": 93.5934065934066, + "grad_norm": 4.408767223358154, + "learning_rate": 3.2032967032967037e-06, + "loss": 0.0845, + "step": 34068 + }, + { + "epoch": 93.59615384615384, + "grad_norm": 4.0322089195251465, + "learning_rate": 3.2019230769230775e-06, + "loss": 0.0342, + "step": 34069 + }, + { + "epoch": 93.5989010989011, + "grad_norm": 3.5641167163848877, + "learning_rate": 3.2005494505494505e-06, + "loss": 0.0473, + "step": 34070 + }, + { + "epoch": 93.60164835164835, + "grad_norm": 28.812694549560547, + "learning_rate": 3.1991758241758243e-06, + "loss": 0.2194, + "step": 34071 + }, + { + "epoch": 93.6043956043956, + "grad_norm": 13.968437194824219, + "learning_rate": 3.197802197802198e-06, + "loss": 0.1629, + "step": 34072 + }, + { + "epoch": 93.60714285714286, + "grad_norm": 10.231779098510742, + "learning_rate": 3.1964285714285716e-06, + "loss": 0.1142, + "step": 34073 + }, + { + "epoch": 93.60989010989012, + "grad_norm": 10.919290542602539, + "learning_rate": 3.1950549450549454e-06, + "loss": 0.1725, + "step": 34074 + }, + { + "epoch": 93.61263736263736, + "grad_norm": 7.147034168243408, + "learning_rate": 3.193681318681319e-06, + "loss": 0.09, + "step": 34075 + }, + { + "epoch": 93.61538461538461, + "grad_norm": 7.712208271026611, + "learning_rate": 3.192307692307692e-06, + "loss": 0.0642, + "step": 34076 + }, + { + "epoch": 93.61813186813187, + "grad_norm": 16.955669403076172, + "learning_rate": 3.190934065934066e-06, + "loss": 0.2359, + "step": 34077 + }, + { + "epoch": 93.62087912087912, + "grad_norm": 15.115232467651367, + "learning_rate": 3.18956043956044e-06, + "loss": 0.2331, + "step": 34078 + }, + { + "epoch": 93.62362637362638, + "grad_norm": 3.2668020725250244, + "learning_rate": 3.1881868131868137e-06, + "loss": 0.0244, + "step": 34079 + }, + { + "epoch": 93.62637362637362, + "grad_norm": 0.9472829103469849, + "learning_rate": 3.1868131868131867e-06, + "loss": 0.0098, + "step": 34080 + }, + { + "epoch": 93.62912087912088, + "grad_norm": 12.373936653137207, + "learning_rate": 3.1854395604395605e-06, + "loss": 0.1369, + "step": 34081 + }, + { + "epoch": 93.63186813186813, + "grad_norm": 0.631915807723999, + "learning_rate": 3.1840659340659343e-06, + "loss": 0.0092, + "step": 34082 + }, + { + "epoch": 93.63461538461539, + "grad_norm": 6.090509414672852, + "learning_rate": 3.182692307692308e-06, + "loss": 0.0604, + "step": 34083 + }, + { + "epoch": 93.63736263736264, + "grad_norm": 7.8159027099609375, + "learning_rate": 3.181318681318681e-06, + "loss": 0.1347, + "step": 34084 + }, + { + "epoch": 93.64010989010988, + "grad_norm": 4.861199855804443, + "learning_rate": 3.179945054945055e-06, + "loss": 0.0865, + "step": 34085 + }, + { + "epoch": 93.64285714285714, + "grad_norm": 15.01660442352295, + "learning_rate": 3.178571428571429e-06, + "loss": 0.4503, + "step": 34086 + }, + { + "epoch": 93.6456043956044, + "grad_norm": 20.477041244506836, + "learning_rate": 3.1771978021978026e-06, + "loss": 0.2716, + "step": 34087 + }, + { + "epoch": 93.64835164835165, + "grad_norm": 15.470067024230957, + "learning_rate": 3.175824175824176e-06, + "loss": 0.2618, + "step": 34088 + }, + { + "epoch": 93.6510989010989, + "grad_norm": 9.316514015197754, + "learning_rate": 3.1744505494505495e-06, + "loss": 0.0664, + "step": 34089 + }, + { + "epoch": 93.65384615384616, + "grad_norm": 1.8948606252670288, + "learning_rate": 3.173076923076923e-06, + "loss": 0.0233, + "step": 34090 + }, + { + "epoch": 93.6565934065934, + "grad_norm": 2.713254451751709, + "learning_rate": 3.1717032967032967e-06, + "loss": 0.031, + "step": 34091 + }, + { + "epoch": 93.65934065934066, + "grad_norm": 13.358033180236816, + "learning_rate": 3.1703296703296705e-06, + "loss": 0.268, + "step": 34092 + }, + { + "epoch": 93.66208791208791, + "grad_norm": 8.114697456359863, + "learning_rate": 3.1689560439560444e-06, + "loss": 0.0502, + "step": 34093 + }, + { + "epoch": 93.66483516483517, + "grad_norm": 16.681791305541992, + "learning_rate": 3.1675824175824173e-06, + "loss": 0.3296, + "step": 34094 + }, + { + "epoch": 93.66758241758242, + "grad_norm": 7.020773410797119, + "learning_rate": 3.166208791208791e-06, + "loss": 0.0868, + "step": 34095 + }, + { + "epoch": 93.67032967032966, + "grad_norm": 8.460990905761719, + "learning_rate": 3.164835164835165e-06, + "loss": 0.1346, + "step": 34096 + }, + { + "epoch": 93.67307692307692, + "grad_norm": 9.254685401916504, + "learning_rate": 3.163461538461539e-06, + "loss": 0.1406, + "step": 34097 + }, + { + "epoch": 93.67582417582418, + "grad_norm": 3.5916125774383545, + "learning_rate": 3.1620879120879127e-06, + "loss": 0.0532, + "step": 34098 + }, + { + "epoch": 93.67857142857143, + "grad_norm": 2.2430179119110107, + "learning_rate": 3.1607142857142856e-06, + "loss": 0.0371, + "step": 34099 + }, + { + "epoch": 93.68131868131869, + "grad_norm": 7.230537414550781, + "learning_rate": 3.1593406593406595e-06, + "loss": 0.0806, + "step": 34100 + }, + { + "epoch": 93.68406593406593, + "grad_norm": 11.128450393676758, + "learning_rate": 3.1579670329670333e-06, + "loss": 0.2034, + "step": 34101 + }, + { + "epoch": 93.68681318681318, + "grad_norm": 9.10734748840332, + "learning_rate": 3.1565934065934067e-06, + "loss": 0.207, + "step": 34102 + }, + { + "epoch": 93.68956043956044, + "grad_norm": 11.328324317932129, + "learning_rate": 3.1552197802197805e-06, + "loss": 0.1509, + "step": 34103 + }, + { + "epoch": 93.6923076923077, + "grad_norm": 8.310629844665527, + "learning_rate": 3.153846153846154e-06, + "loss": 0.0685, + "step": 34104 + }, + { + "epoch": 93.69505494505495, + "grad_norm": 6.6207709312438965, + "learning_rate": 3.1524725274725274e-06, + "loss": 0.0876, + "step": 34105 + }, + { + "epoch": 93.6978021978022, + "grad_norm": 6.368810176849365, + "learning_rate": 3.151098901098901e-06, + "loss": 0.1302, + "step": 34106 + }, + { + "epoch": 93.70054945054945, + "grad_norm": 14.901516914367676, + "learning_rate": 3.149725274725275e-06, + "loss": 0.1755, + "step": 34107 + }, + { + "epoch": 93.7032967032967, + "grad_norm": 12.458403587341309, + "learning_rate": 3.148351648351649e-06, + "loss": 0.245, + "step": 34108 + }, + { + "epoch": 93.70604395604396, + "grad_norm": 11.380322456359863, + "learning_rate": 3.146978021978022e-06, + "loss": 0.2305, + "step": 34109 + }, + { + "epoch": 93.70879120879121, + "grad_norm": 8.488770484924316, + "learning_rate": 3.1456043956043957e-06, + "loss": 0.1307, + "step": 34110 + }, + { + "epoch": 93.71153846153847, + "grad_norm": 15.048187255859375, + "learning_rate": 3.1442307692307695e-06, + "loss": 0.3061, + "step": 34111 + }, + { + "epoch": 93.71428571428571, + "grad_norm": 9.558395385742188, + "learning_rate": 3.1428571428571433e-06, + "loss": 0.1808, + "step": 34112 + }, + { + "epoch": 93.71703296703296, + "grad_norm": 9.483413696289062, + "learning_rate": 3.1414835164835163e-06, + "loss": 0.1523, + "step": 34113 + }, + { + "epoch": 93.71978021978022, + "grad_norm": 11.889250755310059, + "learning_rate": 3.14010989010989e-06, + "loss": 0.2961, + "step": 34114 + }, + { + "epoch": 93.72252747252747, + "grad_norm": 8.435158729553223, + "learning_rate": 3.138736263736264e-06, + "loss": 0.196, + "step": 34115 + }, + { + "epoch": 93.72527472527473, + "grad_norm": 8.163484573364258, + "learning_rate": 3.137362637362638e-06, + "loss": 0.1201, + "step": 34116 + }, + { + "epoch": 93.72802197802197, + "grad_norm": 12.103007316589355, + "learning_rate": 3.135989010989011e-06, + "loss": 0.1774, + "step": 34117 + }, + { + "epoch": 93.73076923076923, + "grad_norm": 10.963723182678223, + "learning_rate": 3.1346153846153846e-06, + "loss": 0.1851, + "step": 34118 + }, + { + "epoch": 93.73351648351648, + "grad_norm": 14.010080337524414, + "learning_rate": 3.133241758241758e-06, + "loss": 0.489, + "step": 34119 + }, + { + "epoch": 93.73626373626374, + "grad_norm": 13.081284523010254, + "learning_rate": 3.131868131868132e-06, + "loss": 0.271, + "step": 34120 + }, + { + "epoch": 93.73901098901099, + "grad_norm": 17.60838508605957, + "learning_rate": 3.1304945054945057e-06, + "loss": 0.4137, + "step": 34121 + }, + { + "epoch": 93.74175824175825, + "grad_norm": 13.332315444946289, + "learning_rate": 3.1291208791208795e-06, + "loss": 0.1281, + "step": 34122 + }, + { + "epoch": 93.74450549450549, + "grad_norm": 10.72232723236084, + "learning_rate": 3.1277472527472525e-06, + "loss": 0.3462, + "step": 34123 + }, + { + "epoch": 93.74725274725274, + "grad_norm": 16.728734970092773, + "learning_rate": 3.1263736263736263e-06, + "loss": 0.3964, + "step": 34124 + }, + { + "epoch": 93.75, + "grad_norm": 12.23200798034668, + "learning_rate": 3.125e-06, + "loss": 0.2567, + "step": 34125 + }, + { + "epoch": 93.75274725274726, + "grad_norm": 9.941712379455566, + "learning_rate": 3.123626373626374e-06, + "loss": 0.1503, + "step": 34126 + }, + { + "epoch": 93.75549450549451, + "grad_norm": 14.49917984008789, + "learning_rate": 3.1222527472527474e-06, + "loss": 0.4351, + "step": 34127 + }, + { + "epoch": 93.75824175824175, + "grad_norm": 8.6052827835083, + "learning_rate": 3.1208791208791212e-06, + "loss": 0.0868, + "step": 34128 + }, + { + "epoch": 93.76098901098901, + "grad_norm": 12.489288330078125, + "learning_rate": 3.1195054945054946e-06, + "loss": 0.2074, + "step": 34129 + }, + { + "epoch": 93.76373626373626, + "grad_norm": 1.5534673929214478, + "learning_rate": 3.1181318681318685e-06, + "loss": 0.0193, + "step": 34130 + }, + { + "epoch": 93.76648351648352, + "grad_norm": 4.6636528968811035, + "learning_rate": 3.1167582417582423e-06, + "loss": 0.0942, + "step": 34131 + }, + { + "epoch": 93.76923076923077, + "grad_norm": 11.233844757080078, + "learning_rate": 3.1153846153846157e-06, + "loss": 0.1963, + "step": 34132 + }, + { + "epoch": 93.77197802197803, + "grad_norm": 16.298954010009766, + "learning_rate": 3.114010989010989e-06, + "loss": 0.3649, + "step": 34133 + }, + { + "epoch": 93.77472527472527, + "grad_norm": 11.428364753723145, + "learning_rate": 3.1126373626373625e-06, + "loss": 0.1772, + "step": 34134 + }, + { + "epoch": 93.77747252747253, + "grad_norm": 21.538869857788086, + "learning_rate": 3.1112637362637363e-06, + "loss": 0.3091, + "step": 34135 + }, + { + "epoch": 93.78021978021978, + "grad_norm": 8.120044708251953, + "learning_rate": 3.1098901098901097e-06, + "loss": 0.2722, + "step": 34136 + }, + { + "epoch": 93.78296703296704, + "grad_norm": 4.818157196044922, + "learning_rate": 3.1085164835164836e-06, + "loss": 0.0486, + "step": 34137 + }, + { + "epoch": 93.78571428571429, + "grad_norm": 6.940460205078125, + "learning_rate": 3.1071428571428574e-06, + "loss": 0.0767, + "step": 34138 + }, + { + "epoch": 93.78846153846153, + "grad_norm": 2.082609176635742, + "learning_rate": 3.105769230769231e-06, + "loss": 0.0258, + "step": 34139 + }, + { + "epoch": 93.79120879120879, + "grad_norm": 12.856024742126465, + "learning_rate": 3.1043956043956046e-06, + "loss": 0.354, + "step": 34140 + }, + { + "epoch": 93.79395604395604, + "grad_norm": 10.782904624938965, + "learning_rate": 3.103021978021978e-06, + "loss": 0.1299, + "step": 34141 + }, + { + "epoch": 93.7967032967033, + "grad_norm": 17.101543426513672, + "learning_rate": 3.101648351648352e-06, + "loss": 0.3642, + "step": 34142 + }, + { + "epoch": 93.79945054945055, + "grad_norm": 11.636411666870117, + "learning_rate": 3.1002747252747257e-06, + "loss": 0.3201, + "step": 34143 + }, + { + "epoch": 93.8021978021978, + "grad_norm": 6.048196315765381, + "learning_rate": 3.098901098901099e-06, + "loss": 0.1791, + "step": 34144 + }, + { + "epoch": 93.80494505494505, + "grad_norm": 15.01859188079834, + "learning_rate": 3.097527472527473e-06, + "loss": 0.2791, + "step": 34145 + }, + { + "epoch": 93.8076923076923, + "grad_norm": 5.1291279792785645, + "learning_rate": 3.0961538461538464e-06, + "loss": 0.0955, + "step": 34146 + }, + { + "epoch": 93.81043956043956, + "grad_norm": 13.197467803955078, + "learning_rate": 3.0947802197802198e-06, + "loss": 0.2099, + "step": 34147 + }, + { + "epoch": 93.81318681318682, + "grad_norm": 19.78537940979004, + "learning_rate": 3.0934065934065936e-06, + "loss": 0.3748, + "step": 34148 + }, + { + "epoch": 93.81593406593407, + "grad_norm": 17.576440811157227, + "learning_rate": 3.092032967032967e-06, + "loss": 0.5008, + "step": 34149 + }, + { + "epoch": 93.81868131868131, + "grad_norm": 12.411788940429688, + "learning_rate": 3.090659340659341e-06, + "loss": 0.34, + "step": 34150 + }, + { + "epoch": 93.82142857142857, + "grad_norm": 18.593000411987305, + "learning_rate": 3.0892857142857142e-06, + "loss": 0.2567, + "step": 34151 + }, + { + "epoch": 93.82417582417582, + "grad_norm": 8.085281372070312, + "learning_rate": 3.087912087912088e-06, + "loss": 0.1022, + "step": 34152 + }, + { + "epoch": 93.82692307692308, + "grad_norm": 6.436769485473633, + "learning_rate": 3.0865384615384615e-06, + "loss": 0.0551, + "step": 34153 + }, + { + "epoch": 93.82967032967034, + "grad_norm": 10.16942310333252, + "learning_rate": 3.0851648351648353e-06, + "loss": 0.1805, + "step": 34154 + }, + { + "epoch": 93.83241758241758, + "grad_norm": 12.225364685058594, + "learning_rate": 3.083791208791209e-06, + "loss": 0.1752, + "step": 34155 + }, + { + "epoch": 93.83516483516483, + "grad_norm": 10.24769115447998, + "learning_rate": 3.0824175824175825e-06, + "loss": 0.1629, + "step": 34156 + }, + { + "epoch": 93.83791208791209, + "grad_norm": 5.475946426391602, + "learning_rate": 3.0810439560439564e-06, + "loss": 0.0634, + "step": 34157 + }, + { + "epoch": 93.84065934065934, + "grad_norm": 11.113526344299316, + "learning_rate": 3.0796703296703298e-06, + "loss": 0.1147, + "step": 34158 + }, + { + "epoch": 93.8434065934066, + "grad_norm": 1.8214114904403687, + "learning_rate": 3.0782967032967036e-06, + "loss": 0.0207, + "step": 34159 + }, + { + "epoch": 93.84615384615384, + "grad_norm": 20.000102996826172, + "learning_rate": 3.0769230769230774e-06, + "loss": 0.3852, + "step": 34160 + }, + { + "epoch": 93.8489010989011, + "grad_norm": 16.225482940673828, + "learning_rate": 3.075549450549451e-06, + "loss": 0.2298, + "step": 34161 + }, + { + "epoch": 93.85164835164835, + "grad_norm": 2.114341974258423, + "learning_rate": 3.0741758241758242e-06, + "loss": 0.0375, + "step": 34162 + }, + { + "epoch": 93.8543956043956, + "grad_norm": 8.200448989868164, + "learning_rate": 3.0728021978021977e-06, + "loss": 0.1796, + "step": 34163 + }, + { + "epoch": 93.85714285714286, + "grad_norm": 11.324660301208496, + "learning_rate": 3.0714285714285715e-06, + "loss": 0.1594, + "step": 34164 + }, + { + "epoch": 93.85989010989012, + "grad_norm": 8.790487289428711, + "learning_rate": 3.070054945054945e-06, + "loss": 0.0985, + "step": 34165 + }, + { + "epoch": 93.86263736263736, + "grad_norm": 9.040017127990723, + "learning_rate": 3.0686813186813187e-06, + "loss": 0.2081, + "step": 34166 + }, + { + "epoch": 93.86538461538461, + "grad_norm": 22.021434783935547, + "learning_rate": 3.0673076923076926e-06, + "loss": 0.3649, + "step": 34167 + }, + { + "epoch": 93.86813186813187, + "grad_norm": 4.1844353675842285, + "learning_rate": 3.065934065934066e-06, + "loss": 0.0736, + "step": 34168 + }, + { + "epoch": 93.87087912087912, + "grad_norm": 8.507991790771484, + "learning_rate": 3.06456043956044e-06, + "loss": 0.065, + "step": 34169 + }, + { + "epoch": 93.87362637362638, + "grad_norm": 20.49230194091797, + "learning_rate": 3.063186813186813e-06, + "loss": 0.3688, + "step": 34170 + }, + { + "epoch": 93.87637362637362, + "grad_norm": 6.764710426330566, + "learning_rate": 3.061813186813187e-06, + "loss": 0.0667, + "step": 34171 + }, + { + "epoch": 93.87912087912088, + "grad_norm": 5.225215435028076, + "learning_rate": 3.060439560439561e-06, + "loss": 0.0392, + "step": 34172 + }, + { + "epoch": 93.88186813186813, + "grad_norm": 18.048006057739258, + "learning_rate": 3.0590659340659343e-06, + "loss": 0.3258, + "step": 34173 + }, + { + "epoch": 93.88461538461539, + "grad_norm": 15.703941345214844, + "learning_rate": 3.057692307692308e-06, + "loss": 0.3352, + "step": 34174 + }, + { + "epoch": 93.88736263736264, + "grad_norm": 1.6476808786392212, + "learning_rate": 3.0563186813186815e-06, + "loss": 0.0239, + "step": 34175 + }, + { + "epoch": 93.89010989010988, + "grad_norm": 3.239809274673462, + "learning_rate": 3.0549450549450553e-06, + "loss": 0.0379, + "step": 34176 + }, + { + "epoch": 93.89285714285714, + "grad_norm": 19.056730270385742, + "learning_rate": 3.0535714285714287e-06, + "loss": 0.2023, + "step": 34177 + }, + { + "epoch": 93.8956043956044, + "grad_norm": 4.5955891609191895, + "learning_rate": 3.052197802197802e-06, + "loss": 0.0392, + "step": 34178 + }, + { + "epoch": 93.89835164835165, + "grad_norm": 12.334065437316895, + "learning_rate": 3.050824175824176e-06, + "loss": 0.1503, + "step": 34179 + }, + { + "epoch": 93.9010989010989, + "grad_norm": 8.906085968017578, + "learning_rate": 3.0494505494505494e-06, + "loss": 0.1706, + "step": 34180 + }, + { + "epoch": 93.90384615384616, + "grad_norm": 7.502687931060791, + "learning_rate": 3.0480769230769232e-06, + "loss": 0.1213, + "step": 34181 + }, + { + "epoch": 93.9065934065934, + "grad_norm": 12.582526206970215, + "learning_rate": 3.0467032967032966e-06, + "loss": 0.1149, + "step": 34182 + }, + { + "epoch": 93.90934065934066, + "grad_norm": 7.671681880950928, + "learning_rate": 3.0453296703296705e-06, + "loss": 0.0782, + "step": 34183 + }, + { + "epoch": 93.91208791208791, + "grad_norm": 6.572871208190918, + "learning_rate": 3.0439560439560443e-06, + "loss": 0.147, + "step": 34184 + }, + { + "epoch": 93.91483516483517, + "grad_norm": 15.740899085998535, + "learning_rate": 3.0425824175824177e-06, + "loss": 0.1975, + "step": 34185 + }, + { + "epoch": 93.91758241758242, + "grad_norm": 15.60435676574707, + "learning_rate": 3.0412087912087915e-06, + "loss": 0.2725, + "step": 34186 + }, + { + "epoch": 93.92032967032966, + "grad_norm": 8.554686546325684, + "learning_rate": 3.039835164835165e-06, + "loss": 0.1196, + "step": 34187 + }, + { + "epoch": 93.92307692307692, + "grad_norm": 8.617207527160645, + "learning_rate": 3.0384615384615388e-06, + "loss": 0.1084, + "step": 34188 + }, + { + "epoch": 93.92582417582418, + "grad_norm": 7.767120361328125, + "learning_rate": 3.037087912087912e-06, + "loss": 0.0992, + "step": 34189 + }, + { + "epoch": 93.92857142857143, + "grad_norm": 10.186954498291016, + "learning_rate": 3.035714285714286e-06, + "loss": 0.1434, + "step": 34190 + }, + { + "epoch": 93.93131868131869, + "grad_norm": 9.451701164245605, + "learning_rate": 3.0343406593406594e-06, + "loss": 0.1512, + "step": 34191 + }, + { + "epoch": 93.93406593406593, + "grad_norm": 9.146875381469727, + "learning_rate": 3.0329670329670332e-06, + "loss": 0.0896, + "step": 34192 + }, + { + "epoch": 93.93681318681318, + "grad_norm": 3.137655258178711, + "learning_rate": 3.0315934065934066e-06, + "loss": 0.0241, + "step": 34193 + }, + { + "epoch": 93.93956043956044, + "grad_norm": 5.780648708343506, + "learning_rate": 3.03021978021978e-06, + "loss": 0.1695, + "step": 34194 + }, + { + "epoch": 93.9423076923077, + "grad_norm": 11.480375289916992, + "learning_rate": 3.028846153846154e-06, + "loss": 0.1472, + "step": 34195 + }, + { + "epoch": 93.94505494505495, + "grad_norm": 7.252110958099365, + "learning_rate": 3.0274725274725277e-06, + "loss": 0.0673, + "step": 34196 + }, + { + "epoch": 93.9478021978022, + "grad_norm": 3.1581764221191406, + "learning_rate": 3.026098901098901e-06, + "loss": 0.0495, + "step": 34197 + }, + { + "epoch": 93.95054945054945, + "grad_norm": 6.560963153839111, + "learning_rate": 3.024725274725275e-06, + "loss": 0.0464, + "step": 34198 + }, + { + "epoch": 93.9532967032967, + "grad_norm": 7.8264546394348145, + "learning_rate": 3.0233516483516483e-06, + "loss": 0.1006, + "step": 34199 + }, + { + "epoch": 93.95604395604396, + "grad_norm": 15.738551139831543, + "learning_rate": 3.021978021978022e-06, + "loss": 0.5074, + "step": 34200 + }, + { + "epoch": 93.95879120879121, + "grad_norm": 13.401800155639648, + "learning_rate": 3.020604395604396e-06, + "loss": 0.1971, + "step": 34201 + }, + { + "epoch": 93.96153846153847, + "grad_norm": 7.739703178405762, + "learning_rate": 3.0192307692307694e-06, + "loss": 0.2493, + "step": 34202 + }, + { + "epoch": 93.96428571428571, + "grad_norm": 9.671610832214355, + "learning_rate": 3.0178571428571432e-06, + "loss": 0.0877, + "step": 34203 + }, + { + "epoch": 93.96703296703296, + "grad_norm": 7.1518635749816895, + "learning_rate": 3.0164835164835167e-06, + "loss": 0.0696, + "step": 34204 + }, + { + "epoch": 93.96978021978022, + "grad_norm": 12.727483749389648, + "learning_rate": 3.0151098901098905e-06, + "loss": 0.1933, + "step": 34205 + }, + { + "epoch": 93.97252747252747, + "grad_norm": 1.9544914960861206, + "learning_rate": 3.013736263736264e-06, + "loss": 0.0274, + "step": 34206 + }, + { + "epoch": 93.97527472527473, + "grad_norm": 3.381398916244507, + "learning_rate": 3.0123626373626373e-06, + "loss": 0.0527, + "step": 34207 + }, + { + "epoch": 93.97802197802197, + "grad_norm": 3.345287561416626, + "learning_rate": 3.010989010989011e-06, + "loss": 0.0462, + "step": 34208 + }, + { + "epoch": 93.98076923076923, + "grad_norm": 16.633953094482422, + "learning_rate": 3.0096153846153845e-06, + "loss": 0.2878, + "step": 34209 + }, + { + "epoch": 93.98351648351648, + "grad_norm": 14.440014839172363, + "learning_rate": 3.0082417582417584e-06, + "loss": 0.2097, + "step": 34210 + }, + { + "epoch": 93.98626373626374, + "grad_norm": 8.292436599731445, + "learning_rate": 3.0068681318681318e-06, + "loss": 0.109, + "step": 34211 + }, + { + "epoch": 93.98901098901099, + "grad_norm": 5.999154567718506, + "learning_rate": 3.0054945054945056e-06, + "loss": 0.0961, + "step": 34212 + }, + { + "epoch": 93.99175824175825, + "grad_norm": 17.62164878845215, + "learning_rate": 3.0041208791208794e-06, + "loss": 0.4264, + "step": 34213 + }, + { + "epoch": 93.99450549450549, + "grad_norm": 3.018610715866089, + "learning_rate": 3.002747252747253e-06, + "loss": 0.0289, + "step": 34214 + }, + { + "epoch": 93.99725274725274, + "grad_norm": 14.931524276733398, + "learning_rate": 3.0013736263736267e-06, + "loss": 0.2125, + "step": 34215 + }, + { + "epoch": 94.0, + "grad_norm": 13.688676834106445, + "learning_rate": 3e-06, + "loss": 0.0859, + "step": 34216 + }, + { + "epoch": 94.0, + "eval_accuracy": 0.6528925619834711, + "eval_f1": 0.6715006192086717, + "eval_f1_DuraRiadoRio_64x64": 0.75, + "eval_f1_Mole_64x64": 0.542713567839196, + "eval_f1_Quebrado_64x64": 0.8292682926829268, + "eval_f1_RiadoRio_64x64": 0.5238095238095238, + "eval_f1_RioFechado_64x64": 0.7117117117117117, + "eval_loss": 1.7387961149215698, + "eval_precision": 0.8227972027972028, + "eval_precision_DuraRiadoRio_64x64": 0.9375, + "eval_precision_Mole_64x64": 0.9818181818181818, + "eval_precision_Quebrado_64x64": 0.8321678321678322, + "eval_precision_RiadoRio_64x64": 0.375, + "eval_precision_RioFechado_64x64": 0.9875, + "eval_recall": 0.6502295939378964, + "eval_recall_DuraRiadoRio_64x64": 0.625, + "eval_recall_Mole_64x64": 0.375, + "eval_recall_Quebrado_64x64": 0.8263888888888888, + "eval_recall_RiadoRio_64x64": 0.868421052631579, + "eval_recall_RioFechado_64x64": 0.5563380281690141, + "eval_runtime": 1.7411, + "eval_samples_per_second": 416.989, + "eval_steps_per_second": 26.421, + "step": 34216 + }, + { + "epoch": 94.00274725274726, + "grad_norm": 20.819326400756836, + "learning_rate": 2.998626373626374e-06, + "loss": 0.3732, + "step": 34217 + }, + { + "epoch": 94.00549450549451, + "grad_norm": 18.552947998046875, + "learning_rate": 2.9972527472527473e-06, + "loss": 0.2621, + "step": 34218 + }, + { + "epoch": 94.00824175824175, + "grad_norm": 6.959201335906982, + "learning_rate": 2.995879120879121e-06, + "loss": 0.1143, + "step": 34219 + }, + { + "epoch": 94.01098901098901, + "grad_norm": 16.301063537597656, + "learning_rate": 2.994505494505495e-06, + "loss": 0.3792, + "step": 34220 + }, + { + "epoch": 94.01373626373626, + "grad_norm": 10.764328002929688, + "learning_rate": 2.9931318681318684e-06, + "loss": 0.1634, + "step": 34221 + }, + { + "epoch": 94.01648351648352, + "grad_norm": 6.982840538024902, + "learning_rate": 2.9917582417582418e-06, + "loss": 0.0663, + "step": 34222 + }, + { + "epoch": 94.01923076923077, + "grad_norm": 12.839695930480957, + "learning_rate": 2.990384615384615e-06, + "loss": 0.245, + "step": 34223 + }, + { + "epoch": 94.02197802197803, + "grad_norm": 24.366943359375, + "learning_rate": 2.989010989010989e-06, + "loss": 0.8331, + "step": 34224 + }, + { + "epoch": 94.02472527472527, + "grad_norm": 2.9291932582855225, + "learning_rate": 2.987637362637363e-06, + "loss": 0.0309, + "step": 34225 + }, + { + "epoch": 94.02747252747253, + "grad_norm": 17.287551879882812, + "learning_rate": 2.9862637362637363e-06, + "loss": 0.2529, + "step": 34226 + }, + { + "epoch": 94.03021978021978, + "grad_norm": 11.269757270812988, + "learning_rate": 2.98489010989011e-06, + "loss": 0.3853, + "step": 34227 + }, + { + "epoch": 94.03296703296704, + "grad_norm": 20.071453094482422, + "learning_rate": 2.9835164835164835e-06, + "loss": 0.4892, + "step": 34228 + }, + { + "epoch": 94.03571428571429, + "grad_norm": 15.671064376831055, + "learning_rate": 2.9821428571428573e-06, + "loss": 0.524, + "step": 34229 + }, + { + "epoch": 94.03846153846153, + "grad_norm": 6.7226738929748535, + "learning_rate": 2.980769230769231e-06, + "loss": 0.1371, + "step": 34230 + }, + { + "epoch": 94.04120879120879, + "grad_norm": 13.268900871276855, + "learning_rate": 2.9793956043956046e-06, + "loss": 0.2347, + "step": 34231 + }, + { + "epoch": 94.04395604395604, + "grad_norm": 10.441678047180176, + "learning_rate": 2.9780219780219784e-06, + "loss": 0.2044, + "step": 34232 + }, + { + "epoch": 94.0467032967033, + "grad_norm": 30.491113662719727, + "learning_rate": 2.976648351648352e-06, + "loss": 0.513, + "step": 34233 + }, + { + "epoch": 94.04945054945055, + "grad_norm": 5.0388569831848145, + "learning_rate": 2.9752747252747256e-06, + "loss": 0.0731, + "step": 34234 + }, + { + "epoch": 94.0521978021978, + "grad_norm": 12.0374174118042, + "learning_rate": 2.973901098901099e-06, + "loss": 0.2535, + "step": 34235 + }, + { + "epoch": 94.05494505494505, + "grad_norm": 1.6123541593551636, + "learning_rate": 2.9725274725274724e-06, + "loss": 0.0149, + "step": 34236 + }, + { + "epoch": 94.0576923076923, + "grad_norm": 4.0617451667785645, + "learning_rate": 2.9711538461538463e-06, + "loss": 0.0803, + "step": 34237 + }, + { + "epoch": 94.06043956043956, + "grad_norm": 22.949581146240234, + "learning_rate": 2.9697802197802197e-06, + "loss": 0.5037, + "step": 34238 + }, + { + "epoch": 94.06318681318682, + "grad_norm": 8.302781105041504, + "learning_rate": 2.9684065934065935e-06, + "loss": 0.2147, + "step": 34239 + }, + { + "epoch": 94.06593406593407, + "grad_norm": 13.514394760131836, + "learning_rate": 2.967032967032967e-06, + "loss": 0.1949, + "step": 34240 + }, + { + "epoch": 94.06868131868131, + "grad_norm": 5.7907280921936035, + "learning_rate": 2.9656593406593408e-06, + "loss": 0.1164, + "step": 34241 + }, + { + "epoch": 94.07142857142857, + "grad_norm": 6.888185977935791, + "learning_rate": 2.9642857142857146e-06, + "loss": 0.0823, + "step": 34242 + }, + { + "epoch": 94.07417582417582, + "grad_norm": 11.812897682189941, + "learning_rate": 2.962912087912088e-06, + "loss": 0.3465, + "step": 34243 + }, + { + "epoch": 94.07692307692308, + "grad_norm": 2.435835838317871, + "learning_rate": 2.961538461538462e-06, + "loss": 0.0641, + "step": 34244 + }, + { + "epoch": 94.07967032967034, + "grad_norm": 7.330104827880859, + "learning_rate": 2.9601648351648352e-06, + "loss": 0.1362, + "step": 34245 + }, + { + "epoch": 94.08241758241758, + "grad_norm": 8.591336250305176, + "learning_rate": 2.958791208791209e-06, + "loss": 0.0927, + "step": 34246 + }, + { + "epoch": 94.08516483516483, + "grad_norm": 11.195691108703613, + "learning_rate": 2.9574175824175825e-06, + "loss": 0.1542, + "step": 34247 + }, + { + "epoch": 94.08791208791209, + "grad_norm": 16.316194534301758, + "learning_rate": 2.9560439560439563e-06, + "loss": 0.2195, + "step": 34248 + }, + { + "epoch": 94.09065934065934, + "grad_norm": 23.22013282775879, + "learning_rate": 2.95467032967033e-06, + "loss": 0.47, + "step": 34249 + }, + { + "epoch": 94.0934065934066, + "grad_norm": 12.609522819519043, + "learning_rate": 2.9532967032967035e-06, + "loss": 0.1545, + "step": 34250 + }, + { + "epoch": 94.09615384615384, + "grad_norm": 23.652141571044922, + "learning_rate": 2.951923076923077e-06, + "loss": 0.5245, + "step": 34251 + }, + { + "epoch": 94.0989010989011, + "grad_norm": 6.373142242431641, + "learning_rate": 2.9505494505494503e-06, + "loss": 0.0686, + "step": 34252 + }, + { + "epoch": 94.10164835164835, + "grad_norm": 4.061751365661621, + "learning_rate": 2.949175824175824e-06, + "loss": 0.0431, + "step": 34253 + }, + { + "epoch": 94.1043956043956, + "grad_norm": 20.225143432617188, + "learning_rate": 2.947802197802198e-06, + "loss": 0.49, + "step": 34254 + }, + { + "epoch": 94.10714285714286, + "grad_norm": 0.7197092771530151, + "learning_rate": 2.9464285714285714e-06, + "loss": 0.0077, + "step": 34255 + }, + { + "epoch": 94.10989010989012, + "grad_norm": 6.426702976226807, + "learning_rate": 2.9450549450549452e-06, + "loss": 0.1606, + "step": 34256 + }, + { + "epoch": 94.11263736263736, + "grad_norm": 3.608361005783081, + "learning_rate": 2.9436813186813186e-06, + "loss": 0.0436, + "step": 34257 + }, + { + "epoch": 94.11538461538461, + "grad_norm": 10.948073387145996, + "learning_rate": 2.9423076923076925e-06, + "loss": 0.1597, + "step": 34258 + }, + { + "epoch": 94.11813186813187, + "grad_norm": 18.07007598876953, + "learning_rate": 2.9409340659340663e-06, + "loss": 0.234, + "step": 34259 + }, + { + "epoch": 94.12087912087912, + "grad_norm": 32.568233489990234, + "learning_rate": 2.9395604395604397e-06, + "loss": 0.7909, + "step": 34260 + }, + { + "epoch": 94.12362637362638, + "grad_norm": 18.50083351135254, + "learning_rate": 2.9381868131868135e-06, + "loss": 0.5234, + "step": 34261 + }, + { + "epoch": 94.12637362637362, + "grad_norm": 8.2356595993042, + "learning_rate": 2.936813186813187e-06, + "loss": 0.1131, + "step": 34262 + }, + { + "epoch": 94.12912087912088, + "grad_norm": 19.99119758605957, + "learning_rate": 2.9354395604395608e-06, + "loss": 0.3994, + "step": 34263 + }, + { + "epoch": 94.13186813186813, + "grad_norm": 11.73039436340332, + "learning_rate": 2.934065934065934e-06, + "loss": 0.1788, + "step": 34264 + }, + { + "epoch": 94.13461538461539, + "grad_norm": 11.55433177947998, + "learning_rate": 2.932692307692308e-06, + "loss": 0.2013, + "step": 34265 + }, + { + "epoch": 94.13736263736264, + "grad_norm": 11.981405258178711, + "learning_rate": 2.9313186813186814e-06, + "loss": 0.1065, + "step": 34266 + }, + { + "epoch": 94.14010989010988, + "grad_norm": 24.099205017089844, + "learning_rate": 2.929945054945055e-06, + "loss": 0.5111, + "step": 34267 + }, + { + "epoch": 94.14285714285714, + "grad_norm": 4.642506122589111, + "learning_rate": 2.9285714285714287e-06, + "loss": 0.0601, + "step": 34268 + }, + { + "epoch": 94.1456043956044, + "grad_norm": 5.193229675292969, + "learning_rate": 2.927197802197802e-06, + "loss": 0.0601, + "step": 34269 + }, + { + "epoch": 94.14835164835165, + "grad_norm": 12.489354133605957, + "learning_rate": 2.925824175824176e-06, + "loss": 0.1614, + "step": 34270 + }, + { + "epoch": 94.1510989010989, + "grad_norm": 14.575986862182617, + "learning_rate": 2.9244505494505497e-06, + "loss": 0.3121, + "step": 34271 + }, + { + "epoch": 94.15384615384616, + "grad_norm": 18.739097595214844, + "learning_rate": 2.923076923076923e-06, + "loss": 0.2341, + "step": 34272 + }, + { + "epoch": 94.1565934065934, + "grad_norm": 8.995349884033203, + "learning_rate": 2.921703296703297e-06, + "loss": 0.1402, + "step": 34273 + }, + { + "epoch": 94.15934065934066, + "grad_norm": 5.699477672576904, + "learning_rate": 2.9203296703296704e-06, + "loss": 0.1, + "step": 34274 + }, + { + "epoch": 94.16208791208791, + "grad_norm": 24.725738525390625, + "learning_rate": 2.918956043956044e-06, + "loss": 0.5987, + "step": 34275 + }, + { + "epoch": 94.16483516483517, + "grad_norm": 9.612393379211426, + "learning_rate": 2.9175824175824176e-06, + "loss": 0.0945, + "step": 34276 + }, + { + "epoch": 94.16758241758242, + "grad_norm": 6.6388726234436035, + "learning_rate": 2.9162087912087914e-06, + "loss": 0.0933, + "step": 34277 + }, + { + "epoch": 94.17032967032966, + "grad_norm": 18.129709243774414, + "learning_rate": 2.9148351648351653e-06, + "loss": 0.5945, + "step": 34278 + }, + { + "epoch": 94.17307692307692, + "grad_norm": 3.733464002609253, + "learning_rate": 2.9134615384615387e-06, + "loss": 0.0375, + "step": 34279 + }, + { + "epoch": 94.17582417582418, + "grad_norm": 8.651130676269531, + "learning_rate": 2.912087912087912e-06, + "loss": 0.1827, + "step": 34280 + }, + { + "epoch": 94.17857142857143, + "grad_norm": 14.68792724609375, + "learning_rate": 2.910714285714286e-06, + "loss": 0.4952, + "step": 34281 + }, + { + "epoch": 94.18131868131869, + "grad_norm": 15.782428741455078, + "learning_rate": 2.9093406593406593e-06, + "loss": 0.2617, + "step": 34282 + }, + { + "epoch": 94.18406593406593, + "grad_norm": 8.498263359069824, + "learning_rate": 2.907967032967033e-06, + "loss": 0.117, + "step": 34283 + }, + { + "epoch": 94.18681318681318, + "grad_norm": 10.95975112915039, + "learning_rate": 2.9065934065934066e-06, + "loss": 0.2311, + "step": 34284 + }, + { + "epoch": 94.18956043956044, + "grad_norm": 18.206743240356445, + "learning_rate": 2.9052197802197804e-06, + "loss": 0.2828, + "step": 34285 + }, + { + "epoch": 94.1923076923077, + "grad_norm": 11.701417922973633, + "learning_rate": 2.903846153846154e-06, + "loss": 0.1553, + "step": 34286 + }, + { + "epoch": 94.19505494505495, + "grad_norm": 2.0934340953826904, + "learning_rate": 2.9024725274725276e-06, + "loss": 0.0222, + "step": 34287 + }, + { + "epoch": 94.1978021978022, + "grad_norm": 10.369315147399902, + "learning_rate": 2.9010989010989015e-06, + "loss": 0.1826, + "step": 34288 + }, + { + "epoch": 94.20054945054945, + "grad_norm": 11.278313636779785, + "learning_rate": 2.899725274725275e-06, + "loss": 0.0847, + "step": 34289 + }, + { + "epoch": 94.2032967032967, + "grad_norm": 6.699732303619385, + "learning_rate": 2.8983516483516487e-06, + "loss": 0.1018, + "step": 34290 + }, + { + "epoch": 94.20604395604396, + "grad_norm": 2.2422561645507812, + "learning_rate": 2.896978021978022e-06, + "loss": 0.0345, + "step": 34291 + }, + { + "epoch": 94.20879120879121, + "grad_norm": 3.9629104137420654, + "learning_rate": 2.895604395604396e-06, + "loss": 0.0626, + "step": 34292 + }, + { + "epoch": 94.21153846153847, + "grad_norm": 12.603867530822754, + "learning_rate": 2.8942307692307693e-06, + "loss": 0.3148, + "step": 34293 + }, + { + "epoch": 94.21428571428571, + "grad_norm": 17.968002319335938, + "learning_rate": 2.892857142857143e-06, + "loss": 0.1874, + "step": 34294 + }, + { + "epoch": 94.21703296703296, + "grad_norm": 7.552913188934326, + "learning_rate": 2.8914835164835166e-06, + "loss": 0.1916, + "step": 34295 + }, + { + "epoch": 94.21978021978022, + "grad_norm": 11.907549858093262, + "learning_rate": 2.89010989010989e-06, + "loss": 0.1624, + "step": 34296 + }, + { + "epoch": 94.22252747252747, + "grad_norm": 3.94004225730896, + "learning_rate": 2.888736263736264e-06, + "loss": 0.0883, + "step": 34297 + }, + { + "epoch": 94.22527472527473, + "grad_norm": 13.59617805480957, + "learning_rate": 2.8873626373626372e-06, + "loss": 0.2752, + "step": 34298 + }, + { + "epoch": 94.22802197802197, + "grad_norm": 10.052452087402344, + "learning_rate": 2.885989010989011e-06, + "loss": 0.1494, + "step": 34299 + }, + { + "epoch": 94.23076923076923, + "grad_norm": 20.933679580688477, + "learning_rate": 2.884615384615385e-06, + "loss": 0.6987, + "step": 34300 + }, + { + "epoch": 94.23351648351648, + "grad_norm": 11.271974563598633, + "learning_rate": 2.8832417582417583e-06, + "loss": 0.1441, + "step": 34301 + }, + { + "epoch": 94.23626373626374, + "grad_norm": 15.48542594909668, + "learning_rate": 2.881868131868132e-06, + "loss": 0.2962, + "step": 34302 + }, + { + "epoch": 94.23901098901099, + "grad_norm": 5.649853229522705, + "learning_rate": 2.8804945054945055e-06, + "loss": 0.0701, + "step": 34303 + }, + { + "epoch": 94.24175824175825, + "grad_norm": 4.341416835784912, + "learning_rate": 2.8791208791208794e-06, + "loss": 0.0342, + "step": 34304 + }, + { + "epoch": 94.24450549450549, + "grad_norm": 18.35984992980957, + "learning_rate": 2.8777472527472528e-06, + "loss": 0.2687, + "step": 34305 + }, + { + "epoch": 94.24725274725274, + "grad_norm": 13.985955238342285, + "learning_rate": 2.8763736263736266e-06, + "loss": 0.2341, + "step": 34306 + }, + { + "epoch": 94.25, + "grad_norm": 13.145545959472656, + "learning_rate": 2.8750000000000004e-06, + "loss": 0.2459, + "step": 34307 + }, + { + "epoch": 94.25274725274726, + "grad_norm": 11.827306747436523, + "learning_rate": 2.873626373626374e-06, + "loss": 0.1261, + "step": 34308 + }, + { + "epoch": 94.25549450549451, + "grad_norm": 16.4581356048584, + "learning_rate": 2.8722527472527477e-06, + "loss": 0.5725, + "step": 34309 + }, + { + "epoch": 94.25824175824175, + "grad_norm": 2.425879716873169, + "learning_rate": 2.870879120879121e-06, + "loss": 0.0223, + "step": 34310 + }, + { + "epoch": 94.26098901098901, + "grad_norm": 2.6993448734283447, + "learning_rate": 2.8695054945054945e-06, + "loss": 0.0312, + "step": 34311 + }, + { + "epoch": 94.26373626373626, + "grad_norm": 8.885306358337402, + "learning_rate": 2.8681318681318683e-06, + "loss": 0.2581, + "step": 34312 + }, + { + "epoch": 94.26648351648352, + "grad_norm": 13.150257110595703, + "learning_rate": 2.8667582417582417e-06, + "loss": 0.1213, + "step": 34313 + }, + { + "epoch": 94.26923076923077, + "grad_norm": 8.380393028259277, + "learning_rate": 2.8653846153846155e-06, + "loss": 0.1507, + "step": 34314 + }, + { + "epoch": 94.27197802197803, + "grad_norm": 19.66679573059082, + "learning_rate": 2.864010989010989e-06, + "loss": 0.5208, + "step": 34315 + }, + { + "epoch": 94.27472527472527, + "grad_norm": 5.472107410430908, + "learning_rate": 2.8626373626373628e-06, + "loss": 0.1037, + "step": 34316 + }, + { + "epoch": 94.27747252747253, + "grad_norm": 11.791616439819336, + "learning_rate": 2.861263736263736e-06, + "loss": 0.1423, + "step": 34317 + }, + { + "epoch": 94.28021978021978, + "grad_norm": 17.860816955566406, + "learning_rate": 2.85989010989011e-06, + "loss": 0.2323, + "step": 34318 + }, + { + "epoch": 94.28296703296704, + "grad_norm": 1.8581185340881348, + "learning_rate": 2.858516483516484e-06, + "loss": 0.0173, + "step": 34319 + }, + { + "epoch": 94.28571428571429, + "grad_norm": 8.545958518981934, + "learning_rate": 2.8571428571428573e-06, + "loss": 0.1163, + "step": 34320 + }, + { + "epoch": 94.28846153846153, + "grad_norm": 8.184865951538086, + "learning_rate": 2.855769230769231e-06, + "loss": 0.1069, + "step": 34321 + }, + { + "epoch": 94.29120879120879, + "grad_norm": 11.478516578674316, + "learning_rate": 2.8543956043956045e-06, + "loss": 0.1048, + "step": 34322 + }, + { + "epoch": 94.29395604395604, + "grad_norm": 12.946598052978516, + "learning_rate": 2.8530219780219783e-06, + "loss": 0.1412, + "step": 34323 + }, + { + "epoch": 94.2967032967033, + "grad_norm": 18.727859497070312, + "learning_rate": 2.8516483516483517e-06, + "loss": 0.3366, + "step": 34324 + }, + { + "epoch": 94.29945054945055, + "grad_norm": 3.7996647357940674, + "learning_rate": 2.850274725274725e-06, + "loss": 0.0588, + "step": 34325 + }, + { + "epoch": 94.3021978021978, + "grad_norm": 0.7728369235992432, + "learning_rate": 2.848901098901099e-06, + "loss": 0.0076, + "step": 34326 + }, + { + "epoch": 94.30494505494505, + "grad_norm": 2.0998446941375732, + "learning_rate": 2.8475274725274724e-06, + "loss": 0.0328, + "step": 34327 + }, + { + "epoch": 94.3076923076923, + "grad_norm": 11.900880813598633, + "learning_rate": 2.846153846153846e-06, + "loss": 0.2635, + "step": 34328 + }, + { + "epoch": 94.31043956043956, + "grad_norm": 3.076565742492676, + "learning_rate": 2.84478021978022e-06, + "loss": 0.0363, + "step": 34329 + }, + { + "epoch": 94.31318681318682, + "grad_norm": 11.863539695739746, + "learning_rate": 2.8434065934065934e-06, + "loss": 0.2032, + "step": 34330 + }, + { + "epoch": 94.31593406593407, + "grad_norm": 11.023002624511719, + "learning_rate": 2.8420329670329673e-06, + "loss": 0.1037, + "step": 34331 + }, + { + "epoch": 94.31868131868131, + "grad_norm": 15.770744323730469, + "learning_rate": 2.8406593406593407e-06, + "loss": 0.2756, + "step": 34332 + }, + { + "epoch": 94.32142857142857, + "grad_norm": 21.111026763916016, + "learning_rate": 2.8392857142857145e-06, + "loss": 0.3107, + "step": 34333 + }, + { + "epoch": 94.32417582417582, + "grad_norm": 5.612756729125977, + "learning_rate": 2.837912087912088e-06, + "loss": 0.0772, + "step": 34334 + }, + { + "epoch": 94.32692307692308, + "grad_norm": 12.371315956115723, + "learning_rate": 2.8365384615384617e-06, + "loss": 0.2857, + "step": 34335 + }, + { + "epoch": 94.32967032967034, + "grad_norm": 9.045243263244629, + "learning_rate": 2.8351648351648356e-06, + "loss": 0.0872, + "step": 34336 + }, + { + "epoch": 94.33241758241758, + "grad_norm": 18.40250015258789, + "learning_rate": 2.833791208791209e-06, + "loss": 0.3674, + "step": 34337 + }, + { + "epoch": 94.33516483516483, + "grad_norm": 5.86257791519165, + "learning_rate": 2.832417582417583e-06, + "loss": 0.089, + "step": 34338 + }, + { + "epoch": 94.33791208791209, + "grad_norm": 1.738336443901062, + "learning_rate": 2.8310439560439562e-06, + "loss": 0.0174, + "step": 34339 + }, + { + "epoch": 94.34065934065934, + "grad_norm": 8.579707145690918, + "learning_rate": 2.8296703296703296e-06, + "loss": 0.1521, + "step": 34340 + }, + { + "epoch": 94.3434065934066, + "grad_norm": 7.121431350708008, + "learning_rate": 2.8282967032967035e-06, + "loss": 0.0843, + "step": 34341 + }, + { + "epoch": 94.34615384615384, + "grad_norm": 7.629800796508789, + "learning_rate": 2.826923076923077e-06, + "loss": 0.1595, + "step": 34342 + }, + { + "epoch": 94.3489010989011, + "grad_norm": 11.968170166015625, + "learning_rate": 2.8255494505494507e-06, + "loss": 0.1772, + "step": 34343 + }, + { + "epoch": 94.35164835164835, + "grad_norm": 3.953761339187622, + "learning_rate": 2.824175824175824e-06, + "loss": 0.0537, + "step": 34344 + }, + { + "epoch": 94.3543956043956, + "grad_norm": 8.459793090820312, + "learning_rate": 2.822802197802198e-06, + "loss": 0.0987, + "step": 34345 + }, + { + "epoch": 94.35714285714286, + "grad_norm": 16.063613891601562, + "learning_rate": 2.8214285714285713e-06, + "loss": 0.2283, + "step": 34346 + }, + { + "epoch": 94.35989010989012, + "grad_norm": 13.834860801696777, + "learning_rate": 2.820054945054945e-06, + "loss": 0.1848, + "step": 34347 + }, + { + "epoch": 94.36263736263736, + "grad_norm": 10.339462280273438, + "learning_rate": 2.818681318681319e-06, + "loss": 0.1124, + "step": 34348 + }, + { + "epoch": 94.36538461538461, + "grad_norm": 10.763503074645996, + "learning_rate": 2.8173076923076924e-06, + "loss": 0.1925, + "step": 34349 + }, + { + "epoch": 94.36813186813187, + "grad_norm": 4.340409278869629, + "learning_rate": 2.8159340659340662e-06, + "loss": 0.0325, + "step": 34350 + }, + { + "epoch": 94.37087912087912, + "grad_norm": 3.551666498184204, + "learning_rate": 2.8145604395604396e-06, + "loss": 0.0299, + "step": 34351 + }, + { + "epoch": 94.37362637362638, + "grad_norm": 4.423816204071045, + "learning_rate": 2.8131868131868135e-06, + "loss": 0.0543, + "step": 34352 + }, + { + "epoch": 94.37637362637362, + "grad_norm": 11.127679824829102, + "learning_rate": 2.811813186813187e-06, + "loss": 0.1303, + "step": 34353 + }, + { + "epoch": 94.37912087912088, + "grad_norm": 12.62485408782959, + "learning_rate": 2.8104395604395607e-06, + "loss": 0.1897, + "step": 34354 + }, + { + "epoch": 94.38186813186813, + "grad_norm": 22.7596378326416, + "learning_rate": 2.809065934065934e-06, + "loss": 0.3252, + "step": 34355 + }, + { + "epoch": 94.38461538461539, + "grad_norm": 25.462162017822266, + "learning_rate": 2.8076923076923075e-06, + "loss": 0.4731, + "step": 34356 + }, + { + "epoch": 94.38736263736264, + "grad_norm": 10.965869903564453, + "learning_rate": 2.8063186813186814e-06, + "loss": 0.1648, + "step": 34357 + }, + { + "epoch": 94.39010989010988, + "grad_norm": 12.042008399963379, + "learning_rate": 2.804945054945055e-06, + "loss": 0.1752, + "step": 34358 + }, + { + "epoch": 94.39285714285714, + "grad_norm": 10.509224891662598, + "learning_rate": 2.8035714285714286e-06, + "loss": 0.1375, + "step": 34359 + }, + { + "epoch": 94.3956043956044, + "grad_norm": 15.450013160705566, + "learning_rate": 2.8021978021978024e-06, + "loss": 0.2838, + "step": 34360 + }, + { + "epoch": 94.39835164835165, + "grad_norm": 18.29914665222168, + "learning_rate": 2.800824175824176e-06, + "loss": 0.2537, + "step": 34361 + }, + { + "epoch": 94.4010989010989, + "grad_norm": 10.744154930114746, + "learning_rate": 2.7994505494505497e-06, + "loss": 0.144, + "step": 34362 + }, + { + "epoch": 94.40384615384616, + "grad_norm": 17.3110294342041, + "learning_rate": 2.798076923076923e-06, + "loss": 0.3382, + "step": 34363 + }, + { + "epoch": 94.4065934065934, + "grad_norm": 16.745525360107422, + "learning_rate": 2.796703296703297e-06, + "loss": 0.261, + "step": 34364 + }, + { + "epoch": 94.40934065934066, + "grad_norm": 20.63475227355957, + "learning_rate": 2.7953296703296707e-06, + "loss": 0.5816, + "step": 34365 + }, + { + "epoch": 94.41208791208791, + "grad_norm": 6.807863235473633, + "learning_rate": 2.793956043956044e-06, + "loss": 0.0578, + "step": 34366 + }, + { + "epoch": 94.41483516483517, + "grad_norm": 18.030805587768555, + "learning_rate": 2.792582417582418e-06, + "loss": 0.383, + "step": 34367 + }, + { + "epoch": 94.41758241758242, + "grad_norm": 13.829936981201172, + "learning_rate": 2.7912087912087914e-06, + "loss": 0.2435, + "step": 34368 + }, + { + "epoch": 94.42032967032966, + "grad_norm": 2.018446922302246, + "learning_rate": 2.7898351648351648e-06, + "loss": 0.0308, + "step": 34369 + }, + { + "epoch": 94.42307692307692, + "grad_norm": 19.712854385375977, + "learning_rate": 2.7884615384615386e-06, + "loss": 0.3402, + "step": 34370 + }, + { + "epoch": 94.42582417582418, + "grad_norm": 15.35212230682373, + "learning_rate": 2.787087912087912e-06, + "loss": 0.1817, + "step": 34371 + }, + { + "epoch": 94.42857142857143, + "grad_norm": 1.015308141708374, + "learning_rate": 2.785714285714286e-06, + "loss": 0.0101, + "step": 34372 + }, + { + "epoch": 94.43131868131869, + "grad_norm": 9.278854370117188, + "learning_rate": 2.7843406593406593e-06, + "loss": 0.1971, + "step": 34373 + }, + { + "epoch": 94.43406593406593, + "grad_norm": 11.344803810119629, + "learning_rate": 2.782967032967033e-06, + "loss": 0.1998, + "step": 34374 + }, + { + "epoch": 94.43681318681318, + "grad_norm": 12.826360702514648, + "learning_rate": 2.7815934065934065e-06, + "loss": 0.2009, + "step": 34375 + }, + { + "epoch": 94.43956043956044, + "grad_norm": 2.1350765228271484, + "learning_rate": 2.7802197802197803e-06, + "loss": 0.0264, + "step": 34376 + }, + { + "epoch": 94.4423076923077, + "grad_norm": 21.551414489746094, + "learning_rate": 2.778846153846154e-06, + "loss": 0.3938, + "step": 34377 + }, + { + "epoch": 94.44505494505495, + "grad_norm": 10.000853538513184, + "learning_rate": 2.7774725274725276e-06, + "loss": 0.1261, + "step": 34378 + }, + { + "epoch": 94.4478021978022, + "grad_norm": 3.3255531787872314, + "learning_rate": 2.7760989010989014e-06, + "loss": 0.0553, + "step": 34379 + }, + { + "epoch": 94.45054945054945, + "grad_norm": 10.487348556518555, + "learning_rate": 2.774725274725275e-06, + "loss": 0.1404, + "step": 34380 + }, + { + "epoch": 94.4532967032967, + "grad_norm": 10.589315414428711, + "learning_rate": 2.7733516483516486e-06, + "loss": 0.2257, + "step": 34381 + }, + { + "epoch": 94.45604395604396, + "grad_norm": 0.5263291597366333, + "learning_rate": 2.7719780219780225e-06, + "loss": 0.0067, + "step": 34382 + }, + { + "epoch": 94.45879120879121, + "grad_norm": 10.458473205566406, + "learning_rate": 2.770604395604396e-06, + "loss": 0.1567, + "step": 34383 + }, + { + "epoch": 94.46153846153847, + "grad_norm": 12.567994117736816, + "learning_rate": 2.7692307692307693e-06, + "loss": 0.5677, + "step": 34384 + }, + { + "epoch": 94.46428571428571, + "grad_norm": 21.900054931640625, + "learning_rate": 2.7678571428571427e-06, + "loss": 0.4748, + "step": 34385 + }, + { + "epoch": 94.46703296703296, + "grad_norm": 9.320055961608887, + "learning_rate": 2.7664835164835165e-06, + "loss": 0.1268, + "step": 34386 + }, + { + "epoch": 94.46978021978022, + "grad_norm": 18.421987533569336, + "learning_rate": 2.7651098901098903e-06, + "loss": 0.4671, + "step": 34387 + }, + { + "epoch": 94.47252747252747, + "grad_norm": 14.139436721801758, + "learning_rate": 2.7637362637362637e-06, + "loss": 0.1349, + "step": 34388 + }, + { + "epoch": 94.47527472527473, + "grad_norm": 19.570619583129883, + "learning_rate": 2.7623626373626376e-06, + "loss": 0.3403, + "step": 34389 + }, + { + "epoch": 94.47802197802197, + "grad_norm": 22.02610206604004, + "learning_rate": 2.760989010989011e-06, + "loss": 0.6297, + "step": 34390 + }, + { + "epoch": 94.48076923076923, + "grad_norm": 15.270689010620117, + "learning_rate": 2.759615384615385e-06, + "loss": 0.172, + "step": 34391 + }, + { + "epoch": 94.48351648351648, + "grad_norm": 13.979816436767578, + "learning_rate": 2.7582417582417582e-06, + "loss": 0.1986, + "step": 34392 + }, + { + "epoch": 94.48626373626374, + "grad_norm": 14.650300979614258, + "learning_rate": 2.756868131868132e-06, + "loss": 0.1827, + "step": 34393 + }, + { + "epoch": 94.48901098901099, + "grad_norm": 15.532096862792969, + "learning_rate": 2.755494505494506e-06, + "loss": 0.2015, + "step": 34394 + }, + { + "epoch": 94.49175824175825, + "grad_norm": 9.340054512023926, + "learning_rate": 2.7541208791208793e-06, + "loss": 0.1802, + "step": 34395 + }, + { + "epoch": 94.49450549450549, + "grad_norm": 5.199159622192383, + "learning_rate": 2.752747252747253e-06, + "loss": 0.0833, + "step": 34396 + }, + { + "epoch": 94.49725274725274, + "grad_norm": 8.383417129516602, + "learning_rate": 2.7513736263736265e-06, + "loss": 0.149, + "step": 34397 + }, + { + "epoch": 94.5, + "grad_norm": 6.6442437171936035, + "learning_rate": 2.7500000000000004e-06, + "loss": 0.1037, + "step": 34398 + }, + { + "epoch": 94.50274725274726, + "grad_norm": 8.594508171081543, + "learning_rate": 2.7486263736263738e-06, + "loss": 0.0868, + "step": 34399 + }, + { + "epoch": 94.50549450549451, + "grad_norm": 9.334135055541992, + "learning_rate": 2.747252747252747e-06, + "loss": 0.0788, + "step": 34400 + }, + { + "epoch": 94.50824175824175, + "grad_norm": 11.010344505310059, + "learning_rate": 2.745879120879121e-06, + "loss": 0.1564, + "step": 34401 + }, + { + "epoch": 94.51098901098901, + "grad_norm": 3.9182143211364746, + "learning_rate": 2.7445054945054944e-06, + "loss": 0.0853, + "step": 34402 + }, + { + "epoch": 94.51373626373626, + "grad_norm": 11.58601188659668, + "learning_rate": 2.7431318681318682e-06, + "loss": 0.2469, + "step": 34403 + }, + { + "epoch": 94.51648351648352, + "grad_norm": 10.967241287231445, + "learning_rate": 2.7417582417582416e-06, + "loss": 0.1708, + "step": 34404 + }, + { + "epoch": 94.51923076923077, + "grad_norm": 9.410277366638184, + "learning_rate": 2.7403846153846155e-06, + "loss": 0.2664, + "step": 34405 + }, + { + "epoch": 94.52197802197803, + "grad_norm": 18.523103713989258, + "learning_rate": 2.7390109890109893e-06, + "loss": 0.3596, + "step": 34406 + }, + { + "epoch": 94.52472527472527, + "grad_norm": 4.678365707397461, + "learning_rate": 2.7376373626373627e-06, + "loss": 0.0672, + "step": 34407 + }, + { + "epoch": 94.52747252747253, + "grad_norm": 6.715355396270752, + "learning_rate": 2.7362637362637365e-06, + "loss": 0.0831, + "step": 34408 + }, + { + "epoch": 94.53021978021978, + "grad_norm": 15.372743606567383, + "learning_rate": 2.73489010989011e-06, + "loss": 0.4275, + "step": 34409 + }, + { + "epoch": 94.53296703296704, + "grad_norm": 14.201993942260742, + "learning_rate": 2.7335164835164838e-06, + "loss": 0.2154, + "step": 34410 + }, + { + "epoch": 94.53571428571429, + "grad_norm": 2.742581367492676, + "learning_rate": 2.7321428571428576e-06, + "loss": 0.0173, + "step": 34411 + }, + { + "epoch": 94.53846153846153, + "grad_norm": 12.938309669494629, + "learning_rate": 2.730769230769231e-06, + "loss": 0.2419, + "step": 34412 + }, + { + "epoch": 94.54120879120879, + "grad_norm": 14.826370239257812, + "learning_rate": 2.7293956043956044e-06, + "loss": 0.3477, + "step": 34413 + }, + { + "epoch": 94.54395604395604, + "grad_norm": 7.676454067230225, + "learning_rate": 2.728021978021978e-06, + "loss": 0.0954, + "step": 34414 + }, + { + "epoch": 94.5467032967033, + "grad_norm": 11.98776912689209, + "learning_rate": 2.7266483516483517e-06, + "loss": 0.2579, + "step": 34415 + }, + { + "epoch": 94.54945054945055, + "grad_norm": 14.990873336791992, + "learning_rate": 2.7252747252747255e-06, + "loss": 0.3253, + "step": 34416 + }, + { + "epoch": 94.5521978021978, + "grad_norm": 5.197876453399658, + "learning_rate": 2.723901098901099e-06, + "loss": 0.0609, + "step": 34417 + }, + { + "epoch": 94.55494505494505, + "grad_norm": 12.486214637756348, + "learning_rate": 2.7225274725274727e-06, + "loss": 0.2692, + "step": 34418 + }, + { + "epoch": 94.5576923076923, + "grad_norm": 15.376872062683105, + "learning_rate": 2.721153846153846e-06, + "loss": 0.349, + "step": 34419 + }, + { + "epoch": 94.56043956043956, + "grad_norm": 16.368131637573242, + "learning_rate": 2.71978021978022e-06, + "loss": 0.4512, + "step": 34420 + }, + { + "epoch": 94.56318681318682, + "grad_norm": 14.408885955810547, + "learning_rate": 2.7184065934065934e-06, + "loss": 0.196, + "step": 34421 + }, + { + "epoch": 94.56593406593407, + "grad_norm": 7.503598690032959, + "learning_rate": 2.717032967032967e-06, + "loss": 0.0511, + "step": 34422 + }, + { + "epoch": 94.56868131868131, + "grad_norm": 9.037841796875, + "learning_rate": 2.715659340659341e-06, + "loss": 0.177, + "step": 34423 + }, + { + "epoch": 94.57142857142857, + "grad_norm": 14.13089370727539, + "learning_rate": 2.7142857142857144e-06, + "loss": 0.2153, + "step": 34424 + }, + { + "epoch": 94.57417582417582, + "grad_norm": 14.731012344360352, + "learning_rate": 2.7129120879120883e-06, + "loss": 0.3065, + "step": 34425 + }, + { + "epoch": 94.57692307692308, + "grad_norm": 14.393275260925293, + "learning_rate": 2.7115384615384617e-06, + "loss": 0.3884, + "step": 34426 + }, + { + "epoch": 94.57967032967034, + "grad_norm": 10.281900405883789, + "learning_rate": 2.7101648351648355e-06, + "loss": 0.2268, + "step": 34427 + }, + { + "epoch": 94.58241758241758, + "grad_norm": 13.90970230102539, + "learning_rate": 2.708791208791209e-06, + "loss": 0.208, + "step": 34428 + }, + { + "epoch": 94.58516483516483, + "grad_norm": 20.558273315429688, + "learning_rate": 2.7074175824175823e-06, + "loss": 0.3555, + "step": 34429 + }, + { + "epoch": 94.58791208791209, + "grad_norm": 3.7944514751434326, + "learning_rate": 2.706043956043956e-06, + "loss": 0.0512, + "step": 34430 + }, + { + "epoch": 94.59065934065934, + "grad_norm": 9.739734649658203, + "learning_rate": 2.7046703296703296e-06, + "loss": 0.1234, + "step": 34431 + }, + { + "epoch": 94.5934065934066, + "grad_norm": 14.791040420532227, + "learning_rate": 2.7032967032967034e-06, + "loss": 0.1825, + "step": 34432 + }, + { + "epoch": 94.59615384615384, + "grad_norm": 4.9412007331848145, + "learning_rate": 2.701923076923077e-06, + "loss": 0.0451, + "step": 34433 + }, + { + "epoch": 94.5989010989011, + "grad_norm": 8.92892837524414, + "learning_rate": 2.7005494505494506e-06, + "loss": 0.1723, + "step": 34434 + }, + { + "epoch": 94.60164835164835, + "grad_norm": 9.072806358337402, + "learning_rate": 2.6991758241758245e-06, + "loss": 0.1374, + "step": 34435 + }, + { + "epoch": 94.6043956043956, + "grad_norm": 8.203057289123535, + "learning_rate": 2.697802197802198e-06, + "loss": 0.1585, + "step": 34436 + }, + { + "epoch": 94.60714285714286, + "grad_norm": 2.2971246242523193, + "learning_rate": 2.6964285714285717e-06, + "loss": 0.0238, + "step": 34437 + }, + { + "epoch": 94.60989010989012, + "grad_norm": 22.234739303588867, + "learning_rate": 2.695054945054945e-06, + "loss": 0.2524, + "step": 34438 + }, + { + "epoch": 94.61263736263736, + "grad_norm": 27.31049346923828, + "learning_rate": 2.693681318681319e-06, + "loss": 0.8104, + "step": 34439 + }, + { + "epoch": 94.61538461538461, + "grad_norm": 13.477924346923828, + "learning_rate": 2.6923076923076928e-06, + "loss": 0.1596, + "step": 34440 + }, + { + "epoch": 94.61813186813187, + "grad_norm": 14.668845176696777, + "learning_rate": 2.690934065934066e-06, + "loss": 0.2209, + "step": 34441 + }, + { + "epoch": 94.62087912087912, + "grad_norm": 6.307083606719971, + "learning_rate": 2.6895604395604396e-06, + "loss": 0.0776, + "step": 34442 + }, + { + "epoch": 94.62362637362638, + "grad_norm": 8.91076374053955, + "learning_rate": 2.6881868131868134e-06, + "loss": 0.1683, + "step": 34443 + }, + { + "epoch": 94.62637362637362, + "grad_norm": 21.6261043548584, + "learning_rate": 2.686813186813187e-06, + "loss": 0.2754, + "step": 34444 + }, + { + "epoch": 94.62912087912088, + "grad_norm": 19.498254776000977, + "learning_rate": 2.6854395604395602e-06, + "loss": 0.2946, + "step": 34445 + }, + { + "epoch": 94.63186813186813, + "grad_norm": 5.743504524230957, + "learning_rate": 2.684065934065934e-06, + "loss": 0.0464, + "step": 34446 + }, + { + "epoch": 94.63461538461539, + "grad_norm": 42.3255615234375, + "learning_rate": 2.682692307692308e-06, + "loss": 0.718, + "step": 34447 + }, + { + "epoch": 94.63736263736264, + "grad_norm": 15.639254570007324, + "learning_rate": 2.6813186813186813e-06, + "loss": 0.3442, + "step": 34448 + }, + { + "epoch": 94.64010989010988, + "grad_norm": 12.549710273742676, + "learning_rate": 2.679945054945055e-06, + "loss": 0.2358, + "step": 34449 + }, + { + "epoch": 94.64285714285714, + "grad_norm": 9.484692573547363, + "learning_rate": 2.6785714285714285e-06, + "loss": 0.0848, + "step": 34450 + }, + { + "epoch": 94.6456043956044, + "grad_norm": 9.99130630493164, + "learning_rate": 2.6771978021978023e-06, + "loss": 0.1404, + "step": 34451 + }, + { + "epoch": 94.64835164835165, + "grad_norm": 1.6043378114700317, + "learning_rate": 2.675824175824176e-06, + "loss": 0.0231, + "step": 34452 + }, + { + "epoch": 94.6510989010989, + "grad_norm": 21.896533966064453, + "learning_rate": 2.6744505494505496e-06, + "loss": 0.5436, + "step": 34453 + }, + { + "epoch": 94.65384615384616, + "grad_norm": 5.894654750823975, + "learning_rate": 2.6730769230769234e-06, + "loss": 0.0547, + "step": 34454 + }, + { + "epoch": 94.6565934065934, + "grad_norm": 20.14461326599121, + "learning_rate": 2.671703296703297e-06, + "loss": 0.287, + "step": 34455 + }, + { + "epoch": 94.65934065934066, + "grad_norm": 5.7891974449157715, + "learning_rate": 2.6703296703296707e-06, + "loss": 0.0697, + "step": 34456 + }, + { + "epoch": 94.66208791208791, + "grad_norm": 15.63628101348877, + "learning_rate": 2.668956043956044e-06, + "loss": 0.4111, + "step": 34457 + }, + { + "epoch": 94.66483516483517, + "grad_norm": 5.550860404968262, + "learning_rate": 2.6675824175824175e-06, + "loss": 0.0953, + "step": 34458 + }, + { + "epoch": 94.66758241758242, + "grad_norm": 10.518214225769043, + "learning_rate": 2.6662087912087913e-06, + "loss": 0.2268, + "step": 34459 + }, + { + "epoch": 94.67032967032966, + "grad_norm": 19.07260513305664, + "learning_rate": 2.6648351648351647e-06, + "loss": 0.3237, + "step": 34460 + }, + { + "epoch": 94.67307692307692, + "grad_norm": 18.284603118896484, + "learning_rate": 2.6634615384615385e-06, + "loss": 0.3384, + "step": 34461 + }, + { + "epoch": 94.67582417582418, + "grad_norm": 15.062812805175781, + "learning_rate": 2.662087912087912e-06, + "loss": 0.3515, + "step": 34462 + }, + { + "epoch": 94.67857142857143, + "grad_norm": 10.089606285095215, + "learning_rate": 2.6607142857142858e-06, + "loss": 0.1784, + "step": 34463 + }, + { + "epoch": 94.68131868131869, + "grad_norm": 12.284428596496582, + "learning_rate": 2.6593406593406596e-06, + "loss": 0.1644, + "step": 34464 + }, + { + "epoch": 94.68406593406593, + "grad_norm": 13.178044319152832, + "learning_rate": 2.657967032967033e-06, + "loss": 0.2167, + "step": 34465 + }, + { + "epoch": 94.68681318681318, + "grad_norm": 8.619281768798828, + "learning_rate": 2.656593406593407e-06, + "loss": 0.1581, + "step": 34466 + }, + { + "epoch": 94.68956043956044, + "grad_norm": 13.54969596862793, + "learning_rate": 2.6552197802197802e-06, + "loss": 0.2512, + "step": 34467 + }, + { + "epoch": 94.6923076923077, + "grad_norm": 21.91468048095703, + "learning_rate": 2.653846153846154e-06, + "loss": 0.4198, + "step": 34468 + }, + { + "epoch": 94.69505494505495, + "grad_norm": 15.187707901000977, + "learning_rate": 2.652472527472528e-06, + "loss": 0.1992, + "step": 34469 + }, + { + "epoch": 94.6978021978022, + "grad_norm": 7.9277663230896, + "learning_rate": 2.6510989010989013e-06, + "loss": 0.1143, + "step": 34470 + }, + { + "epoch": 94.70054945054945, + "grad_norm": 1.2256015539169312, + "learning_rate": 2.649725274725275e-06, + "loss": 0.0136, + "step": 34471 + }, + { + "epoch": 94.7032967032967, + "grad_norm": 13.057095527648926, + "learning_rate": 2.6483516483516486e-06, + "loss": 0.2018, + "step": 34472 + }, + { + "epoch": 94.70604395604396, + "grad_norm": 14.648640632629395, + "learning_rate": 2.646978021978022e-06, + "loss": 0.2792, + "step": 34473 + }, + { + "epoch": 94.70879120879121, + "grad_norm": 4.86968994140625, + "learning_rate": 2.6456043956043954e-06, + "loss": 0.0473, + "step": 34474 + }, + { + "epoch": 94.71153846153847, + "grad_norm": 5.451611518859863, + "learning_rate": 2.644230769230769e-06, + "loss": 0.083, + "step": 34475 + }, + { + "epoch": 94.71428571428571, + "grad_norm": 8.278289794921875, + "learning_rate": 2.642857142857143e-06, + "loss": 0.1142, + "step": 34476 + }, + { + "epoch": 94.71703296703296, + "grad_norm": 11.443942070007324, + "learning_rate": 2.6414835164835164e-06, + "loss": 0.1981, + "step": 34477 + }, + { + "epoch": 94.71978021978022, + "grad_norm": 11.245654106140137, + "learning_rate": 2.6401098901098903e-06, + "loss": 0.2085, + "step": 34478 + }, + { + "epoch": 94.72252747252747, + "grad_norm": 6.986364841461182, + "learning_rate": 2.6387362637362637e-06, + "loss": 0.0581, + "step": 34479 + }, + { + "epoch": 94.72527472527473, + "grad_norm": 2.485038995742798, + "learning_rate": 2.6373626373626375e-06, + "loss": 0.0271, + "step": 34480 + }, + { + "epoch": 94.72802197802197, + "grad_norm": 13.948646545410156, + "learning_rate": 2.6359890109890113e-06, + "loss": 0.2175, + "step": 34481 + }, + { + "epoch": 94.73076923076923, + "grad_norm": 14.374518394470215, + "learning_rate": 2.6346153846153847e-06, + "loss": 0.2341, + "step": 34482 + }, + { + "epoch": 94.73351648351648, + "grad_norm": 10.652059555053711, + "learning_rate": 2.6332417582417586e-06, + "loss": 0.0775, + "step": 34483 + }, + { + "epoch": 94.73626373626374, + "grad_norm": 10.697515487670898, + "learning_rate": 2.631868131868132e-06, + "loss": 0.2467, + "step": 34484 + }, + { + "epoch": 94.73901098901099, + "grad_norm": 5.243539333343506, + "learning_rate": 2.630494505494506e-06, + "loss": 0.0465, + "step": 34485 + }, + { + "epoch": 94.74175824175825, + "grad_norm": 17.717857360839844, + "learning_rate": 2.629120879120879e-06, + "loss": 0.422, + "step": 34486 + }, + { + "epoch": 94.74450549450549, + "grad_norm": 10.757843971252441, + "learning_rate": 2.627747252747253e-06, + "loss": 0.0716, + "step": 34487 + }, + { + "epoch": 94.74725274725274, + "grad_norm": 26.321651458740234, + "learning_rate": 2.6263736263736264e-06, + "loss": 0.5824, + "step": 34488 + }, + { + "epoch": 94.75, + "grad_norm": 13.460076332092285, + "learning_rate": 2.625e-06, + "loss": 0.2622, + "step": 34489 + }, + { + "epoch": 94.75274725274726, + "grad_norm": 10.641026496887207, + "learning_rate": 2.6236263736263737e-06, + "loss": 0.3737, + "step": 34490 + }, + { + "epoch": 94.75549450549451, + "grad_norm": 9.049924850463867, + "learning_rate": 2.622252747252747e-06, + "loss": 0.1317, + "step": 34491 + }, + { + "epoch": 94.75824175824175, + "grad_norm": 1.309668779373169, + "learning_rate": 2.620879120879121e-06, + "loss": 0.0108, + "step": 34492 + }, + { + "epoch": 94.76098901098901, + "grad_norm": 17.927762985229492, + "learning_rate": 2.6195054945054948e-06, + "loss": 0.4445, + "step": 34493 + }, + { + "epoch": 94.76373626373626, + "grad_norm": 16.16843032836914, + "learning_rate": 2.618131868131868e-06, + "loss": 0.3068, + "step": 34494 + }, + { + "epoch": 94.76648351648352, + "grad_norm": 21.722198486328125, + "learning_rate": 2.616758241758242e-06, + "loss": 0.4013, + "step": 34495 + }, + { + "epoch": 94.76923076923077, + "grad_norm": 20.769872665405273, + "learning_rate": 2.6153846153846154e-06, + "loss": 0.547, + "step": 34496 + }, + { + "epoch": 94.77197802197803, + "grad_norm": 16.642330169677734, + "learning_rate": 2.6140109890109892e-06, + "loss": 0.2466, + "step": 34497 + }, + { + "epoch": 94.77472527472527, + "grad_norm": 12.01979923248291, + "learning_rate": 2.612637362637363e-06, + "loss": 0.2684, + "step": 34498 + }, + { + "epoch": 94.77747252747253, + "grad_norm": 7.821021556854248, + "learning_rate": 2.6112637362637365e-06, + "loss": 0.093, + "step": 34499 + }, + { + "epoch": 94.78021978021978, + "grad_norm": 9.845542907714844, + "learning_rate": 2.6098901098901103e-06, + "loss": 0.1089, + "step": 34500 + }, + { + "epoch": 94.78296703296704, + "grad_norm": 10.726997375488281, + "learning_rate": 2.6085164835164837e-06, + "loss": 0.1577, + "step": 34501 + }, + { + "epoch": 94.78571428571429, + "grad_norm": 1.5530797243118286, + "learning_rate": 2.607142857142857e-06, + "loss": 0.0197, + "step": 34502 + }, + { + "epoch": 94.78846153846153, + "grad_norm": 14.173249244689941, + "learning_rate": 2.6057692307692305e-06, + "loss": 0.1821, + "step": 34503 + }, + { + "epoch": 94.79120879120879, + "grad_norm": 17.498340606689453, + "learning_rate": 2.6043956043956043e-06, + "loss": 0.357, + "step": 34504 + }, + { + "epoch": 94.79395604395604, + "grad_norm": 6.868772506713867, + "learning_rate": 2.603021978021978e-06, + "loss": 0.0419, + "step": 34505 + }, + { + "epoch": 94.7967032967033, + "grad_norm": 14.569931983947754, + "learning_rate": 2.6016483516483516e-06, + "loss": 0.3522, + "step": 34506 + }, + { + "epoch": 94.79945054945055, + "grad_norm": 7.127756595611572, + "learning_rate": 2.6002747252747254e-06, + "loss": 0.0903, + "step": 34507 + }, + { + "epoch": 94.8021978021978, + "grad_norm": 12.068087577819824, + "learning_rate": 2.598901098901099e-06, + "loss": 0.1559, + "step": 34508 + }, + { + "epoch": 94.80494505494505, + "grad_norm": 10.440079689025879, + "learning_rate": 2.5975274725274726e-06, + "loss": 0.147, + "step": 34509 + }, + { + "epoch": 94.8076923076923, + "grad_norm": 14.453628540039062, + "learning_rate": 2.5961538461538465e-06, + "loss": 0.3145, + "step": 34510 + }, + { + "epoch": 94.81043956043956, + "grad_norm": 13.04960823059082, + "learning_rate": 2.59478021978022e-06, + "loss": 0.19, + "step": 34511 + }, + { + "epoch": 94.81318681318682, + "grad_norm": 7.578349590301514, + "learning_rate": 2.5934065934065937e-06, + "loss": 0.0659, + "step": 34512 + }, + { + "epoch": 94.81593406593407, + "grad_norm": 7.674729347229004, + "learning_rate": 2.592032967032967e-06, + "loss": 0.109, + "step": 34513 + }, + { + "epoch": 94.81868131868131, + "grad_norm": 9.084528923034668, + "learning_rate": 2.590659340659341e-06, + "loss": 0.2341, + "step": 34514 + }, + { + "epoch": 94.82142857142857, + "grad_norm": 16.089487075805664, + "learning_rate": 2.5892857142857148e-06, + "loss": 0.2642, + "step": 34515 + }, + { + "epoch": 94.82417582417582, + "grad_norm": 8.403870582580566, + "learning_rate": 2.587912087912088e-06, + "loss": 0.1534, + "step": 34516 + }, + { + "epoch": 94.82692307692308, + "grad_norm": 10.576120376586914, + "learning_rate": 2.5865384615384616e-06, + "loss": 0.1177, + "step": 34517 + }, + { + "epoch": 94.82967032967034, + "grad_norm": 7.504264831542969, + "learning_rate": 2.585164835164835e-06, + "loss": 0.1293, + "step": 34518 + }, + { + "epoch": 94.83241758241758, + "grad_norm": 15.132843971252441, + "learning_rate": 2.583791208791209e-06, + "loss": 0.2005, + "step": 34519 + }, + { + "epoch": 94.83516483516483, + "grad_norm": 11.992897987365723, + "learning_rate": 2.5824175824175822e-06, + "loss": 0.2661, + "step": 34520 + }, + { + "epoch": 94.83791208791209, + "grad_norm": 12.082706451416016, + "learning_rate": 2.581043956043956e-06, + "loss": 0.1502, + "step": 34521 + }, + { + "epoch": 94.84065934065934, + "grad_norm": 14.515700340270996, + "learning_rate": 2.57967032967033e-06, + "loss": 0.1975, + "step": 34522 + }, + { + "epoch": 94.8434065934066, + "grad_norm": 7.127151012420654, + "learning_rate": 2.5782967032967033e-06, + "loss": 0.09, + "step": 34523 + }, + { + "epoch": 94.84615384615384, + "grad_norm": 10.962567329406738, + "learning_rate": 2.576923076923077e-06, + "loss": 0.1824, + "step": 34524 + }, + { + "epoch": 94.8489010989011, + "grad_norm": 12.069741249084473, + "learning_rate": 2.5755494505494505e-06, + "loss": 0.2448, + "step": 34525 + }, + { + "epoch": 94.85164835164835, + "grad_norm": 16.719846725463867, + "learning_rate": 2.5741758241758244e-06, + "loss": 0.4741, + "step": 34526 + }, + { + "epoch": 94.8543956043956, + "grad_norm": 19.086061477661133, + "learning_rate": 2.572802197802198e-06, + "loss": 0.2191, + "step": 34527 + }, + { + "epoch": 94.85714285714286, + "grad_norm": 23.012454986572266, + "learning_rate": 2.5714285714285716e-06, + "loss": 0.4181, + "step": 34528 + }, + { + "epoch": 94.85989010989012, + "grad_norm": 17.33097267150879, + "learning_rate": 2.5700549450549454e-06, + "loss": 0.1664, + "step": 34529 + }, + { + "epoch": 94.86263736263736, + "grad_norm": 2.223193645477295, + "learning_rate": 2.568681318681319e-06, + "loss": 0.0293, + "step": 34530 + }, + { + "epoch": 94.86538461538461, + "grad_norm": 10.693047523498535, + "learning_rate": 2.5673076923076923e-06, + "loss": 0.1713, + "step": 34531 + }, + { + "epoch": 94.86813186813187, + "grad_norm": 12.994413375854492, + "learning_rate": 2.565934065934066e-06, + "loss": 0.1635, + "step": 34532 + }, + { + "epoch": 94.87087912087912, + "grad_norm": 8.660268783569336, + "learning_rate": 2.5645604395604395e-06, + "loss": 0.1744, + "step": 34533 + }, + { + "epoch": 94.87362637362638, + "grad_norm": 4.7224225997924805, + "learning_rate": 2.5631868131868133e-06, + "loss": 0.053, + "step": 34534 + }, + { + "epoch": 94.87637362637362, + "grad_norm": 9.925812721252441, + "learning_rate": 2.5618131868131867e-06, + "loss": 0.138, + "step": 34535 + }, + { + "epoch": 94.87912087912088, + "grad_norm": 7.608709812164307, + "learning_rate": 2.5604395604395606e-06, + "loss": 0.1141, + "step": 34536 + }, + { + "epoch": 94.88186813186813, + "grad_norm": 5.1005072593688965, + "learning_rate": 2.559065934065934e-06, + "loss": 0.0723, + "step": 34537 + }, + { + "epoch": 94.88461538461539, + "grad_norm": 15.265100479125977, + "learning_rate": 2.557692307692308e-06, + "loss": 0.2362, + "step": 34538 + }, + { + "epoch": 94.88736263736264, + "grad_norm": 5.479675769805908, + "learning_rate": 2.5563186813186816e-06, + "loss": 0.0384, + "step": 34539 + }, + { + "epoch": 94.89010989010988, + "grad_norm": 30.828060150146484, + "learning_rate": 2.554945054945055e-06, + "loss": 0.7531, + "step": 34540 + }, + { + "epoch": 94.89285714285714, + "grad_norm": 12.83785629272461, + "learning_rate": 2.553571428571429e-06, + "loss": 0.3235, + "step": 34541 + }, + { + "epoch": 94.8956043956044, + "grad_norm": 16.88099479675293, + "learning_rate": 2.5521978021978023e-06, + "loss": 0.3879, + "step": 34542 + }, + { + "epoch": 94.89835164835165, + "grad_norm": 6.509127616882324, + "learning_rate": 2.550824175824176e-06, + "loss": 0.0884, + "step": 34543 + }, + { + "epoch": 94.9010989010989, + "grad_norm": 14.232526779174805, + "learning_rate": 2.54945054945055e-06, + "loss": 0.2546, + "step": 34544 + }, + { + "epoch": 94.90384615384616, + "grad_norm": 20.015954971313477, + "learning_rate": 2.5480769230769233e-06, + "loss": 0.3144, + "step": 34545 + }, + { + "epoch": 94.9065934065934, + "grad_norm": 4.6570210456848145, + "learning_rate": 2.5467032967032967e-06, + "loss": 0.0783, + "step": 34546 + }, + { + "epoch": 94.90934065934066, + "grad_norm": 3.7488155364990234, + "learning_rate": 2.54532967032967e-06, + "loss": 0.0598, + "step": 34547 + }, + { + "epoch": 94.91208791208791, + "grad_norm": 9.705658912658691, + "learning_rate": 2.543956043956044e-06, + "loss": 0.2127, + "step": 34548 + }, + { + "epoch": 94.91483516483517, + "grad_norm": 1.8647233247756958, + "learning_rate": 2.5425824175824174e-06, + "loss": 0.0261, + "step": 34549 + }, + { + "epoch": 94.91758241758242, + "grad_norm": 2.1443779468536377, + "learning_rate": 2.5412087912087912e-06, + "loss": 0.0263, + "step": 34550 + }, + { + "epoch": 94.92032967032966, + "grad_norm": 2.107011079788208, + "learning_rate": 2.539835164835165e-06, + "loss": 0.0201, + "step": 34551 + }, + { + "epoch": 94.92307692307692, + "grad_norm": 8.127513885498047, + "learning_rate": 2.5384615384615385e-06, + "loss": 0.1204, + "step": 34552 + }, + { + "epoch": 94.92582417582418, + "grad_norm": 18.619823455810547, + "learning_rate": 2.5370879120879123e-06, + "loss": 0.5796, + "step": 34553 + }, + { + "epoch": 94.92857142857143, + "grad_norm": 20.81243896484375, + "learning_rate": 2.5357142857142857e-06, + "loss": 0.6773, + "step": 34554 + }, + { + "epoch": 94.93131868131869, + "grad_norm": 8.318992614746094, + "learning_rate": 2.5343406593406595e-06, + "loss": 0.0651, + "step": 34555 + }, + { + "epoch": 94.93406593406593, + "grad_norm": 2.0453810691833496, + "learning_rate": 2.5329670329670334e-06, + "loss": 0.0226, + "step": 34556 + }, + { + "epoch": 94.93681318681318, + "grad_norm": 16.03838348388672, + "learning_rate": 2.5315934065934068e-06, + "loss": 0.3165, + "step": 34557 + }, + { + "epoch": 94.93956043956044, + "grad_norm": 11.982400894165039, + "learning_rate": 2.5302197802197806e-06, + "loss": 0.0982, + "step": 34558 + }, + { + "epoch": 94.9423076923077, + "grad_norm": 17.251516342163086, + "learning_rate": 2.528846153846154e-06, + "loss": 0.2482, + "step": 34559 + }, + { + "epoch": 94.94505494505495, + "grad_norm": 7.113117694854736, + "learning_rate": 2.527472527472528e-06, + "loss": 0.0874, + "step": 34560 + }, + { + "epoch": 94.9478021978022, + "grad_norm": 3.6708853244781494, + "learning_rate": 2.5260989010989012e-06, + "loss": 0.0267, + "step": 34561 + }, + { + "epoch": 94.95054945054945, + "grad_norm": 7.151129722595215, + "learning_rate": 2.5247252747252746e-06, + "loss": 0.1027, + "step": 34562 + }, + { + "epoch": 94.9532967032967, + "grad_norm": 4.895940780639648, + "learning_rate": 2.5233516483516485e-06, + "loss": 0.0624, + "step": 34563 + }, + { + "epoch": 94.95604395604396, + "grad_norm": 21.79584312438965, + "learning_rate": 2.521978021978022e-06, + "loss": 0.6858, + "step": 34564 + }, + { + "epoch": 94.95879120879121, + "grad_norm": 6.258606433868408, + "learning_rate": 2.5206043956043957e-06, + "loss": 0.0765, + "step": 34565 + }, + { + "epoch": 94.96153846153847, + "grad_norm": 7.17580509185791, + "learning_rate": 2.519230769230769e-06, + "loss": 0.096, + "step": 34566 + }, + { + "epoch": 94.96428571428571, + "grad_norm": 7.080141544342041, + "learning_rate": 2.517857142857143e-06, + "loss": 0.1881, + "step": 34567 + }, + { + "epoch": 94.96703296703296, + "grad_norm": 29.195194244384766, + "learning_rate": 2.5164835164835168e-06, + "loss": 0.6419, + "step": 34568 + }, + { + "epoch": 94.96978021978022, + "grad_norm": 13.649665832519531, + "learning_rate": 2.51510989010989e-06, + "loss": 0.161, + "step": 34569 + }, + { + "epoch": 94.97252747252747, + "grad_norm": 22.20528793334961, + "learning_rate": 2.513736263736264e-06, + "loss": 0.3803, + "step": 34570 + }, + { + "epoch": 94.97527472527473, + "grad_norm": 13.798640251159668, + "learning_rate": 2.5123626373626374e-06, + "loss": 0.2522, + "step": 34571 + }, + { + "epoch": 94.97802197802197, + "grad_norm": 16.825885772705078, + "learning_rate": 2.5109890109890113e-06, + "loss": 0.3239, + "step": 34572 + }, + { + "epoch": 94.98076923076923, + "grad_norm": 19.675121307373047, + "learning_rate": 2.5096153846153847e-06, + "loss": 0.4264, + "step": 34573 + }, + { + "epoch": 94.98351648351648, + "grad_norm": 4.274972915649414, + "learning_rate": 2.5082417582417585e-06, + "loss": 0.075, + "step": 34574 + }, + { + "epoch": 94.98626373626374, + "grad_norm": 6.074391841888428, + "learning_rate": 2.506868131868132e-06, + "loss": 0.0691, + "step": 34575 + }, + { + "epoch": 94.98901098901099, + "grad_norm": 8.927213668823242, + "learning_rate": 2.5054945054945057e-06, + "loss": 0.1328, + "step": 34576 + }, + { + "epoch": 94.99175824175825, + "grad_norm": 11.555932998657227, + "learning_rate": 2.504120879120879e-06, + "loss": 0.2408, + "step": 34577 + }, + { + "epoch": 94.99450549450549, + "grad_norm": 6.671682834625244, + "learning_rate": 2.5027472527472525e-06, + "loss": 0.1418, + "step": 34578 + }, + { + "epoch": 94.99725274725274, + "grad_norm": 21.790842056274414, + "learning_rate": 2.5013736263736264e-06, + "loss": 0.5749, + "step": 34579 + }, + { + "epoch": 95.0, + "grad_norm": 64.79170989990234, + "learning_rate": 2.5e-06, + "loss": 0.6228, + "step": 34580 + }, + { + "epoch": 95.0, + "eval_accuracy": 0.6570247933884298, + "eval_f1": 0.6708682968567975, + "eval_f1_DuraRiadoRio_64x64": 0.7888446215139442, + "eval_f1_Mole_64x64": 0.8837209302325582, + "eval_f1_Quebrado_64x64": 0.6431718061674009, + "eval_f1_RiadoRio_64x64": 0.5492424242424242, + "eval_f1_RioFechado_64x64": 0.48936170212765956, + "eval_loss": 1.6017143726348877, + "eval_precision": 0.8380780030042618, + "eval_precision_DuraRiadoRio_64x64": 0.9252336448598131, + "eval_precision_Mole_64x64": 1.0, + "eval_precision_Quebrado_64x64": 0.8795180722891566, + "eval_precision_RiadoRio_64x64": 0.38563829787234044, + "eval_precision_RioFechado_64x64": 1.0, + "eval_recall": 0.652800428300799, + "eval_recall_DuraRiadoRio_64x64": 0.6875, + "eval_recall_Mole_64x64": 0.7916666666666666, + "eval_recall_Quebrado_64x64": 0.5069444444444444, + "eval_recall_RiadoRio_64x64": 0.9539473684210527, + "eval_recall_RioFechado_64x64": 0.323943661971831, + "eval_runtime": 1.8462, + "eval_samples_per_second": 393.243, + "eval_steps_per_second": 24.916, + "step": 34580 + }, + { + "epoch": 95.00274725274726, + "grad_norm": 3.114504814147949, + "learning_rate": 2.4986263736263736e-06, + "loss": 0.0496, + "step": 34581 + }, + { + "epoch": 95.00549450549451, + "grad_norm": 19.524337768554688, + "learning_rate": 2.4972527472527474e-06, + "loss": 0.3417, + "step": 34582 + }, + { + "epoch": 95.00824175824175, + "grad_norm": 25.136995315551758, + "learning_rate": 2.495879120879121e-06, + "loss": 0.3568, + "step": 34583 + }, + { + "epoch": 95.01098901098901, + "grad_norm": 3.324002981185913, + "learning_rate": 2.4945054945054947e-06, + "loss": 0.08, + "step": 34584 + }, + { + "epoch": 95.01373626373626, + "grad_norm": 26.89771842956543, + "learning_rate": 2.4931318681318685e-06, + "loss": 0.2361, + "step": 34585 + }, + { + "epoch": 95.01648351648352, + "grad_norm": 4.135047435760498, + "learning_rate": 2.491758241758242e-06, + "loss": 0.0311, + "step": 34586 + }, + { + "epoch": 95.01923076923077, + "grad_norm": 5.25806999206543, + "learning_rate": 2.4903846153846157e-06, + "loss": 0.062, + "step": 34587 + }, + { + "epoch": 95.02197802197803, + "grad_norm": 13.372461318969727, + "learning_rate": 2.489010989010989e-06, + "loss": 0.1842, + "step": 34588 + }, + { + "epoch": 95.02472527472527, + "grad_norm": 8.829931259155273, + "learning_rate": 2.487637362637363e-06, + "loss": 0.1827, + "step": 34589 + }, + { + "epoch": 95.02747252747253, + "grad_norm": 13.030951499938965, + "learning_rate": 2.4862637362637364e-06, + "loss": 0.2844, + "step": 34590 + }, + { + "epoch": 95.03021978021978, + "grad_norm": 10.863699913024902, + "learning_rate": 2.48489010989011e-06, + "loss": 0.1836, + "step": 34591 + }, + { + "epoch": 95.03296703296704, + "grad_norm": 9.627970695495605, + "learning_rate": 2.4835164835164836e-06, + "loss": 0.2381, + "step": 34592 + }, + { + "epoch": 95.03571428571429, + "grad_norm": 8.881664276123047, + "learning_rate": 2.482142857142857e-06, + "loss": 0.0934, + "step": 34593 + }, + { + "epoch": 95.03846153846153, + "grad_norm": 11.803573608398438, + "learning_rate": 2.480769230769231e-06, + "loss": 0.161, + "step": 34594 + }, + { + "epoch": 95.04120879120879, + "grad_norm": 15.179757118225098, + "learning_rate": 2.4793956043956043e-06, + "loss": 0.2684, + "step": 34595 + }, + { + "epoch": 95.04395604395604, + "grad_norm": 12.061724662780762, + "learning_rate": 2.478021978021978e-06, + "loss": 0.2171, + "step": 34596 + }, + { + "epoch": 95.0467032967033, + "grad_norm": 20.040489196777344, + "learning_rate": 2.476648351648352e-06, + "loss": 0.3343, + "step": 34597 + }, + { + "epoch": 95.04945054945055, + "grad_norm": 9.658561706542969, + "learning_rate": 2.4752747252747253e-06, + "loss": 0.1453, + "step": 34598 + }, + { + "epoch": 95.0521978021978, + "grad_norm": 13.482603073120117, + "learning_rate": 2.473901098901099e-06, + "loss": 0.2522, + "step": 34599 + }, + { + "epoch": 95.05494505494505, + "grad_norm": 9.412081718444824, + "learning_rate": 2.4725274725274726e-06, + "loss": 0.1222, + "step": 34600 + }, + { + "epoch": 95.0576923076923, + "grad_norm": 8.889372825622559, + "learning_rate": 2.4711538461538464e-06, + "loss": 0.1719, + "step": 34601 + }, + { + "epoch": 95.06043956043956, + "grad_norm": 21.500905990600586, + "learning_rate": 2.46978021978022e-06, + "loss": 0.441, + "step": 34602 + }, + { + "epoch": 95.06318681318682, + "grad_norm": 10.835671424865723, + "learning_rate": 2.4684065934065936e-06, + "loss": 0.1888, + "step": 34603 + }, + { + "epoch": 95.06593406593407, + "grad_norm": 5.01451301574707, + "learning_rate": 2.4670329670329675e-06, + "loss": 0.1143, + "step": 34604 + }, + { + "epoch": 95.06868131868131, + "grad_norm": 6.150638580322266, + "learning_rate": 2.465659340659341e-06, + "loss": 0.0967, + "step": 34605 + }, + { + "epoch": 95.07142857142857, + "grad_norm": 7.702830791473389, + "learning_rate": 2.4642857142857143e-06, + "loss": 0.0859, + "step": 34606 + }, + { + "epoch": 95.07417582417582, + "grad_norm": 14.157854080200195, + "learning_rate": 2.4629120879120877e-06, + "loss": 0.2197, + "step": 34607 + }, + { + "epoch": 95.07692307692308, + "grad_norm": 10.276741981506348, + "learning_rate": 2.4615384615384615e-06, + "loss": 0.1389, + "step": 34608 + }, + { + "epoch": 95.07967032967034, + "grad_norm": 8.944218635559082, + "learning_rate": 2.4601648351648354e-06, + "loss": 0.1859, + "step": 34609 + }, + { + "epoch": 95.08241758241758, + "grad_norm": 7.49994421005249, + "learning_rate": 2.4587912087912088e-06, + "loss": 0.093, + "step": 34610 + }, + { + "epoch": 95.08516483516483, + "grad_norm": 6.545137882232666, + "learning_rate": 2.4574175824175826e-06, + "loss": 0.1179, + "step": 34611 + }, + { + "epoch": 95.08791208791209, + "grad_norm": 2.157790422439575, + "learning_rate": 2.456043956043956e-06, + "loss": 0.0319, + "step": 34612 + }, + { + "epoch": 95.09065934065934, + "grad_norm": 14.738290786743164, + "learning_rate": 2.45467032967033e-06, + "loss": 0.2218, + "step": 34613 + }, + { + "epoch": 95.0934065934066, + "grad_norm": 5.647954940795898, + "learning_rate": 2.4532967032967037e-06, + "loss": 0.0746, + "step": 34614 + }, + { + "epoch": 95.09615384615384, + "grad_norm": 28.293466567993164, + "learning_rate": 2.451923076923077e-06, + "loss": 0.5785, + "step": 34615 + }, + { + "epoch": 95.0989010989011, + "grad_norm": 9.715702056884766, + "learning_rate": 2.450549450549451e-06, + "loss": 0.1223, + "step": 34616 + }, + { + "epoch": 95.10164835164835, + "grad_norm": 12.603979110717773, + "learning_rate": 2.4491758241758243e-06, + "loss": 0.1775, + "step": 34617 + }, + { + "epoch": 95.1043956043956, + "grad_norm": 15.809046745300293, + "learning_rate": 2.447802197802198e-06, + "loss": 0.2267, + "step": 34618 + }, + { + "epoch": 95.10714285714286, + "grad_norm": 2.195162296295166, + "learning_rate": 2.4464285714285715e-06, + "loss": 0.0347, + "step": 34619 + }, + { + "epoch": 95.10989010989012, + "grad_norm": 27.803688049316406, + "learning_rate": 2.445054945054945e-06, + "loss": 0.4756, + "step": 34620 + }, + { + "epoch": 95.11263736263736, + "grad_norm": 7.938302993774414, + "learning_rate": 2.4436813186813188e-06, + "loss": 0.1303, + "step": 34621 + }, + { + "epoch": 95.11538461538461, + "grad_norm": 6.227798938751221, + "learning_rate": 2.442307692307692e-06, + "loss": 0.0971, + "step": 34622 + }, + { + "epoch": 95.11813186813187, + "grad_norm": 13.180306434631348, + "learning_rate": 2.440934065934066e-06, + "loss": 0.2067, + "step": 34623 + }, + { + "epoch": 95.12087912087912, + "grad_norm": 23.782087326049805, + "learning_rate": 2.4395604395604394e-06, + "loss": 0.5263, + "step": 34624 + }, + { + "epoch": 95.12362637362638, + "grad_norm": 2.644235134124756, + "learning_rate": 2.4381868131868133e-06, + "loss": 0.0144, + "step": 34625 + }, + { + "epoch": 95.12637362637362, + "grad_norm": 13.75612735748291, + "learning_rate": 2.436813186813187e-06, + "loss": 0.1667, + "step": 34626 + }, + { + "epoch": 95.12912087912088, + "grad_norm": 10.250359535217285, + "learning_rate": 2.4354395604395605e-06, + "loss": 0.2154, + "step": 34627 + }, + { + "epoch": 95.13186813186813, + "grad_norm": 12.481351852416992, + "learning_rate": 2.4340659340659343e-06, + "loss": 0.3547, + "step": 34628 + }, + { + "epoch": 95.13461538461539, + "grad_norm": 18.920562744140625, + "learning_rate": 2.4326923076923077e-06, + "loss": 0.2127, + "step": 34629 + }, + { + "epoch": 95.13736263736264, + "grad_norm": 5.132762908935547, + "learning_rate": 2.4313186813186816e-06, + "loss": 0.0834, + "step": 34630 + }, + { + "epoch": 95.14010989010988, + "grad_norm": 16.303417205810547, + "learning_rate": 2.429945054945055e-06, + "loss": 0.2276, + "step": 34631 + }, + { + "epoch": 95.14285714285714, + "grad_norm": 13.760462760925293, + "learning_rate": 2.428571428571429e-06, + "loss": 0.1893, + "step": 34632 + }, + { + "epoch": 95.1456043956044, + "grad_norm": 21.31751251220703, + "learning_rate": 2.4271978021978026e-06, + "loss": 0.2314, + "step": 34633 + }, + { + "epoch": 95.14835164835165, + "grad_norm": 18.352659225463867, + "learning_rate": 2.425824175824176e-06, + "loss": 0.2653, + "step": 34634 + }, + { + "epoch": 95.1510989010989, + "grad_norm": 13.408977508544922, + "learning_rate": 2.4244505494505494e-06, + "loss": 0.144, + "step": 34635 + }, + { + "epoch": 95.15384615384616, + "grad_norm": 12.694528579711914, + "learning_rate": 2.423076923076923e-06, + "loss": 0.1882, + "step": 34636 + }, + { + "epoch": 95.1565934065934, + "grad_norm": 5.655028820037842, + "learning_rate": 2.4217032967032967e-06, + "loss": 0.0717, + "step": 34637 + }, + { + "epoch": 95.15934065934066, + "grad_norm": 10.2180757522583, + "learning_rate": 2.4203296703296705e-06, + "loss": 0.2586, + "step": 34638 + }, + { + "epoch": 95.16208791208791, + "grad_norm": 14.345746994018555, + "learning_rate": 2.418956043956044e-06, + "loss": 0.2054, + "step": 34639 + }, + { + "epoch": 95.16483516483517, + "grad_norm": 14.89650821685791, + "learning_rate": 2.4175824175824177e-06, + "loss": 0.1734, + "step": 34640 + }, + { + "epoch": 95.16758241758242, + "grad_norm": 9.641487121582031, + "learning_rate": 2.416208791208791e-06, + "loss": 0.1952, + "step": 34641 + }, + { + "epoch": 95.17032967032966, + "grad_norm": 1.4234097003936768, + "learning_rate": 2.414835164835165e-06, + "loss": 0.0162, + "step": 34642 + }, + { + "epoch": 95.17307692307692, + "grad_norm": 10.042984008789062, + "learning_rate": 2.413461538461539e-06, + "loss": 0.0616, + "step": 34643 + }, + { + "epoch": 95.17582417582418, + "grad_norm": 15.825162887573242, + "learning_rate": 2.4120879120879122e-06, + "loss": 0.2558, + "step": 34644 + }, + { + "epoch": 95.17857142857143, + "grad_norm": 18.6180477142334, + "learning_rate": 2.410714285714286e-06, + "loss": 0.3815, + "step": 34645 + }, + { + "epoch": 95.18131868131869, + "grad_norm": 12.714881896972656, + "learning_rate": 2.4093406593406595e-06, + "loss": 0.267, + "step": 34646 + }, + { + "epoch": 95.18406593406593, + "grad_norm": 4.006378650665283, + "learning_rate": 2.4079670329670333e-06, + "loss": 0.0944, + "step": 34647 + }, + { + "epoch": 95.18681318681318, + "grad_norm": 4.4865288734436035, + "learning_rate": 2.4065934065934067e-06, + "loss": 0.0706, + "step": 34648 + }, + { + "epoch": 95.18956043956044, + "grad_norm": 23.40318489074707, + "learning_rate": 2.4052197802197805e-06, + "loss": 0.306, + "step": 34649 + }, + { + "epoch": 95.1923076923077, + "grad_norm": 12.172121047973633, + "learning_rate": 2.403846153846154e-06, + "loss": 0.3187, + "step": 34650 + }, + { + "epoch": 95.19505494505495, + "grad_norm": 16.96407699584961, + "learning_rate": 2.4024725274725273e-06, + "loss": 0.475, + "step": 34651 + }, + { + "epoch": 95.1978021978022, + "grad_norm": 7.942366123199463, + "learning_rate": 2.401098901098901e-06, + "loss": 0.1308, + "step": 34652 + }, + { + "epoch": 95.20054945054945, + "grad_norm": 23.8763370513916, + "learning_rate": 2.3997252747252746e-06, + "loss": 0.5029, + "step": 34653 + }, + { + "epoch": 95.2032967032967, + "grad_norm": 9.543118476867676, + "learning_rate": 2.3983516483516484e-06, + "loss": 0.1618, + "step": 34654 + }, + { + "epoch": 95.20604395604396, + "grad_norm": 15.49433708190918, + "learning_rate": 2.3969780219780222e-06, + "loss": 0.4743, + "step": 34655 + }, + { + "epoch": 95.20879120879121, + "grad_norm": 3.6070656776428223, + "learning_rate": 2.3956043956043956e-06, + "loss": 0.0403, + "step": 34656 + }, + { + "epoch": 95.21153846153847, + "grad_norm": 10.256877899169922, + "learning_rate": 2.3942307692307695e-06, + "loss": 0.2156, + "step": 34657 + }, + { + "epoch": 95.21428571428571, + "grad_norm": 21.182985305786133, + "learning_rate": 2.392857142857143e-06, + "loss": 0.4647, + "step": 34658 + }, + { + "epoch": 95.21703296703296, + "grad_norm": 2.750896453857422, + "learning_rate": 2.3914835164835167e-06, + "loss": 0.0377, + "step": 34659 + }, + { + "epoch": 95.21978021978022, + "grad_norm": 18.10002326965332, + "learning_rate": 2.39010989010989e-06, + "loss": 0.2812, + "step": 34660 + }, + { + "epoch": 95.22252747252747, + "grad_norm": 10.746209144592285, + "learning_rate": 2.388736263736264e-06, + "loss": 0.0854, + "step": 34661 + }, + { + "epoch": 95.22527472527473, + "grad_norm": 15.379034996032715, + "learning_rate": 2.3873626373626378e-06, + "loss": 0.391, + "step": 34662 + }, + { + "epoch": 95.22802197802197, + "grad_norm": 13.455546379089355, + "learning_rate": 2.385989010989011e-06, + "loss": 0.3157, + "step": 34663 + }, + { + "epoch": 95.23076923076923, + "grad_norm": 14.757545471191406, + "learning_rate": 2.3846153846153846e-06, + "loss": 0.2632, + "step": 34664 + }, + { + "epoch": 95.23351648351648, + "grad_norm": 5.636791706085205, + "learning_rate": 2.3832417582417584e-06, + "loss": 0.12, + "step": 34665 + }, + { + "epoch": 95.23626373626374, + "grad_norm": 13.073802947998047, + "learning_rate": 2.381868131868132e-06, + "loss": 0.1477, + "step": 34666 + }, + { + "epoch": 95.23901098901099, + "grad_norm": 5.430861949920654, + "learning_rate": 2.3804945054945057e-06, + "loss": 0.0464, + "step": 34667 + }, + { + "epoch": 95.24175824175825, + "grad_norm": 24.05756950378418, + "learning_rate": 2.379120879120879e-06, + "loss": 0.7755, + "step": 34668 + }, + { + "epoch": 95.24450549450549, + "grad_norm": 6.2687835693359375, + "learning_rate": 2.377747252747253e-06, + "loss": 0.0953, + "step": 34669 + }, + { + "epoch": 95.24725274725274, + "grad_norm": 23.71895408630371, + "learning_rate": 2.3763736263736263e-06, + "loss": 0.4384, + "step": 34670 + }, + { + "epoch": 95.25, + "grad_norm": 19.291723251342773, + "learning_rate": 2.375e-06, + "loss": 0.3591, + "step": 34671 + }, + { + "epoch": 95.25274725274726, + "grad_norm": 12.984404563903809, + "learning_rate": 2.373626373626374e-06, + "loss": 0.3591, + "step": 34672 + }, + { + "epoch": 95.25549450549451, + "grad_norm": 20.391183853149414, + "learning_rate": 2.3722527472527474e-06, + "loss": 0.5389, + "step": 34673 + }, + { + "epoch": 95.25824175824175, + "grad_norm": 10.534941673278809, + "learning_rate": 2.370879120879121e-06, + "loss": 0.2662, + "step": 34674 + }, + { + "epoch": 95.26098901098901, + "grad_norm": 13.822104454040527, + "learning_rate": 2.3695054945054946e-06, + "loss": 0.1816, + "step": 34675 + }, + { + "epoch": 95.26373626373626, + "grad_norm": 17.381669998168945, + "learning_rate": 2.3681318681318684e-06, + "loss": 0.3312, + "step": 34676 + }, + { + "epoch": 95.26648351648352, + "grad_norm": 9.711411476135254, + "learning_rate": 2.366758241758242e-06, + "loss": 0.1861, + "step": 34677 + }, + { + "epoch": 95.26923076923077, + "grad_norm": 9.196488380432129, + "learning_rate": 2.3653846153846157e-06, + "loss": 0.2952, + "step": 34678 + }, + { + "epoch": 95.27197802197803, + "grad_norm": 13.758681297302246, + "learning_rate": 2.364010989010989e-06, + "loss": 0.4304, + "step": 34679 + }, + { + "epoch": 95.27472527472527, + "grad_norm": 9.168633460998535, + "learning_rate": 2.3626373626373625e-06, + "loss": 0.08, + "step": 34680 + }, + { + "epoch": 95.27747252747253, + "grad_norm": 15.247084617614746, + "learning_rate": 2.3612637362637363e-06, + "loss": 0.2108, + "step": 34681 + }, + { + "epoch": 95.28021978021978, + "grad_norm": 20.412860870361328, + "learning_rate": 2.3598901098901097e-06, + "loss": 0.3207, + "step": 34682 + }, + { + "epoch": 95.28296703296704, + "grad_norm": 13.475935935974121, + "learning_rate": 2.3585164835164836e-06, + "loss": 0.3475, + "step": 34683 + }, + { + "epoch": 95.28571428571429, + "grad_norm": 3.7676565647125244, + "learning_rate": 2.3571428571428574e-06, + "loss": 0.0526, + "step": 34684 + }, + { + "epoch": 95.28846153846153, + "grad_norm": 15.412328720092773, + "learning_rate": 2.355769230769231e-06, + "loss": 0.2482, + "step": 34685 + }, + { + "epoch": 95.29120879120879, + "grad_norm": 6.126090049743652, + "learning_rate": 2.3543956043956046e-06, + "loss": 0.0697, + "step": 34686 + }, + { + "epoch": 95.29395604395604, + "grad_norm": 12.075347900390625, + "learning_rate": 2.353021978021978e-06, + "loss": 0.2541, + "step": 34687 + }, + { + "epoch": 95.2967032967033, + "grad_norm": 0.8751955628395081, + "learning_rate": 2.351648351648352e-06, + "loss": 0.0091, + "step": 34688 + }, + { + "epoch": 95.29945054945055, + "grad_norm": 1.7356199026107788, + "learning_rate": 2.3502747252747253e-06, + "loss": 0.0212, + "step": 34689 + }, + { + "epoch": 95.3021978021978, + "grad_norm": 2.1131680011749268, + "learning_rate": 2.348901098901099e-06, + "loss": 0.0282, + "step": 34690 + }, + { + "epoch": 95.30494505494505, + "grad_norm": 7.788144111633301, + "learning_rate": 2.347527472527473e-06, + "loss": 0.0975, + "step": 34691 + }, + { + "epoch": 95.3076923076923, + "grad_norm": 7.24920129776001, + "learning_rate": 2.3461538461538463e-06, + "loss": 0.0701, + "step": 34692 + }, + { + "epoch": 95.31043956043956, + "grad_norm": 8.322183609008789, + "learning_rate": 2.34478021978022e-06, + "loss": 0.1173, + "step": 34693 + }, + { + "epoch": 95.31318681318682, + "grad_norm": 18.063430786132812, + "learning_rate": 2.3434065934065936e-06, + "loss": 0.3677, + "step": 34694 + }, + { + "epoch": 95.31593406593407, + "grad_norm": 5.021759033203125, + "learning_rate": 2.342032967032967e-06, + "loss": 0.0518, + "step": 34695 + }, + { + "epoch": 95.31868131868131, + "grad_norm": 15.92878246307373, + "learning_rate": 2.340659340659341e-06, + "loss": 0.3649, + "step": 34696 + }, + { + "epoch": 95.32142857142857, + "grad_norm": 14.658013343811035, + "learning_rate": 2.3392857142857142e-06, + "loss": 0.3113, + "step": 34697 + }, + { + "epoch": 95.32417582417582, + "grad_norm": 2.6994998455047607, + "learning_rate": 2.337912087912088e-06, + "loss": 0.0275, + "step": 34698 + }, + { + "epoch": 95.32692307692308, + "grad_norm": 15.748318672180176, + "learning_rate": 2.3365384615384615e-06, + "loss": 0.4435, + "step": 34699 + }, + { + "epoch": 95.32967032967034, + "grad_norm": 2.5524911880493164, + "learning_rate": 2.3351648351648353e-06, + "loss": 0.0283, + "step": 34700 + }, + { + "epoch": 95.33241758241758, + "grad_norm": 11.73157024383545, + "learning_rate": 2.3337912087912087e-06, + "loss": 0.2976, + "step": 34701 + }, + { + "epoch": 95.33516483516483, + "grad_norm": 8.506024360656738, + "learning_rate": 2.3324175824175825e-06, + "loss": 0.0885, + "step": 34702 + }, + { + "epoch": 95.33791208791209, + "grad_norm": 7.703587532043457, + "learning_rate": 2.3310439560439563e-06, + "loss": 0.0662, + "step": 34703 + }, + { + "epoch": 95.34065934065934, + "grad_norm": 7.91494607925415, + "learning_rate": 2.3296703296703298e-06, + "loss": 0.1306, + "step": 34704 + }, + { + "epoch": 95.3434065934066, + "grad_norm": 17.316503524780273, + "learning_rate": 2.3282967032967036e-06, + "loss": 0.6148, + "step": 34705 + }, + { + "epoch": 95.34615384615384, + "grad_norm": 20.29517364501953, + "learning_rate": 2.326923076923077e-06, + "loss": 0.5827, + "step": 34706 + }, + { + "epoch": 95.3489010989011, + "grad_norm": 14.337204933166504, + "learning_rate": 2.325549450549451e-06, + "loss": 0.2185, + "step": 34707 + }, + { + "epoch": 95.35164835164835, + "grad_norm": 14.424417495727539, + "learning_rate": 2.3241758241758242e-06, + "loss": 0.3739, + "step": 34708 + }, + { + "epoch": 95.3543956043956, + "grad_norm": 7.7047600746154785, + "learning_rate": 2.322802197802198e-06, + "loss": 0.1228, + "step": 34709 + }, + { + "epoch": 95.35714285714286, + "grad_norm": 19.43988609313965, + "learning_rate": 2.3214285714285715e-06, + "loss": 0.3919, + "step": 34710 + }, + { + "epoch": 95.35989010989012, + "grad_norm": 11.722697257995605, + "learning_rate": 2.320054945054945e-06, + "loss": 0.1619, + "step": 34711 + }, + { + "epoch": 95.36263736263736, + "grad_norm": 24.208457946777344, + "learning_rate": 2.3186813186813187e-06, + "loss": 0.5861, + "step": 34712 + }, + { + "epoch": 95.36538461538461, + "grad_norm": 14.256412506103516, + "learning_rate": 2.3173076923076925e-06, + "loss": 0.4948, + "step": 34713 + }, + { + "epoch": 95.36813186813187, + "grad_norm": 11.863527297973633, + "learning_rate": 2.315934065934066e-06, + "loss": 0.2605, + "step": 34714 + }, + { + "epoch": 95.37087912087912, + "grad_norm": 6.9523515701293945, + "learning_rate": 2.3145604395604398e-06, + "loss": 0.1116, + "step": 34715 + }, + { + "epoch": 95.37362637362638, + "grad_norm": 10.143196105957031, + "learning_rate": 2.313186813186813e-06, + "loss": 0.1473, + "step": 34716 + }, + { + "epoch": 95.37637362637362, + "grad_norm": 14.441344261169434, + "learning_rate": 2.311813186813187e-06, + "loss": 0.1636, + "step": 34717 + }, + { + "epoch": 95.37912087912088, + "grad_norm": 21.73223876953125, + "learning_rate": 2.3104395604395604e-06, + "loss": 0.6462, + "step": 34718 + }, + { + "epoch": 95.38186813186813, + "grad_norm": 1.8140870332717896, + "learning_rate": 2.3090659340659342e-06, + "loss": 0.0237, + "step": 34719 + }, + { + "epoch": 95.38461538461539, + "grad_norm": 8.337210655212402, + "learning_rate": 2.307692307692308e-06, + "loss": 0.0755, + "step": 34720 + }, + { + "epoch": 95.38736263736264, + "grad_norm": 18.769956588745117, + "learning_rate": 2.3063186813186815e-06, + "loss": 0.3557, + "step": 34721 + }, + { + "epoch": 95.39010989010988, + "grad_norm": 22.913787841796875, + "learning_rate": 2.3049450549450553e-06, + "loss": 0.8629, + "step": 34722 + }, + { + "epoch": 95.39285714285714, + "grad_norm": 15.862815856933594, + "learning_rate": 2.3035714285714287e-06, + "loss": 0.4205, + "step": 34723 + }, + { + "epoch": 95.3956043956044, + "grad_norm": 5.66319465637207, + "learning_rate": 2.302197802197802e-06, + "loss": 0.0792, + "step": 34724 + }, + { + "epoch": 95.39835164835165, + "grad_norm": 12.877760887145996, + "learning_rate": 2.300824175824176e-06, + "loss": 0.2547, + "step": 34725 + }, + { + "epoch": 95.4010989010989, + "grad_norm": 1.0814872980117798, + "learning_rate": 2.2994505494505494e-06, + "loss": 0.0089, + "step": 34726 + }, + { + "epoch": 95.40384615384616, + "grad_norm": 5.3569207191467285, + "learning_rate": 2.298076923076923e-06, + "loss": 0.0856, + "step": 34727 + }, + { + "epoch": 95.4065934065934, + "grad_norm": 5.900999546051025, + "learning_rate": 2.2967032967032966e-06, + "loss": 0.0874, + "step": 34728 + }, + { + "epoch": 95.40934065934066, + "grad_norm": 1.482164740562439, + "learning_rate": 2.2953296703296704e-06, + "loss": 0.0232, + "step": 34729 + }, + { + "epoch": 95.41208791208791, + "grad_norm": 13.256006240844727, + "learning_rate": 2.293956043956044e-06, + "loss": 0.1645, + "step": 34730 + }, + { + "epoch": 95.41483516483517, + "grad_norm": 11.406281471252441, + "learning_rate": 2.2925824175824177e-06, + "loss": 0.2357, + "step": 34731 + }, + { + "epoch": 95.41758241758242, + "grad_norm": 23.57375717163086, + "learning_rate": 2.2912087912087915e-06, + "loss": 0.5305, + "step": 34732 + }, + { + "epoch": 95.42032967032966, + "grad_norm": 2.501603603363037, + "learning_rate": 2.289835164835165e-06, + "loss": 0.0399, + "step": 34733 + }, + { + "epoch": 95.42307692307692, + "grad_norm": 6.388299465179443, + "learning_rate": 2.2884615384615387e-06, + "loss": 0.1003, + "step": 34734 + }, + { + "epoch": 95.42582417582418, + "grad_norm": 17.276575088500977, + "learning_rate": 2.287087912087912e-06, + "loss": 0.1361, + "step": 34735 + }, + { + "epoch": 95.42857142857143, + "grad_norm": 12.754171371459961, + "learning_rate": 2.285714285714286e-06, + "loss": 0.1978, + "step": 34736 + }, + { + "epoch": 95.43131868131869, + "grad_norm": 19.895793914794922, + "learning_rate": 2.2843406593406594e-06, + "loss": 0.3251, + "step": 34737 + }, + { + "epoch": 95.43406593406593, + "grad_norm": 10.898101806640625, + "learning_rate": 2.282967032967033e-06, + "loss": 0.077, + "step": 34738 + }, + { + "epoch": 95.43681318681318, + "grad_norm": 7.627315521240234, + "learning_rate": 2.2815934065934066e-06, + "loss": 0.0962, + "step": 34739 + }, + { + "epoch": 95.43956043956044, + "grad_norm": 16.701799392700195, + "learning_rate": 2.28021978021978e-06, + "loss": 0.3484, + "step": 34740 + }, + { + "epoch": 95.4423076923077, + "grad_norm": 23.531099319458008, + "learning_rate": 2.278846153846154e-06, + "loss": 1.0482, + "step": 34741 + }, + { + "epoch": 95.44505494505495, + "grad_norm": 3.3827972412109375, + "learning_rate": 2.2774725274725277e-06, + "loss": 0.0359, + "step": 34742 + }, + { + "epoch": 95.4478021978022, + "grad_norm": 11.766274452209473, + "learning_rate": 2.276098901098901e-06, + "loss": 0.243, + "step": 34743 + }, + { + "epoch": 95.45054945054945, + "grad_norm": 11.436826705932617, + "learning_rate": 2.274725274725275e-06, + "loss": 0.1772, + "step": 34744 + }, + { + "epoch": 95.4532967032967, + "grad_norm": 15.53626537322998, + "learning_rate": 2.2733516483516483e-06, + "loss": 0.4923, + "step": 34745 + }, + { + "epoch": 95.45604395604396, + "grad_norm": 13.687894821166992, + "learning_rate": 2.271978021978022e-06, + "loss": 0.2, + "step": 34746 + }, + { + "epoch": 95.45879120879121, + "grad_norm": 16.610355377197266, + "learning_rate": 2.2706043956043956e-06, + "loss": 0.2999, + "step": 34747 + }, + { + "epoch": 95.46153846153847, + "grad_norm": 11.747941970825195, + "learning_rate": 2.2692307692307694e-06, + "loss": 0.1903, + "step": 34748 + }, + { + "epoch": 95.46428571428571, + "grad_norm": 10.98857593536377, + "learning_rate": 2.2678571428571432e-06, + "loss": 0.128, + "step": 34749 + }, + { + "epoch": 95.46703296703296, + "grad_norm": 24.557218551635742, + "learning_rate": 2.2664835164835166e-06, + "loss": 0.8653, + "step": 34750 + }, + { + "epoch": 95.46978021978022, + "grad_norm": 18.652090072631836, + "learning_rate": 2.2651098901098905e-06, + "loss": 0.2783, + "step": 34751 + }, + { + "epoch": 95.47252747252747, + "grad_norm": 2.6727938652038574, + "learning_rate": 2.263736263736264e-06, + "loss": 0.0363, + "step": 34752 + }, + { + "epoch": 95.47527472527473, + "grad_norm": 5.924737453460693, + "learning_rate": 2.2623626373626373e-06, + "loss": 0.0896, + "step": 34753 + }, + { + "epoch": 95.47802197802197, + "grad_norm": 15.772093772888184, + "learning_rate": 2.260989010989011e-06, + "loss": 0.2256, + "step": 34754 + }, + { + "epoch": 95.48076923076923, + "grad_norm": 17.175874710083008, + "learning_rate": 2.2596153846153845e-06, + "loss": 0.3687, + "step": 34755 + }, + { + "epoch": 95.48351648351648, + "grad_norm": 8.89777946472168, + "learning_rate": 2.2582417582417583e-06, + "loss": 0.1513, + "step": 34756 + }, + { + "epoch": 95.48626373626374, + "grad_norm": 11.413308143615723, + "learning_rate": 2.2568681318681318e-06, + "loss": 0.2525, + "step": 34757 + }, + { + "epoch": 95.48901098901099, + "grad_norm": 12.69174575805664, + "learning_rate": 2.2554945054945056e-06, + "loss": 0.2287, + "step": 34758 + }, + { + "epoch": 95.49175824175825, + "grad_norm": 12.708637237548828, + "learning_rate": 2.254120879120879e-06, + "loss": 0.1519, + "step": 34759 + }, + { + "epoch": 95.49450549450549, + "grad_norm": 10.948906898498535, + "learning_rate": 2.252747252747253e-06, + "loss": 0.1678, + "step": 34760 + }, + { + "epoch": 95.49725274725274, + "grad_norm": 15.678946495056152, + "learning_rate": 2.2513736263736267e-06, + "loss": 0.2755, + "step": 34761 + }, + { + "epoch": 95.5, + "grad_norm": 8.824912071228027, + "learning_rate": 2.25e-06, + "loss": 0.1001, + "step": 34762 + }, + { + "epoch": 95.50274725274726, + "grad_norm": 5.3924760818481445, + "learning_rate": 2.248626373626374e-06, + "loss": 0.0867, + "step": 34763 + }, + { + "epoch": 95.50549450549451, + "grad_norm": 6.120877265930176, + "learning_rate": 2.2472527472527473e-06, + "loss": 0.0728, + "step": 34764 + }, + { + "epoch": 95.50824175824175, + "grad_norm": 24.118202209472656, + "learning_rate": 2.245879120879121e-06, + "loss": 0.4607, + "step": 34765 + }, + { + "epoch": 95.51098901098901, + "grad_norm": 21.81807518005371, + "learning_rate": 2.244505494505495e-06, + "loss": 0.3874, + "step": 34766 + }, + { + "epoch": 95.51373626373626, + "grad_norm": 2.7078607082366943, + "learning_rate": 2.2431318681318684e-06, + "loss": 0.0371, + "step": 34767 + }, + { + "epoch": 95.51648351648352, + "grad_norm": 8.777885437011719, + "learning_rate": 2.2417582417582418e-06, + "loss": 0.0968, + "step": 34768 + }, + { + "epoch": 95.51923076923077, + "grad_norm": 17.83206558227539, + "learning_rate": 2.240384615384615e-06, + "loss": 0.3583, + "step": 34769 + }, + { + "epoch": 95.52197802197803, + "grad_norm": 10.420341491699219, + "learning_rate": 2.239010989010989e-06, + "loss": 0.1541, + "step": 34770 + }, + { + "epoch": 95.52472527472527, + "grad_norm": 7.276554584503174, + "learning_rate": 2.237637362637363e-06, + "loss": 0.1143, + "step": 34771 + }, + { + "epoch": 95.52747252747253, + "grad_norm": 9.104288101196289, + "learning_rate": 2.2362637362637362e-06, + "loss": 0.1599, + "step": 34772 + }, + { + "epoch": 95.53021978021978, + "grad_norm": 13.797361373901367, + "learning_rate": 2.23489010989011e-06, + "loss": 0.228, + "step": 34773 + }, + { + "epoch": 95.53296703296704, + "grad_norm": 6.347996234893799, + "learning_rate": 2.2335164835164835e-06, + "loss": 0.1117, + "step": 34774 + }, + { + "epoch": 95.53571428571429, + "grad_norm": 14.089594841003418, + "learning_rate": 2.2321428571428573e-06, + "loss": 0.2179, + "step": 34775 + }, + { + "epoch": 95.53846153846153, + "grad_norm": 13.88701057434082, + "learning_rate": 2.2307692307692307e-06, + "loss": 0.1935, + "step": 34776 + }, + { + "epoch": 95.54120879120879, + "grad_norm": 24.1041259765625, + "learning_rate": 2.2293956043956045e-06, + "loss": 0.6677, + "step": 34777 + }, + { + "epoch": 95.54395604395604, + "grad_norm": 15.200531005859375, + "learning_rate": 2.2280219780219784e-06, + "loss": 0.2399, + "step": 34778 + }, + { + "epoch": 95.5467032967033, + "grad_norm": 3.5970723628997803, + "learning_rate": 2.2266483516483518e-06, + "loss": 0.0523, + "step": 34779 + }, + { + "epoch": 95.54945054945055, + "grad_norm": 6.511169910430908, + "learning_rate": 2.2252747252747256e-06, + "loss": 0.1775, + "step": 34780 + }, + { + "epoch": 95.5521978021978, + "grad_norm": 8.221328735351562, + "learning_rate": 2.223901098901099e-06, + "loss": 0.1066, + "step": 34781 + }, + { + "epoch": 95.55494505494505, + "grad_norm": 2.1695072650909424, + "learning_rate": 2.222527472527473e-06, + "loss": 0.0337, + "step": 34782 + }, + { + "epoch": 95.5576923076923, + "grad_norm": 17.218067169189453, + "learning_rate": 2.2211538461538463e-06, + "loss": 0.3529, + "step": 34783 + }, + { + "epoch": 95.56043956043956, + "grad_norm": 12.422236442565918, + "learning_rate": 2.2197802197802197e-06, + "loss": 0.3505, + "step": 34784 + }, + { + "epoch": 95.56318681318682, + "grad_norm": 7.192598342895508, + "learning_rate": 2.2184065934065935e-06, + "loss": 0.1193, + "step": 34785 + }, + { + "epoch": 95.56593406593407, + "grad_norm": 23.847492218017578, + "learning_rate": 2.217032967032967e-06, + "loss": 0.7988, + "step": 34786 + }, + { + "epoch": 95.56868131868131, + "grad_norm": 10.957526206970215, + "learning_rate": 2.2156593406593407e-06, + "loss": 0.1847, + "step": 34787 + }, + { + "epoch": 95.57142857142857, + "grad_norm": 1.3396095037460327, + "learning_rate": 2.214285714285714e-06, + "loss": 0.0201, + "step": 34788 + }, + { + "epoch": 95.57417582417582, + "grad_norm": 3.2176854610443115, + "learning_rate": 2.212912087912088e-06, + "loss": 0.0384, + "step": 34789 + }, + { + "epoch": 95.57692307692308, + "grad_norm": 9.022958755493164, + "learning_rate": 2.211538461538462e-06, + "loss": 0.0704, + "step": 34790 + }, + { + "epoch": 95.57967032967034, + "grad_norm": 1.2589980363845825, + "learning_rate": 2.210164835164835e-06, + "loss": 0.0127, + "step": 34791 + }, + { + "epoch": 95.58241758241758, + "grad_norm": 10.947334289550781, + "learning_rate": 2.208791208791209e-06, + "loss": 0.1243, + "step": 34792 + }, + { + "epoch": 95.58516483516483, + "grad_norm": 12.183614730834961, + "learning_rate": 2.2074175824175824e-06, + "loss": 0.2214, + "step": 34793 + }, + { + "epoch": 95.58791208791209, + "grad_norm": 12.803443908691406, + "learning_rate": 2.2060439560439563e-06, + "loss": 0.2089, + "step": 34794 + }, + { + "epoch": 95.59065934065934, + "grad_norm": 12.245413780212402, + "learning_rate": 2.20467032967033e-06, + "loss": 0.2712, + "step": 34795 + }, + { + "epoch": 95.5934065934066, + "grad_norm": 4.354969501495361, + "learning_rate": 2.2032967032967035e-06, + "loss": 0.0654, + "step": 34796 + }, + { + "epoch": 95.59615384615384, + "grad_norm": 14.649582862854004, + "learning_rate": 2.201923076923077e-06, + "loss": 0.2597, + "step": 34797 + }, + { + "epoch": 95.5989010989011, + "grad_norm": 10.97911262512207, + "learning_rate": 2.2005494505494507e-06, + "loss": 0.1646, + "step": 34798 + }, + { + "epoch": 95.60164835164835, + "grad_norm": 11.874845504760742, + "learning_rate": 2.199175824175824e-06, + "loss": 0.1435, + "step": 34799 + }, + { + "epoch": 95.6043956043956, + "grad_norm": 8.840694427490234, + "learning_rate": 2.197802197802198e-06, + "loss": 0.1005, + "step": 34800 + }, + { + "epoch": 95.60714285714286, + "grad_norm": 3.921149253845215, + "learning_rate": 2.1964285714285714e-06, + "loss": 0.0354, + "step": 34801 + }, + { + "epoch": 95.60989010989012, + "grad_norm": 7.05897855758667, + "learning_rate": 2.1950549450549452e-06, + "loss": 0.108, + "step": 34802 + }, + { + "epoch": 95.61263736263736, + "grad_norm": 5.790294170379639, + "learning_rate": 2.1936813186813186e-06, + "loss": 0.1123, + "step": 34803 + }, + { + "epoch": 95.61538461538461, + "grad_norm": 15.284306526184082, + "learning_rate": 2.1923076923076925e-06, + "loss": 0.2313, + "step": 34804 + }, + { + "epoch": 95.61813186813187, + "grad_norm": 3.2997329235076904, + "learning_rate": 2.190934065934066e-06, + "loss": 0.0747, + "step": 34805 + }, + { + "epoch": 95.62087912087912, + "grad_norm": 15.736566543579102, + "learning_rate": 2.1895604395604397e-06, + "loss": 0.3267, + "step": 34806 + }, + { + "epoch": 95.62362637362638, + "grad_norm": 2.5621044635772705, + "learning_rate": 2.1881868131868135e-06, + "loss": 0.0164, + "step": 34807 + }, + { + "epoch": 95.62637362637362, + "grad_norm": 6.5034379959106445, + "learning_rate": 2.186813186813187e-06, + "loss": 0.1342, + "step": 34808 + }, + { + "epoch": 95.62912087912088, + "grad_norm": 11.67647647857666, + "learning_rate": 2.1854395604395608e-06, + "loss": 0.1333, + "step": 34809 + }, + { + "epoch": 95.63186813186813, + "grad_norm": 7.2807183265686035, + "learning_rate": 2.184065934065934e-06, + "loss": 0.0962, + "step": 34810 + }, + { + "epoch": 95.63461538461539, + "grad_norm": 19.637657165527344, + "learning_rate": 2.182692307692308e-06, + "loss": 0.3797, + "step": 34811 + }, + { + "epoch": 95.63736263736264, + "grad_norm": 4.465491771697998, + "learning_rate": 2.1813186813186814e-06, + "loss": 0.0655, + "step": 34812 + }, + { + "epoch": 95.64010989010988, + "grad_norm": 15.342875480651855, + "learning_rate": 2.179945054945055e-06, + "loss": 0.3321, + "step": 34813 + }, + { + "epoch": 95.64285714285714, + "grad_norm": 12.33679485321045, + "learning_rate": 2.1785714285714286e-06, + "loss": 0.1588, + "step": 34814 + }, + { + "epoch": 95.6456043956044, + "grad_norm": 26.65512466430664, + "learning_rate": 2.177197802197802e-06, + "loss": 0.7094, + "step": 34815 + }, + { + "epoch": 95.64835164835165, + "grad_norm": 11.076390266418457, + "learning_rate": 2.175824175824176e-06, + "loss": 0.2551, + "step": 34816 + }, + { + "epoch": 95.6510989010989, + "grad_norm": 15.135108947753906, + "learning_rate": 2.1744505494505493e-06, + "loss": 0.1937, + "step": 34817 + }, + { + "epoch": 95.65384615384616, + "grad_norm": 8.194833755493164, + "learning_rate": 2.173076923076923e-06, + "loss": 0.1276, + "step": 34818 + }, + { + "epoch": 95.6565934065934, + "grad_norm": 16.485105514526367, + "learning_rate": 2.171703296703297e-06, + "loss": 0.2573, + "step": 34819 + }, + { + "epoch": 95.65934065934066, + "grad_norm": 5.468986988067627, + "learning_rate": 2.1703296703296704e-06, + "loss": 0.0659, + "step": 34820 + }, + { + "epoch": 95.66208791208791, + "grad_norm": 15.593303680419922, + "learning_rate": 2.168956043956044e-06, + "loss": 0.1546, + "step": 34821 + }, + { + "epoch": 95.66483516483517, + "grad_norm": 17.485454559326172, + "learning_rate": 2.1675824175824176e-06, + "loss": 0.4077, + "step": 34822 + }, + { + "epoch": 95.66758241758242, + "grad_norm": 11.588269233703613, + "learning_rate": 2.1662087912087914e-06, + "loss": 0.0713, + "step": 34823 + }, + { + "epoch": 95.67032967032966, + "grad_norm": 10.654352188110352, + "learning_rate": 2.1648351648351653e-06, + "loss": 0.3637, + "step": 34824 + }, + { + "epoch": 95.67307692307692, + "grad_norm": 13.191635131835938, + "learning_rate": 2.1634615384615387e-06, + "loss": 0.1806, + "step": 34825 + }, + { + "epoch": 95.67582417582418, + "grad_norm": 15.807766914367676, + "learning_rate": 2.1620879120879125e-06, + "loss": 0.2574, + "step": 34826 + }, + { + "epoch": 95.67857142857143, + "grad_norm": 4.234955787658691, + "learning_rate": 2.160714285714286e-06, + "loss": 0.036, + "step": 34827 + }, + { + "epoch": 95.68131868131869, + "grad_norm": 32.06184387207031, + "learning_rate": 2.1593406593406593e-06, + "loss": 0.4699, + "step": 34828 + }, + { + "epoch": 95.68406593406593, + "grad_norm": 6.431604385375977, + "learning_rate": 2.1579670329670327e-06, + "loss": 0.0969, + "step": 34829 + }, + { + "epoch": 95.68681318681318, + "grad_norm": 20.45132827758789, + "learning_rate": 2.1565934065934065e-06, + "loss": 0.1651, + "step": 34830 + }, + { + "epoch": 95.68956043956044, + "grad_norm": 11.305545806884766, + "learning_rate": 2.1552197802197804e-06, + "loss": 0.1741, + "step": 34831 + }, + { + "epoch": 95.6923076923077, + "grad_norm": 9.623075485229492, + "learning_rate": 2.1538461538461538e-06, + "loss": 0.1618, + "step": 34832 + }, + { + "epoch": 95.69505494505495, + "grad_norm": 8.914571762084961, + "learning_rate": 2.1524725274725276e-06, + "loss": 0.1684, + "step": 34833 + }, + { + "epoch": 95.6978021978022, + "grad_norm": 21.358545303344727, + "learning_rate": 2.151098901098901e-06, + "loss": 0.2722, + "step": 34834 + }, + { + "epoch": 95.70054945054945, + "grad_norm": 6.463935852050781, + "learning_rate": 2.149725274725275e-06, + "loss": 0.0645, + "step": 34835 + }, + { + "epoch": 95.7032967032967, + "grad_norm": 17.850595474243164, + "learning_rate": 2.1483516483516487e-06, + "loss": 0.4785, + "step": 34836 + }, + { + "epoch": 95.70604395604396, + "grad_norm": 2.2140798568725586, + "learning_rate": 2.146978021978022e-06, + "loss": 0.0271, + "step": 34837 + }, + { + "epoch": 95.70879120879121, + "grad_norm": 11.391518592834473, + "learning_rate": 2.145604395604396e-06, + "loss": 0.2589, + "step": 34838 + }, + { + "epoch": 95.71153846153847, + "grad_norm": 4.849063873291016, + "learning_rate": 2.1442307692307693e-06, + "loss": 0.0565, + "step": 34839 + }, + { + "epoch": 95.71428571428571, + "grad_norm": 12.564872741699219, + "learning_rate": 2.142857142857143e-06, + "loss": 0.1941, + "step": 34840 + }, + { + "epoch": 95.71703296703296, + "grad_norm": 1.5358762741088867, + "learning_rate": 2.1414835164835166e-06, + "loss": 0.0246, + "step": 34841 + }, + { + "epoch": 95.71978021978022, + "grad_norm": 7.341864109039307, + "learning_rate": 2.14010989010989e-06, + "loss": 0.0741, + "step": 34842 + }, + { + "epoch": 95.72252747252747, + "grad_norm": 12.65658950805664, + "learning_rate": 2.138736263736264e-06, + "loss": 0.3602, + "step": 34843 + }, + { + "epoch": 95.72527472527473, + "grad_norm": 11.291845321655273, + "learning_rate": 2.137362637362637e-06, + "loss": 0.2005, + "step": 34844 + }, + { + "epoch": 95.72802197802197, + "grad_norm": 2.922793388366699, + "learning_rate": 2.135989010989011e-06, + "loss": 0.0426, + "step": 34845 + }, + { + "epoch": 95.73076923076923, + "grad_norm": 6.161616325378418, + "learning_rate": 2.1346153846153844e-06, + "loss": 0.1038, + "step": 34846 + }, + { + "epoch": 95.73351648351648, + "grad_norm": 11.385066032409668, + "learning_rate": 2.1332417582417583e-06, + "loss": 0.2565, + "step": 34847 + }, + { + "epoch": 95.73626373626374, + "grad_norm": 15.12462329864502, + "learning_rate": 2.131868131868132e-06, + "loss": 0.1451, + "step": 34848 + }, + { + "epoch": 95.73901098901099, + "grad_norm": 9.716530799865723, + "learning_rate": 2.1304945054945055e-06, + "loss": 0.1216, + "step": 34849 + }, + { + "epoch": 95.74175824175825, + "grad_norm": 8.636463165283203, + "learning_rate": 2.1291208791208793e-06, + "loss": 0.1472, + "step": 34850 + }, + { + "epoch": 95.74450549450549, + "grad_norm": 9.35327434539795, + "learning_rate": 2.1277472527472527e-06, + "loss": 0.2492, + "step": 34851 + }, + { + "epoch": 95.74725274725274, + "grad_norm": 5.958645343780518, + "learning_rate": 2.1263736263736266e-06, + "loss": 0.0399, + "step": 34852 + }, + { + "epoch": 95.75, + "grad_norm": 10.193475723266602, + "learning_rate": 2.1250000000000004e-06, + "loss": 0.2421, + "step": 34853 + }, + { + "epoch": 95.75274725274726, + "grad_norm": 1.0068634748458862, + "learning_rate": 2.123626373626374e-06, + "loss": 0.0095, + "step": 34854 + }, + { + "epoch": 95.75549450549451, + "grad_norm": 17.091135025024414, + "learning_rate": 2.1222527472527476e-06, + "loss": 0.2631, + "step": 34855 + }, + { + "epoch": 95.75824175824175, + "grad_norm": 14.713912963867188, + "learning_rate": 2.120879120879121e-06, + "loss": 0.2542, + "step": 34856 + }, + { + "epoch": 95.76098901098901, + "grad_norm": 3.609971284866333, + "learning_rate": 2.1195054945054945e-06, + "loss": 0.0274, + "step": 34857 + }, + { + "epoch": 95.76373626373626, + "grad_norm": 19.61491584777832, + "learning_rate": 2.118131868131868e-06, + "loss": 0.5042, + "step": 34858 + }, + { + "epoch": 95.76648351648352, + "grad_norm": 12.723082542419434, + "learning_rate": 2.1167582417582417e-06, + "loss": 0.2325, + "step": 34859 + }, + { + "epoch": 95.76923076923077, + "grad_norm": 14.261704444885254, + "learning_rate": 2.1153846153846155e-06, + "loss": 0.2739, + "step": 34860 + }, + { + "epoch": 95.77197802197803, + "grad_norm": 10.78623104095459, + "learning_rate": 2.114010989010989e-06, + "loss": 0.2455, + "step": 34861 + }, + { + "epoch": 95.77472527472527, + "grad_norm": 4.512066841125488, + "learning_rate": 2.1126373626373628e-06, + "loss": 0.0658, + "step": 34862 + }, + { + "epoch": 95.77747252747253, + "grad_norm": 24.024585723876953, + "learning_rate": 2.111263736263736e-06, + "loss": 0.287, + "step": 34863 + }, + { + "epoch": 95.78021978021978, + "grad_norm": 18.190845489501953, + "learning_rate": 2.10989010989011e-06, + "loss": 0.2667, + "step": 34864 + }, + { + "epoch": 95.78296703296704, + "grad_norm": 21.424829483032227, + "learning_rate": 2.108516483516484e-06, + "loss": 0.272, + "step": 34865 + }, + { + "epoch": 95.78571428571429, + "grad_norm": 12.81600570678711, + "learning_rate": 2.1071428571428572e-06, + "loss": 0.1756, + "step": 34866 + }, + { + "epoch": 95.78846153846153, + "grad_norm": 6.02952766418457, + "learning_rate": 2.105769230769231e-06, + "loss": 0.0458, + "step": 34867 + }, + { + "epoch": 95.79120879120879, + "grad_norm": 5.01469612121582, + "learning_rate": 2.1043956043956045e-06, + "loss": 0.0348, + "step": 34868 + }, + { + "epoch": 95.79395604395604, + "grad_norm": 1.0421870946884155, + "learning_rate": 2.1030219780219783e-06, + "loss": 0.0079, + "step": 34869 + }, + { + "epoch": 95.7967032967033, + "grad_norm": 12.668217658996582, + "learning_rate": 2.1016483516483517e-06, + "loss": 0.1035, + "step": 34870 + }, + { + "epoch": 95.79945054945055, + "grad_norm": 8.616228103637695, + "learning_rate": 2.1002747252747255e-06, + "loss": 0.1392, + "step": 34871 + }, + { + "epoch": 95.8021978021978, + "grad_norm": 6.146887302398682, + "learning_rate": 2.098901098901099e-06, + "loss": 0.0563, + "step": 34872 + }, + { + "epoch": 95.80494505494505, + "grad_norm": 6.567018508911133, + "learning_rate": 2.0975274725274724e-06, + "loss": 0.0667, + "step": 34873 + }, + { + "epoch": 95.8076923076923, + "grad_norm": 13.620314598083496, + "learning_rate": 2.096153846153846e-06, + "loss": 0.4397, + "step": 34874 + }, + { + "epoch": 95.81043956043956, + "grad_norm": 16.946958541870117, + "learning_rate": 2.0947802197802196e-06, + "loss": 0.2829, + "step": 34875 + }, + { + "epoch": 95.81318681318682, + "grad_norm": 13.590364456176758, + "learning_rate": 2.0934065934065934e-06, + "loss": 0.2193, + "step": 34876 + }, + { + "epoch": 95.81593406593407, + "grad_norm": 17.161211013793945, + "learning_rate": 2.0920329670329673e-06, + "loss": 0.4679, + "step": 34877 + }, + { + "epoch": 95.81868131868131, + "grad_norm": 22.18272590637207, + "learning_rate": 2.0906593406593407e-06, + "loss": 0.9249, + "step": 34878 + }, + { + "epoch": 95.82142857142857, + "grad_norm": 4.710088729858398, + "learning_rate": 2.0892857142857145e-06, + "loss": 0.0673, + "step": 34879 + }, + { + "epoch": 95.82417582417582, + "grad_norm": 10.457962036132812, + "learning_rate": 2.087912087912088e-06, + "loss": 0.1457, + "step": 34880 + }, + { + "epoch": 95.82692307692308, + "grad_norm": 12.523054122924805, + "learning_rate": 2.0865384615384617e-06, + "loss": 0.194, + "step": 34881 + }, + { + "epoch": 95.82967032967034, + "grad_norm": 13.751335144042969, + "learning_rate": 2.0851648351648356e-06, + "loss": 0.3813, + "step": 34882 + }, + { + "epoch": 95.83241758241758, + "grad_norm": 13.632258415222168, + "learning_rate": 2.083791208791209e-06, + "loss": 0.169, + "step": 34883 + }, + { + "epoch": 95.83516483516483, + "grad_norm": 24.790529251098633, + "learning_rate": 2.082417582417583e-06, + "loss": 0.2562, + "step": 34884 + }, + { + "epoch": 95.83791208791209, + "grad_norm": 21.044231414794922, + "learning_rate": 2.081043956043956e-06, + "loss": 0.5979, + "step": 34885 + }, + { + "epoch": 95.84065934065934, + "grad_norm": 10.693687438964844, + "learning_rate": 2.0796703296703296e-06, + "loss": 0.1877, + "step": 34886 + }, + { + "epoch": 95.8434065934066, + "grad_norm": 4.164669513702393, + "learning_rate": 2.0782967032967034e-06, + "loss": 0.0447, + "step": 34887 + }, + { + "epoch": 95.84615384615384, + "grad_norm": 8.114274978637695, + "learning_rate": 2.076923076923077e-06, + "loss": 0.0871, + "step": 34888 + }, + { + "epoch": 95.8489010989011, + "grad_norm": 11.400716781616211, + "learning_rate": 2.0755494505494507e-06, + "loss": 0.1532, + "step": 34889 + }, + { + "epoch": 95.85164835164835, + "grad_norm": 9.901318550109863, + "learning_rate": 2.074175824175824e-06, + "loss": 0.3004, + "step": 34890 + }, + { + "epoch": 95.8543956043956, + "grad_norm": 9.606199264526367, + "learning_rate": 2.072802197802198e-06, + "loss": 0.1364, + "step": 34891 + }, + { + "epoch": 95.85714285714286, + "grad_norm": 5.6570234298706055, + "learning_rate": 2.0714285714285713e-06, + "loss": 0.0497, + "step": 34892 + }, + { + "epoch": 95.85989010989012, + "grad_norm": 1.358880877494812, + "learning_rate": 2.070054945054945e-06, + "loss": 0.014, + "step": 34893 + }, + { + "epoch": 95.86263736263736, + "grad_norm": 9.087517738342285, + "learning_rate": 2.068681318681319e-06, + "loss": 0.1394, + "step": 34894 + }, + { + "epoch": 95.86538461538461, + "grad_norm": 7.386605739593506, + "learning_rate": 2.0673076923076924e-06, + "loss": 0.1453, + "step": 34895 + }, + { + "epoch": 95.86813186813187, + "grad_norm": 6.849757194519043, + "learning_rate": 2.0659340659340662e-06, + "loss": 0.1126, + "step": 34896 + }, + { + "epoch": 95.87087912087912, + "grad_norm": 7.444779396057129, + "learning_rate": 2.0645604395604396e-06, + "loss": 0.1083, + "step": 34897 + }, + { + "epoch": 95.87362637362638, + "grad_norm": 9.384753227233887, + "learning_rate": 2.0631868131868135e-06, + "loss": 0.151, + "step": 34898 + }, + { + "epoch": 95.87637362637362, + "grad_norm": 9.777482032775879, + "learning_rate": 2.0618131868131873e-06, + "loss": 0.1217, + "step": 34899 + }, + { + "epoch": 95.87912087912088, + "grad_norm": 8.345490455627441, + "learning_rate": 2.0604395604395607e-06, + "loss": 0.144, + "step": 34900 + }, + { + "epoch": 95.88186813186813, + "grad_norm": 20.572317123413086, + "learning_rate": 2.059065934065934e-06, + "loss": 0.5287, + "step": 34901 + }, + { + "epoch": 95.88461538461539, + "grad_norm": 3.6002750396728516, + "learning_rate": 2.0576923076923075e-06, + "loss": 0.0395, + "step": 34902 + }, + { + "epoch": 95.88736263736264, + "grad_norm": 14.286964416503906, + "learning_rate": 2.0563186813186813e-06, + "loss": 0.3308, + "step": 34903 + }, + { + "epoch": 95.89010989010988, + "grad_norm": 2.5092740058898926, + "learning_rate": 2.0549450549450547e-06, + "loss": 0.0289, + "step": 34904 + }, + { + "epoch": 95.89285714285714, + "grad_norm": 14.4132661819458, + "learning_rate": 2.0535714285714286e-06, + "loss": 0.303, + "step": 34905 + }, + { + "epoch": 95.8956043956044, + "grad_norm": 9.917731285095215, + "learning_rate": 2.0521978021978024e-06, + "loss": 0.1211, + "step": 34906 + }, + { + "epoch": 95.89835164835165, + "grad_norm": 12.38727855682373, + "learning_rate": 2.050824175824176e-06, + "loss": 0.2565, + "step": 34907 + }, + { + "epoch": 95.9010989010989, + "grad_norm": 8.507058143615723, + "learning_rate": 2.0494505494505496e-06, + "loss": 0.1552, + "step": 34908 + }, + { + "epoch": 95.90384615384616, + "grad_norm": 6.859165668487549, + "learning_rate": 2.048076923076923e-06, + "loss": 0.056, + "step": 34909 + }, + { + "epoch": 95.9065934065934, + "grad_norm": 18.387290954589844, + "learning_rate": 2.046703296703297e-06, + "loss": 0.267, + "step": 34910 + }, + { + "epoch": 95.90934065934066, + "grad_norm": 7.84644079208374, + "learning_rate": 2.0453296703296707e-06, + "loss": 0.1045, + "step": 34911 + }, + { + "epoch": 95.91208791208791, + "grad_norm": 12.044062614440918, + "learning_rate": 2.043956043956044e-06, + "loss": 0.277, + "step": 34912 + }, + { + "epoch": 95.91483516483517, + "grad_norm": 9.971237182617188, + "learning_rate": 2.042582417582418e-06, + "loss": 0.1708, + "step": 34913 + }, + { + "epoch": 95.91758241758242, + "grad_norm": 10.050692558288574, + "learning_rate": 2.0412087912087914e-06, + "loss": 0.145, + "step": 34914 + }, + { + "epoch": 95.92032967032966, + "grad_norm": 6.914986610412598, + "learning_rate": 2.039835164835165e-06, + "loss": 0.0942, + "step": 34915 + }, + { + "epoch": 95.92307692307692, + "grad_norm": 15.148327827453613, + "learning_rate": 2.0384615384615386e-06, + "loss": 0.252, + "step": 34916 + }, + { + "epoch": 95.92582417582418, + "grad_norm": 11.045915603637695, + "learning_rate": 2.037087912087912e-06, + "loss": 0.1149, + "step": 34917 + }, + { + "epoch": 95.92857142857143, + "grad_norm": 18.137977600097656, + "learning_rate": 2.035714285714286e-06, + "loss": 0.2816, + "step": 34918 + }, + { + "epoch": 95.93131868131869, + "grad_norm": 10.302145957946777, + "learning_rate": 2.0343406593406592e-06, + "loss": 0.1345, + "step": 34919 + }, + { + "epoch": 95.93406593406593, + "grad_norm": 12.920409202575684, + "learning_rate": 2.032967032967033e-06, + "loss": 0.1137, + "step": 34920 + }, + { + "epoch": 95.93681318681318, + "grad_norm": 1.8416179418563843, + "learning_rate": 2.0315934065934065e-06, + "loss": 0.0326, + "step": 34921 + }, + { + "epoch": 95.93956043956044, + "grad_norm": 10.086447715759277, + "learning_rate": 2.0302197802197803e-06, + "loss": 0.1221, + "step": 34922 + }, + { + "epoch": 95.9423076923077, + "grad_norm": 7.648046016693115, + "learning_rate": 2.028846153846154e-06, + "loss": 0.1212, + "step": 34923 + }, + { + "epoch": 95.94505494505495, + "grad_norm": 6.582411289215088, + "learning_rate": 2.0274725274725275e-06, + "loss": 0.0944, + "step": 34924 + }, + { + "epoch": 95.9478021978022, + "grad_norm": 18.97147560119629, + "learning_rate": 2.0260989010989014e-06, + "loss": 0.34, + "step": 34925 + }, + { + "epoch": 95.95054945054945, + "grad_norm": 11.62173080444336, + "learning_rate": 2.0247252747252748e-06, + "loss": 0.1536, + "step": 34926 + }, + { + "epoch": 95.9532967032967, + "grad_norm": 5.1450514793396, + "learning_rate": 2.0233516483516486e-06, + "loss": 0.1122, + "step": 34927 + }, + { + "epoch": 95.95604395604396, + "grad_norm": 13.266648292541504, + "learning_rate": 2.0219780219780224e-06, + "loss": 0.2265, + "step": 34928 + }, + { + "epoch": 95.95879120879121, + "grad_norm": 13.039347648620605, + "learning_rate": 2.020604395604396e-06, + "loss": 0.251, + "step": 34929 + }, + { + "epoch": 95.96153846153847, + "grad_norm": 17.099639892578125, + "learning_rate": 2.0192307692307692e-06, + "loss": 0.3095, + "step": 34930 + }, + { + "epoch": 95.96428571428571, + "grad_norm": 8.282767295837402, + "learning_rate": 2.0178571428571427e-06, + "loss": 0.189, + "step": 34931 + }, + { + "epoch": 95.96703296703296, + "grad_norm": 12.550715446472168, + "learning_rate": 2.0164835164835165e-06, + "loss": 0.2812, + "step": 34932 + }, + { + "epoch": 95.96978021978022, + "grad_norm": 7.18637752532959, + "learning_rate": 2.01510989010989e-06, + "loss": 0.1357, + "step": 34933 + }, + { + "epoch": 95.97252747252747, + "grad_norm": 23.758508682250977, + "learning_rate": 2.0137362637362637e-06, + "loss": 0.3702, + "step": 34934 + }, + { + "epoch": 95.97527472527473, + "grad_norm": 17.268213272094727, + "learning_rate": 2.0123626373626376e-06, + "loss": 0.1384, + "step": 34935 + }, + { + "epoch": 95.97802197802197, + "grad_norm": 19.01134490966797, + "learning_rate": 2.010989010989011e-06, + "loss": 0.5164, + "step": 34936 + }, + { + "epoch": 95.98076923076923, + "grad_norm": 19.745615005493164, + "learning_rate": 2.009615384615385e-06, + "loss": 0.4674, + "step": 34937 + }, + { + "epoch": 95.98351648351648, + "grad_norm": 25.033279418945312, + "learning_rate": 2.008241758241758e-06, + "loss": 0.3612, + "step": 34938 + }, + { + "epoch": 95.98626373626374, + "grad_norm": 14.20668888092041, + "learning_rate": 2.006868131868132e-06, + "loss": 0.2397, + "step": 34939 + }, + { + "epoch": 95.98901098901099, + "grad_norm": 15.252764701843262, + "learning_rate": 2.005494505494506e-06, + "loss": 0.1954, + "step": 34940 + }, + { + "epoch": 95.99175824175825, + "grad_norm": 5.937333106994629, + "learning_rate": 2.0041208791208793e-06, + "loss": 0.0854, + "step": 34941 + }, + { + "epoch": 95.99450549450549, + "grad_norm": 18.36890411376953, + "learning_rate": 2.002747252747253e-06, + "loss": 0.4848, + "step": 34942 + }, + { + "epoch": 95.99725274725274, + "grad_norm": 5.252399444580078, + "learning_rate": 2.0013736263736265e-06, + "loss": 0.1226, + "step": 34943 + }, + { + "epoch": 96.0, + "grad_norm": 29.527313232421875, + "learning_rate": 2.0000000000000003e-06, + "loss": 0.3703, + "step": 34944 + }, + { + "epoch": 96.0, + "eval_accuracy": 0.696969696969697, + "eval_f1": 0.6883039110577829, + "eval_f1_DuraRiadoRio_64x64": 0.6952141057934509, + "eval_f1_Mole_64x64": 0.5922330097087378, + "eval_f1_Quebrado_64x64": 0.6486486486486487, + "eval_f1_RiadoRio_64x64": 0.7027027027027027, + "eval_f1_RioFechado_64x64": 0.8027210884353742, + "eval_loss": 1.7468606233596802, + "eval_precision": 0.7750254131052188, + "eval_precision_DuraRiadoRio_64x64": 0.5454545454545454, + "eval_precision_Mole_64x64": 0.9838709677419355, + "eval_precision_Quebrado_64x64": 0.9230769230769231, + "eval_precision_RiadoRio_64x64": 0.6464088397790055, + "eval_precision_RioFechado_64x64": 0.7763157894736842, + "eval_recall": 0.696533440408533, + "eval_recall_DuraRiadoRio_64x64": 0.9583333333333334, + "eval_recall_Mole_64x64": 0.4236111111111111, + "eval_recall_Quebrado_64x64": 0.5, + "eval_recall_RiadoRio_64x64": 0.7697368421052632, + "eval_recall_RioFechado_64x64": 0.8309859154929577, + "eval_runtime": 1.7081, + "eval_samples_per_second": 425.038, + "eval_steps_per_second": 26.931, + "step": 34944 + }, + { + "epoch": 96.00274725274726, + "grad_norm": 16.895797729492188, + "learning_rate": 1.9986263736263737e-06, + "loss": 0.2282, + "step": 34945 + }, + { + "epoch": 96.00549450549451, + "grad_norm": 5.777506351470947, + "learning_rate": 1.997252747252747e-06, + "loss": 0.0866, + "step": 34946 + }, + { + "epoch": 96.00824175824175, + "grad_norm": 9.621326446533203, + "learning_rate": 1.995879120879121e-06, + "loss": 0.1149, + "step": 34947 + }, + { + "epoch": 96.01098901098901, + "grad_norm": 20.5129451751709, + "learning_rate": 1.9945054945054944e-06, + "loss": 0.5318, + "step": 34948 + }, + { + "epoch": 96.01373626373626, + "grad_norm": 11.819576263427734, + "learning_rate": 1.9931318681318682e-06, + "loss": 0.1501, + "step": 34949 + }, + { + "epoch": 96.01648351648352, + "grad_norm": 17.131893157958984, + "learning_rate": 1.9917582417582416e-06, + "loss": 0.3147, + "step": 34950 + }, + { + "epoch": 96.01923076923077, + "grad_norm": 13.462008476257324, + "learning_rate": 1.9903846153846155e-06, + "loss": 0.2754, + "step": 34951 + }, + { + "epoch": 96.02197802197803, + "grad_norm": 14.012907981872559, + "learning_rate": 1.9890109890109893e-06, + "loss": 0.3815, + "step": 34952 + }, + { + "epoch": 96.02472527472527, + "grad_norm": 20.134197235107422, + "learning_rate": 1.9876373626373627e-06, + "loss": 0.4066, + "step": 34953 + }, + { + "epoch": 96.02747252747253, + "grad_norm": 21.04575538635254, + "learning_rate": 1.9862637362637365e-06, + "loss": 0.4672, + "step": 34954 + }, + { + "epoch": 96.03021978021978, + "grad_norm": 21.034761428833008, + "learning_rate": 1.98489010989011e-06, + "loss": 0.6356, + "step": 34955 + }, + { + "epoch": 96.03296703296704, + "grad_norm": 31.932458877563477, + "learning_rate": 1.9835164835164838e-06, + "loss": 1.1248, + "step": 34956 + }, + { + "epoch": 96.03571428571429, + "grad_norm": 13.511056900024414, + "learning_rate": 1.982142857142857e-06, + "loss": 0.2736, + "step": 34957 + }, + { + "epoch": 96.03846153846153, + "grad_norm": 5.577303409576416, + "learning_rate": 1.980769230769231e-06, + "loss": 0.1074, + "step": 34958 + }, + { + "epoch": 96.04120879120879, + "grad_norm": 6.18470573425293, + "learning_rate": 1.9793956043956044e-06, + "loss": 0.0863, + "step": 34959 + }, + { + "epoch": 96.04395604395604, + "grad_norm": 3.486121654510498, + "learning_rate": 1.9780219780219782e-06, + "loss": 0.0347, + "step": 34960 + }, + { + "epoch": 96.0467032967033, + "grad_norm": 13.794644355773926, + "learning_rate": 1.9766483516483516e-06, + "loss": 0.3486, + "step": 34961 + }, + { + "epoch": 96.04945054945055, + "grad_norm": 20.01028823852539, + "learning_rate": 1.975274725274725e-06, + "loss": 0.4406, + "step": 34962 + }, + { + "epoch": 96.0521978021978, + "grad_norm": 16.00251579284668, + "learning_rate": 1.973901098901099e-06, + "loss": 0.1504, + "step": 34963 + }, + { + "epoch": 96.05494505494505, + "grad_norm": 5.9626145362854, + "learning_rate": 1.9725274725274727e-06, + "loss": 0.1235, + "step": 34964 + }, + { + "epoch": 96.0576923076923, + "grad_norm": 13.163703918457031, + "learning_rate": 1.971153846153846e-06, + "loss": 0.169, + "step": 34965 + }, + { + "epoch": 96.06043956043956, + "grad_norm": 17.720674514770508, + "learning_rate": 1.96978021978022e-06, + "loss": 0.3047, + "step": 34966 + }, + { + "epoch": 96.06318681318682, + "grad_norm": 10.697011947631836, + "learning_rate": 1.9684065934065933e-06, + "loss": 0.2417, + "step": 34967 + }, + { + "epoch": 96.06593406593407, + "grad_norm": 10.785560607910156, + "learning_rate": 1.967032967032967e-06, + "loss": 0.2049, + "step": 34968 + }, + { + "epoch": 96.06868131868131, + "grad_norm": 12.629258155822754, + "learning_rate": 1.965659340659341e-06, + "loss": 0.1369, + "step": 34969 + }, + { + "epoch": 96.07142857142857, + "grad_norm": 9.977320671081543, + "learning_rate": 1.9642857142857144e-06, + "loss": 0.1517, + "step": 34970 + }, + { + "epoch": 96.07417582417582, + "grad_norm": 10.126372337341309, + "learning_rate": 1.9629120879120882e-06, + "loss": 0.2116, + "step": 34971 + }, + { + "epoch": 96.07692307692308, + "grad_norm": 13.512564659118652, + "learning_rate": 1.9615384615384617e-06, + "loss": 0.2294, + "step": 34972 + }, + { + "epoch": 96.07967032967034, + "grad_norm": 17.72117042541504, + "learning_rate": 1.9601648351648355e-06, + "loss": 0.452, + "step": 34973 + }, + { + "epoch": 96.08241758241758, + "grad_norm": 13.868208885192871, + "learning_rate": 1.958791208791209e-06, + "loss": 0.231, + "step": 34974 + }, + { + "epoch": 96.08516483516483, + "grad_norm": 13.120858192443848, + "learning_rate": 1.9574175824175823e-06, + "loss": 0.2842, + "step": 34975 + }, + { + "epoch": 96.08791208791209, + "grad_norm": 10.76705551147461, + "learning_rate": 1.956043956043956e-06, + "loss": 0.2384, + "step": 34976 + }, + { + "epoch": 96.09065934065934, + "grad_norm": 18.4564151763916, + "learning_rate": 1.9546703296703295e-06, + "loss": 0.3181, + "step": 34977 + }, + { + "epoch": 96.0934065934066, + "grad_norm": 8.271793365478516, + "learning_rate": 1.9532967032967034e-06, + "loss": 0.1368, + "step": 34978 + }, + { + "epoch": 96.09615384615384, + "grad_norm": 13.46271800994873, + "learning_rate": 1.9519230769230768e-06, + "loss": 0.3175, + "step": 34979 + }, + { + "epoch": 96.0989010989011, + "grad_norm": 1.9634954929351807, + "learning_rate": 1.9505494505494506e-06, + "loss": 0.0291, + "step": 34980 + }, + { + "epoch": 96.10164835164835, + "grad_norm": 15.567618370056152, + "learning_rate": 1.9491758241758244e-06, + "loss": 0.1849, + "step": 34981 + }, + { + "epoch": 96.1043956043956, + "grad_norm": 10.821146011352539, + "learning_rate": 1.947802197802198e-06, + "loss": 0.1856, + "step": 34982 + }, + { + "epoch": 96.10714285714286, + "grad_norm": 1.407159686088562, + "learning_rate": 1.9464285714285717e-06, + "loss": 0.0186, + "step": 34983 + }, + { + "epoch": 96.10989010989012, + "grad_norm": 7.4144673347473145, + "learning_rate": 1.945054945054945e-06, + "loss": 0.1066, + "step": 34984 + }, + { + "epoch": 96.11263736263736, + "grad_norm": 8.126395225524902, + "learning_rate": 1.943681318681319e-06, + "loss": 0.1468, + "step": 34985 + }, + { + "epoch": 96.11538461538461, + "grad_norm": 27.85803985595703, + "learning_rate": 1.9423076923076923e-06, + "loss": 0.43, + "step": 34986 + }, + { + "epoch": 96.11813186813187, + "grad_norm": 9.025816917419434, + "learning_rate": 1.940934065934066e-06, + "loss": 0.1008, + "step": 34987 + }, + { + "epoch": 96.12087912087912, + "grad_norm": 24.16258430480957, + "learning_rate": 1.93956043956044e-06, + "loss": 0.1738, + "step": 34988 + }, + { + "epoch": 96.12362637362638, + "grad_norm": 3.5844967365264893, + "learning_rate": 1.9381868131868134e-06, + "loss": 0.0345, + "step": 34989 + }, + { + "epoch": 96.12637362637362, + "grad_norm": 14.647581100463867, + "learning_rate": 1.9368131868131868e-06, + "loss": 0.294, + "step": 34990 + }, + { + "epoch": 96.12912087912088, + "grad_norm": 10.539766311645508, + "learning_rate": 1.93543956043956e-06, + "loss": 0.1964, + "step": 34991 + }, + { + "epoch": 96.13186813186813, + "grad_norm": 15.809798240661621, + "learning_rate": 1.934065934065934e-06, + "loss": 0.2654, + "step": 34992 + }, + { + "epoch": 96.13461538461539, + "grad_norm": 9.654975891113281, + "learning_rate": 1.932692307692308e-06, + "loss": 0.3797, + "step": 34993 + }, + { + "epoch": 96.13736263736264, + "grad_norm": 13.685291290283203, + "learning_rate": 1.9313186813186813e-06, + "loss": 0.3106, + "step": 34994 + }, + { + "epoch": 96.14010989010988, + "grad_norm": 8.200736999511719, + "learning_rate": 1.929945054945055e-06, + "loss": 0.1165, + "step": 34995 + }, + { + "epoch": 96.14285714285714, + "grad_norm": 20.622833251953125, + "learning_rate": 1.9285714285714285e-06, + "loss": 0.2519, + "step": 34996 + }, + { + "epoch": 96.1456043956044, + "grad_norm": 15.041736602783203, + "learning_rate": 1.9271978021978023e-06, + "loss": 0.231, + "step": 34997 + }, + { + "epoch": 96.14835164835165, + "grad_norm": 6.306576728820801, + "learning_rate": 1.925824175824176e-06, + "loss": 0.1017, + "step": 34998 + }, + { + "epoch": 96.1510989010989, + "grad_norm": 8.056005477905273, + "learning_rate": 1.9244505494505496e-06, + "loss": 0.0992, + "step": 34999 + }, + { + "epoch": 96.15384615384616, + "grad_norm": 5.8580241203308105, + "learning_rate": 1.9230769230769234e-06, + "loss": 0.0743, + "step": 35000 + }, + { + "epoch": 96.1565934065934, + "grad_norm": 2.510680675506592, + "learning_rate": 1.921703296703297e-06, + "loss": 0.0354, + "step": 35001 + }, + { + "epoch": 96.15934065934066, + "grad_norm": 14.319323539733887, + "learning_rate": 1.9203296703296706e-06, + "loss": 0.2762, + "step": 35002 + }, + { + "epoch": 96.16208791208791, + "grad_norm": 2.602003812789917, + "learning_rate": 1.918956043956044e-06, + "loss": 0.0402, + "step": 35003 + }, + { + "epoch": 96.16483516483517, + "grad_norm": 10.143223762512207, + "learning_rate": 1.917582417582418e-06, + "loss": 0.1149, + "step": 35004 + }, + { + "epoch": 96.16758241758242, + "grad_norm": 12.732650756835938, + "learning_rate": 1.9162087912087913e-06, + "loss": 0.2893, + "step": 35005 + }, + { + "epoch": 96.17032967032966, + "grad_norm": 16.72901725769043, + "learning_rate": 1.9148351648351647e-06, + "loss": 0.308, + "step": 35006 + }, + { + "epoch": 96.17307692307692, + "grad_norm": 10.549657821655273, + "learning_rate": 1.9134615384615385e-06, + "loss": 0.2614, + "step": 35007 + }, + { + "epoch": 96.17582417582418, + "grad_norm": 11.133407592773438, + "learning_rate": 1.912087912087912e-06, + "loss": 0.1445, + "step": 35008 + }, + { + "epoch": 96.17857142857143, + "grad_norm": 8.540998458862305, + "learning_rate": 1.9107142857142858e-06, + "loss": 0.1479, + "step": 35009 + }, + { + "epoch": 96.18131868131869, + "grad_norm": 6.473060131072998, + "learning_rate": 1.9093406593406596e-06, + "loss": 0.0538, + "step": 35010 + }, + { + "epoch": 96.18406593406593, + "grad_norm": 11.693901062011719, + "learning_rate": 1.907967032967033e-06, + "loss": 0.2692, + "step": 35011 + }, + { + "epoch": 96.18681318681318, + "grad_norm": 7.672796726226807, + "learning_rate": 1.9065934065934068e-06, + "loss": 0.0833, + "step": 35012 + }, + { + "epoch": 96.18956043956044, + "grad_norm": 13.678311347961426, + "learning_rate": 1.9052197802197802e-06, + "loss": 0.1788, + "step": 35013 + }, + { + "epoch": 96.1923076923077, + "grad_norm": 18.099849700927734, + "learning_rate": 1.903846153846154e-06, + "loss": 0.3392, + "step": 35014 + }, + { + "epoch": 96.19505494505495, + "grad_norm": 14.512646675109863, + "learning_rate": 1.9024725274725275e-06, + "loss": 0.2522, + "step": 35015 + }, + { + "epoch": 96.1978021978022, + "grad_norm": 2.307546854019165, + "learning_rate": 1.901098901098901e-06, + "loss": 0.03, + "step": 35016 + }, + { + "epoch": 96.20054945054945, + "grad_norm": 7.521539688110352, + "learning_rate": 1.899725274725275e-06, + "loss": 0.0852, + "step": 35017 + }, + { + "epoch": 96.2032967032967, + "grad_norm": 14.267984390258789, + "learning_rate": 1.8983516483516483e-06, + "loss": 0.2986, + "step": 35018 + }, + { + "epoch": 96.20604395604396, + "grad_norm": 5.389867305755615, + "learning_rate": 1.8969780219780222e-06, + "loss": 0.0524, + "step": 35019 + }, + { + "epoch": 96.20879120879121, + "grad_norm": 16.887866973876953, + "learning_rate": 1.8956043956043956e-06, + "loss": 0.2231, + "step": 35020 + }, + { + "epoch": 96.21153846153847, + "grad_norm": 8.9320707321167, + "learning_rate": 1.8942307692307694e-06, + "loss": 0.1388, + "step": 35021 + }, + { + "epoch": 96.21428571428571, + "grad_norm": 18.76171112060547, + "learning_rate": 1.892857142857143e-06, + "loss": 0.2186, + "step": 35022 + }, + { + "epoch": 96.21703296703296, + "grad_norm": 2.7142505645751953, + "learning_rate": 1.8914835164835164e-06, + "loss": 0.0318, + "step": 35023 + }, + { + "epoch": 96.21978021978022, + "grad_norm": 2.714104652404785, + "learning_rate": 1.8901098901098902e-06, + "loss": 0.0178, + "step": 35024 + }, + { + "epoch": 96.22252747252747, + "grad_norm": 5.425991058349609, + "learning_rate": 1.8887362637362636e-06, + "loss": 0.0289, + "step": 35025 + }, + { + "epoch": 96.22527472527473, + "grad_norm": 4.523996829986572, + "learning_rate": 1.8873626373626375e-06, + "loss": 0.0743, + "step": 35026 + }, + { + "epoch": 96.22802197802197, + "grad_norm": 7.62962007522583, + "learning_rate": 1.8859890109890113e-06, + "loss": 0.112, + "step": 35027 + }, + { + "epoch": 96.23076923076923, + "grad_norm": 13.46627426147461, + "learning_rate": 1.8846153846153847e-06, + "loss": 0.2217, + "step": 35028 + }, + { + "epoch": 96.23351648351648, + "grad_norm": 3.0698840618133545, + "learning_rate": 1.8832417582417583e-06, + "loss": 0.0263, + "step": 35029 + }, + { + "epoch": 96.23626373626374, + "grad_norm": 1.3029447793960571, + "learning_rate": 1.881868131868132e-06, + "loss": 0.0183, + "step": 35030 + }, + { + "epoch": 96.23901098901099, + "grad_norm": 10.326644897460938, + "learning_rate": 1.8804945054945056e-06, + "loss": 0.3087, + "step": 35031 + }, + { + "epoch": 96.24175824175825, + "grad_norm": 6.673448085784912, + "learning_rate": 1.879120879120879e-06, + "loss": 0.0876, + "step": 35032 + }, + { + "epoch": 96.24450549450549, + "grad_norm": 18.380218505859375, + "learning_rate": 1.8777472527472528e-06, + "loss": 0.187, + "step": 35033 + }, + { + "epoch": 96.24725274725274, + "grad_norm": 3.675279140472412, + "learning_rate": 1.8763736263736266e-06, + "loss": 0.0637, + "step": 35034 + }, + { + "epoch": 96.25, + "grad_norm": 5.628438472747803, + "learning_rate": 1.875e-06, + "loss": 0.0752, + "step": 35035 + }, + { + "epoch": 96.25274725274726, + "grad_norm": 8.620987892150879, + "learning_rate": 1.8736263736263739e-06, + "loss": 0.1887, + "step": 35036 + }, + { + "epoch": 96.25549450549451, + "grad_norm": 16.547237396240234, + "learning_rate": 1.8722527472527473e-06, + "loss": 0.3569, + "step": 35037 + }, + { + "epoch": 96.25824175824175, + "grad_norm": 10.03154468536377, + "learning_rate": 1.870879120879121e-06, + "loss": 0.1918, + "step": 35038 + }, + { + "epoch": 96.26098901098901, + "grad_norm": 5.190057277679443, + "learning_rate": 1.8695054945054947e-06, + "loss": 0.1017, + "step": 35039 + }, + { + "epoch": 96.26373626373626, + "grad_norm": 6.480632305145264, + "learning_rate": 1.8681318681318681e-06, + "loss": 0.1835, + "step": 35040 + }, + { + "epoch": 96.26648351648352, + "grad_norm": 14.32051944732666, + "learning_rate": 1.866758241758242e-06, + "loss": 0.1367, + "step": 35041 + }, + { + "epoch": 96.26923076923077, + "grad_norm": 11.856884002685547, + "learning_rate": 1.8653846153846154e-06, + "loss": 0.2706, + "step": 35042 + }, + { + "epoch": 96.27197802197803, + "grad_norm": 6.72518253326416, + "learning_rate": 1.8640109890109892e-06, + "loss": 0.0977, + "step": 35043 + }, + { + "epoch": 96.27472527472527, + "grad_norm": 9.975485801696777, + "learning_rate": 1.8626373626373626e-06, + "loss": 0.2447, + "step": 35044 + }, + { + "epoch": 96.27747252747253, + "grad_norm": 5.650332450866699, + "learning_rate": 1.8612637362637362e-06, + "loss": 0.092, + "step": 35045 + }, + { + "epoch": 96.28021978021978, + "grad_norm": 10.952089309692383, + "learning_rate": 1.85989010989011e-06, + "loss": 0.197, + "step": 35046 + }, + { + "epoch": 96.28296703296704, + "grad_norm": 7.6979451179504395, + "learning_rate": 1.8585164835164835e-06, + "loss": 0.149, + "step": 35047 + }, + { + "epoch": 96.28571428571429, + "grad_norm": 4.5012898445129395, + "learning_rate": 1.8571428571428573e-06, + "loss": 0.0663, + "step": 35048 + }, + { + "epoch": 96.28846153846153, + "grad_norm": 21.5616455078125, + "learning_rate": 1.8557692307692307e-06, + "loss": 0.4697, + "step": 35049 + }, + { + "epoch": 96.29120879120879, + "grad_norm": 8.890922546386719, + "learning_rate": 1.8543956043956045e-06, + "loss": 0.143, + "step": 35050 + }, + { + "epoch": 96.29395604395604, + "grad_norm": 1.9622305631637573, + "learning_rate": 1.8530219780219782e-06, + "loss": 0.0147, + "step": 35051 + }, + { + "epoch": 96.2967032967033, + "grad_norm": 10.616893768310547, + "learning_rate": 1.8516483516483518e-06, + "loss": 0.1535, + "step": 35052 + }, + { + "epoch": 96.29945054945055, + "grad_norm": 14.94515609741211, + "learning_rate": 1.8502747252747254e-06, + "loss": 0.2395, + "step": 35053 + }, + { + "epoch": 96.3021978021978, + "grad_norm": 4.561728000640869, + "learning_rate": 1.8489010989010988e-06, + "loss": 0.0481, + "step": 35054 + }, + { + "epoch": 96.30494505494505, + "grad_norm": 17.324678421020508, + "learning_rate": 1.8475274725274726e-06, + "loss": 0.1655, + "step": 35055 + }, + { + "epoch": 96.3076923076923, + "grad_norm": 5.257697582244873, + "learning_rate": 1.8461538461538465e-06, + "loss": 0.0717, + "step": 35056 + }, + { + "epoch": 96.31043956043956, + "grad_norm": 5.318710803985596, + "learning_rate": 1.8447802197802199e-06, + "loss": 0.1316, + "step": 35057 + }, + { + "epoch": 96.31318681318682, + "grad_norm": 9.593087196350098, + "learning_rate": 1.8434065934065937e-06, + "loss": 0.2462, + "step": 35058 + }, + { + "epoch": 96.31593406593407, + "grad_norm": 13.046818733215332, + "learning_rate": 1.842032967032967e-06, + "loss": 0.1904, + "step": 35059 + }, + { + "epoch": 96.31868131868131, + "grad_norm": 9.737322807312012, + "learning_rate": 1.8406593406593407e-06, + "loss": 0.1954, + "step": 35060 + }, + { + "epoch": 96.32142857142857, + "grad_norm": 1.057909607887268, + "learning_rate": 1.8392857142857141e-06, + "loss": 0.0068, + "step": 35061 + }, + { + "epoch": 96.32417582417582, + "grad_norm": 9.155146598815918, + "learning_rate": 1.837912087912088e-06, + "loss": 0.2097, + "step": 35062 + }, + { + "epoch": 96.32692307692308, + "grad_norm": 5.368391036987305, + "learning_rate": 1.8365384615384618e-06, + "loss": 0.0799, + "step": 35063 + }, + { + "epoch": 96.32967032967034, + "grad_norm": 20.904621124267578, + "learning_rate": 1.8351648351648352e-06, + "loss": 0.5678, + "step": 35064 + }, + { + "epoch": 96.33241758241758, + "grad_norm": 10.126054763793945, + "learning_rate": 1.833791208791209e-06, + "loss": 0.1264, + "step": 35065 + }, + { + "epoch": 96.33516483516483, + "grad_norm": 4.546227931976318, + "learning_rate": 1.8324175824175824e-06, + "loss": 0.0509, + "step": 35066 + }, + { + "epoch": 96.33791208791209, + "grad_norm": 17.643718719482422, + "learning_rate": 1.831043956043956e-06, + "loss": 0.3258, + "step": 35067 + }, + { + "epoch": 96.34065934065934, + "grad_norm": 11.268183708190918, + "learning_rate": 1.8296703296703299e-06, + "loss": 0.077, + "step": 35068 + }, + { + "epoch": 96.3434065934066, + "grad_norm": 10.353537559509277, + "learning_rate": 1.8282967032967033e-06, + "loss": 0.254, + "step": 35069 + }, + { + "epoch": 96.34615384615384, + "grad_norm": 19.14505386352539, + "learning_rate": 1.8269230769230771e-06, + "loss": 0.4294, + "step": 35070 + }, + { + "epoch": 96.3489010989011, + "grad_norm": 18.133005142211914, + "learning_rate": 1.8255494505494505e-06, + "loss": 0.2932, + "step": 35071 + }, + { + "epoch": 96.35164835164835, + "grad_norm": 21.70854949951172, + "learning_rate": 1.8241758241758244e-06, + "loss": 0.5393, + "step": 35072 + }, + { + "epoch": 96.3543956043956, + "grad_norm": 25.934356689453125, + "learning_rate": 1.8228021978021978e-06, + "loss": 0.6667, + "step": 35073 + }, + { + "epoch": 96.35714285714286, + "grad_norm": 12.937007904052734, + "learning_rate": 1.8214285714285716e-06, + "loss": 0.1258, + "step": 35074 + }, + { + "epoch": 96.35989010989012, + "grad_norm": 16.672529220581055, + "learning_rate": 1.8200549450549452e-06, + "loss": 0.2929, + "step": 35075 + }, + { + "epoch": 96.36263736263736, + "grad_norm": 8.397762298583984, + "learning_rate": 1.8186813186813186e-06, + "loss": 0.1155, + "step": 35076 + }, + { + "epoch": 96.36538461538461, + "grad_norm": 10.923741340637207, + "learning_rate": 1.8173076923076925e-06, + "loss": 0.0718, + "step": 35077 + }, + { + "epoch": 96.36813186813187, + "grad_norm": 11.059638977050781, + "learning_rate": 1.8159340659340659e-06, + "loss": 0.0981, + "step": 35078 + }, + { + "epoch": 96.37087912087912, + "grad_norm": 14.126886367797852, + "learning_rate": 1.8145604395604397e-06, + "loss": 0.4126, + "step": 35079 + }, + { + "epoch": 96.37362637362638, + "grad_norm": 14.473342895507812, + "learning_rate": 1.8131868131868135e-06, + "loss": 0.3369, + "step": 35080 + }, + { + "epoch": 96.37637362637362, + "grad_norm": 24.05013084411621, + "learning_rate": 1.811813186813187e-06, + "loss": 0.3171, + "step": 35081 + }, + { + "epoch": 96.37912087912088, + "grad_norm": 10.38802433013916, + "learning_rate": 1.8104395604395605e-06, + "loss": 0.175, + "step": 35082 + }, + { + "epoch": 96.38186813186813, + "grad_norm": 13.606204986572266, + "learning_rate": 1.809065934065934e-06, + "loss": 0.1363, + "step": 35083 + }, + { + "epoch": 96.38461538461539, + "grad_norm": 4.987763404846191, + "learning_rate": 1.8076923076923078e-06, + "loss": 0.0606, + "step": 35084 + }, + { + "epoch": 96.38736263736264, + "grad_norm": 7.325535297393799, + "learning_rate": 1.8063186813186812e-06, + "loss": 0.1329, + "step": 35085 + }, + { + "epoch": 96.39010989010988, + "grad_norm": 25.552282333374023, + "learning_rate": 1.804945054945055e-06, + "loss": 0.9161, + "step": 35086 + }, + { + "epoch": 96.39285714285714, + "grad_norm": 8.165956497192383, + "learning_rate": 1.8035714285714288e-06, + "loss": 0.1126, + "step": 35087 + }, + { + "epoch": 96.3956043956044, + "grad_norm": 9.937461853027344, + "learning_rate": 1.8021978021978023e-06, + "loss": 0.0773, + "step": 35088 + }, + { + "epoch": 96.39835164835165, + "grad_norm": 13.328043937683105, + "learning_rate": 1.8008241758241759e-06, + "loss": 0.2179, + "step": 35089 + }, + { + "epoch": 96.4010989010989, + "grad_norm": 21.14877700805664, + "learning_rate": 1.7994505494505495e-06, + "loss": 0.3268, + "step": 35090 + }, + { + "epoch": 96.40384615384616, + "grad_norm": 7.240856170654297, + "learning_rate": 1.7980769230769231e-06, + "loss": 0.151, + "step": 35091 + }, + { + "epoch": 96.4065934065934, + "grad_norm": 16.551708221435547, + "learning_rate": 1.796703296703297e-06, + "loss": 0.2604, + "step": 35092 + }, + { + "epoch": 96.40934065934066, + "grad_norm": 17.30012321472168, + "learning_rate": 1.7953296703296703e-06, + "loss": 0.3073, + "step": 35093 + }, + { + "epoch": 96.41208791208791, + "grad_norm": 15.820499420166016, + "learning_rate": 1.7939560439560442e-06, + "loss": 0.4724, + "step": 35094 + }, + { + "epoch": 96.41483516483517, + "grad_norm": 22.262012481689453, + "learning_rate": 1.7925824175824176e-06, + "loss": 1.0081, + "step": 35095 + }, + { + "epoch": 96.41758241758242, + "grad_norm": 19.47930145263672, + "learning_rate": 1.7912087912087914e-06, + "loss": 0.5619, + "step": 35096 + }, + { + "epoch": 96.42032967032966, + "grad_norm": 17.19125747680664, + "learning_rate": 1.789835164835165e-06, + "loss": 0.4364, + "step": 35097 + }, + { + "epoch": 96.42307692307692, + "grad_norm": 0.9078928828239441, + "learning_rate": 1.7884615384615384e-06, + "loss": 0.0125, + "step": 35098 + }, + { + "epoch": 96.42582417582418, + "grad_norm": 13.493926048278809, + "learning_rate": 1.7870879120879123e-06, + "loss": 0.1672, + "step": 35099 + }, + { + "epoch": 96.42857142857143, + "grad_norm": 12.01185417175293, + "learning_rate": 1.7857142857142857e-06, + "loss": 0.3181, + "step": 35100 + }, + { + "epoch": 96.43131868131869, + "grad_norm": 19.352489471435547, + "learning_rate": 1.7843406593406595e-06, + "loss": 0.5557, + "step": 35101 + }, + { + "epoch": 96.43406593406593, + "grad_norm": 8.556859970092773, + "learning_rate": 1.782967032967033e-06, + "loss": 0.0773, + "step": 35102 + }, + { + "epoch": 96.43681318681318, + "grad_norm": 8.357794761657715, + "learning_rate": 1.7815934065934067e-06, + "loss": 0.1352, + "step": 35103 + }, + { + "epoch": 96.43956043956044, + "grad_norm": 11.798213958740234, + "learning_rate": 1.7802197802197804e-06, + "loss": 0.1612, + "step": 35104 + }, + { + "epoch": 96.4423076923077, + "grad_norm": 4.8814496994018555, + "learning_rate": 1.7788461538461538e-06, + "loss": 0.095, + "step": 35105 + }, + { + "epoch": 96.44505494505495, + "grad_norm": 20.417444229125977, + "learning_rate": 1.7774725274725276e-06, + "loss": 0.4367, + "step": 35106 + }, + { + "epoch": 96.4478021978022, + "grad_norm": 17.175037384033203, + "learning_rate": 1.776098901098901e-06, + "loss": 0.2682, + "step": 35107 + }, + { + "epoch": 96.45054945054945, + "grad_norm": 3.7806129455566406, + "learning_rate": 1.7747252747252748e-06, + "loss": 0.0512, + "step": 35108 + }, + { + "epoch": 96.4532967032967, + "grad_norm": 0.48230302333831787, + "learning_rate": 1.7733516483516487e-06, + "loss": 0.0057, + "step": 35109 + }, + { + "epoch": 96.45604395604396, + "grad_norm": 2.804352045059204, + "learning_rate": 1.771978021978022e-06, + "loss": 0.0643, + "step": 35110 + }, + { + "epoch": 96.45879120879121, + "grad_norm": 19.243301391601562, + "learning_rate": 1.7706043956043957e-06, + "loss": 0.4424, + "step": 35111 + }, + { + "epoch": 96.46153846153847, + "grad_norm": 14.622413635253906, + "learning_rate": 1.769230769230769e-06, + "loss": 0.204, + "step": 35112 + }, + { + "epoch": 96.46428571428571, + "grad_norm": 11.856476783752441, + "learning_rate": 1.767857142857143e-06, + "loss": 0.3668, + "step": 35113 + }, + { + "epoch": 96.46703296703296, + "grad_norm": 11.215583801269531, + "learning_rate": 1.7664835164835163e-06, + "loss": 0.1776, + "step": 35114 + }, + { + "epoch": 96.46978021978022, + "grad_norm": 8.643506050109863, + "learning_rate": 1.7651098901098902e-06, + "loss": 0.0993, + "step": 35115 + }, + { + "epoch": 96.47252747252747, + "grad_norm": 17.001554489135742, + "learning_rate": 1.763736263736264e-06, + "loss": 0.3786, + "step": 35116 + }, + { + "epoch": 96.47527472527473, + "grad_norm": 8.545741081237793, + "learning_rate": 1.7623626373626374e-06, + "loss": 0.1257, + "step": 35117 + }, + { + "epoch": 96.47802197802197, + "grad_norm": 11.961407661437988, + "learning_rate": 1.7609890109890112e-06, + "loss": 0.2416, + "step": 35118 + }, + { + "epoch": 96.48076923076923, + "grad_norm": 6.997363090515137, + "learning_rate": 1.7596153846153846e-06, + "loss": 0.1011, + "step": 35119 + }, + { + "epoch": 96.48351648351648, + "grad_norm": 20.54118537902832, + "learning_rate": 1.7582417582417583e-06, + "loss": 0.1179, + "step": 35120 + }, + { + "epoch": 96.48626373626374, + "grad_norm": 3.08978271484375, + "learning_rate": 1.756868131868132e-06, + "loss": 0.021, + "step": 35121 + }, + { + "epoch": 96.48901098901099, + "grad_norm": 26.312807083129883, + "learning_rate": 1.7554945054945055e-06, + "loss": 0.331, + "step": 35122 + }, + { + "epoch": 96.49175824175825, + "grad_norm": 13.455716133117676, + "learning_rate": 1.7541208791208793e-06, + "loss": 0.2446, + "step": 35123 + }, + { + "epoch": 96.49450549450549, + "grad_norm": 16.839275360107422, + "learning_rate": 1.7527472527472527e-06, + "loss": 0.3616, + "step": 35124 + }, + { + "epoch": 96.49725274725274, + "grad_norm": 1.5364371538162231, + "learning_rate": 1.7513736263736266e-06, + "loss": 0.0146, + "step": 35125 + }, + { + "epoch": 96.5, + "grad_norm": 12.916337966918945, + "learning_rate": 1.7500000000000002e-06, + "loss": 0.1272, + "step": 35126 + }, + { + "epoch": 96.50274725274726, + "grad_norm": 9.625659942626953, + "learning_rate": 1.7486263736263736e-06, + "loss": 0.0972, + "step": 35127 + }, + { + "epoch": 96.50549450549451, + "grad_norm": 9.900949478149414, + "learning_rate": 1.7472527472527474e-06, + "loss": 0.0847, + "step": 35128 + }, + { + "epoch": 96.50824175824175, + "grad_norm": 5.0894551277160645, + "learning_rate": 1.7458791208791208e-06, + "loss": 0.0642, + "step": 35129 + }, + { + "epoch": 96.51098901098901, + "grad_norm": 9.125957489013672, + "learning_rate": 1.7445054945054947e-06, + "loss": 0.2272, + "step": 35130 + }, + { + "epoch": 96.51373626373626, + "grad_norm": 29.208860397338867, + "learning_rate": 1.743131868131868e-06, + "loss": 0.6822, + "step": 35131 + }, + { + "epoch": 96.51648351648352, + "grad_norm": 0.8641111254692078, + "learning_rate": 1.741758241758242e-06, + "loss": 0.0129, + "step": 35132 + }, + { + "epoch": 96.51923076923077, + "grad_norm": 1.4029027223587036, + "learning_rate": 1.7403846153846155e-06, + "loss": 0.0136, + "step": 35133 + }, + { + "epoch": 96.52197802197803, + "grad_norm": 4.469919681549072, + "learning_rate": 1.739010989010989e-06, + "loss": 0.0656, + "step": 35134 + }, + { + "epoch": 96.52472527472527, + "grad_norm": 15.805482864379883, + "learning_rate": 1.7376373626373628e-06, + "loss": 0.2647, + "step": 35135 + }, + { + "epoch": 96.52747252747253, + "grad_norm": 6.044196605682373, + "learning_rate": 1.7362637362637362e-06, + "loss": 0.0736, + "step": 35136 + }, + { + "epoch": 96.53021978021978, + "grad_norm": 8.27637004852295, + "learning_rate": 1.73489010989011e-06, + "loss": 0.0995, + "step": 35137 + }, + { + "epoch": 96.53296703296704, + "grad_norm": 5.743278980255127, + "learning_rate": 1.7335164835164838e-06, + "loss": 0.0858, + "step": 35138 + }, + { + "epoch": 96.53571428571429, + "grad_norm": 12.564605712890625, + "learning_rate": 1.7321428571428572e-06, + "loss": 0.2091, + "step": 35139 + }, + { + "epoch": 96.53846153846153, + "grad_norm": 6.837008953094482, + "learning_rate": 1.7307692307692308e-06, + "loss": 0.0886, + "step": 35140 + }, + { + "epoch": 96.54120879120879, + "grad_norm": 9.405250549316406, + "learning_rate": 1.7293956043956045e-06, + "loss": 0.1917, + "step": 35141 + }, + { + "epoch": 96.54395604395604, + "grad_norm": 3.248244285583496, + "learning_rate": 1.728021978021978e-06, + "loss": 0.1081, + "step": 35142 + }, + { + "epoch": 96.5467032967033, + "grad_norm": 7.458176136016846, + "learning_rate": 1.7266483516483515e-06, + "loss": 0.1014, + "step": 35143 + }, + { + "epoch": 96.54945054945055, + "grad_norm": 9.239167213439941, + "learning_rate": 1.7252747252747253e-06, + "loss": 0.0738, + "step": 35144 + }, + { + "epoch": 96.5521978021978, + "grad_norm": 15.76105785369873, + "learning_rate": 1.7239010989010992e-06, + "loss": 0.2488, + "step": 35145 + }, + { + "epoch": 96.55494505494505, + "grad_norm": 25.445228576660156, + "learning_rate": 1.7225274725274726e-06, + "loss": 0.4597, + "step": 35146 + }, + { + "epoch": 96.5576923076923, + "grad_norm": 8.513289451599121, + "learning_rate": 1.7211538461538464e-06, + "loss": 0.069, + "step": 35147 + }, + { + "epoch": 96.56043956043956, + "grad_norm": 9.331833839416504, + "learning_rate": 1.7197802197802198e-06, + "loss": 0.1994, + "step": 35148 + }, + { + "epoch": 96.56318681318682, + "grad_norm": 6.08896541595459, + "learning_rate": 1.7184065934065934e-06, + "loss": 0.1258, + "step": 35149 + }, + { + "epoch": 96.56593406593407, + "grad_norm": 17.332054138183594, + "learning_rate": 1.7170329670329672e-06, + "loss": 0.5292, + "step": 35150 + }, + { + "epoch": 96.56868131868131, + "grad_norm": 2.1369853019714355, + "learning_rate": 1.7156593406593407e-06, + "loss": 0.0219, + "step": 35151 + }, + { + "epoch": 96.57142857142857, + "grad_norm": 19.80316734313965, + "learning_rate": 1.7142857142857145e-06, + "loss": 0.3767, + "step": 35152 + }, + { + "epoch": 96.57417582417582, + "grad_norm": 7.403489589691162, + "learning_rate": 1.7129120879120879e-06, + "loss": 0.1181, + "step": 35153 + }, + { + "epoch": 96.57692307692308, + "grad_norm": 10.569910049438477, + "learning_rate": 1.7115384615384617e-06, + "loss": 0.125, + "step": 35154 + }, + { + "epoch": 96.57967032967034, + "grad_norm": 5.9737348556518555, + "learning_rate": 1.7101648351648353e-06, + "loss": 0.1044, + "step": 35155 + }, + { + "epoch": 96.58241758241758, + "grad_norm": 2.866741180419922, + "learning_rate": 1.7087912087912087e-06, + "loss": 0.0343, + "step": 35156 + }, + { + "epoch": 96.58516483516483, + "grad_norm": 16.034908294677734, + "learning_rate": 1.7074175824175826e-06, + "loss": 0.187, + "step": 35157 + }, + { + "epoch": 96.58791208791209, + "grad_norm": 5.2183966636657715, + "learning_rate": 1.706043956043956e-06, + "loss": 0.0464, + "step": 35158 + }, + { + "epoch": 96.59065934065934, + "grad_norm": 16.21923828125, + "learning_rate": 1.7046703296703298e-06, + "loss": 0.2545, + "step": 35159 + }, + { + "epoch": 96.5934065934066, + "grad_norm": 5.643706798553467, + "learning_rate": 1.7032967032967032e-06, + "loss": 0.0606, + "step": 35160 + }, + { + "epoch": 96.59615384615384, + "grad_norm": 10.372241020202637, + "learning_rate": 1.701923076923077e-06, + "loss": 0.1056, + "step": 35161 + }, + { + "epoch": 96.5989010989011, + "grad_norm": 14.892528533935547, + "learning_rate": 1.7005494505494507e-06, + "loss": 0.4804, + "step": 35162 + }, + { + "epoch": 96.60164835164835, + "grad_norm": 6.687585830688477, + "learning_rate": 1.6991758241758243e-06, + "loss": 0.0611, + "step": 35163 + }, + { + "epoch": 96.6043956043956, + "grad_norm": 5.570314884185791, + "learning_rate": 1.697802197802198e-06, + "loss": 0.058, + "step": 35164 + }, + { + "epoch": 96.60714285714286, + "grad_norm": 29.22713279724121, + "learning_rate": 1.6964285714285713e-06, + "loss": 0.483, + "step": 35165 + }, + { + "epoch": 96.60989010989012, + "grad_norm": 15.82418155670166, + "learning_rate": 1.6950549450549451e-06, + "loss": 0.2506, + "step": 35166 + }, + { + "epoch": 96.61263736263736, + "grad_norm": 7.334375381469727, + "learning_rate": 1.693681318681319e-06, + "loss": 0.1191, + "step": 35167 + }, + { + "epoch": 96.61538461538461, + "grad_norm": 7.667205333709717, + "learning_rate": 1.6923076923076924e-06, + "loss": 0.0543, + "step": 35168 + }, + { + "epoch": 96.61813186813187, + "grad_norm": 18.872629165649414, + "learning_rate": 1.6909340659340662e-06, + "loss": 0.2698, + "step": 35169 + }, + { + "epoch": 96.62087912087912, + "grad_norm": 8.531120300292969, + "learning_rate": 1.6895604395604396e-06, + "loss": 0.1105, + "step": 35170 + }, + { + "epoch": 96.62362637362638, + "grad_norm": 14.919389724731445, + "learning_rate": 1.6881868131868132e-06, + "loss": 0.2152, + "step": 35171 + }, + { + "epoch": 96.62637362637362, + "grad_norm": 21.037723541259766, + "learning_rate": 1.6868131868131866e-06, + "loss": 0.5653, + "step": 35172 + }, + { + "epoch": 96.62912087912088, + "grad_norm": 9.268341064453125, + "learning_rate": 1.6854395604395605e-06, + "loss": 0.2208, + "step": 35173 + }, + { + "epoch": 96.63186813186813, + "grad_norm": 21.37349510192871, + "learning_rate": 1.6840659340659343e-06, + "loss": 0.4966, + "step": 35174 + }, + { + "epoch": 96.63461538461539, + "grad_norm": 6.901875019073486, + "learning_rate": 1.6826923076923077e-06, + "loss": 0.0988, + "step": 35175 + }, + { + "epoch": 96.63736263736264, + "grad_norm": 5.4869160652160645, + "learning_rate": 1.6813186813186815e-06, + "loss": 0.1036, + "step": 35176 + }, + { + "epoch": 96.64010989010988, + "grad_norm": 24.462528228759766, + "learning_rate": 1.679945054945055e-06, + "loss": 0.224, + "step": 35177 + }, + { + "epoch": 96.64285714285714, + "grad_norm": 9.466605186462402, + "learning_rate": 1.6785714285714286e-06, + "loss": 0.1366, + "step": 35178 + }, + { + "epoch": 96.6456043956044, + "grad_norm": 7.001800537109375, + "learning_rate": 1.6771978021978024e-06, + "loss": 0.0778, + "step": 35179 + }, + { + "epoch": 96.64835164835165, + "grad_norm": 9.798270225524902, + "learning_rate": 1.6758241758241758e-06, + "loss": 0.2004, + "step": 35180 + }, + { + "epoch": 96.6510989010989, + "grad_norm": 8.343587875366211, + "learning_rate": 1.6744505494505496e-06, + "loss": 0.1241, + "step": 35181 + }, + { + "epoch": 96.65384615384616, + "grad_norm": 5.125022888183594, + "learning_rate": 1.673076923076923e-06, + "loss": 0.0673, + "step": 35182 + }, + { + "epoch": 96.6565934065934, + "grad_norm": 8.257365226745605, + "learning_rate": 1.6717032967032969e-06, + "loss": 0.1091, + "step": 35183 + }, + { + "epoch": 96.65934065934066, + "grad_norm": 1.6315428018569946, + "learning_rate": 1.6703296703296705e-06, + "loss": 0.018, + "step": 35184 + }, + { + "epoch": 96.66208791208791, + "grad_norm": 12.91743278503418, + "learning_rate": 1.668956043956044e-06, + "loss": 0.1838, + "step": 35185 + }, + { + "epoch": 96.66483516483517, + "grad_norm": 2.013932466506958, + "learning_rate": 1.6675824175824177e-06, + "loss": 0.0261, + "step": 35186 + }, + { + "epoch": 96.66758241758242, + "grad_norm": 6.752343654632568, + "learning_rate": 1.6662087912087911e-06, + "loss": 0.0996, + "step": 35187 + }, + { + "epoch": 96.67032967032966, + "grad_norm": 8.137910842895508, + "learning_rate": 1.664835164835165e-06, + "loss": 0.1347, + "step": 35188 + }, + { + "epoch": 96.67307692307692, + "grad_norm": 5.988504886627197, + "learning_rate": 1.6634615384615384e-06, + "loss": 0.1048, + "step": 35189 + }, + { + "epoch": 96.67582417582418, + "grad_norm": 11.55612850189209, + "learning_rate": 1.6620879120879122e-06, + "loss": 0.1175, + "step": 35190 + }, + { + "epoch": 96.67857142857143, + "grad_norm": 10.469618797302246, + "learning_rate": 1.660714285714286e-06, + "loss": 0.1682, + "step": 35191 + }, + { + "epoch": 96.68131868131869, + "grad_norm": 14.532567977905273, + "learning_rate": 1.6593406593406594e-06, + "loss": 0.2614, + "step": 35192 + }, + { + "epoch": 96.68406593406593, + "grad_norm": 1.2064787149429321, + "learning_rate": 1.657967032967033e-06, + "loss": 0.0141, + "step": 35193 + }, + { + "epoch": 96.68681318681318, + "grad_norm": 15.593862533569336, + "learning_rate": 1.6565934065934065e-06, + "loss": 0.3425, + "step": 35194 + }, + { + "epoch": 96.68956043956044, + "grad_norm": 20.940845489501953, + "learning_rate": 1.6552197802197803e-06, + "loss": 0.4572, + "step": 35195 + }, + { + "epoch": 96.6923076923077, + "grad_norm": 6.463608741760254, + "learning_rate": 1.6538461538461541e-06, + "loss": 0.1092, + "step": 35196 + }, + { + "epoch": 96.69505494505495, + "grad_norm": 2.8315930366516113, + "learning_rate": 1.6524725274725275e-06, + "loss": 0.0493, + "step": 35197 + }, + { + "epoch": 96.6978021978022, + "grad_norm": 1.6725218296051025, + "learning_rate": 1.6510989010989014e-06, + "loss": 0.0157, + "step": 35198 + }, + { + "epoch": 96.70054945054945, + "grad_norm": 14.945333480834961, + "learning_rate": 1.6497252747252748e-06, + "loss": 0.238, + "step": 35199 + }, + { + "epoch": 96.7032967032967, + "grad_norm": 2.0777785778045654, + "learning_rate": 1.6483516483516484e-06, + "loss": 0.0301, + "step": 35200 + }, + { + "epoch": 96.70604395604396, + "grad_norm": 11.053043365478516, + "learning_rate": 1.6469780219780218e-06, + "loss": 0.1334, + "step": 35201 + }, + { + "epoch": 96.70879120879121, + "grad_norm": 1.4235836267471313, + "learning_rate": 1.6456043956043956e-06, + "loss": 0.0178, + "step": 35202 + }, + { + "epoch": 96.71153846153847, + "grad_norm": 9.328828811645508, + "learning_rate": 1.6442307692307695e-06, + "loss": 0.1453, + "step": 35203 + }, + { + "epoch": 96.71428571428571, + "grad_norm": 16.50547981262207, + "learning_rate": 1.6428571428571429e-06, + "loss": 0.479, + "step": 35204 + }, + { + "epoch": 96.71703296703296, + "grad_norm": 13.84426212310791, + "learning_rate": 1.6414835164835167e-06, + "loss": 0.207, + "step": 35205 + }, + { + "epoch": 96.71978021978022, + "grad_norm": 2.990382671356201, + "learning_rate": 1.64010989010989e-06, + "loss": 0.0301, + "step": 35206 + }, + { + "epoch": 96.72252747252747, + "grad_norm": 21.7225284576416, + "learning_rate": 1.638736263736264e-06, + "loss": 0.5828, + "step": 35207 + }, + { + "epoch": 96.72527472527473, + "grad_norm": 22.12575912475586, + "learning_rate": 1.6373626373626375e-06, + "loss": 0.3815, + "step": 35208 + }, + { + "epoch": 96.72802197802197, + "grad_norm": 14.02383041381836, + "learning_rate": 1.635989010989011e-06, + "loss": 0.2899, + "step": 35209 + }, + { + "epoch": 96.73076923076923, + "grad_norm": 3.4588420391082764, + "learning_rate": 1.6346153846153848e-06, + "loss": 0.0558, + "step": 35210 + }, + { + "epoch": 96.73351648351648, + "grad_norm": 21.11509132385254, + "learning_rate": 1.6332417582417582e-06, + "loss": 0.1909, + "step": 35211 + }, + { + "epoch": 96.73626373626374, + "grad_norm": 9.53576374053955, + "learning_rate": 1.631868131868132e-06, + "loss": 0.1527, + "step": 35212 + }, + { + "epoch": 96.73901098901099, + "grad_norm": 8.083700180053711, + "learning_rate": 1.6304945054945054e-06, + "loss": 0.0983, + "step": 35213 + }, + { + "epoch": 96.74175824175825, + "grad_norm": 16.177967071533203, + "learning_rate": 1.6291208791208793e-06, + "loss": 0.1923, + "step": 35214 + }, + { + "epoch": 96.74450549450549, + "grad_norm": 4.498669624328613, + "learning_rate": 1.6277472527472529e-06, + "loss": 0.0482, + "step": 35215 + }, + { + "epoch": 96.74725274725274, + "grad_norm": 21.831607818603516, + "learning_rate": 1.6263736263736263e-06, + "loss": 0.3916, + "step": 35216 + }, + { + "epoch": 96.75, + "grad_norm": 7.768584728240967, + "learning_rate": 1.6250000000000001e-06, + "loss": 0.1084, + "step": 35217 + }, + { + "epoch": 96.75274725274726, + "grad_norm": 22.132566452026367, + "learning_rate": 1.6236263736263735e-06, + "loss": 0.5664, + "step": 35218 + }, + { + "epoch": 96.75549450549451, + "grad_norm": 5.672501087188721, + "learning_rate": 1.6222527472527473e-06, + "loss": 0.0496, + "step": 35219 + }, + { + "epoch": 96.75824175824175, + "grad_norm": 5.210914611816406, + "learning_rate": 1.6208791208791212e-06, + "loss": 0.06, + "step": 35220 + }, + { + "epoch": 96.76098901098901, + "grad_norm": 20.542943954467773, + "learning_rate": 1.6195054945054946e-06, + "loss": 0.362, + "step": 35221 + }, + { + "epoch": 96.76373626373626, + "grad_norm": 25.223403930664062, + "learning_rate": 1.6181318681318682e-06, + "loss": 0.5031, + "step": 35222 + }, + { + "epoch": 96.76648351648352, + "grad_norm": 21.31747817993164, + "learning_rate": 1.6167582417582416e-06, + "loss": 0.377, + "step": 35223 + }, + { + "epoch": 96.76923076923077, + "grad_norm": 23.894533157348633, + "learning_rate": 1.6153846153846154e-06, + "loss": 0.3969, + "step": 35224 + }, + { + "epoch": 96.77197802197803, + "grad_norm": 2.3664627075195312, + "learning_rate": 1.6140109890109893e-06, + "loss": 0.0324, + "step": 35225 + }, + { + "epoch": 96.77472527472527, + "grad_norm": 1.7747920751571655, + "learning_rate": 1.6126373626373627e-06, + "loss": 0.0243, + "step": 35226 + }, + { + "epoch": 96.77747252747253, + "grad_norm": 21.645042419433594, + "learning_rate": 1.6112637362637365e-06, + "loss": 0.3582, + "step": 35227 + }, + { + "epoch": 96.78021978021978, + "grad_norm": 4.3051910400390625, + "learning_rate": 1.60989010989011e-06, + "loss": 0.0474, + "step": 35228 + }, + { + "epoch": 96.78296703296704, + "grad_norm": 4.367342472076416, + "learning_rate": 1.6085164835164835e-06, + "loss": 0.0451, + "step": 35229 + }, + { + "epoch": 96.78571428571429, + "grad_norm": 6.274427890777588, + "learning_rate": 1.6071428571428572e-06, + "loss": 0.0734, + "step": 35230 + }, + { + "epoch": 96.78846153846153, + "grad_norm": 5.36728048324585, + "learning_rate": 1.6057692307692308e-06, + "loss": 0.0624, + "step": 35231 + }, + { + "epoch": 96.79120879120879, + "grad_norm": 23.272579193115234, + "learning_rate": 1.6043956043956046e-06, + "loss": 0.3164, + "step": 35232 + }, + { + "epoch": 96.79395604395604, + "grad_norm": 6.215569972991943, + "learning_rate": 1.603021978021978e-06, + "loss": 0.0862, + "step": 35233 + }, + { + "epoch": 96.7967032967033, + "grad_norm": 4.233935356140137, + "learning_rate": 1.6016483516483518e-06, + "loss": 0.0375, + "step": 35234 + }, + { + "epoch": 96.79945054945055, + "grad_norm": 10.124747276306152, + "learning_rate": 1.6002747252747252e-06, + "loss": 0.1782, + "step": 35235 + }, + { + "epoch": 96.8021978021978, + "grad_norm": 11.320977210998535, + "learning_rate": 1.598901098901099e-06, + "loss": 0.1085, + "step": 35236 + }, + { + "epoch": 96.80494505494505, + "grad_norm": 5.44706392288208, + "learning_rate": 1.5975274725274727e-06, + "loss": 0.0497, + "step": 35237 + }, + { + "epoch": 96.8076923076923, + "grad_norm": 22.150108337402344, + "learning_rate": 1.596153846153846e-06, + "loss": 0.3347, + "step": 35238 + }, + { + "epoch": 96.81043956043956, + "grad_norm": 14.220800399780273, + "learning_rate": 1.59478021978022e-06, + "loss": 0.4707, + "step": 35239 + }, + { + "epoch": 96.81318681318682, + "grad_norm": 12.085850715637207, + "learning_rate": 1.5934065934065933e-06, + "loss": 0.2546, + "step": 35240 + }, + { + "epoch": 96.81593406593407, + "grad_norm": 13.688178062438965, + "learning_rate": 1.5920329670329672e-06, + "loss": 0.3432, + "step": 35241 + }, + { + "epoch": 96.81868131868131, + "grad_norm": 8.017855644226074, + "learning_rate": 1.5906593406593406e-06, + "loss": 0.0953, + "step": 35242 + }, + { + "epoch": 96.82142857142857, + "grad_norm": 8.25187873840332, + "learning_rate": 1.5892857142857144e-06, + "loss": 0.1023, + "step": 35243 + }, + { + "epoch": 96.82417582417582, + "grad_norm": 12.470974922180176, + "learning_rate": 1.587912087912088e-06, + "loss": 0.3119, + "step": 35244 + }, + { + "epoch": 96.82692307692308, + "grad_norm": 11.544760704040527, + "learning_rate": 1.5865384615384614e-06, + "loss": 0.2497, + "step": 35245 + }, + { + "epoch": 96.82967032967034, + "grad_norm": 7.142764568328857, + "learning_rate": 1.5851648351648353e-06, + "loss": 0.1168, + "step": 35246 + }, + { + "epoch": 96.83241758241758, + "grad_norm": 5.23486328125, + "learning_rate": 1.5837912087912087e-06, + "loss": 0.1039, + "step": 35247 + }, + { + "epoch": 96.83516483516483, + "grad_norm": 4.964739799499512, + "learning_rate": 1.5824175824175825e-06, + "loss": 0.0549, + "step": 35248 + }, + { + "epoch": 96.83791208791209, + "grad_norm": 6.991430282592773, + "learning_rate": 1.5810439560439563e-06, + "loss": 0.1054, + "step": 35249 + }, + { + "epoch": 96.84065934065934, + "grad_norm": 20.26136016845703, + "learning_rate": 1.5796703296703297e-06, + "loss": 0.2367, + "step": 35250 + }, + { + "epoch": 96.8434065934066, + "grad_norm": 25.817691802978516, + "learning_rate": 1.5782967032967034e-06, + "loss": 0.4999, + "step": 35251 + }, + { + "epoch": 96.84615384615384, + "grad_norm": 12.542179107666016, + "learning_rate": 1.576923076923077e-06, + "loss": 0.185, + "step": 35252 + }, + { + "epoch": 96.8489010989011, + "grad_norm": 13.42015266418457, + "learning_rate": 1.5755494505494506e-06, + "loss": 0.1242, + "step": 35253 + }, + { + "epoch": 96.85164835164835, + "grad_norm": 17.263099670410156, + "learning_rate": 1.5741758241758244e-06, + "loss": 0.4433, + "step": 35254 + }, + { + "epoch": 96.8543956043956, + "grad_norm": 12.743325233459473, + "learning_rate": 1.5728021978021978e-06, + "loss": 0.1719, + "step": 35255 + }, + { + "epoch": 96.85714285714286, + "grad_norm": 14.83581829071045, + "learning_rate": 1.5714285714285717e-06, + "loss": 0.2242, + "step": 35256 + }, + { + "epoch": 96.85989010989012, + "grad_norm": 7.168817043304443, + "learning_rate": 1.570054945054945e-06, + "loss": 0.0873, + "step": 35257 + }, + { + "epoch": 96.86263736263736, + "grad_norm": 11.948734283447266, + "learning_rate": 1.568681318681319e-06, + "loss": 0.1959, + "step": 35258 + }, + { + "epoch": 96.86538461538461, + "grad_norm": 17.790742874145508, + "learning_rate": 1.5673076923076923e-06, + "loss": 0.2257, + "step": 35259 + }, + { + "epoch": 96.86813186813187, + "grad_norm": 10.12228775024414, + "learning_rate": 1.565934065934066e-06, + "loss": 0.1359, + "step": 35260 + }, + { + "epoch": 96.87087912087912, + "grad_norm": 17.54292106628418, + "learning_rate": 1.5645604395604398e-06, + "loss": 0.2478, + "step": 35261 + }, + { + "epoch": 96.87362637362638, + "grad_norm": 10.32858657836914, + "learning_rate": 1.5631868131868132e-06, + "loss": 0.1603, + "step": 35262 + }, + { + "epoch": 96.87637362637362, + "grad_norm": 14.529809951782227, + "learning_rate": 1.561813186813187e-06, + "loss": 0.1311, + "step": 35263 + }, + { + "epoch": 96.87912087912088, + "grad_norm": 5.366388320922852, + "learning_rate": 1.5604395604395606e-06, + "loss": 0.1088, + "step": 35264 + }, + { + "epoch": 96.88186813186813, + "grad_norm": 5.838002681732178, + "learning_rate": 1.5590659340659342e-06, + "loss": 0.0896, + "step": 35265 + }, + { + "epoch": 96.88461538461539, + "grad_norm": 19.97150993347168, + "learning_rate": 1.5576923076923078e-06, + "loss": 0.2934, + "step": 35266 + }, + { + "epoch": 96.88736263736264, + "grad_norm": 15.865117073059082, + "learning_rate": 1.5563186813186813e-06, + "loss": 0.3462, + "step": 35267 + }, + { + "epoch": 96.89010989010988, + "grad_norm": 4.7379302978515625, + "learning_rate": 1.5549450549450549e-06, + "loss": 0.0684, + "step": 35268 + }, + { + "epoch": 96.89285714285714, + "grad_norm": 16.30522918701172, + "learning_rate": 1.5535714285714287e-06, + "loss": 0.2165, + "step": 35269 + }, + { + "epoch": 96.8956043956044, + "grad_norm": 18.473928451538086, + "learning_rate": 1.5521978021978023e-06, + "loss": 0.3446, + "step": 35270 + }, + { + "epoch": 96.89835164835165, + "grad_norm": 3.352808952331543, + "learning_rate": 1.550824175824176e-06, + "loss": 0.0413, + "step": 35271 + }, + { + "epoch": 96.9010989010989, + "grad_norm": 18.38095474243164, + "learning_rate": 1.5494505494505496e-06, + "loss": 0.2505, + "step": 35272 + }, + { + "epoch": 96.90384615384616, + "grad_norm": 4.886912822723389, + "learning_rate": 1.5480769230769232e-06, + "loss": 0.0441, + "step": 35273 + }, + { + "epoch": 96.9065934065934, + "grad_norm": 6.151547431945801, + "learning_rate": 1.5467032967032968e-06, + "loss": 0.0772, + "step": 35274 + }, + { + "epoch": 96.90934065934066, + "grad_norm": 12.767861366271973, + "learning_rate": 1.5453296703296704e-06, + "loss": 0.1922, + "step": 35275 + }, + { + "epoch": 96.91208791208791, + "grad_norm": 4.965900421142578, + "learning_rate": 1.543956043956044e-06, + "loss": 0.0481, + "step": 35276 + }, + { + "epoch": 96.91483516483517, + "grad_norm": 4.341781139373779, + "learning_rate": 1.5425824175824177e-06, + "loss": 0.0662, + "step": 35277 + }, + { + "epoch": 96.91758241758242, + "grad_norm": 1.2623615264892578, + "learning_rate": 1.5412087912087913e-06, + "loss": 0.013, + "step": 35278 + }, + { + "epoch": 96.92032967032966, + "grad_norm": 9.362556457519531, + "learning_rate": 1.5398351648351649e-06, + "loss": 0.1376, + "step": 35279 + }, + { + "epoch": 96.92307692307692, + "grad_norm": 7.2043914794921875, + "learning_rate": 1.5384615384615387e-06, + "loss": 0.1205, + "step": 35280 + }, + { + "epoch": 96.92582417582418, + "grad_norm": 13.171308517456055, + "learning_rate": 1.5370879120879121e-06, + "loss": 0.2857, + "step": 35281 + }, + { + "epoch": 96.92857142857143, + "grad_norm": 10.424554824829102, + "learning_rate": 1.5357142857142857e-06, + "loss": 0.1251, + "step": 35282 + }, + { + "epoch": 96.93131868131869, + "grad_norm": 11.509790420532227, + "learning_rate": 1.5343406593406594e-06, + "loss": 0.1484, + "step": 35283 + }, + { + "epoch": 96.93406593406593, + "grad_norm": 7.621973037719727, + "learning_rate": 1.532967032967033e-06, + "loss": 0.1684, + "step": 35284 + }, + { + "epoch": 96.93681318681318, + "grad_norm": 15.895637512207031, + "learning_rate": 1.5315934065934066e-06, + "loss": 0.3656, + "step": 35285 + }, + { + "epoch": 96.93956043956044, + "grad_norm": 8.0870361328125, + "learning_rate": 1.5302197802197804e-06, + "loss": 0.1334, + "step": 35286 + }, + { + "epoch": 96.9423076923077, + "grad_norm": 15.433774948120117, + "learning_rate": 1.528846153846154e-06, + "loss": 0.3805, + "step": 35287 + }, + { + "epoch": 96.94505494505495, + "grad_norm": 14.200844764709473, + "learning_rate": 1.5274725274725277e-06, + "loss": 0.2064, + "step": 35288 + }, + { + "epoch": 96.9478021978022, + "grad_norm": 9.928858757019043, + "learning_rate": 1.526098901098901e-06, + "loss": 0.1908, + "step": 35289 + }, + { + "epoch": 96.95054945054945, + "grad_norm": 4.791778087615967, + "learning_rate": 1.5247252747252747e-06, + "loss": 0.0383, + "step": 35290 + }, + { + "epoch": 96.9532967032967, + "grad_norm": 22.77447509765625, + "learning_rate": 1.5233516483516483e-06, + "loss": 0.454, + "step": 35291 + }, + { + "epoch": 96.95604395604396, + "grad_norm": 18.363412857055664, + "learning_rate": 1.5219780219780221e-06, + "loss": 0.4963, + "step": 35292 + }, + { + "epoch": 96.95879120879121, + "grad_norm": 10.849441528320312, + "learning_rate": 1.5206043956043958e-06, + "loss": 0.126, + "step": 35293 + }, + { + "epoch": 96.96153846153847, + "grad_norm": 3.2557032108306885, + "learning_rate": 1.5192307692307694e-06, + "loss": 0.0349, + "step": 35294 + }, + { + "epoch": 96.96428571428571, + "grad_norm": 8.13752555847168, + "learning_rate": 1.517857142857143e-06, + "loss": 0.1304, + "step": 35295 + }, + { + "epoch": 96.96703296703296, + "grad_norm": 7.708303451538086, + "learning_rate": 1.5164835164835166e-06, + "loss": 0.1519, + "step": 35296 + }, + { + "epoch": 96.96978021978022, + "grad_norm": 21.67194366455078, + "learning_rate": 1.51510989010989e-06, + "loss": 0.3762, + "step": 35297 + }, + { + "epoch": 96.97252747252747, + "grad_norm": 19.64863395690918, + "learning_rate": 1.5137362637362639e-06, + "loss": 0.2959, + "step": 35298 + }, + { + "epoch": 96.97527472527473, + "grad_norm": 11.446368217468262, + "learning_rate": 1.5123626373626375e-06, + "loss": 0.1466, + "step": 35299 + }, + { + "epoch": 96.97802197802197, + "grad_norm": 7.734022617340088, + "learning_rate": 1.510989010989011e-06, + "loss": 0.0785, + "step": 35300 + }, + { + "epoch": 96.98076923076923, + "grad_norm": 12.226238250732422, + "learning_rate": 1.5096153846153847e-06, + "loss": 0.2944, + "step": 35301 + }, + { + "epoch": 96.98351648351648, + "grad_norm": 12.067817687988281, + "learning_rate": 1.5082417582417583e-06, + "loss": 0.1195, + "step": 35302 + }, + { + "epoch": 96.98626373626374, + "grad_norm": 13.913267135620117, + "learning_rate": 1.506868131868132e-06, + "loss": 0.1793, + "step": 35303 + }, + { + "epoch": 96.98901098901099, + "grad_norm": 2.41888427734375, + "learning_rate": 1.5054945054945056e-06, + "loss": 0.0324, + "step": 35304 + }, + { + "epoch": 96.99175824175825, + "grad_norm": 15.586984634399414, + "learning_rate": 1.5041208791208792e-06, + "loss": 0.2279, + "step": 35305 + }, + { + "epoch": 96.99450549450549, + "grad_norm": 28.33803939819336, + "learning_rate": 1.5027472527472528e-06, + "loss": 0.2482, + "step": 35306 + }, + { + "epoch": 96.99725274725274, + "grad_norm": 7.119443416595459, + "learning_rate": 1.5013736263736264e-06, + "loss": 0.1581, + "step": 35307 + }, + { + "epoch": 97.0, + "grad_norm": 74.53857421875, + "learning_rate": 1.5e-06, + "loss": 2.6558, + "step": 35308 + }, + { + "epoch": 97.0, + "eval_accuracy": 0.7024793388429752, + "eval_f1": 0.7179071450617005, + "eval_f1_DuraRiadoRio_64x64": 0.6782608695652174, + "eval_f1_Mole_64x64": 0.8529411764705882, + "eval_f1_Quebrado_64x64": 0.8, + "eval_f1_RiadoRio_64x64": 0.5610859728506787, + "eval_f1_RioFechado_64x64": 0.6972477064220184, + "eval_loss": 1.6193928718566895, + "eval_precision": 0.8070666998055607, + "eval_precision_DuraRiadoRio_64x64": 0.9069767441860465, + "eval_precision_Mole_64x64": 0.90625, + "eval_precision_Quebrado_64x64": 0.7945205479452054, + "eval_precision_RiadoRio_64x64": 0.42758620689655175, + "eval_precision_RioFechado_64x64": 1.0, + "eval_recall": 0.7007557038135245, + "eval_recall_DuraRiadoRio_64x64": 0.5416666666666666, + "eval_recall_Mole_64x64": 0.8055555555555556, + "eval_recall_Quebrado_64x64": 0.8055555555555556, + "eval_recall_RiadoRio_64x64": 0.8157894736842105, + "eval_recall_RioFechado_64x64": 0.5352112676056338, + "eval_runtime": 1.7517, + "eval_samples_per_second": 414.457, + "eval_steps_per_second": 26.26, + "step": 35308 + }, + { + "epoch": 97.00274725274726, + "grad_norm": 2.950761556625366, + "learning_rate": 1.4986263736263737e-06, + "loss": 0.061, + "step": 35309 + }, + { + "epoch": 97.00549450549451, + "grad_norm": 9.552480697631836, + "learning_rate": 1.4972527472527475e-06, + "loss": 0.1021, + "step": 35310 + }, + { + "epoch": 97.00824175824175, + "grad_norm": 2.7038581371307373, + "learning_rate": 1.4958791208791209e-06, + "loss": 0.0285, + "step": 35311 + }, + { + "epoch": 97.01098901098901, + "grad_norm": 12.743711471557617, + "learning_rate": 1.4945054945054945e-06, + "loss": 0.1544, + "step": 35312 + }, + { + "epoch": 97.01373626373626, + "grad_norm": 21.750661849975586, + "learning_rate": 1.4931318681318681e-06, + "loss": 0.3989, + "step": 35313 + }, + { + "epoch": 97.01648351648352, + "grad_norm": 5.55877685546875, + "learning_rate": 1.4917582417582417e-06, + "loss": 0.0779, + "step": 35314 + }, + { + "epoch": 97.01923076923077, + "grad_norm": 3.699233293533325, + "learning_rate": 1.4903846153846156e-06, + "loss": 0.0538, + "step": 35315 + }, + { + "epoch": 97.02197802197803, + "grad_norm": 20.923309326171875, + "learning_rate": 1.4890109890109892e-06, + "loss": 0.3192, + "step": 35316 + }, + { + "epoch": 97.02472527472527, + "grad_norm": 7.339940071105957, + "learning_rate": 1.4876373626373628e-06, + "loss": 0.1251, + "step": 35317 + }, + { + "epoch": 97.02747252747253, + "grad_norm": 7.496582508087158, + "learning_rate": 1.4862637362637362e-06, + "loss": 0.1509, + "step": 35318 + }, + { + "epoch": 97.03021978021978, + "grad_norm": 7.691093921661377, + "learning_rate": 1.4848901098901098e-06, + "loss": 0.0768, + "step": 35319 + }, + { + "epoch": 97.03296703296704, + "grad_norm": 11.843086242675781, + "learning_rate": 1.4835164835164835e-06, + "loss": 0.1246, + "step": 35320 + }, + { + "epoch": 97.03571428571429, + "grad_norm": 14.48559284210205, + "learning_rate": 1.4821428571428573e-06, + "loss": 0.1796, + "step": 35321 + }, + { + "epoch": 97.03846153846153, + "grad_norm": 12.126301765441895, + "learning_rate": 1.480769230769231e-06, + "loss": 0.2885, + "step": 35322 + }, + { + "epoch": 97.04120879120879, + "grad_norm": 13.694785118103027, + "learning_rate": 1.4793956043956045e-06, + "loss": 0.2457, + "step": 35323 + }, + { + "epoch": 97.04395604395604, + "grad_norm": 18.43748664855957, + "learning_rate": 1.4780219780219781e-06, + "loss": 0.6898, + "step": 35324 + }, + { + "epoch": 97.0467032967033, + "grad_norm": 7.501060485839844, + "learning_rate": 1.4766483516483518e-06, + "loss": 0.1066, + "step": 35325 + }, + { + "epoch": 97.04945054945055, + "grad_norm": 15.37631893157959, + "learning_rate": 1.4752747252747252e-06, + "loss": 0.3005, + "step": 35326 + }, + { + "epoch": 97.0521978021978, + "grad_norm": 4.985317230224609, + "learning_rate": 1.473901098901099e-06, + "loss": 0.0601, + "step": 35327 + }, + { + "epoch": 97.05494505494505, + "grad_norm": 4.046944618225098, + "learning_rate": 1.4725274725274726e-06, + "loss": 0.0752, + "step": 35328 + }, + { + "epoch": 97.0576923076923, + "grad_norm": 4.352238655090332, + "learning_rate": 1.4711538461538462e-06, + "loss": 0.0509, + "step": 35329 + }, + { + "epoch": 97.06043956043956, + "grad_norm": 8.384954452514648, + "learning_rate": 1.4697802197802199e-06, + "loss": 0.1212, + "step": 35330 + }, + { + "epoch": 97.06318681318682, + "grad_norm": 3.6398744583129883, + "learning_rate": 1.4684065934065935e-06, + "loss": 0.0433, + "step": 35331 + }, + { + "epoch": 97.06593406593407, + "grad_norm": 8.514318466186523, + "learning_rate": 1.467032967032967e-06, + "loss": 0.2639, + "step": 35332 + }, + { + "epoch": 97.06868131868131, + "grad_norm": 1.2871780395507812, + "learning_rate": 1.4656593406593407e-06, + "loss": 0.0172, + "step": 35333 + }, + { + "epoch": 97.07142857142857, + "grad_norm": 5.710649013519287, + "learning_rate": 1.4642857142857143e-06, + "loss": 0.0877, + "step": 35334 + }, + { + "epoch": 97.07417582417582, + "grad_norm": 6.15333890914917, + "learning_rate": 1.462912087912088e-06, + "loss": 0.0718, + "step": 35335 + }, + { + "epoch": 97.07692307692308, + "grad_norm": 22.217758178710938, + "learning_rate": 1.4615384615384616e-06, + "loss": 0.3439, + "step": 35336 + }, + { + "epoch": 97.07967032967034, + "grad_norm": 11.097816467285156, + "learning_rate": 1.4601648351648352e-06, + "loss": 0.0969, + "step": 35337 + }, + { + "epoch": 97.08241758241758, + "grad_norm": 7.633369445800781, + "learning_rate": 1.4587912087912088e-06, + "loss": 0.0913, + "step": 35338 + }, + { + "epoch": 97.08516483516483, + "grad_norm": 6.301124095916748, + "learning_rate": 1.4574175824175826e-06, + "loss": 0.0685, + "step": 35339 + }, + { + "epoch": 97.08791208791209, + "grad_norm": 4.344252586364746, + "learning_rate": 1.456043956043956e-06, + "loss": 0.0608, + "step": 35340 + }, + { + "epoch": 97.09065934065934, + "grad_norm": 12.66810417175293, + "learning_rate": 1.4546703296703297e-06, + "loss": 0.2314, + "step": 35341 + }, + { + "epoch": 97.0934065934066, + "grad_norm": 3.397095203399658, + "learning_rate": 1.4532967032967033e-06, + "loss": 0.0431, + "step": 35342 + }, + { + "epoch": 97.09615384615384, + "grad_norm": 31.83395004272461, + "learning_rate": 1.451923076923077e-06, + "loss": 0.4835, + "step": 35343 + }, + { + "epoch": 97.0989010989011, + "grad_norm": 12.205408096313477, + "learning_rate": 1.4505494505494507e-06, + "loss": 0.2085, + "step": 35344 + }, + { + "epoch": 97.10164835164835, + "grad_norm": 5.8700947761535645, + "learning_rate": 1.4491758241758243e-06, + "loss": 0.1227, + "step": 35345 + }, + { + "epoch": 97.1043956043956, + "grad_norm": 7.970654010772705, + "learning_rate": 1.447802197802198e-06, + "loss": 0.0878, + "step": 35346 + }, + { + "epoch": 97.10714285714286, + "grad_norm": 14.151189804077148, + "learning_rate": 1.4464285714285716e-06, + "loss": 0.276, + "step": 35347 + }, + { + "epoch": 97.10989010989012, + "grad_norm": 18.552040100097656, + "learning_rate": 1.445054945054945e-06, + "loss": 0.6106, + "step": 35348 + }, + { + "epoch": 97.11263736263736, + "grad_norm": 13.110773086547852, + "learning_rate": 1.4436813186813186e-06, + "loss": 0.1369, + "step": 35349 + }, + { + "epoch": 97.11538461538461, + "grad_norm": 5.838184833526611, + "learning_rate": 1.4423076923076924e-06, + "loss": 0.1213, + "step": 35350 + }, + { + "epoch": 97.11813186813187, + "grad_norm": 14.755071640014648, + "learning_rate": 1.440934065934066e-06, + "loss": 0.2819, + "step": 35351 + }, + { + "epoch": 97.12087912087912, + "grad_norm": 20.120405197143555, + "learning_rate": 1.4395604395604397e-06, + "loss": 0.2079, + "step": 35352 + }, + { + "epoch": 97.12362637362638, + "grad_norm": 22.5474910736084, + "learning_rate": 1.4381868131868133e-06, + "loss": 0.3355, + "step": 35353 + }, + { + "epoch": 97.12637362637362, + "grad_norm": 4.547063827514648, + "learning_rate": 1.436813186813187e-06, + "loss": 0.079, + "step": 35354 + }, + { + "epoch": 97.12912087912088, + "grad_norm": 10.483163833618164, + "learning_rate": 1.4354395604395605e-06, + "loss": 0.1208, + "step": 35355 + }, + { + "epoch": 97.13186813186813, + "grad_norm": 22.222610473632812, + "learning_rate": 1.4340659340659342e-06, + "loss": 0.7673, + "step": 35356 + }, + { + "epoch": 97.13461538461539, + "grad_norm": 18.12540054321289, + "learning_rate": 1.4326923076923078e-06, + "loss": 0.2521, + "step": 35357 + }, + { + "epoch": 97.13736263736264, + "grad_norm": 3.5755553245544434, + "learning_rate": 1.4313186813186814e-06, + "loss": 0.0493, + "step": 35358 + }, + { + "epoch": 97.14010989010988, + "grad_norm": 13.93982982635498, + "learning_rate": 1.429945054945055e-06, + "loss": 0.4379, + "step": 35359 + }, + { + "epoch": 97.14285714285714, + "grad_norm": 13.72155475616455, + "learning_rate": 1.4285714285714286e-06, + "loss": 0.2833, + "step": 35360 + }, + { + "epoch": 97.1456043956044, + "grad_norm": 24.84180450439453, + "learning_rate": 1.4271978021978022e-06, + "loss": 0.7927, + "step": 35361 + }, + { + "epoch": 97.14835164835165, + "grad_norm": 1.616833209991455, + "learning_rate": 1.4258241758241759e-06, + "loss": 0.0161, + "step": 35362 + }, + { + "epoch": 97.1510989010989, + "grad_norm": 19.026107788085938, + "learning_rate": 1.4244505494505495e-06, + "loss": 0.4615, + "step": 35363 + }, + { + "epoch": 97.15384615384616, + "grad_norm": 9.346263885498047, + "learning_rate": 1.423076923076923e-06, + "loss": 0.1388, + "step": 35364 + }, + { + "epoch": 97.1565934065934, + "grad_norm": 17.351734161376953, + "learning_rate": 1.4217032967032967e-06, + "loss": 0.303, + "step": 35365 + }, + { + "epoch": 97.15934065934066, + "grad_norm": 12.727481842041016, + "learning_rate": 1.4203296703296703e-06, + "loss": 0.2932, + "step": 35366 + }, + { + "epoch": 97.16208791208791, + "grad_norm": 12.946974754333496, + "learning_rate": 1.418956043956044e-06, + "loss": 0.2546, + "step": 35367 + }, + { + "epoch": 97.16483516483517, + "grad_norm": 13.63271427154541, + "learning_rate": 1.4175824175824178e-06, + "loss": 0.3707, + "step": 35368 + }, + { + "epoch": 97.16758241758242, + "grad_norm": 13.129077911376953, + "learning_rate": 1.4162087912087914e-06, + "loss": 0.1395, + "step": 35369 + }, + { + "epoch": 97.17032967032966, + "grad_norm": 13.103046417236328, + "learning_rate": 1.4148351648351648e-06, + "loss": 0.2236, + "step": 35370 + }, + { + "epoch": 97.17307692307692, + "grad_norm": 6.197707653045654, + "learning_rate": 1.4134615384615384e-06, + "loss": 0.0952, + "step": 35371 + }, + { + "epoch": 97.17582417582418, + "grad_norm": 15.698918342590332, + "learning_rate": 1.412087912087912e-06, + "loss": 0.2568, + "step": 35372 + }, + { + "epoch": 97.17857142857143, + "grad_norm": 4.20643949508667, + "learning_rate": 1.4107142857142857e-06, + "loss": 0.0617, + "step": 35373 + }, + { + "epoch": 97.18131868131869, + "grad_norm": 20.645360946655273, + "learning_rate": 1.4093406593406595e-06, + "loss": 0.427, + "step": 35374 + }, + { + "epoch": 97.18406593406593, + "grad_norm": 10.940051078796387, + "learning_rate": 1.4079670329670331e-06, + "loss": 0.1407, + "step": 35375 + }, + { + "epoch": 97.18681318681318, + "grad_norm": 17.21392059326172, + "learning_rate": 1.4065934065934067e-06, + "loss": 0.2572, + "step": 35376 + }, + { + "epoch": 97.18956043956044, + "grad_norm": 16.3627986907959, + "learning_rate": 1.4052197802197804e-06, + "loss": 0.3781, + "step": 35377 + }, + { + "epoch": 97.1923076923077, + "grad_norm": 18.245967864990234, + "learning_rate": 1.4038461538461538e-06, + "loss": 0.371, + "step": 35378 + }, + { + "epoch": 97.19505494505495, + "grad_norm": 4.5405354499816895, + "learning_rate": 1.4024725274725276e-06, + "loss": 0.0513, + "step": 35379 + }, + { + "epoch": 97.1978021978022, + "grad_norm": 8.909341812133789, + "learning_rate": 1.4010989010989012e-06, + "loss": 0.3212, + "step": 35380 + }, + { + "epoch": 97.20054945054945, + "grad_norm": 8.68595027923584, + "learning_rate": 1.3997252747252748e-06, + "loss": 0.1624, + "step": 35381 + }, + { + "epoch": 97.2032967032967, + "grad_norm": 8.427101135253906, + "learning_rate": 1.3983516483516484e-06, + "loss": 0.1667, + "step": 35382 + }, + { + "epoch": 97.20604395604396, + "grad_norm": 6.526493549346924, + "learning_rate": 1.396978021978022e-06, + "loss": 0.1001, + "step": 35383 + }, + { + "epoch": 97.20879120879121, + "grad_norm": 19.365161895751953, + "learning_rate": 1.3956043956043957e-06, + "loss": 0.1875, + "step": 35384 + }, + { + "epoch": 97.21153846153847, + "grad_norm": 12.66425609588623, + "learning_rate": 1.3942307692307693e-06, + "loss": 0.2245, + "step": 35385 + }, + { + "epoch": 97.21428571428571, + "grad_norm": 9.21137809753418, + "learning_rate": 1.392857142857143e-06, + "loss": 0.1069, + "step": 35386 + }, + { + "epoch": 97.21703296703296, + "grad_norm": 3.9971141815185547, + "learning_rate": 1.3914835164835165e-06, + "loss": 0.076, + "step": 35387 + }, + { + "epoch": 97.21978021978022, + "grad_norm": 0.5975073575973511, + "learning_rate": 1.3901098901098902e-06, + "loss": 0.0087, + "step": 35388 + }, + { + "epoch": 97.22252747252747, + "grad_norm": 6.8885016441345215, + "learning_rate": 1.3887362637362638e-06, + "loss": 0.1114, + "step": 35389 + }, + { + "epoch": 97.22527472527473, + "grad_norm": 14.849732398986816, + "learning_rate": 1.3873626373626374e-06, + "loss": 0.1649, + "step": 35390 + }, + { + "epoch": 97.22802197802197, + "grad_norm": 26.716739654541016, + "learning_rate": 1.3859890109890112e-06, + "loss": 0.5442, + "step": 35391 + }, + { + "epoch": 97.23076923076923, + "grad_norm": 7.58362340927124, + "learning_rate": 1.3846153846153846e-06, + "loss": 0.1543, + "step": 35392 + }, + { + "epoch": 97.23351648351648, + "grad_norm": 3.1541836261749268, + "learning_rate": 1.3832417582417583e-06, + "loss": 0.0459, + "step": 35393 + }, + { + "epoch": 97.23626373626374, + "grad_norm": 4.157817840576172, + "learning_rate": 1.3818681318681319e-06, + "loss": 0.0355, + "step": 35394 + }, + { + "epoch": 97.23901098901099, + "grad_norm": 15.063750267028809, + "learning_rate": 1.3804945054945055e-06, + "loss": 0.3282, + "step": 35395 + }, + { + "epoch": 97.24175824175825, + "grad_norm": 18.537202835083008, + "learning_rate": 1.3791208791208791e-06, + "loss": 0.302, + "step": 35396 + }, + { + "epoch": 97.24450549450549, + "grad_norm": 11.23630428314209, + "learning_rate": 1.377747252747253e-06, + "loss": 0.1539, + "step": 35397 + }, + { + "epoch": 97.24725274725274, + "grad_norm": 0.9831414222717285, + "learning_rate": 1.3763736263736266e-06, + "loss": 0.0112, + "step": 35398 + }, + { + "epoch": 97.25, + "grad_norm": 7.521181106567383, + "learning_rate": 1.3750000000000002e-06, + "loss": 0.0907, + "step": 35399 + }, + { + "epoch": 97.25274725274726, + "grad_norm": 16.64811134338379, + "learning_rate": 1.3736263736263736e-06, + "loss": 0.2112, + "step": 35400 + }, + { + "epoch": 97.25549450549451, + "grad_norm": 8.178776741027832, + "learning_rate": 1.3722527472527472e-06, + "loss": 0.0798, + "step": 35401 + }, + { + "epoch": 97.25824175824175, + "grad_norm": 22.610023498535156, + "learning_rate": 1.3708791208791208e-06, + "loss": 0.4584, + "step": 35402 + }, + { + "epoch": 97.26098901098901, + "grad_norm": 5.320784091949463, + "learning_rate": 1.3695054945054947e-06, + "loss": 0.062, + "step": 35403 + }, + { + "epoch": 97.26373626373626, + "grad_norm": 9.232995986938477, + "learning_rate": 1.3681318681318683e-06, + "loss": 0.1017, + "step": 35404 + }, + { + "epoch": 97.26648351648352, + "grad_norm": 6.208919048309326, + "learning_rate": 1.3667582417582419e-06, + "loss": 0.0914, + "step": 35405 + }, + { + "epoch": 97.26923076923077, + "grad_norm": 19.951459884643555, + "learning_rate": 1.3653846153846155e-06, + "loss": 0.4838, + "step": 35406 + }, + { + "epoch": 97.27197802197803, + "grad_norm": 6.929879665374756, + "learning_rate": 1.364010989010989e-06, + "loss": 0.0678, + "step": 35407 + }, + { + "epoch": 97.27472527472527, + "grad_norm": 3.938157320022583, + "learning_rate": 1.3626373626373627e-06, + "loss": 0.0438, + "step": 35408 + }, + { + "epoch": 97.27747252747253, + "grad_norm": 9.821428298950195, + "learning_rate": 1.3612637362637364e-06, + "loss": 0.1652, + "step": 35409 + }, + { + "epoch": 97.28021978021978, + "grad_norm": 2.141838788986206, + "learning_rate": 1.35989010989011e-06, + "loss": 0.0162, + "step": 35410 + }, + { + "epoch": 97.28296703296704, + "grad_norm": 6.045135021209717, + "learning_rate": 1.3585164835164836e-06, + "loss": 0.0921, + "step": 35411 + }, + { + "epoch": 97.28571428571429, + "grad_norm": 7.014040946960449, + "learning_rate": 1.3571428571428572e-06, + "loss": 0.1142, + "step": 35412 + }, + { + "epoch": 97.28846153846153, + "grad_norm": 9.270535469055176, + "learning_rate": 1.3557692307692308e-06, + "loss": 0.1161, + "step": 35413 + }, + { + "epoch": 97.29120879120879, + "grad_norm": 11.676614761352539, + "learning_rate": 1.3543956043956045e-06, + "loss": 0.1166, + "step": 35414 + }, + { + "epoch": 97.29395604395604, + "grad_norm": 22.93992042541504, + "learning_rate": 1.353021978021978e-06, + "loss": 0.3348, + "step": 35415 + }, + { + "epoch": 97.2967032967033, + "grad_norm": 24.94814109802246, + "learning_rate": 1.3516483516483517e-06, + "loss": 0.4075, + "step": 35416 + }, + { + "epoch": 97.29945054945055, + "grad_norm": 24.938865661621094, + "learning_rate": 1.3502747252747253e-06, + "loss": 0.3184, + "step": 35417 + }, + { + "epoch": 97.3021978021978, + "grad_norm": 12.228339195251465, + "learning_rate": 1.348901098901099e-06, + "loss": 0.2194, + "step": 35418 + }, + { + "epoch": 97.30494505494505, + "grad_norm": 2.8321263790130615, + "learning_rate": 1.3475274725274725e-06, + "loss": 0.0191, + "step": 35419 + }, + { + "epoch": 97.3076923076923, + "grad_norm": 1.013066291809082, + "learning_rate": 1.3461538461538464e-06, + "loss": 0.0102, + "step": 35420 + }, + { + "epoch": 97.31043956043956, + "grad_norm": 20.421104431152344, + "learning_rate": 1.3447802197802198e-06, + "loss": 0.279, + "step": 35421 + }, + { + "epoch": 97.31318681318682, + "grad_norm": 0.9387209415435791, + "learning_rate": 1.3434065934065934e-06, + "loss": 0.0099, + "step": 35422 + }, + { + "epoch": 97.31593406593407, + "grad_norm": 19.0089111328125, + "learning_rate": 1.342032967032967e-06, + "loss": 0.3392, + "step": 35423 + }, + { + "epoch": 97.31868131868131, + "grad_norm": 3.2169582843780518, + "learning_rate": 1.3406593406593406e-06, + "loss": 0.0433, + "step": 35424 + }, + { + "epoch": 97.32142857142857, + "grad_norm": 20.253149032592773, + "learning_rate": 1.3392857142857143e-06, + "loss": 0.5089, + "step": 35425 + }, + { + "epoch": 97.32417582417582, + "grad_norm": 1.0739723443984985, + "learning_rate": 1.337912087912088e-06, + "loss": 0.0111, + "step": 35426 + }, + { + "epoch": 97.32692307692308, + "grad_norm": 8.46070671081543, + "learning_rate": 1.3365384615384617e-06, + "loss": 0.0686, + "step": 35427 + }, + { + "epoch": 97.32967032967034, + "grad_norm": 10.727119445800781, + "learning_rate": 1.3351648351648353e-06, + "loss": 0.242, + "step": 35428 + }, + { + "epoch": 97.33241758241758, + "grad_norm": 19.265132904052734, + "learning_rate": 1.3337912087912087e-06, + "loss": 0.4621, + "step": 35429 + }, + { + "epoch": 97.33516483516483, + "grad_norm": 13.414203643798828, + "learning_rate": 1.3324175824175824e-06, + "loss": 0.1505, + "step": 35430 + }, + { + "epoch": 97.33791208791209, + "grad_norm": 11.134848594665527, + "learning_rate": 1.331043956043956e-06, + "loss": 0.196, + "step": 35431 + }, + { + "epoch": 97.34065934065934, + "grad_norm": 12.831854820251465, + "learning_rate": 1.3296703296703298e-06, + "loss": 0.2812, + "step": 35432 + }, + { + "epoch": 97.3434065934066, + "grad_norm": 1.4769794940948486, + "learning_rate": 1.3282967032967034e-06, + "loss": 0.0108, + "step": 35433 + }, + { + "epoch": 97.34615384615384, + "grad_norm": 19.654226303100586, + "learning_rate": 1.326923076923077e-06, + "loss": 0.4862, + "step": 35434 + }, + { + "epoch": 97.3489010989011, + "grad_norm": 4.709257125854492, + "learning_rate": 1.3255494505494507e-06, + "loss": 0.0365, + "step": 35435 + }, + { + "epoch": 97.35164835164835, + "grad_norm": 6.449209213256836, + "learning_rate": 1.3241758241758243e-06, + "loss": 0.1286, + "step": 35436 + }, + { + "epoch": 97.3543956043956, + "grad_norm": 9.242727279663086, + "learning_rate": 1.3228021978021977e-06, + "loss": 0.0976, + "step": 35437 + }, + { + "epoch": 97.35714285714286, + "grad_norm": 9.282313346862793, + "learning_rate": 1.3214285714285715e-06, + "loss": 0.1367, + "step": 35438 + }, + { + "epoch": 97.35989010989012, + "grad_norm": 23.357141494750977, + "learning_rate": 1.3200549450549451e-06, + "loss": 0.7486, + "step": 35439 + }, + { + "epoch": 97.36263736263736, + "grad_norm": 11.34442138671875, + "learning_rate": 1.3186813186813187e-06, + "loss": 0.2264, + "step": 35440 + }, + { + "epoch": 97.36538461538461, + "grad_norm": 2.9202170372009277, + "learning_rate": 1.3173076923076924e-06, + "loss": 0.0257, + "step": 35441 + }, + { + "epoch": 97.36813186813187, + "grad_norm": 4.485043525695801, + "learning_rate": 1.315934065934066e-06, + "loss": 0.0633, + "step": 35442 + }, + { + "epoch": 97.37087912087912, + "grad_norm": 13.575501441955566, + "learning_rate": 1.3145604395604396e-06, + "loss": 0.2993, + "step": 35443 + }, + { + "epoch": 97.37362637362638, + "grad_norm": 26.05365753173828, + "learning_rate": 1.3131868131868132e-06, + "loss": 0.7014, + "step": 35444 + }, + { + "epoch": 97.37637362637362, + "grad_norm": 7.099236965179443, + "learning_rate": 1.3118131868131868e-06, + "loss": 0.1826, + "step": 35445 + }, + { + "epoch": 97.37912087912088, + "grad_norm": 11.759085655212402, + "learning_rate": 1.3104395604395605e-06, + "loss": 0.0978, + "step": 35446 + }, + { + "epoch": 97.38186813186813, + "grad_norm": 12.828251838684082, + "learning_rate": 1.309065934065934e-06, + "loss": 0.3363, + "step": 35447 + }, + { + "epoch": 97.38461538461539, + "grad_norm": 7.6327290534973145, + "learning_rate": 1.3076923076923077e-06, + "loss": 0.1032, + "step": 35448 + }, + { + "epoch": 97.38736263736264, + "grad_norm": 10.726821899414062, + "learning_rate": 1.3063186813186815e-06, + "loss": 0.1498, + "step": 35449 + }, + { + "epoch": 97.39010989010988, + "grad_norm": 13.597997665405273, + "learning_rate": 1.3049450549450551e-06, + "loss": 0.3224, + "step": 35450 + }, + { + "epoch": 97.39285714285714, + "grad_norm": 19.38951301574707, + "learning_rate": 1.3035714285714286e-06, + "loss": 0.5108, + "step": 35451 + }, + { + "epoch": 97.3956043956044, + "grad_norm": 14.778510093688965, + "learning_rate": 1.3021978021978022e-06, + "loss": 0.3662, + "step": 35452 + }, + { + "epoch": 97.39835164835165, + "grad_norm": 5.413156509399414, + "learning_rate": 1.3008241758241758e-06, + "loss": 0.1211, + "step": 35453 + }, + { + "epoch": 97.4010989010989, + "grad_norm": 12.326811790466309, + "learning_rate": 1.2994505494505494e-06, + "loss": 0.2501, + "step": 35454 + }, + { + "epoch": 97.40384615384616, + "grad_norm": 5.112259387969971, + "learning_rate": 1.2980769230769232e-06, + "loss": 0.0421, + "step": 35455 + }, + { + "epoch": 97.4065934065934, + "grad_norm": 19.337871551513672, + "learning_rate": 1.2967032967032969e-06, + "loss": 0.5041, + "step": 35456 + }, + { + "epoch": 97.40934065934066, + "grad_norm": 9.96248722076416, + "learning_rate": 1.2953296703296705e-06, + "loss": 0.2913, + "step": 35457 + }, + { + "epoch": 97.41208791208791, + "grad_norm": 0.4335610568523407, + "learning_rate": 1.293956043956044e-06, + "loss": 0.0054, + "step": 35458 + }, + { + "epoch": 97.41483516483517, + "grad_norm": 16.911088943481445, + "learning_rate": 1.2925824175824175e-06, + "loss": 0.5208, + "step": 35459 + }, + { + "epoch": 97.41758241758242, + "grad_norm": 11.096712112426758, + "learning_rate": 1.2912087912087911e-06, + "loss": 0.1697, + "step": 35460 + }, + { + "epoch": 97.42032967032966, + "grad_norm": 11.701732635498047, + "learning_rate": 1.289835164835165e-06, + "loss": 0.2129, + "step": 35461 + }, + { + "epoch": 97.42307692307692, + "grad_norm": 2.9803969860076904, + "learning_rate": 1.2884615384615386e-06, + "loss": 0.0206, + "step": 35462 + }, + { + "epoch": 97.42582417582418, + "grad_norm": 8.573373794555664, + "learning_rate": 1.2870879120879122e-06, + "loss": 0.2126, + "step": 35463 + }, + { + "epoch": 97.42857142857143, + "grad_norm": 13.466543197631836, + "learning_rate": 1.2857142857142858e-06, + "loss": 0.1884, + "step": 35464 + }, + { + "epoch": 97.43131868131869, + "grad_norm": 16.007259368896484, + "learning_rate": 1.2843406593406594e-06, + "loss": 0.3435, + "step": 35465 + }, + { + "epoch": 97.43406593406593, + "grad_norm": 8.334561347961426, + "learning_rate": 1.282967032967033e-06, + "loss": 0.1087, + "step": 35466 + }, + { + "epoch": 97.43681318681318, + "grad_norm": 6.932962894439697, + "learning_rate": 1.2815934065934067e-06, + "loss": 0.1225, + "step": 35467 + }, + { + "epoch": 97.43956043956044, + "grad_norm": 21.495046615600586, + "learning_rate": 1.2802197802197803e-06, + "loss": 0.3677, + "step": 35468 + }, + { + "epoch": 97.4423076923077, + "grad_norm": 15.369077682495117, + "learning_rate": 1.278846153846154e-06, + "loss": 0.3857, + "step": 35469 + }, + { + "epoch": 97.44505494505495, + "grad_norm": 12.890035629272461, + "learning_rate": 1.2774725274725275e-06, + "loss": 0.193, + "step": 35470 + }, + { + "epoch": 97.4478021978022, + "grad_norm": 12.792795181274414, + "learning_rate": 1.2760989010989011e-06, + "loss": 0.2105, + "step": 35471 + }, + { + "epoch": 97.45054945054945, + "grad_norm": 20.041744232177734, + "learning_rate": 1.274725274725275e-06, + "loss": 0.5703, + "step": 35472 + }, + { + "epoch": 97.4532967032967, + "grad_norm": 15.393714904785156, + "learning_rate": 1.2733516483516484e-06, + "loss": 0.3106, + "step": 35473 + }, + { + "epoch": 97.45604395604396, + "grad_norm": 4.10469388961792, + "learning_rate": 1.271978021978022e-06, + "loss": 0.0468, + "step": 35474 + }, + { + "epoch": 97.45879120879121, + "grad_norm": 23.027240753173828, + "learning_rate": 1.2706043956043956e-06, + "loss": 0.4052, + "step": 35475 + }, + { + "epoch": 97.46153846153847, + "grad_norm": 10.712244033813477, + "learning_rate": 1.2692307692307692e-06, + "loss": 0.1409, + "step": 35476 + }, + { + "epoch": 97.46428571428571, + "grad_norm": 4.123257160186768, + "learning_rate": 1.2678571428571428e-06, + "loss": 0.0442, + "step": 35477 + }, + { + "epoch": 97.46703296703296, + "grad_norm": 5.425966262817383, + "learning_rate": 1.2664835164835167e-06, + "loss": 0.0519, + "step": 35478 + }, + { + "epoch": 97.46978021978022, + "grad_norm": 4.033812522888184, + "learning_rate": 1.2651098901098903e-06, + "loss": 0.027, + "step": 35479 + }, + { + "epoch": 97.47252747252747, + "grad_norm": 8.620545387268066, + "learning_rate": 1.263736263736264e-06, + "loss": 0.1201, + "step": 35480 + }, + { + "epoch": 97.47527472527473, + "grad_norm": 9.418375015258789, + "learning_rate": 1.2623626373626373e-06, + "loss": 0.1217, + "step": 35481 + }, + { + "epoch": 97.47802197802197, + "grad_norm": 8.544068336486816, + "learning_rate": 1.260989010989011e-06, + "loss": 0.1205, + "step": 35482 + }, + { + "epoch": 97.48076923076923, + "grad_norm": 22.54959487915039, + "learning_rate": 1.2596153846153846e-06, + "loss": 0.36, + "step": 35483 + }, + { + "epoch": 97.48351648351648, + "grad_norm": 6.325303077697754, + "learning_rate": 1.2582417582417584e-06, + "loss": 0.0742, + "step": 35484 + }, + { + "epoch": 97.48626373626374, + "grad_norm": 14.263612747192383, + "learning_rate": 1.256868131868132e-06, + "loss": 0.2127, + "step": 35485 + }, + { + "epoch": 97.48901098901099, + "grad_norm": 20.20797348022461, + "learning_rate": 1.2554945054945056e-06, + "loss": 0.3261, + "step": 35486 + }, + { + "epoch": 97.49175824175825, + "grad_norm": 9.73694133758545, + "learning_rate": 1.2541208791208792e-06, + "loss": 0.1439, + "step": 35487 + }, + { + "epoch": 97.49450549450549, + "grad_norm": 12.427133560180664, + "learning_rate": 1.2527472527472529e-06, + "loss": 0.2395, + "step": 35488 + }, + { + "epoch": 97.49725274725274, + "grad_norm": 17.837047576904297, + "learning_rate": 1.2513736263736263e-06, + "loss": 0.4346, + "step": 35489 + }, + { + "epoch": 97.5, + "grad_norm": 16.384977340698242, + "learning_rate": 1.25e-06, + "loss": 0.131, + "step": 35490 + }, + { + "epoch": 97.50274725274726, + "grad_norm": 10.741028785705566, + "learning_rate": 1.2486263736263737e-06, + "loss": 0.1004, + "step": 35491 + }, + { + "epoch": 97.50549450549451, + "grad_norm": 6.5940260887146, + "learning_rate": 1.2472527472527473e-06, + "loss": 0.0559, + "step": 35492 + }, + { + "epoch": 97.50824175824175, + "grad_norm": 16.91597557067871, + "learning_rate": 1.245879120879121e-06, + "loss": 0.3474, + "step": 35493 + }, + { + "epoch": 97.51098901098901, + "grad_norm": 19.49968147277832, + "learning_rate": 1.2445054945054946e-06, + "loss": 0.3281, + "step": 35494 + }, + { + "epoch": 97.51373626373626, + "grad_norm": 6.719967365264893, + "learning_rate": 1.2431318681318682e-06, + "loss": 0.0753, + "step": 35495 + }, + { + "epoch": 97.51648351648352, + "grad_norm": 17.246707916259766, + "learning_rate": 1.2417582417582418e-06, + "loss": 0.1767, + "step": 35496 + }, + { + "epoch": 97.51923076923077, + "grad_norm": 18.363014221191406, + "learning_rate": 1.2403846153846154e-06, + "loss": 0.2941, + "step": 35497 + }, + { + "epoch": 97.52197802197803, + "grad_norm": 4.537023544311523, + "learning_rate": 1.239010989010989e-06, + "loss": 0.0902, + "step": 35498 + }, + { + "epoch": 97.52472527472527, + "grad_norm": 2.898780584335327, + "learning_rate": 1.2376373626373627e-06, + "loss": 0.0485, + "step": 35499 + }, + { + "epoch": 97.52747252747253, + "grad_norm": 6.695751667022705, + "learning_rate": 1.2362637362637363e-06, + "loss": 0.0534, + "step": 35500 + }, + { + "epoch": 97.53021978021978, + "grad_norm": 9.717414855957031, + "learning_rate": 1.23489010989011e-06, + "loss": 0.2023, + "step": 35501 + }, + { + "epoch": 97.53296703296704, + "grad_norm": 5.4782891273498535, + "learning_rate": 1.2335164835164837e-06, + "loss": 0.0441, + "step": 35502 + }, + { + "epoch": 97.53571428571429, + "grad_norm": 12.861663818359375, + "learning_rate": 1.2321428571428571e-06, + "loss": 0.3193, + "step": 35503 + }, + { + "epoch": 97.53846153846153, + "grad_norm": 13.833584785461426, + "learning_rate": 1.2307692307692308e-06, + "loss": 0.2013, + "step": 35504 + }, + { + "epoch": 97.54120879120879, + "grad_norm": 11.953471183776855, + "learning_rate": 1.2293956043956044e-06, + "loss": 0.1753, + "step": 35505 + }, + { + "epoch": 97.54395604395604, + "grad_norm": 15.805522918701172, + "learning_rate": 1.228021978021978e-06, + "loss": 0.2481, + "step": 35506 + }, + { + "epoch": 97.5467032967033, + "grad_norm": 3.2268755435943604, + "learning_rate": 1.2266483516483518e-06, + "loss": 0.0385, + "step": 35507 + }, + { + "epoch": 97.54945054945055, + "grad_norm": 3.670185089111328, + "learning_rate": 1.2252747252747254e-06, + "loss": 0.0461, + "step": 35508 + }, + { + "epoch": 97.5521978021978, + "grad_norm": 4.651001930236816, + "learning_rate": 1.223901098901099e-06, + "loss": 0.0643, + "step": 35509 + }, + { + "epoch": 97.55494505494505, + "grad_norm": 12.583438873291016, + "learning_rate": 1.2225274725274725e-06, + "loss": 0.3426, + "step": 35510 + }, + { + "epoch": 97.5576923076923, + "grad_norm": 13.390267372131348, + "learning_rate": 1.221153846153846e-06, + "loss": 0.3641, + "step": 35511 + }, + { + "epoch": 97.56043956043956, + "grad_norm": 16.43551254272461, + "learning_rate": 1.2197802197802197e-06, + "loss": 0.3492, + "step": 35512 + }, + { + "epoch": 97.56318681318682, + "grad_norm": 23.66225814819336, + "learning_rate": 1.2184065934065935e-06, + "loss": 0.7701, + "step": 35513 + }, + { + "epoch": 97.56593406593407, + "grad_norm": 10.195069313049316, + "learning_rate": 1.2170329670329672e-06, + "loss": 0.1652, + "step": 35514 + }, + { + "epoch": 97.56868131868131, + "grad_norm": 6.177288055419922, + "learning_rate": 1.2156593406593408e-06, + "loss": 0.102, + "step": 35515 + }, + { + "epoch": 97.57142857142857, + "grad_norm": 11.68825626373291, + "learning_rate": 1.2142857142857144e-06, + "loss": 0.1474, + "step": 35516 + }, + { + "epoch": 97.57417582417582, + "grad_norm": 11.379854202270508, + "learning_rate": 1.212912087912088e-06, + "loss": 0.149, + "step": 35517 + }, + { + "epoch": 97.57692307692308, + "grad_norm": 16.82648277282715, + "learning_rate": 1.2115384615384614e-06, + "loss": 0.2705, + "step": 35518 + }, + { + "epoch": 97.57967032967034, + "grad_norm": 10.848699569702148, + "learning_rate": 1.2101648351648353e-06, + "loss": 0.1919, + "step": 35519 + }, + { + "epoch": 97.58241758241758, + "grad_norm": 11.567914962768555, + "learning_rate": 1.2087912087912089e-06, + "loss": 0.2681, + "step": 35520 + }, + { + "epoch": 97.58516483516483, + "grad_norm": 8.078672409057617, + "learning_rate": 1.2074175824175825e-06, + "loss": 0.1285, + "step": 35521 + }, + { + "epoch": 97.58791208791209, + "grad_norm": 5.593689441680908, + "learning_rate": 1.2060439560439561e-06, + "loss": 0.1602, + "step": 35522 + }, + { + "epoch": 97.59065934065934, + "grad_norm": 6.185614585876465, + "learning_rate": 1.2046703296703297e-06, + "loss": 0.1368, + "step": 35523 + }, + { + "epoch": 97.5934065934066, + "grad_norm": 2.2122480869293213, + "learning_rate": 1.2032967032967033e-06, + "loss": 0.0204, + "step": 35524 + }, + { + "epoch": 97.59615384615384, + "grad_norm": 7.7302680015563965, + "learning_rate": 1.201923076923077e-06, + "loss": 0.1053, + "step": 35525 + }, + { + "epoch": 97.5989010989011, + "grad_norm": 11.08216667175293, + "learning_rate": 1.2005494505494506e-06, + "loss": 0.115, + "step": 35526 + }, + { + "epoch": 97.60164835164835, + "grad_norm": 14.725481986999512, + "learning_rate": 1.1991758241758242e-06, + "loss": 0.256, + "step": 35527 + }, + { + "epoch": 97.6043956043956, + "grad_norm": 11.371620178222656, + "learning_rate": 1.1978021978021978e-06, + "loss": 0.108, + "step": 35528 + }, + { + "epoch": 97.60714285714286, + "grad_norm": 7.1650919914245605, + "learning_rate": 1.1964285714285714e-06, + "loss": 0.0483, + "step": 35529 + }, + { + "epoch": 97.60989010989012, + "grad_norm": 6.967748165130615, + "learning_rate": 1.195054945054945e-06, + "loss": 0.1146, + "step": 35530 + }, + { + "epoch": 97.61263736263736, + "grad_norm": 28.171127319335938, + "learning_rate": 1.1936813186813189e-06, + "loss": 0.6475, + "step": 35531 + }, + { + "epoch": 97.61538461538461, + "grad_norm": 21.598243713378906, + "learning_rate": 1.1923076923076923e-06, + "loss": 0.2772, + "step": 35532 + }, + { + "epoch": 97.61813186813187, + "grad_norm": 2.947904348373413, + "learning_rate": 1.190934065934066e-06, + "loss": 0.0467, + "step": 35533 + }, + { + "epoch": 97.62087912087912, + "grad_norm": 21.454439163208008, + "learning_rate": 1.1895604395604395e-06, + "loss": 0.4679, + "step": 35534 + }, + { + "epoch": 97.62362637362638, + "grad_norm": 27.065187454223633, + "learning_rate": 1.1881868131868132e-06, + "loss": 0.3151, + "step": 35535 + }, + { + "epoch": 97.62637362637362, + "grad_norm": 14.187403678894043, + "learning_rate": 1.186813186813187e-06, + "loss": 0.1652, + "step": 35536 + }, + { + "epoch": 97.62912087912088, + "grad_norm": 17.5563907623291, + "learning_rate": 1.1854395604395606e-06, + "loss": 0.3769, + "step": 35537 + }, + { + "epoch": 97.63186813186813, + "grad_norm": 7.66112756729126, + "learning_rate": 1.1840659340659342e-06, + "loss": 0.2161, + "step": 35538 + }, + { + "epoch": 97.63461538461539, + "grad_norm": 5.47547721862793, + "learning_rate": 1.1826923076923078e-06, + "loss": 0.0909, + "step": 35539 + }, + { + "epoch": 97.63736263736264, + "grad_norm": 3.85353684425354, + "learning_rate": 1.1813186813186812e-06, + "loss": 0.0345, + "step": 35540 + }, + { + "epoch": 97.64010989010988, + "grad_norm": 15.63048267364502, + "learning_rate": 1.1799450549450549e-06, + "loss": 0.258, + "step": 35541 + }, + { + "epoch": 97.64285714285714, + "grad_norm": 2.19939923286438, + "learning_rate": 1.1785714285714287e-06, + "loss": 0.0305, + "step": 35542 + }, + { + "epoch": 97.6456043956044, + "grad_norm": 12.428054809570312, + "learning_rate": 1.1771978021978023e-06, + "loss": 0.3542, + "step": 35543 + }, + { + "epoch": 97.64835164835165, + "grad_norm": 5.184631824493408, + "learning_rate": 1.175824175824176e-06, + "loss": 0.1001, + "step": 35544 + }, + { + "epoch": 97.6510989010989, + "grad_norm": 24.743539810180664, + "learning_rate": 1.1744505494505495e-06, + "loss": 0.5266, + "step": 35545 + }, + { + "epoch": 97.65384615384616, + "grad_norm": 6.571742534637451, + "learning_rate": 1.1730769230769232e-06, + "loss": 0.0585, + "step": 35546 + }, + { + "epoch": 97.6565934065934, + "grad_norm": 10.000574111938477, + "learning_rate": 1.1717032967032968e-06, + "loss": 0.2132, + "step": 35547 + }, + { + "epoch": 97.65934065934066, + "grad_norm": 7.066041946411133, + "learning_rate": 1.1703296703296704e-06, + "loss": 0.1861, + "step": 35548 + }, + { + "epoch": 97.66208791208791, + "grad_norm": 6.882715702056885, + "learning_rate": 1.168956043956044e-06, + "loss": 0.0734, + "step": 35549 + }, + { + "epoch": 97.66483516483517, + "grad_norm": 17.483448028564453, + "learning_rate": 1.1675824175824176e-06, + "loss": 0.4477, + "step": 35550 + }, + { + "epoch": 97.66758241758242, + "grad_norm": 14.390746116638184, + "learning_rate": 1.1662087912087913e-06, + "loss": 0.2419, + "step": 35551 + }, + { + "epoch": 97.67032967032966, + "grad_norm": 3.7054402828216553, + "learning_rate": 1.1648351648351649e-06, + "loss": 0.0383, + "step": 35552 + }, + { + "epoch": 97.67307692307692, + "grad_norm": 6.953535556793213, + "learning_rate": 1.1634615384615385e-06, + "loss": 0.1023, + "step": 35553 + }, + { + "epoch": 97.67582417582418, + "grad_norm": 0.8058640956878662, + "learning_rate": 1.1620879120879121e-06, + "loss": 0.009, + "step": 35554 + }, + { + "epoch": 97.67857142857143, + "grad_norm": 4.6155500411987305, + "learning_rate": 1.1607142857142857e-06, + "loss": 0.0561, + "step": 35555 + }, + { + "epoch": 97.68131868131869, + "grad_norm": 13.329988479614258, + "learning_rate": 1.1593406593406594e-06, + "loss": 0.2097, + "step": 35556 + }, + { + "epoch": 97.68406593406593, + "grad_norm": 15.501212120056152, + "learning_rate": 1.157967032967033e-06, + "loss": 0.4006, + "step": 35557 + }, + { + "epoch": 97.68681318681318, + "grad_norm": 32.08930587768555, + "learning_rate": 1.1565934065934066e-06, + "loss": 0.7096, + "step": 35558 + }, + { + "epoch": 97.68956043956044, + "grad_norm": 31.677734375, + "learning_rate": 1.1552197802197802e-06, + "loss": 1.0135, + "step": 35559 + }, + { + "epoch": 97.6923076923077, + "grad_norm": 5.619636535644531, + "learning_rate": 1.153846153846154e-06, + "loss": 0.1195, + "step": 35560 + }, + { + "epoch": 97.69505494505495, + "grad_norm": 15.459497451782227, + "learning_rate": 1.1524725274725277e-06, + "loss": 0.2351, + "step": 35561 + }, + { + "epoch": 97.6978021978022, + "grad_norm": 3.236599922180176, + "learning_rate": 1.151098901098901e-06, + "loss": 0.0337, + "step": 35562 + }, + { + "epoch": 97.70054945054945, + "grad_norm": 12.915900230407715, + "learning_rate": 1.1497252747252747e-06, + "loss": 0.0983, + "step": 35563 + }, + { + "epoch": 97.7032967032967, + "grad_norm": 16.549076080322266, + "learning_rate": 1.1483516483516483e-06, + "loss": 0.2694, + "step": 35564 + }, + { + "epoch": 97.70604395604396, + "grad_norm": 3.7434816360473633, + "learning_rate": 1.146978021978022e-06, + "loss": 0.0386, + "step": 35565 + }, + { + "epoch": 97.70879120879121, + "grad_norm": 4.733206748962402, + "learning_rate": 1.1456043956043957e-06, + "loss": 0.0341, + "step": 35566 + }, + { + "epoch": 97.71153846153847, + "grad_norm": 2.911738872528076, + "learning_rate": 1.1442307692307694e-06, + "loss": 0.0422, + "step": 35567 + }, + { + "epoch": 97.71428571428571, + "grad_norm": 4.194016456604004, + "learning_rate": 1.142857142857143e-06, + "loss": 0.0299, + "step": 35568 + }, + { + "epoch": 97.71703296703296, + "grad_norm": 3.4867961406707764, + "learning_rate": 1.1414835164835166e-06, + "loss": 0.0325, + "step": 35569 + }, + { + "epoch": 97.71978021978022, + "grad_norm": 4.855159759521484, + "learning_rate": 1.14010989010989e-06, + "loss": 0.0624, + "step": 35570 + }, + { + "epoch": 97.72252747252747, + "grad_norm": 5.126788139343262, + "learning_rate": 1.1387362637362638e-06, + "loss": 0.1204, + "step": 35571 + }, + { + "epoch": 97.72527472527473, + "grad_norm": 9.03407096862793, + "learning_rate": 1.1373626373626375e-06, + "loss": 0.1097, + "step": 35572 + }, + { + "epoch": 97.72802197802197, + "grad_norm": 14.713526725769043, + "learning_rate": 1.135989010989011e-06, + "loss": 0.2941, + "step": 35573 + }, + { + "epoch": 97.73076923076923, + "grad_norm": 25.22985076904297, + "learning_rate": 1.1346153846153847e-06, + "loss": 0.6299, + "step": 35574 + }, + { + "epoch": 97.73351648351648, + "grad_norm": 14.38640308380127, + "learning_rate": 1.1332417582417583e-06, + "loss": 0.1316, + "step": 35575 + }, + { + "epoch": 97.73626373626374, + "grad_norm": 5.992971897125244, + "learning_rate": 1.131868131868132e-06, + "loss": 0.0364, + "step": 35576 + }, + { + "epoch": 97.73901098901099, + "grad_norm": 12.016124725341797, + "learning_rate": 1.1304945054945056e-06, + "loss": 0.1245, + "step": 35577 + }, + { + "epoch": 97.74175824175825, + "grad_norm": 12.373406410217285, + "learning_rate": 1.1291208791208792e-06, + "loss": 0.1871, + "step": 35578 + }, + { + "epoch": 97.74450549450549, + "grad_norm": 5.709145545959473, + "learning_rate": 1.1277472527472528e-06, + "loss": 0.051, + "step": 35579 + }, + { + "epoch": 97.74725274725274, + "grad_norm": 13.223149299621582, + "learning_rate": 1.1263736263736264e-06, + "loss": 0.2258, + "step": 35580 + }, + { + "epoch": 97.75, + "grad_norm": 7.48258113861084, + "learning_rate": 1.125e-06, + "loss": 0.2134, + "step": 35581 + }, + { + "epoch": 97.75274725274726, + "grad_norm": 23.406049728393555, + "learning_rate": 1.1236263736263736e-06, + "loss": 0.4457, + "step": 35582 + }, + { + "epoch": 97.75549450549451, + "grad_norm": 2.2396035194396973, + "learning_rate": 1.1222527472527475e-06, + "loss": 0.013, + "step": 35583 + }, + { + "epoch": 97.75824175824175, + "grad_norm": 19.21963119506836, + "learning_rate": 1.1208791208791209e-06, + "loss": 0.7029, + "step": 35584 + }, + { + "epoch": 97.76098901098901, + "grad_norm": 19.22524070739746, + "learning_rate": 1.1195054945054945e-06, + "loss": 0.3343, + "step": 35585 + }, + { + "epoch": 97.76373626373626, + "grad_norm": 6.768002986907959, + "learning_rate": 1.1181318681318681e-06, + "loss": 0.1236, + "step": 35586 + }, + { + "epoch": 97.76648351648352, + "grad_norm": 13.721673011779785, + "learning_rate": 1.1167582417582417e-06, + "loss": 0.1329, + "step": 35587 + }, + { + "epoch": 97.76923076923077, + "grad_norm": 8.234033584594727, + "learning_rate": 1.1153846153846154e-06, + "loss": 0.1733, + "step": 35588 + }, + { + "epoch": 97.77197802197803, + "grad_norm": 10.422799110412598, + "learning_rate": 1.1140109890109892e-06, + "loss": 0.2028, + "step": 35589 + }, + { + "epoch": 97.77472527472527, + "grad_norm": 13.701458930969238, + "learning_rate": 1.1126373626373628e-06, + "loss": 0.2153, + "step": 35590 + }, + { + "epoch": 97.77747252747253, + "grad_norm": 12.416821479797363, + "learning_rate": 1.1112637362637364e-06, + "loss": 0.1598, + "step": 35591 + }, + { + "epoch": 97.78021978021978, + "grad_norm": 26.981285095214844, + "learning_rate": 1.1098901098901098e-06, + "loss": 0.467, + "step": 35592 + }, + { + "epoch": 97.78296703296704, + "grad_norm": 12.20963191986084, + "learning_rate": 1.1085164835164835e-06, + "loss": 0.2264, + "step": 35593 + }, + { + "epoch": 97.78571428571429, + "grad_norm": 6.373779296875, + "learning_rate": 1.107142857142857e-06, + "loss": 0.1502, + "step": 35594 + }, + { + "epoch": 97.78846153846153, + "grad_norm": 12.198905944824219, + "learning_rate": 1.105769230769231e-06, + "loss": 0.1502, + "step": 35595 + }, + { + "epoch": 97.79120879120879, + "grad_norm": 14.86524486541748, + "learning_rate": 1.1043956043956045e-06, + "loss": 0.0759, + "step": 35596 + }, + { + "epoch": 97.79395604395604, + "grad_norm": 14.035354614257812, + "learning_rate": 1.1030219780219781e-06, + "loss": 0.2503, + "step": 35597 + }, + { + "epoch": 97.7967032967033, + "grad_norm": 2.161133289337158, + "learning_rate": 1.1016483516483518e-06, + "loss": 0.0242, + "step": 35598 + }, + { + "epoch": 97.79945054945055, + "grad_norm": 0.30289891362190247, + "learning_rate": 1.1002747252747254e-06, + "loss": 0.0034, + "step": 35599 + }, + { + "epoch": 97.8021978021978, + "grad_norm": 5.182058334350586, + "learning_rate": 1.098901098901099e-06, + "loss": 0.0505, + "step": 35600 + }, + { + "epoch": 97.80494505494505, + "grad_norm": 9.68582534790039, + "learning_rate": 1.0975274725274726e-06, + "loss": 0.1337, + "step": 35601 + }, + { + "epoch": 97.8076923076923, + "grad_norm": 13.013919830322266, + "learning_rate": 1.0961538461538462e-06, + "loss": 0.3208, + "step": 35602 + }, + { + "epoch": 97.81043956043956, + "grad_norm": 4.504162788391113, + "learning_rate": 1.0947802197802198e-06, + "loss": 0.0627, + "step": 35603 + }, + { + "epoch": 97.81318681318682, + "grad_norm": 12.64108657836914, + "learning_rate": 1.0934065934065935e-06, + "loss": 0.2126, + "step": 35604 + }, + { + "epoch": 97.81593406593407, + "grad_norm": 13.668262481689453, + "learning_rate": 1.092032967032967e-06, + "loss": 0.1695, + "step": 35605 + }, + { + "epoch": 97.81868131868131, + "grad_norm": 3.6478095054626465, + "learning_rate": 1.0906593406593407e-06, + "loss": 0.0494, + "step": 35606 + }, + { + "epoch": 97.82142857142857, + "grad_norm": 15.332401275634766, + "learning_rate": 1.0892857142857143e-06, + "loss": 0.3048, + "step": 35607 + }, + { + "epoch": 97.82417582417582, + "grad_norm": 11.121574401855469, + "learning_rate": 1.087912087912088e-06, + "loss": 0.1249, + "step": 35608 + }, + { + "epoch": 97.82692307692308, + "grad_norm": 4.545644283294678, + "learning_rate": 1.0865384615384616e-06, + "loss": 0.0481, + "step": 35609 + }, + { + "epoch": 97.82967032967034, + "grad_norm": 12.578070640563965, + "learning_rate": 1.0851648351648352e-06, + "loss": 0.1942, + "step": 35610 + }, + { + "epoch": 97.83241758241758, + "grad_norm": 20.373550415039062, + "learning_rate": 1.0837912087912088e-06, + "loss": 0.2701, + "step": 35611 + }, + { + "epoch": 97.83516483516483, + "grad_norm": 5.73628044128418, + "learning_rate": 1.0824175824175826e-06, + "loss": 0.0716, + "step": 35612 + }, + { + "epoch": 97.83791208791209, + "grad_norm": 15.222789764404297, + "learning_rate": 1.0810439560439562e-06, + "loss": 0.2396, + "step": 35613 + }, + { + "epoch": 97.84065934065934, + "grad_norm": 16.516931533813477, + "learning_rate": 1.0796703296703297e-06, + "loss": 0.4485, + "step": 35614 + }, + { + "epoch": 97.8434065934066, + "grad_norm": 1.2510641813278198, + "learning_rate": 1.0782967032967033e-06, + "loss": 0.0104, + "step": 35615 + }, + { + "epoch": 97.84615384615384, + "grad_norm": 10.217774391174316, + "learning_rate": 1.0769230769230769e-06, + "loss": 0.1696, + "step": 35616 + }, + { + "epoch": 97.8489010989011, + "grad_norm": 2.534569263458252, + "learning_rate": 1.0755494505494505e-06, + "loss": 0.0275, + "step": 35617 + }, + { + "epoch": 97.85164835164835, + "grad_norm": 5.656667232513428, + "learning_rate": 1.0741758241758243e-06, + "loss": 0.1101, + "step": 35618 + }, + { + "epoch": 97.8543956043956, + "grad_norm": 10.653371810913086, + "learning_rate": 1.072802197802198e-06, + "loss": 0.3411, + "step": 35619 + }, + { + "epoch": 97.85714285714286, + "grad_norm": 8.620566368103027, + "learning_rate": 1.0714285714285716e-06, + "loss": 0.0893, + "step": 35620 + }, + { + "epoch": 97.85989010989012, + "grad_norm": 15.87067699432373, + "learning_rate": 1.070054945054945e-06, + "loss": 0.1338, + "step": 35621 + }, + { + "epoch": 97.86263736263736, + "grad_norm": 17.038267135620117, + "learning_rate": 1.0686813186813186e-06, + "loss": 0.4529, + "step": 35622 + }, + { + "epoch": 97.86538461538461, + "grad_norm": 0.9478601813316345, + "learning_rate": 1.0673076923076922e-06, + "loss": 0.0075, + "step": 35623 + }, + { + "epoch": 97.86813186813187, + "grad_norm": 15.905744552612305, + "learning_rate": 1.065934065934066e-06, + "loss": 0.2005, + "step": 35624 + }, + { + "epoch": 97.87087912087912, + "grad_norm": 12.60941219329834, + "learning_rate": 1.0645604395604397e-06, + "loss": 0.2532, + "step": 35625 + }, + { + "epoch": 97.87362637362638, + "grad_norm": 25.34882164001465, + "learning_rate": 1.0631868131868133e-06, + "loss": 0.4494, + "step": 35626 + }, + { + "epoch": 97.87637362637362, + "grad_norm": 7.058924674987793, + "learning_rate": 1.061813186813187e-06, + "loss": 0.1663, + "step": 35627 + }, + { + "epoch": 97.87912087912088, + "grad_norm": 11.000401496887207, + "learning_rate": 1.0604395604395605e-06, + "loss": 0.2046, + "step": 35628 + }, + { + "epoch": 97.88186813186813, + "grad_norm": 12.629637718200684, + "learning_rate": 1.059065934065934e-06, + "loss": 0.4272, + "step": 35629 + }, + { + "epoch": 97.88461538461539, + "grad_norm": 14.899882316589355, + "learning_rate": 1.0576923076923078e-06, + "loss": 0.1524, + "step": 35630 + }, + { + "epoch": 97.88736263736264, + "grad_norm": 1.6598000526428223, + "learning_rate": 1.0563186813186814e-06, + "loss": 0.0194, + "step": 35631 + }, + { + "epoch": 97.89010989010988, + "grad_norm": 16.61223793029785, + "learning_rate": 1.054945054945055e-06, + "loss": 0.3853, + "step": 35632 + }, + { + "epoch": 97.89285714285714, + "grad_norm": 18.05774688720703, + "learning_rate": 1.0535714285714286e-06, + "loss": 0.8545, + "step": 35633 + }, + { + "epoch": 97.8956043956044, + "grad_norm": 6.340512752532959, + "learning_rate": 1.0521978021978022e-06, + "loss": 0.0643, + "step": 35634 + }, + { + "epoch": 97.89835164835165, + "grad_norm": 8.213770866394043, + "learning_rate": 1.0508241758241759e-06, + "loss": 0.0764, + "step": 35635 + }, + { + "epoch": 97.9010989010989, + "grad_norm": 13.459314346313477, + "learning_rate": 1.0494505494505495e-06, + "loss": 0.2379, + "step": 35636 + }, + { + "epoch": 97.90384615384616, + "grad_norm": 11.018921852111816, + "learning_rate": 1.048076923076923e-06, + "loss": 0.1639, + "step": 35637 + }, + { + "epoch": 97.9065934065934, + "grad_norm": 15.061875343322754, + "learning_rate": 1.0467032967032967e-06, + "loss": 0.4429, + "step": 35638 + }, + { + "epoch": 97.90934065934066, + "grad_norm": 12.967190742492676, + "learning_rate": 1.0453296703296703e-06, + "loss": 0.2313, + "step": 35639 + }, + { + "epoch": 97.91208791208791, + "grad_norm": 25.67902946472168, + "learning_rate": 1.043956043956044e-06, + "loss": 0.4265, + "step": 35640 + }, + { + "epoch": 97.91483516483517, + "grad_norm": 9.231865882873535, + "learning_rate": 1.0425824175824178e-06, + "loss": 0.1354, + "step": 35641 + }, + { + "epoch": 97.91758241758242, + "grad_norm": 4.383686065673828, + "learning_rate": 1.0412087912087914e-06, + "loss": 0.0701, + "step": 35642 + }, + { + "epoch": 97.92032967032966, + "grad_norm": 12.837431907653809, + "learning_rate": 1.0398351648351648e-06, + "loss": 0.1391, + "step": 35643 + }, + { + "epoch": 97.92307692307692, + "grad_norm": 2.8699891567230225, + "learning_rate": 1.0384615384615384e-06, + "loss": 0.0367, + "step": 35644 + }, + { + "epoch": 97.92582417582418, + "grad_norm": 11.64415454864502, + "learning_rate": 1.037087912087912e-06, + "loss": 0.3711, + "step": 35645 + }, + { + "epoch": 97.92857142857143, + "grad_norm": 12.628307342529297, + "learning_rate": 1.0357142857142857e-06, + "loss": 0.2402, + "step": 35646 + }, + { + "epoch": 97.93131868131869, + "grad_norm": 13.580928802490234, + "learning_rate": 1.0343406593406595e-06, + "loss": 0.3764, + "step": 35647 + }, + { + "epoch": 97.93406593406593, + "grad_norm": 18.264177322387695, + "learning_rate": 1.0329670329670331e-06, + "loss": 0.7113, + "step": 35648 + }, + { + "epoch": 97.93681318681318, + "grad_norm": 13.29126262664795, + "learning_rate": 1.0315934065934067e-06, + "loss": 0.2459, + "step": 35649 + }, + { + "epoch": 97.93956043956044, + "grad_norm": 14.480448722839355, + "learning_rate": 1.0302197802197803e-06, + "loss": 0.4001, + "step": 35650 + }, + { + "epoch": 97.9423076923077, + "grad_norm": 12.04328441619873, + "learning_rate": 1.0288461538461538e-06, + "loss": 0.4711, + "step": 35651 + }, + { + "epoch": 97.94505494505495, + "grad_norm": 2.4415345191955566, + "learning_rate": 1.0274725274725274e-06, + "loss": 0.0254, + "step": 35652 + }, + { + "epoch": 97.9478021978022, + "grad_norm": 17.041200637817383, + "learning_rate": 1.0260989010989012e-06, + "loss": 0.38, + "step": 35653 + }, + { + "epoch": 97.95054945054945, + "grad_norm": 7.493197917938232, + "learning_rate": 1.0247252747252748e-06, + "loss": 0.0876, + "step": 35654 + }, + { + "epoch": 97.9532967032967, + "grad_norm": 8.796682357788086, + "learning_rate": 1.0233516483516484e-06, + "loss": 0.1014, + "step": 35655 + }, + { + "epoch": 97.95604395604396, + "grad_norm": 3.4898910522460938, + "learning_rate": 1.021978021978022e-06, + "loss": 0.029, + "step": 35656 + }, + { + "epoch": 97.95879120879121, + "grad_norm": 4.257445335388184, + "learning_rate": 1.0206043956043957e-06, + "loss": 0.0312, + "step": 35657 + }, + { + "epoch": 97.96153846153847, + "grad_norm": 13.049458503723145, + "learning_rate": 1.0192307692307693e-06, + "loss": 0.257, + "step": 35658 + }, + { + "epoch": 97.96428571428571, + "grad_norm": 11.340557098388672, + "learning_rate": 1.017857142857143e-06, + "loss": 0.1825, + "step": 35659 + }, + { + "epoch": 97.96703296703296, + "grad_norm": 15.65622615814209, + "learning_rate": 1.0164835164835165e-06, + "loss": 0.233, + "step": 35660 + }, + { + "epoch": 97.96978021978022, + "grad_norm": 17.189563751220703, + "learning_rate": 1.0151098901098902e-06, + "loss": 0.4775, + "step": 35661 + }, + { + "epoch": 97.97252747252747, + "grad_norm": 5.815486907958984, + "learning_rate": 1.0137362637362638e-06, + "loss": 0.0589, + "step": 35662 + }, + { + "epoch": 97.97527472527473, + "grad_norm": 8.39887523651123, + "learning_rate": 1.0123626373626374e-06, + "loss": 0.1486, + "step": 35663 + }, + { + "epoch": 97.97802197802197, + "grad_norm": 0.53550785779953, + "learning_rate": 1.0109890109890112e-06, + "loss": 0.0066, + "step": 35664 + }, + { + "epoch": 97.98076923076923, + "grad_norm": 16.141693115234375, + "learning_rate": 1.0096153846153846e-06, + "loss": 0.6409, + "step": 35665 + }, + { + "epoch": 97.98351648351648, + "grad_norm": 4.8649773597717285, + "learning_rate": 1.0082417582417582e-06, + "loss": 0.0969, + "step": 35666 + }, + { + "epoch": 97.98626373626374, + "grad_norm": 14.819328308105469, + "learning_rate": 1.0068681318681319e-06, + "loss": 0.1318, + "step": 35667 + }, + { + "epoch": 97.98901098901099, + "grad_norm": 24.470718383789062, + "learning_rate": 1.0054945054945055e-06, + "loss": 0.5244, + "step": 35668 + }, + { + "epoch": 97.99175824175825, + "grad_norm": 8.971163749694824, + "learning_rate": 1.004120879120879e-06, + "loss": 0.0966, + "step": 35669 + }, + { + "epoch": 97.99450549450549, + "grad_norm": 12.633484840393066, + "learning_rate": 1.002747252747253e-06, + "loss": 0.2394, + "step": 35670 + }, + { + "epoch": 97.99725274725274, + "grad_norm": 9.42464542388916, + "learning_rate": 1.0013736263736265e-06, + "loss": 0.1519, + "step": 35671 + }, + { + "epoch": 98.0, + "grad_norm": 59.79823684692383, + "learning_rate": 1.0000000000000002e-06, + "loss": 1.1283, + "step": 35672 + }, + { + "epoch": 98.0, + "eval_accuracy": 0.7286501377410468, + "eval_f1": 0.7223404725889604, + "eval_f1_DuraRiadoRio_64x64": 0.7615062761506276, + "eval_f1_Mole_64x64": 0.8401253918495298, + "eval_f1_Quebrado_64x64": 0.8366013071895425, + "eval_f1_RiadoRio_64x64": 0.6224489795918368, + "eval_f1_RioFechado_64x64": 0.5510204081632653, + "eval_loss": 1.3465520143508911, + "eval_precision": 0.8044131625359696, + "eval_precision_DuraRiadoRio_64x64": 0.9578947368421052, + "eval_precision_Mole_64x64": 0.7657142857142857, + "eval_precision_Quebrado_64x64": 0.7901234567901234, + "eval_precision_RiadoRio_64x64": 0.5083333333333333, + "eval_precision_RioFechado_64x64": 1.0, + "eval_recall": 0.7268604315954205, + "eval_recall_DuraRiadoRio_64x64": 0.6319444444444444, + "eval_recall_Mole_64x64": 0.9305555555555556, + "eval_recall_Quebrado_64x64": 0.8888888888888888, + "eval_recall_RiadoRio_64x64": 0.8026315789473685, + "eval_recall_RioFechado_64x64": 0.38028169014084506, + "eval_runtime": 1.8322, + "eval_samples_per_second": 396.241, + "eval_steps_per_second": 25.106, + "step": 35672 + }, + { + "epoch": 98.00274725274726, + "grad_norm": 13.178536415100098, + "learning_rate": 9.986263736263736e-07, + "loss": 0.245, + "step": 35673 + }, + { + "epoch": 98.00549450549451, + "grad_norm": 9.439343452453613, + "learning_rate": 9.972527472527472e-07, + "loss": 0.1945, + "step": 35674 + }, + { + "epoch": 98.00824175824175, + "grad_norm": 16.292728424072266, + "learning_rate": 9.958791208791208e-07, + "loss": 0.2212, + "step": 35675 + }, + { + "epoch": 98.01098901098901, + "grad_norm": 15.331428527832031, + "learning_rate": 9.945054945054946e-07, + "loss": 0.2409, + "step": 35676 + }, + { + "epoch": 98.01373626373626, + "grad_norm": 12.139143943786621, + "learning_rate": 9.931318681318683e-07, + "loss": 0.2223, + "step": 35677 + }, + { + "epoch": 98.01648351648352, + "grad_norm": 24.664321899414062, + "learning_rate": 9.917582417582419e-07, + "loss": 0.6531, + "step": 35678 + }, + { + "epoch": 98.01923076923077, + "grad_norm": 2.525010347366333, + "learning_rate": 9.903846153846155e-07, + "loss": 0.0232, + "step": 35679 + }, + { + "epoch": 98.02197802197803, + "grad_norm": 19.78927230834961, + "learning_rate": 9.890109890109891e-07, + "loss": 0.3579, + "step": 35680 + }, + { + "epoch": 98.02472527472527, + "grad_norm": 19.501310348510742, + "learning_rate": 9.876373626373625e-07, + "loss": 0.2444, + "step": 35681 + }, + { + "epoch": 98.02747252747253, + "grad_norm": 8.733159065246582, + "learning_rate": 9.862637362637364e-07, + "loss": 0.0675, + "step": 35682 + }, + { + "epoch": 98.03021978021978, + "grad_norm": 3.9410221576690674, + "learning_rate": 9.8489010989011e-07, + "loss": 0.0617, + "step": 35683 + }, + { + "epoch": 98.03296703296704, + "grad_norm": 6.283053398132324, + "learning_rate": 9.835164835164836e-07, + "loss": 0.0875, + "step": 35684 + }, + { + "epoch": 98.03571428571429, + "grad_norm": 22.943620681762695, + "learning_rate": 9.821428571428572e-07, + "loss": 0.456, + "step": 35685 + }, + { + "epoch": 98.03846153846153, + "grad_norm": 12.097728729248047, + "learning_rate": 9.807692307692308e-07, + "loss": 0.2878, + "step": 35686 + }, + { + "epoch": 98.04120879120879, + "grad_norm": 8.378986358642578, + "learning_rate": 9.793956043956044e-07, + "loss": 0.2051, + "step": 35687 + }, + { + "epoch": 98.04395604395604, + "grad_norm": 16.519569396972656, + "learning_rate": 9.78021978021978e-07, + "loss": 0.2848, + "step": 35688 + }, + { + "epoch": 98.0467032967033, + "grad_norm": 6.121734142303467, + "learning_rate": 9.766483516483517e-07, + "loss": 0.188, + "step": 35689 + }, + { + "epoch": 98.04945054945055, + "grad_norm": 2.8064959049224854, + "learning_rate": 9.752747252747253e-07, + "loss": 0.031, + "step": 35690 + }, + { + "epoch": 98.0521978021978, + "grad_norm": 9.674988746643066, + "learning_rate": 9.73901098901099e-07, + "loss": 0.1143, + "step": 35691 + }, + { + "epoch": 98.05494505494505, + "grad_norm": 10.448546409606934, + "learning_rate": 9.725274725274725e-07, + "loss": 0.3787, + "step": 35692 + }, + { + "epoch": 98.0576923076923, + "grad_norm": 22.87947654724121, + "learning_rate": 9.711538461538462e-07, + "loss": 0.6105, + "step": 35693 + }, + { + "epoch": 98.06043956043956, + "grad_norm": 22.755708694458008, + "learning_rate": 9.6978021978022e-07, + "loss": 0.362, + "step": 35694 + }, + { + "epoch": 98.06318681318682, + "grad_norm": 17.662073135375977, + "learning_rate": 9.684065934065934e-07, + "loss": 0.4141, + "step": 35695 + }, + { + "epoch": 98.06593406593407, + "grad_norm": 3.3827974796295166, + "learning_rate": 9.67032967032967e-07, + "loss": 0.0405, + "step": 35696 + }, + { + "epoch": 98.06868131868131, + "grad_norm": 14.535648345947266, + "learning_rate": 9.656593406593406e-07, + "loss": 0.2982, + "step": 35697 + }, + { + "epoch": 98.07142857142857, + "grad_norm": 11.34339427947998, + "learning_rate": 9.642857142857142e-07, + "loss": 0.136, + "step": 35698 + }, + { + "epoch": 98.07417582417582, + "grad_norm": 6.700819969177246, + "learning_rate": 9.62912087912088e-07, + "loss": 0.0774, + "step": 35699 + }, + { + "epoch": 98.07692307692308, + "grad_norm": 9.247394561767578, + "learning_rate": 9.615384615384617e-07, + "loss": 0.1106, + "step": 35700 + }, + { + "epoch": 98.07967032967034, + "grad_norm": 10.807527542114258, + "learning_rate": 9.601648351648353e-07, + "loss": 0.0852, + "step": 35701 + }, + { + "epoch": 98.08241758241758, + "grad_norm": 4.1326422691345215, + "learning_rate": 9.58791208791209e-07, + "loss": 0.0679, + "step": 35702 + }, + { + "epoch": 98.08516483516483, + "grad_norm": 23.380159378051758, + "learning_rate": 9.574175824175823e-07, + "loss": 0.6705, + "step": 35703 + }, + { + "epoch": 98.08791208791209, + "grad_norm": 24.024343490600586, + "learning_rate": 9.56043956043956e-07, + "loss": 0.7185, + "step": 35704 + }, + { + "epoch": 98.09065934065934, + "grad_norm": 3.2771449089050293, + "learning_rate": 9.546703296703298e-07, + "loss": 0.0548, + "step": 35705 + }, + { + "epoch": 98.0934065934066, + "grad_norm": 3.0394279956817627, + "learning_rate": 9.532967032967034e-07, + "loss": 0.0344, + "step": 35706 + }, + { + "epoch": 98.09615384615384, + "grad_norm": 3.5743792057037354, + "learning_rate": 9.51923076923077e-07, + "loss": 0.0569, + "step": 35707 + }, + { + "epoch": 98.0989010989011, + "grad_norm": 14.55035400390625, + "learning_rate": 9.505494505494505e-07, + "loss": 0.159, + "step": 35708 + }, + { + "epoch": 98.10164835164835, + "grad_norm": 25.40459632873535, + "learning_rate": 9.491758241758242e-07, + "loss": 0.3647, + "step": 35709 + }, + { + "epoch": 98.1043956043956, + "grad_norm": 10.340005874633789, + "learning_rate": 9.478021978021978e-07, + "loss": 0.163, + "step": 35710 + }, + { + "epoch": 98.10714285714286, + "grad_norm": 8.142144203186035, + "learning_rate": 9.464285714285715e-07, + "loss": 0.2631, + "step": 35711 + }, + { + "epoch": 98.10989010989012, + "grad_norm": 15.502777099609375, + "learning_rate": 9.450549450549451e-07, + "loss": 0.2459, + "step": 35712 + }, + { + "epoch": 98.11263736263736, + "grad_norm": 16.348487854003906, + "learning_rate": 9.436813186813187e-07, + "loss": 0.1992, + "step": 35713 + }, + { + "epoch": 98.11538461538461, + "grad_norm": 3.8529279232025146, + "learning_rate": 9.423076923076924e-07, + "loss": 0.0395, + "step": 35714 + }, + { + "epoch": 98.11813186813187, + "grad_norm": 1.1260186433792114, + "learning_rate": 9.40934065934066e-07, + "loss": 0.0082, + "step": 35715 + }, + { + "epoch": 98.12087912087912, + "grad_norm": 13.308844566345215, + "learning_rate": 9.395604395604395e-07, + "loss": 0.4626, + "step": 35716 + }, + { + "epoch": 98.12362637362638, + "grad_norm": 19.231935501098633, + "learning_rate": 9.381868131868133e-07, + "loss": 0.3945, + "step": 35717 + }, + { + "epoch": 98.12637362637362, + "grad_norm": 14.327423095703125, + "learning_rate": 9.368131868131869e-07, + "loss": 0.2254, + "step": 35718 + }, + { + "epoch": 98.12912087912088, + "grad_norm": 25.79360580444336, + "learning_rate": 9.354395604395605e-07, + "loss": 0.375, + "step": 35719 + }, + { + "epoch": 98.13186813186813, + "grad_norm": 8.47325325012207, + "learning_rate": 9.340659340659341e-07, + "loss": 0.1072, + "step": 35720 + }, + { + "epoch": 98.13461538461539, + "grad_norm": 5.988062858581543, + "learning_rate": 9.326923076923077e-07, + "loss": 0.0621, + "step": 35721 + }, + { + "epoch": 98.13736263736264, + "grad_norm": 12.701996803283691, + "learning_rate": 9.313186813186813e-07, + "loss": 0.1739, + "step": 35722 + }, + { + "epoch": 98.14010989010988, + "grad_norm": 7.340224742889404, + "learning_rate": 9.29945054945055e-07, + "loss": 0.1473, + "step": 35723 + }, + { + "epoch": 98.14285714285714, + "grad_norm": 7.717600345611572, + "learning_rate": 9.285714285714287e-07, + "loss": 0.1095, + "step": 35724 + }, + { + "epoch": 98.1456043956044, + "grad_norm": 1.3970128297805786, + "learning_rate": 9.271978021978023e-07, + "loss": 0.0142, + "step": 35725 + }, + { + "epoch": 98.14835164835165, + "grad_norm": 17.377052307128906, + "learning_rate": 9.258241758241759e-07, + "loss": 0.3311, + "step": 35726 + }, + { + "epoch": 98.1510989010989, + "grad_norm": 12.151362419128418, + "learning_rate": 9.244505494505494e-07, + "loss": 0.1775, + "step": 35727 + }, + { + "epoch": 98.15384615384616, + "grad_norm": 8.89278793334961, + "learning_rate": 9.230769230769232e-07, + "loss": 0.1095, + "step": 35728 + }, + { + "epoch": 98.1565934065934, + "grad_norm": 23.826763153076172, + "learning_rate": 9.217032967032968e-07, + "loss": 0.4268, + "step": 35729 + }, + { + "epoch": 98.15934065934066, + "grad_norm": 3.197659969329834, + "learning_rate": 9.203296703296704e-07, + "loss": 0.0374, + "step": 35730 + }, + { + "epoch": 98.16208791208791, + "grad_norm": 13.866631507873535, + "learning_rate": 9.18956043956044e-07, + "loss": 0.1338, + "step": 35731 + }, + { + "epoch": 98.16483516483517, + "grad_norm": 11.266611099243164, + "learning_rate": 9.175824175824176e-07, + "loss": 0.3963, + "step": 35732 + }, + { + "epoch": 98.16758241758242, + "grad_norm": 4.8514323234558105, + "learning_rate": 9.162087912087912e-07, + "loss": 0.0543, + "step": 35733 + }, + { + "epoch": 98.17032967032966, + "grad_norm": 7.784177303314209, + "learning_rate": 9.148351648351649e-07, + "loss": 0.1253, + "step": 35734 + }, + { + "epoch": 98.17307692307692, + "grad_norm": 8.844452857971191, + "learning_rate": 9.134615384615386e-07, + "loss": 0.1218, + "step": 35735 + }, + { + "epoch": 98.17582417582418, + "grad_norm": 7.293329238891602, + "learning_rate": 9.120879120879122e-07, + "loss": 0.0712, + "step": 35736 + }, + { + "epoch": 98.17857142857143, + "grad_norm": 9.734432220458984, + "learning_rate": 9.107142857142858e-07, + "loss": 0.1509, + "step": 35737 + }, + { + "epoch": 98.18131868131869, + "grad_norm": 17.653226852416992, + "learning_rate": 9.093406593406593e-07, + "loss": 0.236, + "step": 35738 + }, + { + "epoch": 98.18406593406593, + "grad_norm": 2.321361541748047, + "learning_rate": 9.079670329670329e-07, + "loss": 0.0264, + "step": 35739 + }, + { + "epoch": 98.18681318681318, + "grad_norm": 6.46843957901001, + "learning_rate": 9.065934065934068e-07, + "loss": 0.0974, + "step": 35740 + }, + { + "epoch": 98.18956043956044, + "grad_norm": 5.474961757659912, + "learning_rate": 9.052197802197803e-07, + "loss": 0.0499, + "step": 35741 + }, + { + "epoch": 98.1923076923077, + "grad_norm": 9.121967315673828, + "learning_rate": 9.038461538461539e-07, + "loss": 0.1274, + "step": 35742 + }, + { + "epoch": 98.19505494505495, + "grad_norm": 11.364276885986328, + "learning_rate": 9.024725274725275e-07, + "loss": 0.1521, + "step": 35743 + }, + { + "epoch": 98.1978021978022, + "grad_norm": 9.89085578918457, + "learning_rate": 9.010989010989011e-07, + "loss": 0.147, + "step": 35744 + }, + { + "epoch": 98.20054945054945, + "grad_norm": 22.96795082092285, + "learning_rate": 8.997252747252747e-07, + "loss": 0.4339, + "step": 35745 + }, + { + "epoch": 98.2032967032967, + "grad_norm": 11.643241882324219, + "learning_rate": 8.983516483516485e-07, + "loss": 0.197, + "step": 35746 + }, + { + "epoch": 98.20604395604396, + "grad_norm": 13.281915664672852, + "learning_rate": 8.969780219780221e-07, + "loss": 0.1617, + "step": 35747 + }, + { + "epoch": 98.20879120879121, + "grad_norm": 19.062946319580078, + "learning_rate": 8.956043956043957e-07, + "loss": 0.4784, + "step": 35748 + }, + { + "epoch": 98.21153846153847, + "grad_norm": 5.858759880065918, + "learning_rate": 8.942307692307692e-07, + "loss": 0.0662, + "step": 35749 + }, + { + "epoch": 98.21428571428571, + "grad_norm": 18.83880043029785, + "learning_rate": 8.928571428571428e-07, + "loss": 0.2301, + "step": 35750 + }, + { + "epoch": 98.21703296703296, + "grad_norm": 10.906587600708008, + "learning_rate": 8.914835164835165e-07, + "loss": 0.1461, + "step": 35751 + }, + { + "epoch": 98.21978021978022, + "grad_norm": 11.844782829284668, + "learning_rate": 8.901098901098902e-07, + "loss": 0.3307, + "step": 35752 + }, + { + "epoch": 98.22252747252747, + "grad_norm": 8.451974868774414, + "learning_rate": 8.887362637362638e-07, + "loss": 0.0932, + "step": 35753 + }, + { + "epoch": 98.22527472527473, + "grad_norm": 6.209577560424805, + "learning_rate": 8.873626373626374e-07, + "loss": 0.0725, + "step": 35754 + }, + { + "epoch": 98.22802197802197, + "grad_norm": 11.017491340637207, + "learning_rate": 8.85989010989011e-07, + "loss": 0.1191, + "step": 35755 + }, + { + "epoch": 98.23076923076923, + "grad_norm": 11.460103034973145, + "learning_rate": 8.846153846153846e-07, + "loss": 0.1331, + "step": 35756 + }, + { + "epoch": 98.23351648351648, + "grad_norm": 11.537973403930664, + "learning_rate": 8.832417582417582e-07, + "loss": 0.339, + "step": 35757 + }, + { + "epoch": 98.23626373626374, + "grad_norm": 14.252634048461914, + "learning_rate": 8.81868131868132e-07, + "loss": 0.2725, + "step": 35758 + }, + { + "epoch": 98.23901098901099, + "grad_norm": 8.282692909240723, + "learning_rate": 8.804945054945056e-07, + "loss": 0.0705, + "step": 35759 + }, + { + "epoch": 98.24175824175825, + "grad_norm": 7.6186418533325195, + "learning_rate": 8.791208791208791e-07, + "loss": 0.1893, + "step": 35760 + }, + { + "epoch": 98.24450549450549, + "grad_norm": 8.580146789550781, + "learning_rate": 8.777472527472527e-07, + "loss": 0.1012, + "step": 35761 + }, + { + "epoch": 98.24725274725274, + "grad_norm": 15.746891975402832, + "learning_rate": 8.763736263736264e-07, + "loss": 0.1854, + "step": 35762 + }, + { + "epoch": 98.25, + "grad_norm": 18.042282104492188, + "learning_rate": 8.750000000000001e-07, + "loss": 0.1483, + "step": 35763 + }, + { + "epoch": 98.25274725274726, + "grad_norm": 15.938526153564453, + "learning_rate": 8.736263736263737e-07, + "loss": 0.1217, + "step": 35764 + }, + { + "epoch": 98.25549450549451, + "grad_norm": 13.372574806213379, + "learning_rate": 8.722527472527473e-07, + "loss": 0.2458, + "step": 35765 + }, + { + "epoch": 98.25824175824175, + "grad_norm": 10.13919734954834, + "learning_rate": 8.70879120879121e-07, + "loss": 0.3056, + "step": 35766 + }, + { + "epoch": 98.26098901098901, + "grad_norm": 6.080844879150391, + "learning_rate": 8.695054945054945e-07, + "loss": 0.0695, + "step": 35767 + }, + { + "epoch": 98.26373626373626, + "grad_norm": 3.08921480178833, + "learning_rate": 8.681318681318681e-07, + "loss": 0.03, + "step": 35768 + }, + { + "epoch": 98.26648351648352, + "grad_norm": 2.972520589828491, + "learning_rate": 8.667582417582419e-07, + "loss": 0.0365, + "step": 35769 + }, + { + "epoch": 98.26923076923077, + "grad_norm": 27.3626766204834, + "learning_rate": 8.653846153846154e-07, + "loss": 0.4712, + "step": 35770 + }, + { + "epoch": 98.27197802197803, + "grad_norm": 1.7320092916488647, + "learning_rate": 8.64010989010989e-07, + "loss": 0.0135, + "step": 35771 + }, + { + "epoch": 98.27472527472527, + "grad_norm": 7.0822978019714355, + "learning_rate": 8.626373626373627e-07, + "loss": 0.0818, + "step": 35772 + }, + { + "epoch": 98.27747252747253, + "grad_norm": 8.100615501403809, + "learning_rate": 8.612637362637363e-07, + "loss": 0.2095, + "step": 35773 + }, + { + "epoch": 98.28021978021978, + "grad_norm": 9.932270050048828, + "learning_rate": 8.598901098901099e-07, + "loss": 0.1172, + "step": 35774 + }, + { + "epoch": 98.28296703296704, + "grad_norm": 16.949386596679688, + "learning_rate": 8.585164835164836e-07, + "loss": 0.671, + "step": 35775 + }, + { + "epoch": 98.28571428571429, + "grad_norm": 10.518978118896484, + "learning_rate": 8.571428571428572e-07, + "loss": 0.1568, + "step": 35776 + }, + { + "epoch": 98.28846153846153, + "grad_norm": 5.233101844787598, + "learning_rate": 8.557692307692309e-07, + "loss": 0.0631, + "step": 35777 + }, + { + "epoch": 98.29120879120879, + "grad_norm": 9.656326293945312, + "learning_rate": 8.543956043956044e-07, + "loss": 0.0858, + "step": 35778 + }, + { + "epoch": 98.29395604395604, + "grad_norm": 4.864252090454102, + "learning_rate": 8.53021978021978e-07, + "loss": 0.0976, + "step": 35779 + }, + { + "epoch": 98.2967032967033, + "grad_norm": 14.578631401062012, + "learning_rate": 8.516483516483516e-07, + "loss": 0.3169, + "step": 35780 + }, + { + "epoch": 98.29945054945055, + "grad_norm": 7.040103912353516, + "learning_rate": 8.502747252747253e-07, + "loss": 0.1833, + "step": 35781 + }, + { + "epoch": 98.3021978021978, + "grad_norm": 12.938202857971191, + "learning_rate": 8.48901098901099e-07, + "loss": 0.1476, + "step": 35782 + }, + { + "epoch": 98.30494505494505, + "grad_norm": 11.436891555786133, + "learning_rate": 8.475274725274726e-07, + "loss": 0.2338, + "step": 35783 + }, + { + "epoch": 98.3076923076923, + "grad_norm": 6.422135829925537, + "learning_rate": 8.461538461538462e-07, + "loss": 0.132, + "step": 35784 + }, + { + "epoch": 98.31043956043956, + "grad_norm": 14.934494972229004, + "learning_rate": 8.447802197802198e-07, + "loss": 0.366, + "step": 35785 + }, + { + "epoch": 98.31318681318682, + "grad_norm": 16.19231605529785, + "learning_rate": 8.434065934065933e-07, + "loss": 0.3296, + "step": 35786 + }, + { + "epoch": 98.31593406593407, + "grad_norm": 16.68783187866211, + "learning_rate": 8.420329670329672e-07, + "loss": 0.1844, + "step": 35787 + }, + { + "epoch": 98.31868131868131, + "grad_norm": 12.770861625671387, + "learning_rate": 8.406593406593408e-07, + "loss": 0.3594, + "step": 35788 + }, + { + "epoch": 98.32142857142857, + "grad_norm": 5.837429523468018, + "learning_rate": 8.392857142857143e-07, + "loss": 0.0829, + "step": 35789 + }, + { + "epoch": 98.32417582417582, + "grad_norm": 13.55129337310791, + "learning_rate": 8.379120879120879e-07, + "loss": 0.1435, + "step": 35790 + }, + { + "epoch": 98.32692307692308, + "grad_norm": 16.960918426513672, + "learning_rate": 8.365384615384615e-07, + "loss": 0.5132, + "step": 35791 + }, + { + "epoch": 98.32967032967034, + "grad_norm": 10.129611015319824, + "learning_rate": 8.351648351648352e-07, + "loss": 0.2775, + "step": 35792 + }, + { + "epoch": 98.33241758241758, + "grad_norm": 11.562372207641602, + "learning_rate": 8.337912087912089e-07, + "loss": 0.1475, + "step": 35793 + }, + { + "epoch": 98.33516483516483, + "grad_norm": 6.370797157287598, + "learning_rate": 8.324175824175825e-07, + "loss": 0.0838, + "step": 35794 + }, + { + "epoch": 98.33791208791209, + "grad_norm": 7.084823131561279, + "learning_rate": 8.310439560439561e-07, + "loss": 0.1059, + "step": 35795 + }, + { + "epoch": 98.34065934065934, + "grad_norm": 11.167522430419922, + "learning_rate": 8.296703296703297e-07, + "loss": 0.2389, + "step": 35796 + }, + { + "epoch": 98.3434065934066, + "grad_norm": 18.286975860595703, + "learning_rate": 8.282967032967032e-07, + "loss": 0.3547, + "step": 35797 + }, + { + "epoch": 98.34615384615384, + "grad_norm": 2.5331814289093018, + "learning_rate": 8.269230769230771e-07, + "loss": 0.0624, + "step": 35798 + }, + { + "epoch": 98.3489010989011, + "grad_norm": 7.420425891876221, + "learning_rate": 8.255494505494507e-07, + "loss": 0.1303, + "step": 35799 + }, + { + "epoch": 98.35164835164835, + "grad_norm": 21.44512939453125, + "learning_rate": 8.241758241758242e-07, + "loss": 0.6694, + "step": 35800 + }, + { + "epoch": 98.3543956043956, + "grad_norm": 8.790353775024414, + "learning_rate": 8.228021978021978e-07, + "loss": 0.0826, + "step": 35801 + }, + { + "epoch": 98.35714285714286, + "grad_norm": 8.116013526916504, + "learning_rate": 8.214285714285714e-07, + "loss": 0.109, + "step": 35802 + }, + { + "epoch": 98.35989010989012, + "grad_norm": 13.084602355957031, + "learning_rate": 8.20054945054945e-07, + "loss": 0.2493, + "step": 35803 + }, + { + "epoch": 98.36263736263736, + "grad_norm": 8.710936546325684, + "learning_rate": 8.186813186813188e-07, + "loss": 0.0801, + "step": 35804 + }, + { + "epoch": 98.36538461538461, + "grad_norm": 21.060638427734375, + "learning_rate": 8.173076923076924e-07, + "loss": 0.4749, + "step": 35805 + }, + { + "epoch": 98.36813186813187, + "grad_norm": 3.584397554397583, + "learning_rate": 8.15934065934066e-07, + "loss": 0.0448, + "step": 35806 + }, + { + "epoch": 98.37087912087912, + "grad_norm": 10.46288776397705, + "learning_rate": 8.145604395604396e-07, + "loss": 0.2359, + "step": 35807 + }, + { + "epoch": 98.37362637362638, + "grad_norm": 14.285822868347168, + "learning_rate": 8.131868131868131e-07, + "loss": 0.1955, + "step": 35808 + }, + { + "epoch": 98.37637362637362, + "grad_norm": 15.154030799865723, + "learning_rate": 8.118131868131868e-07, + "loss": 0.3806, + "step": 35809 + }, + { + "epoch": 98.37912087912088, + "grad_norm": 12.086943626403809, + "learning_rate": 8.104395604395606e-07, + "loss": 0.2553, + "step": 35810 + }, + { + "epoch": 98.38186813186813, + "grad_norm": 21.29224395751953, + "learning_rate": 8.090659340659341e-07, + "loss": 0.2683, + "step": 35811 + }, + { + "epoch": 98.38461538461539, + "grad_norm": 13.768946647644043, + "learning_rate": 8.076923076923077e-07, + "loss": 0.4562, + "step": 35812 + }, + { + "epoch": 98.38736263736264, + "grad_norm": 10.263542175292969, + "learning_rate": 8.063186813186813e-07, + "loss": 0.2249, + "step": 35813 + }, + { + "epoch": 98.39010989010988, + "grad_norm": 18.31694984436035, + "learning_rate": 8.04945054945055e-07, + "loss": 0.2154, + "step": 35814 + }, + { + "epoch": 98.39285714285714, + "grad_norm": 10.89571762084961, + "learning_rate": 8.035714285714286e-07, + "loss": 0.2263, + "step": 35815 + }, + { + "epoch": 98.3956043956044, + "grad_norm": 12.657268524169922, + "learning_rate": 8.021978021978023e-07, + "loss": 0.1731, + "step": 35816 + }, + { + "epoch": 98.39835164835165, + "grad_norm": 22.747230529785156, + "learning_rate": 8.008241758241759e-07, + "loss": 0.4599, + "step": 35817 + }, + { + "epoch": 98.4010989010989, + "grad_norm": 22.276063919067383, + "learning_rate": 7.994505494505495e-07, + "loss": 0.4492, + "step": 35818 + }, + { + "epoch": 98.40384615384616, + "grad_norm": 11.786717414855957, + "learning_rate": 7.98076923076923e-07, + "loss": 0.2305, + "step": 35819 + }, + { + "epoch": 98.4065934065934, + "grad_norm": 26.285737991333008, + "learning_rate": 7.967032967032967e-07, + "loss": 0.7974, + "step": 35820 + }, + { + "epoch": 98.40934065934066, + "grad_norm": 7.649147987365723, + "learning_rate": 7.953296703296703e-07, + "loss": 0.1609, + "step": 35821 + }, + { + "epoch": 98.41208791208791, + "grad_norm": 19.168088912963867, + "learning_rate": 7.93956043956044e-07, + "loss": 0.4407, + "step": 35822 + }, + { + "epoch": 98.41483516483517, + "grad_norm": 21.97691535949707, + "learning_rate": 7.925824175824176e-07, + "loss": 0.3539, + "step": 35823 + }, + { + "epoch": 98.41758241758242, + "grad_norm": 7.039764881134033, + "learning_rate": 7.912087912087912e-07, + "loss": 0.0586, + "step": 35824 + }, + { + "epoch": 98.42032967032966, + "grad_norm": 2.9943172931671143, + "learning_rate": 7.898351648351649e-07, + "loss": 0.021, + "step": 35825 + }, + { + "epoch": 98.42307692307692, + "grad_norm": 21.15877914428711, + "learning_rate": 7.884615384615385e-07, + "loss": 0.8072, + "step": 35826 + }, + { + "epoch": 98.42582417582418, + "grad_norm": 26.238100051879883, + "learning_rate": 7.870879120879122e-07, + "loss": 0.1855, + "step": 35827 + }, + { + "epoch": 98.42857142857143, + "grad_norm": 9.450323104858398, + "learning_rate": 7.857142857142858e-07, + "loss": 0.1312, + "step": 35828 + }, + { + "epoch": 98.43131868131869, + "grad_norm": 19.653329849243164, + "learning_rate": 7.843406593406594e-07, + "loss": 0.3309, + "step": 35829 + }, + { + "epoch": 98.43406593406593, + "grad_norm": 11.637802124023438, + "learning_rate": 7.82967032967033e-07, + "loss": 0.2345, + "step": 35830 + }, + { + "epoch": 98.43681318681318, + "grad_norm": 16.06974220275879, + "learning_rate": 7.815934065934066e-07, + "loss": 0.2305, + "step": 35831 + }, + { + "epoch": 98.43956043956044, + "grad_norm": 3.3496274948120117, + "learning_rate": 7.802197802197803e-07, + "loss": 0.0319, + "step": 35832 + }, + { + "epoch": 98.4423076923077, + "grad_norm": 12.979341506958008, + "learning_rate": 7.788461538461539e-07, + "loss": 0.1019, + "step": 35833 + }, + { + "epoch": 98.44505494505495, + "grad_norm": 19.101516723632812, + "learning_rate": 7.774725274725274e-07, + "loss": 0.5867, + "step": 35834 + }, + { + "epoch": 98.4478021978022, + "grad_norm": 4.994112491607666, + "learning_rate": 7.760989010989012e-07, + "loss": 0.0536, + "step": 35835 + }, + { + "epoch": 98.45054945054945, + "grad_norm": 1.1797947883605957, + "learning_rate": 7.747252747252748e-07, + "loss": 0.0111, + "step": 35836 + }, + { + "epoch": 98.4532967032967, + "grad_norm": 16.181743621826172, + "learning_rate": 7.733516483516484e-07, + "loss": 0.2529, + "step": 35837 + }, + { + "epoch": 98.45604395604396, + "grad_norm": 14.176276206970215, + "learning_rate": 7.71978021978022e-07, + "loss": 0.3271, + "step": 35838 + }, + { + "epoch": 98.45879120879121, + "grad_norm": 6.275771141052246, + "learning_rate": 7.706043956043956e-07, + "loss": 0.1176, + "step": 35839 + }, + { + "epoch": 98.46153846153847, + "grad_norm": 3.6875991821289062, + "learning_rate": 7.692307692307694e-07, + "loss": 0.0307, + "step": 35840 + }, + { + "epoch": 98.46428571428571, + "grad_norm": 18.263206481933594, + "learning_rate": 7.678571428571429e-07, + "loss": 0.3361, + "step": 35841 + }, + { + "epoch": 98.46703296703296, + "grad_norm": 18.766006469726562, + "learning_rate": 7.664835164835165e-07, + "loss": 0.3426, + "step": 35842 + }, + { + "epoch": 98.46978021978022, + "grad_norm": 13.553159713745117, + "learning_rate": 7.651098901098902e-07, + "loss": 0.156, + "step": 35843 + }, + { + "epoch": 98.47252747252747, + "grad_norm": 4.392569541931152, + "learning_rate": 7.637362637362638e-07, + "loss": 0.0734, + "step": 35844 + }, + { + "epoch": 98.47527472527473, + "grad_norm": 24.11963653564453, + "learning_rate": 7.623626373626373e-07, + "loss": 0.3325, + "step": 35845 + }, + { + "epoch": 98.47802197802197, + "grad_norm": 11.513338088989258, + "learning_rate": 7.609890109890111e-07, + "loss": 0.1764, + "step": 35846 + }, + { + "epoch": 98.48076923076923, + "grad_norm": 11.469902038574219, + "learning_rate": 7.596153846153847e-07, + "loss": 0.1263, + "step": 35847 + }, + { + "epoch": 98.48351648351648, + "grad_norm": 8.782958030700684, + "learning_rate": 7.582417582417583e-07, + "loss": 0.1808, + "step": 35848 + }, + { + "epoch": 98.48626373626374, + "grad_norm": 10.77316951751709, + "learning_rate": 7.568681318681319e-07, + "loss": 0.2269, + "step": 35849 + }, + { + "epoch": 98.48901098901099, + "grad_norm": 14.201664924621582, + "learning_rate": 7.554945054945055e-07, + "loss": 0.2033, + "step": 35850 + }, + { + "epoch": 98.49175824175825, + "grad_norm": 13.783578872680664, + "learning_rate": 7.541208791208792e-07, + "loss": 0.1068, + "step": 35851 + }, + { + "epoch": 98.49450549450549, + "grad_norm": 9.467719078063965, + "learning_rate": 7.527472527472528e-07, + "loss": 0.1049, + "step": 35852 + }, + { + "epoch": 98.49725274725274, + "grad_norm": 10.396507263183594, + "learning_rate": 7.513736263736264e-07, + "loss": 0.1429, + "step": 35853 + }, + { + "epoch": 98.5, + "grad_norm": 13.129478454589844, + "learning_rate": 7.5e-07, + "loss": 0.1827, + "step": 35854 + }, + { + "epoch": 98.50274725274726, + "grad_norm": 15.646233558654785, + "learning_rate": 7.486263736263737e-07, + "loss": 0.408, + "step": 35855 + }, + { + "epoch": 98.50549450549451, + "grad_norm": 5.7127685546875, + "learning_rate": 7.472527472527473e-07, + "loss": 0.1293, + "step": 35856 + }, + { + "epoch": 98.50824175824175, + "grad_norm": 8.054533958435059, + "learning_rate": 7.458791208791209e-07, + "loss": 0.1356, + "step": 35857 + }, + { + "epoch": 98.51098901098901, + "grad_norm": 12.078338623046875, + "learning_rate": 7.445054945054946e-07, + "loss": 0.1483, + "step": 35858 + }, + { + "epoch": 98.51373626373626, + "grad_norm": 16.32024383544922, + "learning_rate": 7.431318681318681e-07, + "loss": 0.3208, + "step": 35859 + }, + { + "epoch": 98.51648351648352, + "grad_norm": 11.204719543457031, + "learning_rate": 7.417582417582417e-07, + "loss": 0.2814, + "step": 35860 + }, + { + "epoch": 98.51923076923077, + "grad_norm": 11.956953048706055, + "learning_rate": 7.403846153846155e-07, + "loss": 0.1786, + "step": 35861 + }, + { + "epoch": 98.52197802197803, + "grad_norm": 12.160540580749512, + "learning_rate": 7.390109890109891e-07, + "loss": 0.1365, + "step": 35862 + }, + { + "epoch": 98.52472527472527, + "grad_norm": 3.860686779022217, + "learning_rate": 7.376373626373626e-07, + "loss": 0.0311, + "step": 35863 + }, + { + "epoch": 98.52747252747253, + "grad_norm": 3.073179006576538, + "learning_rate": 7.362637362637363e-07, + "loss": 0.0444, + "step": 35864 + }, + { + "epoch": 98.53021978021978, + "grad_norm": 9.684117317199707, + "learning_rate": 7.348901098901099e-07, + "loss": 0.2591, + "step": 35865 + }, + { + "epoch": 98.53296703296704, + "grad_norm": 11.643766403198242, + "learning_rate": 7.335164835164835e-07, + "loss": 0.23, + "step": 35866 + }, + { + "epoch": 98.53571428571429, + "grad_norm": 21.919912338256836, + "learning_rate": 7.321428571428572e-07, + "loss": 0.579, + "step": 35867 + }, + { + "epoch": 98.53846153846153, + "grad_norm": 27.826766967773438, + "learning_rate": 7.307692307692308e-07, + "loss": 0.9434, + "step": 35868 + }, + { + "epoch": 98.54120879120879, + "grad_norm": 13.866280555725098, + "learning_rate": 7.293956043956044e-07, + "loss": 0.3033, + "step": 35869 + }, + { + "epoch": 98.54395604395604, + "grad_norm": 9.658427238464355, + "learning_rate": 7.28021978021978e-07, + "loss": 0.1509, + "step": 35870 + }, + { + "epoch": 98.5467032967033, + "grad_norm": 10.70002555847168, + "learning_rate": 7.266483516483516e-07, + "loss": 0.1287, + "step": 35871 + }, + { + "epoch": 98.54945054945055, + "grad_norm": 20.16991424560547, + "learning_rate": 7.252747252747254e-07, + "loss": 0.4294, + "step": 35872 + }, + { + "epoch": 98.5521978021978, + "grad_norm": 17.513710021972656, + "learning_rate": 7.23901098901099e-07, + "loss": 0.2548, + "step": 35873 + }, + { + "epoch": 98.55494505494505, + "grad_norm": 10.960710525512695, + "learning_rate": 7.225274725274725e-07, + "loss": 0.1792, + "step": 35874 + }, + { + "epoch": 98.5576923076923, + "grad_norm": 8.369366645812988, + "learning_rate": 7.211538461538462e-07, + "loss": 0.095, + "step": 35875 + }, + { + "epoch": 98.56043956043956, + "grad_norm": 12.011744499206543, + "learning_rate": 7.197802197802198e-07, + "loss": 0.2276, + "step": 35876 + }, + { + "epoch": 98.56318681318682, + "grad_norm": 11.76032829284668, + "learning_rate": 7.184065934065935e-07, + "loss": 0.3288, + "step": 35877 + }, + { + "epoch": 98.56593406593407, + "grad_norm": 11.457657814025879, + "learning_rate": 7.170329670329671e-07, + "loss": 0.2832, + "step": 35878 + }, + { + "epoch": 98.56868131868131, + "grad_norm": 12.137657165527344, + "learning_rate": 7.156593406593407e-07, + "loss": 0.1792, + "step": 35879 + }, + { + "epoch": 98.57142857142857, + "grad_norm": 4.241464138031006, + "learning_rate": 7.142857142857143e-07, + "loss": 0.0282, + "step": 35880 + }, + { + "epoch": 98.57417582417582, + "grad_norm": 12.381719589233398, + "learning_rate": 7.129120879120879e-07, + "loss": 0.2467, + "step": 35881 + }, + { + "epoch": 98.57692307692308, + "grad_norm": 9.111223220825195, + "learning_rate": 7.115384615384616e-07, + "loss": 0.2098, + "step": 35882 + }, + { + "epoch": 98.57967032967034, + "grad_norm": 12.067022323608398, + "learning_rate": 7.101648351648352e-07, + "loss": 0.1433, + "step": 35883 + }, + { + "epoch": 98.58241758241758, + "grad_norm": 6.433961391448975, + "learning_rate": 7.087912087912089e-07, + "loss": 0.0978, + "step": 35884 + }, + { + "epoch": 98.58516483516483, + "grad_norm": 3.724534034729004, + "learning_rate": 7.074175824175824e-07, + "loss": 0.0378, + "step": 35885 + }, + { + "epoch": 98.58791208791209, + "grad_norm": 20.407983779907227, + "learning_rate": 7.06043956043956e-07, + "loss": 0.5085, + "step": 35886 + }, + { + "epoch": 98.59065934065934, + "grad_norm": 17.71092987060547, + "learning_rate": 7.046703296703297e-07, + "loss": 0.4236, + "step": 35887 + }, + { + "epoch": 98.5934065934066, + "grad_norm": 21.421180725097656, + "learning_rate": 7.032967032967034e-07, + "loss": 0.4121, + "step": 35888 + }, + { + "epoch": 98.59615384615384, + "grad_norm": 13.655939102172852, + "learning_rate": 7.019230769230769e-07, + "loss": 0.3614, + "step": 35889 + }, + { + "epoch": 98.5989010989011, + "grad_norm": 14.093056678771973, + "learning_rate": 7.005494505494506e-07, + "loss": 0.3537, + "step": 35890 + }, + { + "epoch": 98.60164835164835, + "grad_norm": 11.299469947814941, + "learning_rate": 6.991758241758242e-07, + "loss": 0.2752, + "step": 35891 + }, + { + "epoch": 98.6043956043956, + "grad_norm": 14.270483016967773, + "learning_rate": 6.978021978021978e-07, + "loss": 0.1169, + "step": 35892 + }, + { + "epoch": 98.60714285714286, + "grad_norm": 10.79732894897461, + "learning_rate": 6.964285714285715e-07, + "loss": 0.2239, + "step": 35893 + }, + { + "epoch": 98.60989010989012, + "grad_norm": 0.9482425451278687, + "learning_rate": 6.950549450549451e-07, + "loss": 0.0112, + "step": 35894 + }, + { + "epoch": 98.61263736263736, + "grad_norm": 15.228436470031738, + "learning_rate": 6.936813186813187e-07, + "loss": 0.0722, + "step": 35895 + }, + { + "epoch": 98.61538461538461, + "grad_norm": 2.3076586723327637, + "learning_rate": 6.923076923076923e-07, + "loss": 0.0306, + "step": 35896 + }, + { + "epoch": 98.61813186813187, + "grad_norm": 13.303350448608398, + "learning_rate": 6.909340659340659e-07, + "loss": 0.1776, + "step": 35897 + }, + { + "epoch": 98.62087912087912, + "grad_norm": 15.625624656677246, + "learning_rate": 6.895604395604396e-07, + "loss": 0.6156, + "step": 35898 + }, + { + "epoch": 98.62362637362638, + "grad_norm": 7.9427385330200195, + "learning_rate": 6.881868131868133e-07, + "loss": 0.2859, + "step": 35899 + }, + { + "epoch": 98.62637362637362, + "grad_norm": 20.359893798828125, + "learning_rate": 6.868131868131868e-07, + "loss": 0.6319, + "step": 35900 + }, + { + "epoch": 98.62912087912088, + "grad_norm": 14.610960006713867, + "learning_rate": 6.854395604395604e-07, + "loss": 0.3007, + "step": 35901 + }, + { + "epoch": 98.63186813186813, + "grad_norm": 14.154444694519043, + "learning_rate": 6.840659340659341e-07, + "loss": 0.1826, + "step": 35902 + }, + { + "epoch": 98.63461538461539, + "grad_norm": 13.394380569458008, + "learning_rate": 6.826923076923078e-07, + "loss": 0.2861, + "step": 35903 + }, + { + "epoch": 98.63736263736264, + "grad_norm": 9.03153133392334, + "learning_rate": 6.813186813186814e-07, + "loss": 0.1016, + "step": 35904 + }, + { + "epoch": 98.64010989010988, + "grad_norm": 24.24247169494629, + "learning_rate": 6.79945054945055e-07, + "loss": 0.4429, + "step": 35905 + }, + { + "epoch": 98.64285714285714, + "grad_norm": 18.90939712524414, + "learning_rate": 6.785714285714286e-07, + "loss": 0.3956, + "step": 35906 + }, + { + "epoch": 98.6456043956044, + "grad_norm": 6.075458526611328, + "learning_rate": 6.771978021978022e-07, + "loss": 0.1055, + "step": 35907 + }, + { + "epoch": 98.64835164835165, + "grad_norm": 9.93674087524414, + "learning_rate": 6.758241758241758e-07, + "loss": 0.1759, + "step": 35908 + }, + { + "epoch": 98.6510989010989, + "grad_norm": 15.320306777954102, + "learning_rate": 6.744505494505495e-07, + "loss": 0.2904, + "step": 35909 + }, + { + "epoch": 98.65384615384616, + "grad_norm": 10.485993385314941, + "learning_rate": 6.730769230769232e-07, + "loss": 0.1645, + "step": 35910 + }, + { + "epoch": 98.6565934065934, + "grad_norm": 23.8050537109375, + "learning_rate": 6.717032967032967e-07, + "loss": 0.2661, + "step": 35911 + }, + { + "epoch": 98.65934065934066, + "grad_norm": 3.702636241912842, + "learning_rate": 6.703296703296703e-07, + "loss": 0.0346, + "step": 35912 + }, + { + "epoch": 98.66208791208791, + "grad_norm": 7.914104461669922, + "learning_rate": 6.68956043956044e-07, + "loss": 0.1359, + "step": 35913 + }, + { + "epoch": 98.66483516483517, + "grad_norm": 9.86230754852295, + "learning_rate": 6.675824175824177e-07, + "loss": 0.1838, + "step": 35914 + }, + { + "epoch": 98.66758241758242, + "grad_norm": 25.18010711669922, + "learning_rate": 6.662087912087912e-07, + "loss": 0.8161, + "step": 35915 + }, + { + "epoch": 98.67032967032966, + "grad_norm": 14.712102890014648, + "learning_rate": 6.648351648351649e-07, + "loss": 0.2974, + "step": 35916 + }, + { + "epoch": 98.67307692307692, + "grad_norm": 7.3559699058532715, + "learning_rate": 6.634615384615385e-07, + "loss": 0.153, + "step": 35917 + }, + { + "epoch": 98.67582417582418, + "grad_norm": 14.48775577545166, + "learning_rate": 6.620879120879121e-07, + "loss": 0.2521, + "step": 35918 + }, + { + "epoch": 98.67857142857143, + "grad_norm": 1.318237543106079, + "learning_rate": 6.607142857142858e-07, + "loss": 0.0143, + "step": 35919 + }, + { + "epoch": 98.68131868131869, + "grad_norm": 14.64842700958252, + "learning_rate": 6.593406593406594e-07, + "loss": 0.222, + "step": 35920 + }, + { + "epoch": 98.68406593406593, + "grad_norm": 5.525327205657959, + "learning_rate": 6.57967032967033e-07, + "loss": 0.1508, + "step": 35921 + }, + { + "epoch": 98.68681318681318, + "grad_norm": 2.148707389831543, + "learning_rate": 6.565934065934066e-07, + "loss": 0.039, + "step": 35922 + }, + { + "epoch": 98.68956043956044, + "grad_norm": 11.843165397644043, + "learning_rate": 6.552197802197802e-07, + "loss": 0.1224, + "step": 35923 + }, + { + "epoch": 98.6923076923077, + "grad_norm": 5.129721164703369, + "learning_rate": 6.538461538461538e-07, + "loss": 0.0525, + "step": 35924 + }, + { + "epoch": 98.69505494505495, + "grad_norm": 15.948704719543457, + "learning_rate": 6.524725274725276e-07, + "loss": 0.2181, + "step": 35925 + }, + { + "epoch": 98.6978021978022, + "grad_norm": 5.096789836883545, + "learning_rate": 6.510989010989011e-07, + "loss": 0.0885, + "step": 35926 + }, + { + "epoch": 98.70054945054945, + "grad_norm": 10.340960502624512, + "learning_rate": 6.497252747252747e-07, + "loss": 0.1837, + "step": 35927 + }, + { + "epoch": 98.7032967032967, + "grad_norm": 1.5186679363250732, + "learning_rate": 6.483516483516484e-07, + "loss": 0.0221, + "step": 35928 + }, + { + "epoch": 98.70604395604396, + "grad_norm": 9.102492332458496, + "learning_rate": 6.46978021978022e-07, + "loss": 0.0964, + "step": 35929 + }, + { + "epoch": 98.70879120879121, + "grad_norm": 13.823291778564453, + "learning_rate": 6.456043956043956e-07, + "loss": 0.167, + "step": 35930 + }, + { + "epoch": 98.71153846153847, + "grad_norm": 13.241250991821289, + "learning_rate": 6.442307692307693e-07, + "loss": 0.2607, + "step": 35931 + }, + { + "epoch": 98.71428571428571, + "grad_norm": 5.786032199859619, + "learning_rate": 6.428571428571429e-07, + "loss": 0.082, + "step": 35932 + }, + { + "epoch": 98.71703296703296, + "grad_norm": 5.344667434692383, + "learning_rate": 6.414835164835165e-07, + "loss": 0.084, + "step": 35933 + }, + { + "epoch": 98.71978021978022, + "grad_norm": 18.082433700561523, + "learning_rate": 6.401098901098901e-07, + "loss": 0.3522, + "step": 35934 + }, + { + "epoch": 98.72252747252747, + "grad_norm": 23.731334686279297, + "learning_rate": 6.387362637362638e-07, + "loss": 0.4867, + "step": 35935 + }, + { + "epoch": 98.72527472527473, + "grad_norm": 19.787065505981445, + "learning_rate": 6.373626373626375e-07, + "loss": 0.34, + "step": 35936 + }, + { + "epoch": 98.72802197802197, + "grad_norm": 12.978287696838379, + "learning_rate": 6.35989010989011e-07, + "loss": 0.3418, + "step": 35937 + }, + { + "epoch": 98.73076923076923, + "grad_norm": 6.1094207763671875, + "learning_rate": 6.346153846153846e-07, + "loss": 0.102, + "step": 35938 + }, + { + "epoch": 98.73351648351648, + "grad_norm": 15.265348434448242, + "learning_rate": 6.332417582417583e-07, + "loss": 0.2606, + "step": 35939 + }, + { + "epoch": 98.73626373626374, + "grad_norm": 16.817874908447266, + "learning_rate": 6.31868131868132e-07, + "loss": 0.2811, + "step": 35940 + }, + { + "epoch": 98.73901098901099, + "grad_norm": 12.812905311584473, + "learning_rate": 6.304945054945055e-07, + "loss": 0.3183, + "step": 35941 + }, + { + "epoch": 98.74175824175825, + "grad_norm": 34.05274200439453, + "learning_rate": 6.291208791208792e-07, + "loss": 0.215, + "step": 35942 + }, + { + "epoch": 98.74450549450549, + "grad_norm": 4.845636367797852, + "learning_rate": 6.277472527472528e-07, + "loss": 0.0728, + "step": 35943 + }, + { + "epoch": 98.74725274725274, + "grad_norm": 12.69144058227539, + "learning_rate": 6.263736263736264e-07, + "loss": 0.1942, + "step": 35944 + }, + { + "epoch": 98.75, + "grad_norm": 5.5958099365234375, + "learning_rate": 6.25e-07, + "loss": 0.0762, + "step": 35945 + }, + { + "epoch": 98.75274725274726, + "grad_norm": 13.964580535888672, + "learning_rate": 6.236263736263737e-07, + "loss": 0.281, + "step": 35946 + }, + { + "epoch": 98.75549450549451, + "grad_norm": 4.054106712341309, + "learning_rate": 6.222527472527473e-07, + "loss": 0.0635, + "step": 35947 + }, + { + "epoch": 98.75824175824175, + "grad_norm": 24.02134895324707, + "learning_rate": 6.208791208791209e-07, + "loss": 0.4316, + "step": 35948 + }, + { + "epoch": 98.76098901098901, + "grad_norm": 7.41500186920166, + "learning_rate": 6.195054945054945e-07, + "loss": 0.0841, + "step": 35949 + }, + { + "epoch": 98.76373626373626, + "grad_norm": 8.346625328063965, + "learning_rate": 6.181318681318681e-07, + "loss": 0.0828, + "step": 35950 + }, + { + "epoch": 98.76648351648352, + "grad_norm": 10.89573860168457, + "learning_rate": 6.167582417582419e-07, + "loss": 0.1157, + "step": 35951 + }, + { + "epoch": 98.76923076923077, + "grad_norm": 7.17901086807251, + "learning_rate": 6.153846153846154e-07, + "loss": 0.131, + "step": 35952 + }, + { + "epoch": 98.77197802197803, + "grad_norm": 14.930095672607422, + "learning_rate": 6.14010989010989e-07, + "loss": 0.4032, + "step": 35953 + }, + { + "epoch": 98.77472527472527, + "grad_norm": 21.074560165405273, + "learning_rate": 6.126373626373627e-07, + "loss": 0.4482, + "step": 35954 + }, + { + "epoch": 98.77747252747253, + "grad_norm": 12.228622436523438, + "learning_rate": 6.112637362637362e-07, + "loss": 0.2339, + "step": 35955 + }, + { + "epoch": 98.78021978021978, + "grad_norm": 5.593270778656006, + "learning_rate": 6.098901098901099e-07, + "loss": 0.0746, + "step": 35956 + }, + { + "epoch": 98.78296703296704, + "grad_norm": 17.38653564453125, + "learning_rate": 6.085164835164836e-07, + "loss": 0.2381, + "step": 35957 + }, + { + "epoch": 98.78571428571429, + "grad_norm": 4.549907684326172, + "learning_rate": 6.071428571428572e-07, + "loss": 0.094, + "step": 35958 + }, + { + "epoch": 98.78846153846153, + "grad_norm": 17.591386795043945, + "learning_rate": 6.057692307692307e-07, + "loss": 0.2148, + "step": 35959 + }, + { + "epoch": 98.79120879120879, + "grad_norm": 13.48025131225586, + "learning_rate": 6.043956043956044e-07, + "loss": 0.3378, + "step": 35960 + }, + { + "epoch": 98.79395604395604, + "grad_norm": 12.248613357543945, + "learning_rate": 6.030219780219781e-07, + "loss": 0.127, + "step": 35961 + }, + { + "epoch": 98.7967032967033, + "grad_norm": 24.056228637695312, + "learning_rate": 6.016483516483517e-07, + "loss": 0.3714, + "step": 35962 + }, + { + "epoch": 98.79945054945055, + "grad_norm": 2.3203072547912598, + "learning_rate": 6.002747252747253e-07, + "loss": 0.026, + "step": 35963 + }, + { + "epoch": 98.8021978021978, + "grad_norm": 13.65876579284668, + "learning_rate": 5.989010989010989e-07, + "loss": 0.279, + "step": 35964 + }, + { + "epoch": 98.80494505494505, + "grad_norm": 9.539896965026855, + "learning_rate": 5.975274725274725e-07, + "loss": 0.1303, + "step": 35965 + }, + { + "epoch": 98.8076923076923, + "grad_norm": 10.37640380859375, + "learning_rate": 5.961538461538461e-07, + "loss": 0.1256, + "step": 35966 + }, + { + "epoch": 98.81043956043956, + "grad_norm": 18.253746032714844, + "learning_rate": 5.947802197802198e-07, + "loss": 0.2986, + "step": 35967 + }, + { + "epoch": 98.81318681318682, + "grad_norm": 8.30717658996582, + "learning_rate": 5.934065934065935e-07, + "loss": 0.0865, + "step": 35968 + }, + { + "epoch": 98.81593406593407, + "grad_norm": 13.07914924621582, + "learning_rate": 5.920329670329671e-07, + "loss": 0.1542, + "step": 35969 + }, + { + "epoch": 98.81868131868131, + "grad_norm": 3.6638824939727783, + "learning_rate": 5.906593406593406e-07, + "loss": 0.0444, + "step": 35970 + }, + { + "epoch": 98.82142857142857, + "grad_norm": 15.75901985168457, + "learning_rate": 5.892857142857143e-07, + "loss": 0.3016, + "step": 35971 + }, + { + "epoch": 98.82417582417582, + "grad_norm": 1.1657949686050415, + "learning_rate": 5.87912087912088e-07, + "loss": 0.0137, + "step": 35972 + }, + { + "epoch": 98.82692307692308, + "grad_norm": 11.915109634399414, + "learning_rate": 5.865384615384616e-07, + "loss": 0.1705, + "step": 35973 + }, + { + "epoch": 98.82967032967034, + "grad_norm": 3.0256528854370117, + "learning_rate": 5.851648351648352e-07, + "loss": 0.0319, + "step": 35974 + }, + { + "epoch": 98.83241758241758, + "grad_norm": 15.402430534362793, + "learning_rate": 5.837912087912088e-07, + "loss": 0.1998, + "step": 35975 + }, + { + "epoch": 98.83516483516483, + "grad_norm": 4.154520511627197, + "learning_rate": 5.824175824175824e-07, + "loss": 0.0681, + "step": 35976 + }, + { + "epoch": 98.83791208791209, + "grad_norm": 10.521150588989258, + "learning_rate": 5.810439560439561e-07, + "loss": 0.1035, + "step": 35977 + }, + { + "epoch": 98.84065934065934, + "grad_norm": 5.2782440185546875, + "learning_rate": 5.796703296703297e-07, + "loss": 0.1035, + "step": 35978 + }, + { + "epoch": 98.8434065934066, + "grad_norm": 9.133931159973145, + "learning_rate": 5.782967032967033e-07, + "loss": 0.1573, + "step": 35979 + }, + { + "epoch": 98.84615384615384, + "grad_norm": 20.234182357788086, + "learning_rate": 5.76923076923077e-07, + "loss": 0.5006, + "step": 35980 + }, + { + "epoch": 98.8489010989011, + "grad_norm": 20.37043571472168, + "learning_rate": 5.755494505494505e-07, + "loss": 0.5082, + "step": 35981 + }, + { + "epoch": 98.85164835164835, + "grad_norm": 3.4470512866973877, + "learning_rate": 5.741758241758242e-07, + "loss": 0.0389, + "step": 35982 + }, + { + "epoch": 98.8543956043956, + "grad_norm": 0.7930830121040344, + "learning_rate": 5.728021978021979e-07, + "loss": 0.008, + "step": 35983 + }, + { + "epoch": 98.85714285714286, + "grad_norm": 14.372601509094238, + "learning_rate": 5.714285714285715e-07, + "loss": 0.4441, + "step": 35984 + }, + { + "epoch": 98.85989010989012, + "grad_norm": 20.172555923461914, + "learning_rate": 5.70054945054945e-07, + "loss": 0.3781, + "step": 35985 + }, + { + "epoch": 98.86263736263736, + "grad_norm": 9.37933349609375, + "learning_rate": 5.686813186813187e-07, + "loss": 0.1375, + "step": 35986 + }, + { + "epoch": 98.86538461538461, + "grad_norm": 6.194093227386475, + "learning_rate": 5.673076923076923e-07, + "loss": 0.1359, + "step": 35987 + }, + { + "epoch": 98.86813186813187, + "grad_norm": 7.245911598205566, + "learning_rate": 5.65934065934066e-07, + "loss": 0.0908, + "step": 35988 + }, + { + "epoch": 98.87087912087912, + "grad_norm": 15.996275901794434, + "learning_rate": 5.645604395604396e-07, + "loss": 0.2784, + "step": 35989 + }, + { + "epoch": 98.87362637362638, + "grad_norm": 7.80645751953125, + "learning_rate": 5.631868131868132e-07, + "loss": 0.0676, + "step": 35990 + }, + { + "epoch": 98.87637362637362, + "grad_norm": 5.3117475509643555, + "learning_rate": 5.618131868131868e-07, + "loss": 0.0588, + "step": 35991 + }, + { + "epoch": 98.87912087912088, + "grad_norm": 7.883950710296631, + "learning_rate": 5.604395604395604e-07, + "loss": 0.1281, + "step": 35992 + }, + { + "epoch": 98.88186813186813, + "grad_norm": 6.779755592346191, + "learning_rate": 5.590659340659341e-07, + "loss": 0.0947, + "step": 35993 + }, + { + "epoch": 98.88461538461539, + "grad_norm": 7.301064491271973, + "learning_rate": 5.576923076923077e-07, + "loss": 0.1643, + "step": 35994 + }, + { + "epoch": 98.88736263736264, + "grad_norm": 9.03727912902832, + "learning_rate": 5.563186813186814e-07, + "loss": 0.142, + "step": 35995 + }, + { + "epoch": 98.89010989010988, + "grad_norm": 18.63078498840332, + "learning_rate": 5.549450549450549e-07, + "loss": 0.3713, + "step": 35996 + }, + { + "epoch": 98.89285714285714, + "grad_norm": 6.539907932281494, + "learning_rate": 5.535714285714285e-07, + "loss": 0.0619, + "step": 35997 + }, + { + "epoch": 98.8956043956044, + "grad_norm": 21.36613655090332, + "learning_rate": 5.521978021978023e-07, + "loss": 0.6219, + "step": 35998 + }, + { + "epoch": 98.89835164835165, + "grad_norm": 1.6943327188491821, + "learning_rate": 5.508241758241759e-07, + "loss": 0.0223, + "step": 35999 + }, + { + "epoch": 98.9010989010989, + "grad_norm": 4.812258720397949, + "learning_rate": 5.494505494505495e-07, + "loss": 0.0635, + "step": 36000 + }, + { + "epoch": 98.90384615384616, + "grad_norm": 21.713924407958984, + "learning_rate": 5.480769230769231e-07, + "loss": 0.3564, + "step": 36001 + }, + { + "epoch": 98.9065934065934, + "grad_norm": 15.76959228515625, + "learning_rate": 5.467032967032967e-07, + "loss": 0.4824, + "step": 36002 + }, + { + "epoch": 98.90934065934066, + "grad_norm": 24.483959197998047, + "learning_rate": 5.453296703296704e-07, + "loss": 0.5609, + "step": 36003 + }, + { + "epoch": 98.91208791208791, + "grad_norm": 16.59882164001465, + "learning_rate": 5.43956043956044e-07, + "loss": 0.2405, + "step": 36004 + }, + { + "epoch": 98.91483516483517, + "grad_norm": 9.14758586883545, + "learning_rate": 5.425824175824176e-07, + "loss": 0.0722, + "step": 36005 + }, + { + "epoch": 98.91758241758242, + "grad_norm": 8.786958694458008, + "learning_rate": 5.412087912087913e-07, + "loss": 0.1012, + "step": 36006 + }, + { + "epoch": 98.92032967032966, + "grad_norm": 2.642780303955078, + "learning_rate": 5.398351648351648e-07, + "loss": 0.0376, + "step": 36007 + }, + { + "epoch": 98.92307692307692, + "grad_norm": 24.830066680908203, + "learning_rate": 5.384615384615384e-07, + "loss": 0.6337, + "step": 36008 + }, + { + "epoch": 98.92582417582418, + "grad_norm": 15.63856029510498, + "learning_rate": 5.370879120879122e-07, + "loss": 0.2684, + "step": 36009 + }, + { + "epoch": 98.92857142857143, + "grad_norm": 7.7862229347229, + "learning_rate": 5.357142857142858e-07, + "loss": 0.1516, + "step": 36010 + }, + { + "epoch": 98.93131868131869, + "grad_norm": 8.812053680419922, + "learning_rate": 5.343406593406593e-07, + "loss": 0.1263, + "step": 36011 + }, + { + "epoch": 98.93406593406593, + "grad_norm": 12.406439781188965, + "learning_rate": 5.32967032967033e-07, + "loss": 0.1605, + "step": 36012 + }, + { + "epoch": 98.93681318681318, + "grad_norm": 34.488609313964844, + "learning_rate": 5.315934065934066e-07, + "loss": 0.8252, + "step": 36013 + }, + { + "epoch": 98.93956043956044, + "grad_norm": 14.624531745910645, + "learning_rate": 5.302197802197803e-07, + "loss": 0.4758, + "step": 36014 + }, + { + "epoch": 98.9423076923077, + "grad_norm": 8.102089881896973, + "learning_rate": 5.288461538461539e-07, + "loss": 0.1612, + "step": 36015 + }, + { + "epoch": 98.94505494505495, + "grad_norm": 10.604991912841797, + "learning_rate": 5.274725274725275e-07, + "loss": 0.1807, + "step": 36016 + }, + { + "epoch": 98.9478021978022, + "grad_norm": 6.752920150756836, + "learning_rate": 5.260989010989011e-07, + "loss": 0.0879, + "step": 36017 + }, + { + "epoch": 98.95054945054945, + "grad_norm": 10.54150676727295, + "learning_rate": 5.247252747252747e-07, + "loss": 0.2587, + "step": 36018 + }, + { + "epoch": 98.9532967032967, + "grad_norm": 8.8434419631958, + "learning_rate": 5.233516483516484e-07, + "loss": 0.0747, + "step": 36019 + }, + { + "epoch": 98.95604395604396, + "grad_norm": 6.14363956451416, + "learning_rate": 5.21978021978022e-07, + "loss": 0.0791, + "step": 36020 + }, + { + "epoch": 98.95879120879121, + "grad_norm": 14.152007102966309, + "learning_rate": 5.206043956043957e-07, + "loss": 0.3436, + "step": 36021 + }, + { + "epoch": 98.96153846153847, + "grad_norm": 2.068924903869629, + "learning_rate": 5.192307692307692e-07, + "loss": 0.0242, + "step": 36022 + }, + { + "epoch": 98.96428571428571, + "grad_norm": 15.761652946472168, + "learning_rate": 5.178571428571428e-07, + "loss": 0.2551, + "step": 36023 + }, + { + "epoch": 98.96703296703296, + "grad_norm": 24.501373291015625, + "learning_rate": 5.164835164835166e-07, + "loss": 0.4895, + "step": 36024 + }, + { + "epoch": 98.96978021978022, + "grad_norm": 6.652297496795654, + "learning_rate": 5.151098901098902e-07, + "loss": 0.131, + "step": 36025 + }, + { + "epoch": 98.97252747252747, + "grad_norm": 5.566038608551025, + "learning_rate": 5.137362637362637e-07, + "loss": 0.0704, + "step": 36026 + }, + { + "epoch": 98.97527472527473, + "grad_norm": 9.845020294189453, + "learning_rate": 5.123626373626374e-07, + "loss": 0.1532, + "step": 36027 + }, + { + "epoch": 98.97802197802197, + "grad_norm": 16.527116775512695, + "learning_rate": 5.10989010989011e-07, + "loss": 0.5266, + "step": 36028 + }, + { + "epoch": 98.98076923076923, + "grad_norm": 5.833383083343506, + "learning_rate": 5.096153846153846e-07, + "loss": 0.0984, + "step": 36029 + }, + { + "epoch": 98.98351648351648, + "grad_norm": 12.265679359436035, + "learning_rate": 5.082417582417583e-07, + "loss": 0.2795, + "step": 36030 + }, + { + "epoch": 98.98626373626374, + "grad_norm": 3.6694765090942383, + "learning_rate": 5.068681318681319e-07, + "loss": 0.0386, + "step": 36031 + }, + { + "epoch": 98.98901098901099, + "grad_norm": 17.79267692565918, + "learning_rate": 5.054945054945056e-07, + "loss": 0.2296, + "step": 36032 + }, + { + "epoch": 98.99175824175825, + "grad_norm": 6.886349678039551, + "learning_rate": 5.041208791208791e-07, + "loss": 0.0468, + "step": 36033 + }, + { + "epoch": 98.99450549450549, + "grad_norm": 5.549317359924316, + "learning_rate": 5.027472527472527e-07, + "loss": 0.0761, + "step": 36034 + }, + { + "epoch": 98.99725274725274, + "grad_norm": 5.574146747589111, + "learning_rate": 5.013736263736265e-07, + "loss": 0.0736, + "step": 36035 + }, + { + "epoch": 99.0, + "grad_norm": 82.45153045654297, + "learning_rate": 5.000000000000001e-07, + "loss": 2.2577, + "step": 36036 + }, + { + "epoch": 99.0, + "eval_accuracy": 0.7920110192837465, + "eval_f1": 0.7984381463027445, + "eval_f1_DuraRiadoRio_64x64": 0.7975460122699386, + "eval_f1_Mole_64x64": 0.862453531598513, + "eval_f1_Quebrado_64x64": 0.7867647058823529, + "eval_f1_RiadoRio_64x64": 0.662613981762918, + "eval_f1_RioFechado_64x64": 0.8828125, + "eval_loss": 0.7777783274650574, + "eval_precision": 0.8170540987001402, + "eval_precision_DuraRiadoRio_64x64": 0.7142857142857143, + "eval_precision_Mole_64x64": 0.928, + "eval_precision_Quebrado_64x64": 0.8359375, + "eval_precision_RiadoRio_64x64": 0.615819209039548, + "eval_precision_RioFechado_64x64": 0.9912280701754386, + "eval_recall": 0.7928537599868216, + "eval_recall_DuraRiadoRio_64x64": 0.9027777777777778, + "eval_recall_Mole_64x64": 0.8055555555555556, + "eval_recall_Quebrado_64x64": 0.7430555555555556, + "eval_recall_RiadoRio_64x64": 0.7171052631578947, + "eval_recall_RioFechado_64x64": 0.795774647887324, + "eval_runtime": 1.6982, + "eval_samples_per_second": 427.511, + "eval_steps_per_second": 27.087, + "step": 36036 + }, + { + "epoch": 99.00274725274726, + "grad_norm": 8.560612678527832, + "learning_rate": 4.986263736263736e-07, + "loss": 0.0931, + "step": 36037 + }, + { + "epoch": 99.00549450549451, + "grad_norm": 2.2027881145477295, + "learning_rate": 4.972527472527473e-07, + "loss": 0.0237, + "step": 36038 + }, + { + "epoch": 99.00824175824175, + "grad_norm": 13.091856956481934, + "learning_rate": 4.958791208791209e-07, + "loss": 0.1376, + "step": 36039 + }, + { + "epoch": 99.01098901098901, + "grad_norm": 13.62516975402832, + "learning_rate": 4.945054945054946e-07, + "loss": 0.1928, + "step": 36040 + }, + { + "epoch": 99.01373626373626, + "grad_norm": 15.215377807617188, + "learning_rate": 4.931318681318682e-07, + "loss": 0.2709, + "step": 36041 + }, + { + "epoch": 99.01648351648352, + "grad_norm": 4.915593147277832, + "learning_rate": 4.917582417582418e-07, + "loss": 0.0789, + "step": 36042 + }, + { + "epoch": 99.01923076923077, + "grad_norm": 8.024323463439941, + "learning_rate": 4.903846153846154e-07, + "loss": 0.1444, + "step": 36043 + }, + { + "epoch": 99.02197802197803, + "grad_norm": 16.053442001342773, + "learning_rate": 4.89010989010989e-07, + "loss": 0.4885, + "step": 36044 + }, + { + "epoch": 99.02472527472527, + "grad_norm": 16.9539737701416, + "learning_rate": 4.876373626373627e-07, + "loss": 0.3166, + "step": 36045 + }, + { + "epoch": 99.02747252747253, + "grad_norm": 9.042043685913086, + "learning_rate": 4.862637362637363e-07, + "loss": 0.1009, + "step": 36046 + }, + { + "epoch": 99.03021978021978, + "grad_norm": 12.59238052368164, + "learning_rate": 4.8489010989011e-07, + "loss": 0.2457, + "step": 36047 + }, + { + "epoch": 99.03296703296704, + "grad_norm": 13.692191123962402, + "learning_rate": 4.835164835164835e-07, + "loss": 0.1405, + "step": 36048 + }, + { + "epoch": 99.03571428571429, + "grad_norm": 9.099863052368164, + "learning_rate": 4.821428571428571e-07, + "loss": 0.1267, + "step": 36049 + }, + { + "epoch": 99.03846153846153, + "grad_norm": 13.773425102233887, + "learning_rate": 4.807692307692308e-07, + "loss": 0.2267, + "step": 36050 + }, + { + "epoch": 99.04120879120879, + "grad_norm": 5.421537399291992, + "learning_rate": 4.793956043956045e-07, + "loss": 0.09, + "step": 36051 + }, + { + "epoch": 99.04395604395604, + "grad_norm": 9.794853210449219, + "learning_rate": 4.78021978021978e-07, + "loss": 0.1305, + "step": 36052 + }, + { + "epoch": 99.0467032967033, + "grad_norm": 10.37033748626709, + "learning_rate": 4.766483516483517e-07, + "loss": 0.1795, + "step": 36053 + }, + { + "epoch": 99.04945054945055, + "grad_norm": 7.196095943450928, + "learning_rate": 4.7527472527472527e-07, + "loss": 0.101, + "step": 36054 + }, + { + "epoch": 99.0521978021978, + "grad_norm": 13.712298393249512, + "learning_rate": 4.739010989010989e-07, + "loss": 0.2503, + "step": 36055 + }, + { + "epoch": 99.05494505494505, + "grad_norm": 7.54055643081665, + "learning_rate": 4.7252747252747256e-07, + "loss": 0.1317, + "step": 36056 + }, + { + "epoch": 99.0576923076923, + "grad_norm": 17.63608741760254, + "learning_rate": 4.711538461538462e-07, + "loss": 0.3345, + "step": 36057 + }, + { + "epoch": 99.06043956043956, + "grad_norm": 18.233619689941406, + "learning_rate": 4.6978021978021974e-07, + "loss": 0.2941, + "step": 36058 + }, + { + "epoch": 99.06318681318682, + "grad_norm": 8.401341438293457, + "learning_rate": 4.6840659340659347e-07, + "loss": 0.2291, + "step": 36059 + }, + { + "epoch": 99.06593406593407, + "grad_norm": 3.5425949096679688, + "learning_rate": 4.6703296703296704e-07, + "loss": 0.0298, + "step": 36060 + }, + { + "epoch": 99.06868131868131, + "grad_norm": 15.498878479003906, + "learning_rate": 4.6565934065934065e-07, + "loss": 0.3914, + "step": 36061 + }, + { + "epoch": 99.07142857142857, + "grad_norm": 6.711222171783447, + "learning_rate": 4.642857142857143e-07, + "loss": 0.0877, + "step": 36062 + }, + { + "epoch": 99.07417582417582, + "grad_norm": 10.340673446655273, + "learning_rate": 4.6291208791208794e-07, + "loss": 0.1948, + "step": 36063 + }, + { + "epoch": 99.07692307692308, + "grad_norm": 14.798056602478027, + "learning_rate": 4.615384615384616e-07, + "loss": 0.1583, + "step": 36064 + }, + { + "epoch": 99.07967032967034, + "grad_norm": 26.361778259277344, + "learning_rate": 4.601648351648352e-07, + "loss": 0.6717, + "step": 36065 + }, + { + "epoch": 99.08241758241758, + "grad_norm": 4.949146270751953, + "learning_rate": 4.587912087912088e-07, + "loss": 0.0523, + "step": 36066 + }, + { + "epoch": 99.08516483516483, + "grad_norm": 14.59709644317627, + "learning_rate": 4.5741758241758247e-07, + "loss": 0.3025, + "step": 36067 + }, + { + "epoch": 99.08791208791209, + "grad_norm": 13.727688789367676, + "learning_rate": 4.560439560439561e-07, + "loss": 0.143, + "step": 36068 + }, + { + "epoch": 99.09065934065934, + "grad_norm": 30.874835968017578, + "learning_rate": 4.5467032967032966e-07, + "loss": 0.2441, + "step": 36069 + }, + { + "epoch": 99.0934065934066, + "grad_norm": 13.132891654968262, + "learning_rate": 4.532967032967034e-07, + "loss": 0.0965, + "step": 36070 + }, + { + "epoch": 99.09615384615384, + "grad_norm": 3.3051044940948486, + "learning_rate": 4.5192307692307695e-07, + "loss": 0.0379, + "step": 36071 + }, + { + "epoch": 99.0989010989011, + "grad_norm": 2.034081220626831, + "learning_rate": 4.5054945054945056e-07, + "loss": 0.0209, + "step": 36072 + }, + { + "epoch": 99.10164835164835, + "grad_norm": 15.146288871765137, + "learning_rate": 4.4917582417582424e-07, + "loss": 0.47, + "step": 36073 + }, + { + "epoch": 99.1043956043956, + "grad_norm": 8.776626586914062, + "learning_rate": 4.4780219780219785e-07, + "loss": 0.0562, + "step": 36074 + }, + { + "epoch": 99.10714285714286, + "grad_norm": 12.619806289672852, + "learning_rate": 4.464285714285714e-07, + "loss": 0.1337, + "step": 36075 + }, + { + "epoch": 99.10989010989012, + "grad_norm": 7.574588298797607, + "learning_rate": 4.450549450549451e-07, + "loss": 0.1015, + "step": 36076 + }, + { + "epoch": 99.11263736263736, + "grad_norm": 9.47201919555664, + "learning_rate": 4.436813186813187e-07, + "loss": 0.0802, + "step": 36077 + }, + { + "epoch": 99.11538461538461, + "grad_norm": 4.774754524230957, + "learning_rate": 4.423076923076923e-07, + "loss": 0.0307, + "step": 36078 + }, + { + "epoch": 99.11813186813187, + "grad_norm": 7.805152893066406, + "learning_rate": 4.40934065934066e-07, + "loss": 0.0674, + "step": 36079 + }, + { + "epoch": 99.12087912087912, + "grad_norm": 16.759815216064453, + "learning_rate": 4.3956043956043957e-07, + "loss": 0.1574, + "step": 36080 + }, + { + "epoch": 99.12362637362638, + "grad_norm": 4.554288864135742, + "learning_rate": 4.381868131868132e-07, + "loss": 0.0425, + "step": 36081 + }, + { + "epoch": 99.12637362637362, + "grad_norm": 7.563496112823486, + "learning_rate": 4.3681318681318686e-07, + "loss": 0.1379, + "step": 36082 + }, + { + "epoch": 99.12912087912088, + "grad_norm": 8.093609809875488, + "learning_rate": 4.354395604395605e-07, + "loss": 0.1176, + "step": 36083 + }, + { + "epoch": 99.13186813186813, + "grad_norm": 5.64961576461792, + "learning_rate": 4.3406593406593404e-07, + "loss": 0.0427, + "step": 36084 + }, + { + "epoch": 99.13461538461539, + "grad_norm": 1.8907042741775513, + "learning_rate": 4.326923076923077e-07, + "loss": 0.0258, + "step": 36085 + }, + { + "epoch": 99.13736263736264, + "grad_norm": 7.985534191131592, + "learning_rate": 4.3131868131868133e-07, + "loss": 0.1767, + "step": 36086 + }, + { + "epoch": 99.14010989010988, + "grad_norm": 13.152621269226074, + "learning_rate": 4.2994505494505495e-07, + "loss": 0.2969, + "step": 36087 + }, + { + "epoch": 99.14285714285714, + "grad_norm": 15.591580390930176, + "learning_rate": 4.285714285714286e-07, + "loss": 0.1928, + "step": 36088 + }, + { + "epoch": 99.1456043956044, + "grad_norm": 8.311159133911133, + "learning_rate": 4.271978021978022e-07, + "loss": 0.0985, + "step": 36089 + }, + { + "epoch": 99.14835164835165, + "grad_norm": 5.600790023803711, + "learning_rate": 4.258241758241758e-07, + "loss": 0.1512, + "step": 36090 + }, + { + "epoch": 99.1510989010989, + "grad_norm": 13.475166320800781, + "learning_rate": 4.244505494505495e-07, + "loss": 0.3682, + "step": 36091 + }, + { + "epoch": 99.15384615384616, + "grad_norm": 4.5854811668396, + "learning_rate": 4.230769230769231e-07, + "loss": 0.0698, + "step": 36092 + }, + { + "epoch": 99.1565934065934, + "grad_norm": 17.789236068725586, + "learning_rate": 4.2170329670329666e-07, + "loss": 0.4574, + "step": 36093 + }, + { + "epoch": 99.15934065934066, + "grad_norm": 16.20972442626953, + "learning_rate": 4.203296703296704e-07, + "loss": 0.2212, + "step": 36094 + }, + { + "epoch": 99.16208791208791, + "grad_norm": 4.058244705200195, + "learning_rate": 4.1895604395604395e-07, + "loss": 0.0403, + "step": 36095 + }, + { + "epoch": 99.16483516483517, + "grad_norm": 17.25463104248047, + "learning_rate": 4.175824175824176e-07, + "loss": 0.7567, + "step": 36096 + }, + { + "epoch": 99.16758241758242, + "grad_norm": 12.02657699584961, + "learning_rate": 4.1620879120879124e-07, + "loss": 0.2626, + "step": 36097 + }, + { + "epoch": 99.17032967032966, + "grad_norm": 14.16895866394043, + "learning_rate": 4.1483516483516486e-07, + "loss": 0.407, + "step": 36098 + }, + { + "epoch": 99.17307692307692, + "grad_norm": 11.538455963134766, + "learning_rate": 4.1346153846153853e-07, + "loss": 0.1936, + "step": 36099 + }, + { + "epoch": 99.17582417582418, + "grad_norm": 12.504767417907715, + "learning_rate": 4.120879120879121e-07, + "loss": 0.2093, + "step": 36100 + }, + { + "epoch": 99.17857142857143, + "grad_norm": 4.726378917694092, + "learning_rate": 4.107142857142857e-07, + "loss": 0.0564, + "step": 36101 + }, + { + "epoch": 99.18131868131869, + "grad_norm": 13.01971435546875, + "learning_rate": 4.093406593406594e-07, + "loss": 0.4768, + "step": 36102 + }, + { + "epoch": 99.18406593406593, + "grad_norm": 7.717353820800781, + "learning_rate": 4.07967032967033e-07, + "loss": 0.1153, + "step": 36103 + }, + { + "epoch": 99.18681318681318, + "grad_norm": 11.691628456115723, + "learning_rate": 4.0659340659340657e-07, + "loss": 0.1219, + "step": 36104 + }, + { + "epoch": 99.18956043956044, + "grad_norm": 8.067585945129395, + "learning_rate": 4.052197802197803e-07, + "loss": 0.0757, + "step": 36105 + }, + { + "epoch": 99.1923076923077, + "grad_norm": 7.9646100997924805, + "learning_rate": 4.0384615384615386e-07, + "loss": 0.1309, + "step": 36106 + }, + { + "epoch": 99.19505494505495, + "grad_norm": 8.992051124572754, + "learning_rate": 4.024725274725275e-07, + "loss": 0.1301, + "step": 36107 + }, + { + "epoch": 99.1978021978022, + "grad_norm": 5.083507537841797, + "learning_rate": 4.0109890109890115e-07, + "loss": 0.0878, + "step": 36108 + }, + { + "epoch": 99.20054945054945, + "grad_norm": 11.768871307373047, + "learning_rate": 3.9972527472527477e-07, + "loss": 0.1193, + "step": 36109 + }, + { + "epoch": 99.2032967032967, + "grad_norm": 23.299081802368164, + "learning_rate": 3.9835164835164833e-07, + "loss": 0.32, + "step": 36110 + }, + { + "epoch": 99.20604395604396, + "grad_norm": 14.171445846557617, + "learning_rate": 3.96978021978022e-07, + "loss": 0.2573, + "step": 36111 + }, + { + "epoch": 99.20879120879121, + "grad_norm": 14.84920883178711, + "learning_rate": 3.956043956043956e-07, + "loss": 0.1643, + "step": 36112 + }, + { + "epoch": 99.21153846153847, + "grad_norm": 2.104149580001831, + "learning_rate": 3.9423076923076924e-07, + "loss": 0.0225, + "step": 36113 + }, + { + "epoch": 99.21428571428571, + "grad_norm": 12.908647537231445, + "learning_rate": 3.928571428571429e-07, + "loss": 0.1658, + "step": 36114 + }, + { + "epoch": 99.21703296703296, + "grad_norm": 13.899640083312988, + "learning_rate": 3.914835164835165e-07, + "loss": 0.5512, + "step": 36115 + }, + { + "epoch": 99.21978021978022, + "grad_norm": 13.134431838989258, + "learning_rate": 3.9010989010989015e-07, + "loss": 0.351, + "step": 36116 + }, + { + "epoch": 99.22252747252747, + "grad_norm": 1.5814714431762695, + "learning_rate": 3.887362637362637e-07, + "loss": 0.0189, + "step": 36117 + }, + { + "epoch": 99.22527472527473, + "grad_norm": 4.484091281890869, + "learning_rate": 3.873626373626374e-07, + "loss": 0.0651, + "step": 36118 + }, + { + "epoch": 99.22802197802197, + "grad_norm": 10.017102241516113, + "learning_rate": 3.85989010989011e-07, + "loss": 0.1334, + "step": 36119 + }, + { + "epoch": 99.23076923076923, + "grad_norm": 7.90739631652832, + "learning_rate": 3.846153846153847e-07, + "loss": 0.1105, + "step": 36120 + }, + { + "epoch": 99.23351648351648, + "grad_norm": 15.135421752929688, + "learning_rate": 3.8324175824175825e-07, + "loss": 0.4315, + "step": 36121 + }, + { + "epoch": 99.23626373626374, + "grad_norm": 9.907583236694336, + "learning_rate": 3.818681318681319e-07, + "loss": 0.1507, + "step": 36122 + }, + { + "epoch": 99.23901098901099, + "grad_norm": 2.0567991733551025, + "learning_rate": 3.8049450549450554e-07, + "loss": 0.0547, + "step": 36123 + }, + { + "epoch": 99.24175824175825, + "grad_norm": 24.238689422607422, + "learning_rate": 3.7912087912087915e-07, + "loss": 0.5725, + "step": 36124 + }, + { + "epoch": 99.24450549450549, + "grad_norm": 12.855426788330078, + "learning_rate": 3.7774725274725277e-07, + "loss": 0.2331, + "step": 36125 + }, + { + "epoch": 99.24725274725274, + "grad_norm": 3.39194917678833, + "learning_rate": 3.763736263736264e-07, + "loss": 0.0429, + "step": 36126 + }, + { + "epoch": 99.25, + "grad_norm": 7.247493267059326, + "learning_rate": 3.75e-07, + "loss": 0.0759, + "step": 36127 + }, + { + "epoch": 99.25274725274726, + "grad_norm": 9.125633239746094, + "learning_rate": 3.7362637362637363e-07, + "loss": 0.2034, + "step": 36128 + }, + { + "epoch": 99.25549450549451, + "grad_norm": 6.830385208129883, + "learning_rate": 3.722527472527473e-07, + "loss": 0.074, + "step": 36129 + }, + { + "epoch": 99.25824175824175, + "grad_norm": 16.338525772094727, + "learning_rate": 3.7087912087912087e-07, + "loss": 0.413, + "step": 36130 + }, + { + "epoch": 99.26098901098901, + "grad_norm": 11.438555717468262, + "learning_rate": 3.6950549450549454e-07, + "loss": 0.2139, + "step": 36131 + }, + { + "epoch": 99.26373626373626, + "grad_norm": 14.27035140991211, + "learning_rate": 3.6813186813186816e-07, + "loss": 0.3346, + "step": 36132 + }, + { + "epoch": 99.26648351648352, + "grad_norm": 9.808813095092773, + "learning_rate": 3.667582417582418e-07, + "loss": 0.1179, + "step": 36133 + }, + { + "epoch": 99.26923076923077, + "grad_norm": 16.250572204589844, + "learning_rate": 3.653846153846154e-07, + "loss": 0.2432, + "step": 36134 + }, + { + "epoch": 99.27197802197803, + "grad_norm": 20.804203033447266, + "learning_rate": 3.64010989010989e-07, + "loss": 0.3219, + "step": 36135 + }, + { + "epoch": 99.27472527472527, + "grad_norm": 12.383854866027832, + "learning_rate": 3.626373626373627e-07, + "loss": 0.1739, + "step": 36136 + }, + { + "epoch": 99.27747252747253, + "grad_norm": 6.769917964935303, + "learning_rate": 3.6126373626373625e-07, + "loss": 0.0716, + "step": 36137 + }, + { + "epoch": 99.28021978021978, + "grad_norm": 5.576066970825195, + "learning_rate": 3.598901098901099e-07, + "loss": 0.0881, + "step": 36138 + }, + { + "epoch": 99.28296703296704, + "grad_norm": 2.542083263397217, + "learning_rate": 3.5851648351648354e-07, + "loss": 0.0174, + "step": 36139 + }, + { + "epoch": 99.28571428571429, + "grad_norm": 4.089484691619873, + "learning_rate": 3.5714285714285716e-07, + "loss": 0.0237, + "step": 36140 + }, + { + "epoch": 99.28846153846153, + "grad_norm": 10.9143648147583, + "learning_rate": 3.557692307692308e-07, + "loss": 0.1373, + "step": 36141 + }, + { + "epoch": 99.29120879120879, + "grad_norm": 10.097064971923828, + "learning_rate": 3.5439560439560445e-07, + "loss": 0.147, + "step": 36142 + }, + { + "epoch": 99.29395604395604, + "grad_norm": 5.471895217895508, + "learning_rate": 3.53021978021978e-07, + "loss": 0.0594, + "step": 36143 + }, + { + "epoch": 99.2967032967033, + "grad_norm": 11.313962936401367, + "learning_rate": 3.516483516483517e-07, + "loss": 0.2928, + "step": 36144 + }, + { + "epoch": 99.29945054945055, + "grad_norm": 8.711796760559082, + "learning_rate": 3.502747252747253e-07, + "loss": 0.0601, + "step": 36145 + }, + { + "epoch": 99.3021978021978, + "grad_norm": 9.011113166809082, + "learning_rate": 3.489010989010989e-07, + "loss": 0.1751, + "step": 36146 + }, + { + "epoch": 99.30494505494505, + "grad_norm": 3.9098122119903564, + "learning_rate": 3.4752747252747254e-07, + "loss": 0.0488, + "step": 36147 + }, + { + "epoch": 99.3076923076923, + "grad_norm": 18.72500228881836, + "learning_rate": 3.4615384615384616e-07, + "loss": 0.5936, + "step": 36148 + }, + { + "epoch": 99.31043956043956, + "grad_norm": 14.014533996582031, + "learning_rate": 3.447802197802198e-07, + "loss": 0.3485, + "step": 36149 + }, + { + "epoch": 99.31318681318682, + "grad_norm": 26.96919059753418, + "learning_rate": 3.434065934065934e-07, + "loss": 0.5716, + "step": 36150 + }, + { + "epoch": 99.31593406593407, + "grad_norm": 14.42650032043457, + "learning_rate": 3.4203296703296707e-07, + "loss": 0.209, + "step": 36151 + }, + { + "epoch": 99.31868131868131, + "grad_norm": 3.9776933193206787, + "learning_rate": 3.406593406593407e-07, + "loss": 0.0509, + "step": 36152 + }, + { + "epoch": 99.32142857142857, + "grad_norm": 14.910573959350586, + "learning_rate": 3.392857142857143e-07, + "loss": 0.2723, + "step": 36153 + }, + { + "epoch": 99.32417582417582, + "grad_norm": 8.95705795288086, + "learning_rate": 3.379120879120879e-07, + "loss": 0.1601, + "step": 36154 + }, + { + "epoch": 99.32692307692308, + "grad_norm": 13.363083839416504, + "learning_rate": 3.365384615384616e-07, + "loss": 0.249, + "step": 36155 + }, + { + "epoch": 99.32967032967034, + "grad_norm": 12.202005386352539, + "learning_rate": 3.3516483516483516e-07, + "loss": 0.1396, + "step": 36156 + }, + { + "epoch": 99.33241758241758, + "grad_norm": 4.089362144470215, + "learning_rate": 3.3379120879120883e-07, + "loss": 0.0537, + "step": 36157 + }, + { + "epoch": 99.33516483516483, + "grad_norm": 8.98291015625, + "learning_rate": 3.3241758241758245e-07, + "loss": 0.1452, + "step": 36158 + }, + { + "epoch": 99.33791208791209, + "grad_norm": 9.708394050598145, + "learning_rate": 3.3104395604395607e-07, + "loss": 0.1083, + "step": 36159 + }, + { + "epoch": 99.34065934065934, + "grad_norm": 0.9789850115776062, + "learning_rate": 3.296703296703297e-07, + "loss": 0.0078, + "step": 36160 + }, + { + "epoch": 99.3434065934066, + "grad_norm": 20.50356101989746, + "learning_rate": 3.282967032967033e-07, + "loss": 0.32, + "step": 36161 + }, + { + "epoch": 99.34615384615384, + "grad_norm": 3.8481836318969727, + "learning_rate": 3.269230769230769e-07, + "loss": 0.0469, + "step": 36162 + }, + { + "epoch": 99.3489010989011, + "grad_norm": 13.41169261932373, + "learning_rate": 3.2554945054945054e-07, + "loss": 0.2474, + "step": 36163 + }, + { + "epoch": 99.35164835164835, + "grad_norm": 26.395843505859375, + "learning_rate": 3.241758241758242e-07, + "loss": 0.4843, + "step": 36164 + }, + { + "epoch": 99.3543956043956, + "grad_norm": 17.68212127685547, + "learning_rate": 3.228021978021978e-07, + "loss": 0.1387, + "step": 36165 + }, + { + "epoch": 99.35714285714286, + "grad_norm": 3.4127004146575928, + "learning_rate": 3.2142857142857145e-07, + "loss": 0.036, + "step": 36166 + }, + { + "epoch": 99.35989010989012, + "grad_norm": 11.12904167175293, + "learning_rate": 3.2005494505494507e-07, + "loss": 0.148, + "step": 36167 + }, + { + "epoch": 99.36263736263736, + "grad_norm": 19.15099334716797, + "learning_rate": 3.1868131868131874e-07, + "loss": 0.3296, + "step": 36168 + }, + { + "epoch": 99.36538461538461, + "grad_norm": 8.206326484680176, + "learning_rate": 3.173076923076923e-07, + "loss": 0.1816, + "step": 36169 + }, + { + "epoch": 99.36813186813187, + "grad_norm": 7.571292400360107, + "learning_rate": 3.15934065934066e-07, + "loss": 0.1277, + "step": 36170 + }, + { + "epoch": 99.37087912087912, + "grad_norm": 23.946165084838867, + "learning_rate": 3.145604395604396e-07, + "loss": 0.3136, + "step": 36171 + }, + { + "epoch": 99.37362637362638, + "grad_norm": 27.186527252197266, + "learning_rate": 3.131868131868132e-07, + "loss": 0.7663, + "step": 36172 + }, + { + "epoch": 99.37637362637362, + "grad_norm": 19.83953857421875, + "learning_rate": 3.1181318681318683e-07, + "loss": 0.5015, + "step": 36173 + }, + { + "epoch": 99.37912087912088, + "grad_norm": 9.947025299072266, + "learning_rate": 3.1043956043956045e-07, + "loss": 0.166, + "step": 36174 + }, + { + "epoch": 99.38186813186813, + "grad_norm": 13.334787368774414, + "learning_rate": 3.0906593406593407e-07, + "loss": 0.1751, + "step": 36175 + }, + { + "epoch": 99.38461538461539, + "grad_norm": 9.342377662658691, + "learning_rate": 3.076923076923077e-07, + "loss": 0.1013, + "step": 36176 + }, + { + "epoch": 99.38736263736264, + "grad_norm": 6.8664870262146, + "learning_rate": 3.0631868131868136e-07, + "loss": 0.0506, + "step": 36177 + }, + { + "epoch": 99.39010989010988, + "grad_norm": 7.977234363555908, + "learning_rate": 3.0494505494505493e-07, + "loss": 0.1006, + "step": 36178 + }, + { + "epoch": 99.39285714285714, + "grad_norm": 10.445944786071777, + "learning_rate": 3.035714285714286e-07, + "loss": 0.0864, + "step": 36179 + }, + { + "epoch": 99.3956043956044, + "grad_norm": 14.629258155822754, + "learning_rate": 3.021978021978022e-07, + "loss": 0.2187, + "step": 36180 + }, + { + "epoch": 99.39835164835165, + "grad_norm": 7.837794303894043, + "learning_rate": 3.0082417582417584e-07, + "loss": 0.2174, + "step": 36181 + }, + { + "epoch": 99.4010989010989, + "grad_norm": 5.077445030212402, + "learning_rate": 2.9945054945054946e-07, + "loss": 0.0615, + "step": 36182 + }, + { + "epoch": 99.40384615384616, + "grad_norm": 6.982247829437256, + "learning_rate": 2.980769230769231e-07, + "loss": 0.1261, + "step": 36183 + }, + { + "epoch": 99.4065934065934, + "grad_norm": 4.431132793426514, + "learning_rate": 2.9670329670329675e-07, + "loss": 0.0706, + "step": 36184 + }, + { + "epoch": 99.40934065934066, + "grad_norm": 30.619422912597656, + "learning_rate": 2.953296703296703e-07, + "loss": 0.8338, + "step": 36185 + }, + { + "epoch": 99.41208791208791, + "grad_norm": 13.165436744689941, + "learning_rate": 2.93956043956044e-07, + "loss": 0.1713, + "step": 36186 + }, + { + "epoch": 99.41483516483517, + "grad_norm": 7.772636413574219, + "learning_rate": 2.925824175824176e-07, + "loss": 0.1484, + "step": 36187 + }, + { + "epoch": 99.41758241758242, + "grad_norm": 4.291107177734375, + "learning_rate": 2.912087912087912e-07, + "loss": 0.0318, + "step": 36188 + }, + { + "epoch": 99.42032967032966, + "grad_norm": 12.339771270751953, + "learning_rate": 2.8983516483516484e-07, + "loss": 0.0814, + "step": 36189 + }, + { + "epoch": 99.42307692307692, + "grad_norm": 11.274310111999512, + "learning_rate": 2.884615384615385e-07, + "loss": 0.1976, + "step": 36190 + }, + { + "epoch": 99.42582417582418, + "grad_norm": 10.358587265014648, + "learning_rate": 2.870879120879121e-07, + "loss": 0.159, + "step": 36191 + }, + { + "epoch": 99.42857142857143, + "grad_norm": 16.562358856201172, + "learning_rate": 2.8571428571428575e-07, + "loss": 0.2624, + "step": 36192 + }, + { + "epoch": 99.43131868131869, + "grad_norm": 6.066154479980469, + "learning_rate": 2.8434065934065937e-07, + "loss": 0.0383, + "step": 36193 + }, + { + "epoch": 99.43406593406593, + "grad_norm": 13.17604923248291, + "learning_rate": 2.82967032967033e-07, + "loss": 0.3213, + "step": 36194 + }, + { + "epoch": 99.43681318681318, + "grad_norm": 10.248294830322266, + "learning_rate": 2.815934065934066e-07, + "loss": 0.2119, + "step": 36195 + }, + { + "epoch": 99.43956043956044, + "grad_norm": 1.914001703262329, + "learning_rate": 2.802197802197802e-07, + "loss": 0.0262, + "step": 36196 + }, + { + "epoch": 99.4423076923077, + "grad_norm": 6.139658451080322, + "learning_rate": 2.7884615384615384e-07, + "loss": 0.1332, + "step": 36197 + }, + { + "epoch": 99.44505494505495, + "grad_norm": 4.997091770172119, + "learning_rate": 2.7747252747252746e-07, + "loss": 0.0902, + "step": 36198 + }, + { + "epoch": 99.4478021978022, + "grad_norm": 2.7660012245178223, + "learning_rate": 2.7609890109890113e-07, + "loss": 0.0441, + "step": 36199 + }, + { + "epoch": 99.45054945054945, + "grad_norm": 14.006245613098145, + "learning_rate": 2.7472527472527475e-07, + "loss": 0.1592, + "step": 36200 + }, + { + "epoch": 99.4532967032967, + "grad_norm": 3.5168375968933105, + "learning_rate": 2.7335164835164837e-07, + "loss": 0.0524, + "step": 36201 + }, + { + "epoch": 99.45604395604396, + "grad_norm": 19.58148956298828, + "learning_rate": 2.71978021978022e-07, + "loss": 0.3754, + "step": 36202 + }, + { + "epoch": 99.45879120879121, + "grad_norm": 12.329195976257324, + "learning_rate": 2.7060439560439566e-07, + "loss": 0.1875, + "step": 36203 + }, + { + "epoch": 99.46153846153847, + "grad_norm": 6.429544448852539, + "learning_rate": 2.692307692307692e-07, + "loss": 0.0768, + "step": 36204 + }, + { + "epoch": 99.46428571428571, + "grad_norm": 22.549623489379883, + "learning_rate": 2.678571428571429e-07, + "loss": 0.5806, + "step": 36205 + }, + { + "epoch": 99.46703296703296, + "grad_norm": 9.947160720825195, + "learning_rate": 2.664835164835165e-07, + "loss": 0.1068, + "step": 36206 + }, + { + "epoch": 99.46978021978022, + "grad_norm": 17.29942512512207, + "learning_rate": 2.6510989010989013e-07, + "loss": 0.3962, + "step": 36207 + }, + { + "epoch": 99.47252747252747, + "grad_norm": 6.496702671051025, + "learning_rate": 2.6373626373626375e-07, + "loss": 0.1272, + "step": 36208 + }, + { + "epoch": 99.47527472527473, + "grad_norm": 8.85326099395752, + "learning_rate": 2.6236263736263737e-07, + "loss": 0.087, + "step": 36209 + }, + { + "epoch": 99.47802197802197, + "grad_norm": 5.412344455718994, + "learning_rate": 2.60989010989011e-07, + "loss": 0.1383, + "step": 36210 + }, + { + "epoch": 99.48076923076923, + "grad_norm": 16.27985382080078, + "learning_rate": 2.596153846153846e-07, + "loss": 0.2527, + "step": 36211 + }, + { + "epoch": 99.48351648351648, + "grad_norm": 5.6863884925842285, + "learning_rate": 2.582417582417583e-07, + "loss": 0.0534, + "step": 36212 + }, + { + "epoch": 99.48626373626374, + "grad_norm": 17.276596069335938, + "learning_rate": 2.5686813186813184e-07, + "loss": 0.605, + "step": 36213 + }, + { + "epoch": 99.48901098901099, + "grad_norm": 15.093246459960938, + "learning_rate": 2.554945054945055e-07, + "loss": 0.2898, + "step": 36214 + }, + { + "epoch": 99.49175824175825, + "grad_norm": 19.836952209472656, + "learning_rate": 2.5412087912087913e-07, + "loss": 0.2704, + "step": 36215 + }, + { + "epoch": 99.49450549450549, + "grad_norm": 12.636688232421875, + "learning_rate": 2.527472527472528e-07, + "loss": 0.1461, + "step": 36216 + }, + { + "epoch": 99.49725274725274, + "grad_norm": 2.3863439559936523, + "learning_rate": 2.5137362637362637e-07, + "loss": 0.0238, + "step": 36217 + }, + { + "epoch": 99.5, + "grad_norm": 0.7226114869117737, + "learning_rate": 2.5000000000000004e-07, + "loss": 0.0089, + "step": 36218 + }, + { + "epoch": 99.50274725274726, + "grad_norm": 4.233931064605713, + "learning_rate": 2.4862637362637366e-07, + "loss": 0.0666, + "step": 36219 + }, + { + "epoch": 99.50549450549451, + "grad_norm": 10.481966018676758, + "learning_rate": 2.472527472527473e-07, + "loss": 0.2336, + "step": 36220 + }, + { + "epoch": 99.50824175824175, + "grad_norm": 6.768803119659424, + "learning_rate": 2.458791208791209e-07, + "loss": 0.1312, + "step": 36221 + }, + { + "epoch": 99.51098901098901, + "grad_norm": 6.2713141441345215, + "learning_rate": 2.445054945054945e-07, + "loss": 0.0667, + "step": 36222 + }, + { + "epoch": 99.51373626373626, + "grad_norm": 2.036465644836426, + "learning_rate": 2.4313186813186813e-07, + "loss": 0.0324, + "step": 36223 + }, + { + "epoch": 99.51648351648352, + "grad_norm": 4.052172660827637, + "learning_rate": 2.4175824175824175e-07, + "loss": 0.0407, + "step": 36224 + }, + { + "epoch": 99.51923076923077, + "grad_norm": 8.327381134033203, + "learning_rate": 2.403846153846154e-07, + "loss": 0.2036, + "step": 36225 + }, + { + "epoch": 99.52197802197803, + "grad_norm": 9.440239906311035, + "learning_rate": 2.39010989010989e-07, + "loss": 0.1274, + "step": 36226 + }, + { + "epoch": 99.52472527472527, + "grad_norm": 19.512598037719727, + "learning_rate": 2.3763736263736264e-07, + "loss": 0.322, + "step": 36227 + }, + { + "epoch": 99.52747252747253, + "grad_norm": 3.047072172164917, + "learning_rate": 2.3626373626373628e-07, + "loss": 0.0308, + "step": 36228 + }, + { + "epoch": 99.53021978021978, + "grad_norm": 14.862642288208008, + "learning_rate": 2.3489010989010987e-07, + "loss": 0.2869, + "step": 36229 + }, + { + "epoch": 99.53296703296704, + "grad_norm": 7.01422643661499, + "learning_rate": 2.3351648351648352e-07, + "loss": 0.0964, + "step": 36230 + }, + { + "epoch": 99.53571428571429, + "grad_norm": 3.246673583984375, + "learning_rate": 2.3214285714285716e-07, + "loss": 0.027, + "step": 36231 + }, + { + "epoch": 99.53846153846153, + "grad_norm": 19.049461364746094, + "learning_rate": 2.307692307692308e-07, + "loss": 0.4926, + "step": 36232 + }, + { + "epoch": 99.54120879120879, + "grad_norm": 17.574085235595703, + "learning_rate": 2.293956043956044e-07, + "loss": 0.3599, + "step": 36233 + }, + { + "epoch": 99.54395604395604, + "grad_norm": 22.162195205688477, + "learning_rate": 2.2802197802197804e-07, + "loss": 0.4915, + "step": 36234 + }, + { + "epoch": 99.5467032967033, + "grad_norm": 14.832160949707031, + "learning_rate": 2.266483516483517e-07, + "loss": 0.2994, + "step": 36235 + }, + { + "epoch": 99.54945054945055, + "grad_norm": 7.609187126159668, + "learning_rate": 2.2527472527472528e-07, + "loss": 0.0895, + "step": 36236 + }, + { + "epoch": 99.5521978021978, + "grad_norm": 13.683975219726562, + "learning_rate": 2.2390109890109893e-07, + "loss": 0.2478, + "step": 36237 + }, + { + "epoch": 99.55494505494505, + "grad_norm": 8.573114395141602, + "learning_rate": 2.2252747252747255e-07, + "loss": 0.2425, + "step": 36238 + }, + { + "epoch": 99.5576923076923, + "grad_norm": 11.19111442565918, + "learning_rate": 2.2115384615384614e-07, + "loss": 0.2256, + "step": 36239 + }, + { + "epoch": 99.56043956043956, + "grad_norm": 6.6120758056640625, + "learning_rate": 2.1978021978021978e-07, + "loss": 0.1132, + "step": 36240 + }, + { + "epoch": 99.56318681318682, + "grad_norm": 22.339113235473633, + "learning_rate": 2.1840659340659343e-07, + "loss": 0.3933, + "step": 36241 + }, + { + "epoch": 99.56593406593407, + "grad_norm": 15.060606956481934, + "learning_rate": 2.1703296703296702e-07, + "loss": 0.2856, + "step": 36242 + }, + { + "epoch": 99.56868131868131, + "grad_norm": 9.586678504943848, + "learning_rate": 2.1565934065934066e-07, + "loss": 0.1241, + "step": 36243 + }, + { + "epoch": 99.57142857142857, + "grad_norm": 5.015296936035156, + "learning_rate": 2.142857142857143e-07, + "loss": 0.0451, + "step": 36244 + }, + { + "epoch": 99.57417582417582, + "grad_norm": 21.219533920288086, + "learning_rate": 2.129120879120879e-07, + "loss": 0.6461, + "step": 36245 + }, + { + "epoch": 99.57692307692308, + "grad_norm": 16.873764038085938, + "learning_rate": 2.1153846153846155e-07, + "loss": 0.1163, + "step": 36246 + }, + { + "epoch": 99.57967032967034, + "grad_norm": 5.787108898162842, + "learning_rate": 2.101648351648352e-07, + "loss": 0.0735, + "step": 36247 + }, + { + "epoch": 99.58241758241758, + "grad_norm": 14.903824806213379, + "learning_rate": 2.087912087912088e-07, + "loss": 0.378, + "step": 36248 + }, + { + "epoch": 99.58516483516483, + "grad_norm": 27.399566650390625, + "learning_rate": 2.0741758241758243e-07, + "loss": 0.7635, + "step": 36249 + }, + { + "epoch": 99.58791208791209, + "grad_norm": 11.250288963317871, + "learning_rate": 2.0604395604395605e-07, + "loss": 0.0817, + "step": 36250 + }, + { + "epoch": 99.59065934065934, + "grad_norm": 4.364383220672607, + "learning_rate": 2.046703296703297e-07, + "loss": 0.05, + "step": 36251 + }, + { + "epoch": 99.5934065934066, + "grad_norm": 1.055267572402954, + "learning_rate": 2.0329670329670329e-07, + "loss": 0.009, + "step": 36252 + }, + { + "epoch": 99.59615384615384, + "grad_norm": 5.891616344451904, + "learning_rate": 2.0192307692307693e-07, + "loss": 0.0654, + "step": 36253 + }, + { + "epoch": 99.5989010989011, + "grad_norm": 13.61130142211914, + "learning_rate": 2.0054945054945058e-07, + "loss": 0.2684, + "step": 36254 + }, + { + "epoch": 99.60164835164835, + "grad_norm": 12.430188179016113, + "learning_rate": 1.9917582417582417e-07, + "loss": 0.4283, + "step": 36255 + }, + { + "epoch": 99.6043956043956, + "grad_norm": 13.776327133178711, + "learning_rate": 1.978021978021978e-07, + "loss": 0.129, + "step": 36256 + }, + { + "epoch": 99.60714285714286, + "grad_norm": 2.5989620685577393, + "learning_rate": 1.9642857142857146e-07, + "loss": 0.0292, + "step": 36257 + }, + { + "epoch": 99.60989010989012, + "grad_norm": 14.931208610534668, + "learning_rate": 1.9505494505494508e-07, + "loss": 0.1202, + "step": 36258 + }, + { + "epoch": 99.61263736263736, + "grad_norm": 9.567425727844238, + "learning_rate": 1.936813186813187e-07, + "loss": 0.2266, + "step": 36259 + }, + { + "epoch": 99.61538461538461, + "grad_norm": 14.966166496276855, + "learning_rate": 1.9230769230769234e-07, + "loss": 0.1641, + "step": 36260 + }, + { + "epoch": 99.61813186813187, + "grad_norm": 11.943471908569336, + "learning_rate": 1.9093406593406596e-07, + "loss": 0.2783, + "step": 36261 + }, + { + "epoch": 99.62087912087912, + "grad_norm": 11.704645156860352, + "learning_rate": 1.8956043956043958e-07, + "loss": 0.2405, + "step": 36262 + }, + { + "epoch": 99.62362637362638, + "grad_norm": 2.9647862911224365, + "learning_rate": 1.881868131868132e-07, + "loss": 0.0179, + "step": 36263 + }, + { + "epoch": 99.62637362637362, + "grad_norm": 12.441377639770508, + "learning_rate": 1.8681318681318681e-07, + "loss": 0.244, + "step": 36264 + }, + { + "epoch": 99.62912087912088, + "grad_norm": 6.790548324584961, + "learning_rate": 1.8543956043956043e-07, + "loss": 0.089, + "step": 36265 + }, + { + "epoch": 99.63186813186813, + "grad_norm": 20.366863250732422, + "learning_rate": 1.8406593406593408e-07, + "loss": 0.4176, + "step": 36266 + }, + { + "epoch": 99.63461538461539, + "grad_norm": 12.500571250915527, + "learning_rate": 1.826923076923077e-07, + "loss": 0.3084, + "step": 36267 + }, + { + "epoch": 99.63736263736264, + "grad_norm": 8.85504150390625, + "learning_rate": 1.8131868131868134e-07, + "loss": 0.1007, + "step": 36268 + }, + { + "epoch": 99.64010989010988, + "grad_norm": 18.71006965637207, + "learning_rate": 1.7994505494505496e-07, + "loss": 0.3353, + "step": 36269 + }, + { + "epoch": 99.64285714285714, + "grad_norm": 5.8266096115112305, + "learning_rate": 1.7857142857142858e-07, + "loss": 0.0866, + "step": 36270 + }, + { + "epoch": 99.6456043956044, + "grad_norm": 8.962824821472168, + "learning_rate": 1.7719780219780222e-07, + "loss": 0.1162, + "step": 36271 + }, + { + "epoch": 99.64835164835165, + "grad_norm": 13.273179054260254, + "learning_rate": 1.7582417582417584e-07, + "loss": 0.1302, + "step": 36272 + }, + { + "epoch": 99.6510989010989, + "grad_norm": 13.305224418640137, + "learning_rate": 1.7445054945054946e-07, + "loss": 0.2985, + "step": 36273 + }, + { + "epoch": 99.65384615384616, + "grad_norm": 14.72514533996582, + "learning_rate": 1.7307692307692308e-07, + "loss": 0.2453, + "step": 36274 + }, + { + "epoch": 99.6565934065934, + "grad_norm": 11.424314498901367, + "learning_rate": 1.717032967032967e-07, + "loss": 0.2155, + "step": 36275 + }, + { + "epoch": 99.65934065934066, + "grad_norm": 8.734023094177246, + "learning_rate": 1.7032967032967034e-07, + "loss": 0.1462, + "step": 36276 + }, + { + "epoch": 99.66208791208791, + "grad_norm": 8.59506607055664, + "learning_rate": 1.6895604395604396e-07, + "loss": 0.1071, + "step": 36277 + }, + { + "epoch": 99.66483516483517, + "grad_norm": 5.831726551055908, + "learning_rate": 1.6758241758241758e-07, + "loss": 0.0607, + "step": 36278 + }, + { + "epoch": 99.66758241758242, + "grad_norm": 12.107756614685059, + "learning_rate": 1.6620879120879123e-07, + "loss": 0.1369, + "step": 36279 + }, + { + "epoch": 99.67032967032966, + "grad_norm": 18.530738830566406, + "learning_rate": 1.6483516483516484e-07, + "loss": 0.7033, + "step": 36280 + }, + { + "epoch": 99.67307692307692, + "grad_norm": 6.6590352058410645, + "learning_rate": 1.6346153846153846e-07, + "loss": 0.1159, + "step": 36281 + }, + { + "epoch": 99.67582417582418, + "grad_norm": 9.633648872375488, + "learning_rate": 1.620879120879121e-07, + "loss": 0.1839, + "step": 36282 + }, + { + "epoch": 99.67857142857143, + "grad_norm": 5.537992477416992, + "learning_rate": 1.6071428571428573e-07, + "loss": 0.0929, + "step": 36283 + }, + { + "epoch": 99.68131868131869, + "grad_norm": 1.6344705820083618, + "learning_rate": 1.5934065934065937e-07, + "loss": 0.0099, + "step": 36284 + }, + { + "epoch": 99.68406593406593, + "grad_norm": 7.579227924346924, + "learning_rate": 1.57967032967033e-07, + "loss": 0.0921, + "step": 36285 + }, + { + "epoch": 99.68681318681318, + "grad_norm": 6.685769557952881, + "learning_rate": 1.565934065934066e-07, + "loss": 0.09, + "step": 36286 + }, + { + "epoch": 99.68956043956044, + "grad_norm": 11.534246444702148, + "learning_rate": 1.5521978021978023e-07, + "loss": 0.1013, + "step": 36287 + }, + { + "epoch": 99.6923076923077, + "grad_norm": 12.178620338439941, + "learning_rate": 1.5384615384615385e-07, + "loss": 0.1387, + "step": 36288 + }, + { + "epoch": 99.69505494505495, + "grad_norm": 9.222033500671387, + "learning_rate": 1.5247252747252746e-07, + "loss": 0.1244, + "step": 36289 + }, + { + "epoch": 99.6978021978022, + "grad_norm": 5.416730880737305, + "learning_rate": 1.510989010989011e-07, + "loss": 0.0512, + "step": 36290 + }, + { + "epoch": 99.70054945054945, + "grad_norm": 1.8792146444320679, + "learning_rate": 1.4972527472527473e-07, + "loss": 0.0314, + "step": 36291 + }, + { + "epoch": 99.7032967032967, + "grad_norm": 8.342155456542969, + "learning_rate": 1.4835164835164837e-07, + "loss": 0.1761, + "step": 36292 + }, + { + "epoch": 99.70604395604396, + "grad_norm": 23.480876922607422, + "learning_rate": 1.46978021978022e-07, + "loss": 0.428, + "step": 36293 + }, + { + "epoch": 99.70879120879121, + "grad_norm": 4.170354843139648, + "learning_rate": 1.456043956043956e-07, + "loss": 0.037, + "step": 36294 + }, + { + "epoch": 99.71153846153847, + "grad_norm": 20.34920310974121, + "learning_rate": 1.4423076923076925e-07, + "loss": 0.6322, + "step": 36295 + }, + { + "epoch": 99.71428571428571, + "grad_norm": 11.9450101852417, + "learning_rate": 1.4285714285714287e-07, + "loss": 0.2194, + "step": 36296 + }, + { + "epoch": 99.71703296703296, + "grad_norm": 11.689681053161621, + "learning_rate": 1.414835164835165e-07, + "loss": 0.1554, + "step": 36297 + }, + { + "epoch": 99.71978021978022, + "grad_norm": 11.283891677856445, + "learning_rate": 1.401098901098901e-07, + "loss": 0.218, + "step": 36298 + }, + { + "epoch": 99.72252747252747, + "grad_norm": 13.813637733459473, + "learning_rate": 1.3873626373626373e-07, + "loss": 0.1576, + "step": 36299 + }, + { + "epoch": 99.72527472527473, + "grad_norm": 14.870976448059082, + "learning_rate": 1.3736263736263737e-07, + "loss": 0.1541, + "step": 36300 + }, + { + "epoch": 99.72802197802197, + "grad_norm": 16.87459373474121, + "learning_rate": 1.35989010989011e-07, + "loss": 0.4012, + "step": 36301 + }, + { + "epoch": 99.73076923076923, + "grad_norm": 13.856466293334961, + "learning_rate": 1.346153846153846e-07, + "loss": 0.3961, + "step": 36302 + }, + { + "epoch": 99.73351648351648, + "grad_norm": 9.793805122375488, + "learning_rate": 1.3324175824175826e-07, + "loss": 0.1643, + "step": 36303 + }, + { + "epoch": 99.73626373626374, + "grad_norm": 4.0253987312316895, + "learning_rate": 1.3186813186813187e-07, + "loss": 0.0394, + "step": 36304 + }, + { + "epoch": 99.73901098901099, + "grad_norm": 12.889581680297852, + "learning_rate": 1.304945054945055e-07, + "loss": 0.3118, + "step": 36305 + }, + { + "epoch": 99.74175824175825, + "grad_norm": 6.906116485595703, + "learning_rate": 1.2912087912087914e-07, + "loss": 0.0562, + "step": 36306 + }, + { + "epoch": 99.74450549450549, + "grad_norm": 12.46888542175293, + "learning_rate": 1.2774725274725276e-07, + "loss": 0.3228, + "step": 36307 + }, + { + "epoch": 99.74725274725274, + "grad_norm": 17.569448471069336, + "learning_rate": 1.263736263736264e-07, + "loss": 0.8181, + "step": 36308 + }, + { + "epoch": 99.75, + "grad_norm": 7.356938362121582, + "learning_rate": 1.2500000000000002e-07, + "loss": 0.148, + "step": 36309 + }, + { + "epoch": 99.75274725274726, + "grad_norm": 8.895243644714355, + "learning_rate": 1.2362637362637364e-07, + "loss": 0.1823, + "step": 36310 + }, + { + "epoch": 99.75549450549451, + "grad_norm": 4.582645416259766, + "learning_rate": 1.2225274725274726e-07, + "loss": 0.0921, + "step": 36311 + }, + { + "epoch": 99.75824175824175, + "grad_norm": 20.03992462158203, + "learning_rate": 1.2087912087912088e-07, + "loss": 0.4493, + "step": 36312 + }, + { + "epoch": 99.76098901098901, + "grad_norm": 20.599048614501953, + "learning_rate": 1.195054945054945e-07, + "loss": 0.3357, + "step": 36313 + }, + { + "epoch": 99.76373626373626, + "grad_norm": 13.671758651733398, + "learning_rate": 1.1813186813186814e-07, + "loss": 0.2029, + "step": 36314 + }, + { + "epoch": 99.76648351648352, + "grad_norm": 3.293713331222534, + "learning_rate": 1.1675824175824176e-07, + "loss": 0.0405, + "step": 36315 + }, + { + "epoch": 99.76923076923077, + "grad_norm": 19.408681869506836, + "learning_rate": 1.153846153846154e-07, + "loss": 0.6538, + "step": 36316 + }, + { + "epoch": 99.77197802197803, + "grad_norm": 1.5237406492233276, + "learning_rate": 1.1401098901098902e-07, + "loss": 0.0251, + "step": 36317 + }, + { + "epoch": 99.77472527472527, + "grad_norm": 5.472422122955322, + "learning_rate": 1.1263736263736264e-07, + "loss": 0.0632, + "step": 36318 + }, + { + "epoch": 99.77747252747253, + "grad_norm": 8.851137161254883, + "learning_rate": 1.1126373626373627e-07, + "loss": 0.1977, + "step": 36319 + }, + { + "epoch": 99.78021978021978, + "grad_norm": 17.673397064208984, + "learning_rate": 1.0989010989010989e-07, + "loss": 0.2939, + "step": 36320 + }, + { + "epoch": 99.78296703296704, + "grad_norm": 8.430741310119629, + "learning_rate": 1.0851648351648351e-07, + "loss": 0.1776, + "step": 36321 + }, + { + "epoch": 99.78571428571429, + "grad_norm": 3.714064836502075, + "learning_rate": 1.0714285714285716e-07, + "loss": 0.0384, + "step": 36322 + }, + { + "epoch": 99.78846153846153, + "grad_norm": 6.740402698516846, + "learning_rate": 1.0576923076923077e-07, + "loss": 0.0888, + "step": 36323 + }, + { + "epoch": 99.79120879120879, + "grad_norm": 18.340824127197266, + "learning_rate": 1.043956043956044e-07, + "loss": 0.3517, + "step": 36324 + }, + { + "epoch": 99.79395604395604, + "grad_norm": 2.5629079341888428, + "learning_rate": 1.0302197802197802e-07, + "loss": 0.0324, + "step": 36325 + }, + { + "epoch": 99.7967032967033, + "grad_norm": 10.840171813964844, + "learning_rate": 1.0164835164835164e-07, + "loss": 0.1571, + "step": 36326 + }, + { + "epoch": 99.79945054945055, + "grad_norm": 4.191339492797852, + "learning_rate": 1.0027472527472529e-07, + "loss": 0.0632, + "step": 36327 + }, + { + "epoch": 99.8021978021978, + "grad_norm": 6.823727607727051, + "learning_rate": 9.89010989010989e-08, + "loss": 0.1431, + "step": 36328 + }, + { + "epoch": 99.80494505494505, + "grad_norm": 10.277973175048828, + "learning_rate": 9.752747252747254e-08, + "loss": 0.1724, + "step": 36329 + }, + { + "epoch": 99.8076923076923, + "grad_norm": 4.704870223999023, + "learning_rate": 9.615384615384617e-08, + "loss": 0.0506, + "step": 36330 + }, + { + "epoch": 99.81043956043956, + "grad_norm": 8.813828468322754, + "learning_rate": 9.478021978021979e-08, + "loss": 0.127, + "step": 36331 + }, + { + "epoch": 99.81318681318682, + "grad_norm": 8.137977600097656, + "learning_rate": 9.340659340659341e-08, + "loss": 0.165, + "step": 36332 + }, + { + "epoch": 99.81593406593407, + "grad_norm": 25.988733291625977, + "learning_rate": 9.203296703296704e-08, + "loss": 0.4267, + "step": 36333 + }, + { + "epoch": 99.81868131868131, + "grad_norm": 21.908618927001953, + "learning_rate": 9.065934065934067e-08, + "loss": 0.2942, + "step": 36334 + }, + { + "epoch": 99.82142857142857, + "grad_norm": 18.55815887451172, + "learning_rate": 8.928571428571429e-08, + "loss": 0.2477, + "step": 36335 + }, + { + "epoch": 99.82417582417582, + "grad_norm": 16.075307846069336, + "learning_rate": 8.791208791208792e-08, + "loss": 0.3244, + "step": 36336 + }, + { + "epoch": 99.82692307692308, + "grad_norm": 8.321587562561035, + "learning_rate": 8.653846153846154e-08, + "loss": 0.1043, + "step": 36337 + }, + { + "epoch": 99.82967032967034, + "grad_norm": 12.661191940307617, + "learning_rate": 8.516483516483517e-08, + "loss": 0.3074, + "step": 36338 + }, + { + "epoch": 99.83241758241758, + "grad_norm": 15.2633695602417, + "learning_rate": 8.379120879120879e-08, + "loss": 0.4744, + "step": 36339 + }, + { + "epoch": 99.83516483516483, + "grad_norm": 3.9601056575775146, + "learning_rate": 8.241758241758242e-08, + "loss": 0.0559, + "step": 36340 + }, + { + "epoch": 99.83791208791209, + "grad_norm": 11.313227653503418, + "learning_rate": 8.104395604395605e-08, + "loss": 0.1108, + "step": 36341 + }, + { + "epoch": 99.84065934065934, + "grad_norm": 13.055678367614746, + "learning_rate": 7.967032967032969e-08, + "loss": 0.1697, + "step": 36342 + }, + { + "epoch": 99.8434065934066, + "grad_norm": 3.1139674186706543, + "learning_rate": 7.82967032967033e-08, + "loss": 0.0368, + "step": 36343 + }, + { + "epoch": 99.84615384615384, + "grad_norm": 7.360661029815674, + "learning_rate": 7.692307692307692e-08, + "loss": 0.1437, + "step": 36344 + }, + { + "epoch": 99.8489010989011, + "grad_norm": 7.712238788604736, + "learning_rate": 7.554945054945055e-08, + "loss": 0.1479, + "step": 36345 + }, + { + "epoch": 99.85164835164835, + "grad_norm": 9.269844055175781, + "learning_rate": 7.417582417582419e-08, + "loss": 0.1506, + "step": 36346 + }, + { + "epoch": 99.8543956043956, + "grad_norm": 8.931977272033691, + "learning_rate": 7.28021978021978e-08, + "loss": 0.1752, + "step": 36347 + }, + { + "epoch": 99.85714285714286, + "grad_norm": 2.7893669605255127, + "learning_rate": 7.142857142857144e-08, + "loss": 0.0303, + "step": 36348 + }, + { + "epoch": 99.85989010989012, + "grad_norm": 4.656267166137695, + "learning_rate": 7.005494505494506e-08, + "loss": 0.0574, + "step": 36349 + }, + { + "epoch": 99.86263736263736, + "grad_norm": 10.079919815063477, + "learning_rate": 6.868131868131869e-08, + "loss": 0.127, + "step": 36350 + }, + { + "epoch": 99.86538461538461, + "grad_norm": 2.2321691513061523, + "learning_rate": 6.73076923076923e-08, + "loss": 0.0264, + "step": 36351 + }, + { + "epoch": 99.86813186813187, + "grad_norm": 13.261127471923828, + "learning_rate": 6.593406593406594e-08, + "loss": 0.3208, + "step": 36352 + }, + { + "epoch": 99.87087912087912, + "grad_norm": 9.134220123291016, + "learning_rate": 6.456043956043957e-08, + "loss": 0.1346, + "step": 36353 + }, + { + "epoch": 99.87362637362638, + "grad_norm": 3.4801013469696045, + "learning_rate": 6.31868131868132e-08, + "loss": 0.0384, + "step": 36354 + }, + { + "epoch": 99.87637362637362, + "grad_norm": 3.8429317474365234, + "learning_rate": 6.181318681318682e-08, + "loss": 0.0284, + "step": 36355 + }, + { + "epoch": 99.87912087912088, + "grad_norm": 4.59492826461792, + "learning_rate": 6.043956043956044e-08, + "loss": 0.0579, + "step": 36356 + }, + { + "epoch": 99.88186813186813, + "grad_norm": 16.27478790283203, + "learning_rate": 5.906593406593407e-08, + "loss": 0.2921, + "step": 36357 + }, + { + "epoch": 99.88461538461539, + "grad_norm": 20.177671432495117, + "learning_rate": 5.76923076923077e-08, + "loss": 0.1599, + "step": 36358 + }, + { + "epoch": 99.88736263736264, + "grad_norm": 4.8889384269714355, + "learning_rate": 5.631868131868132e-08, + "loss": 0.0995, + "step": 36359 + }, + { + "epoch": 99.89010989010988, + "grad_norm": 7.259263038635254, + "learning_rate": 5.4945054945054946e-08, + "loss": 0.0978, + "step": 36360 + }, + { + "epoch": 99.89285714285714, + "grad_norm": 21.556690216064453, + "learning_rate": 5.357142857142858e-08, + "loss": 0.4572, + "step": 36361 + }, + { + "epoch": 99.8956043956044, + "grad_norm": 10.09170150756836, + "learning_rate": 5.21978021978022e-08, + "loss": 0.1934, + "step": 36362 + }, + { + "epoch": 99.89835164835165, + "grad_norm": 20.954795837402344, + "learning_rate": 5.082417582417582e-08, + "loss": 0.5562, + "step": 36363 + }, + { + "epoch": 99.9010989010989, + "grad_norm": 10.689292907714844, + "learning_rate": 4.945054945054945e-08, + "loss": 0.24, + "step": 36364 + }, + { + "epoch": 99.90384615384616, + "grad_norm": 18.83238410949707, + "learning_rate": 4.8076923076923085e-08, + "loss": 0.2975, + "step": 36365 + }, + { + "epoch": 99.9065934065934, + "grad_norm": 18.687776565551758, + "learning_rate": 4.6703296703296704e-08, + "loss": 0.2684, + "step": 36366 + }, + { + "epoch": 99.90934065934066, + "grad_norm": 15.118876457214355, + "learning_rate": 4.5329670329670335e-08, + "loss": 0.1414, + "step": 36367 + }, + { + "epoch": 99.91208791208791, + "grad_norm": 14.95732593536377, + "learning_rate": 4.395604395604396e-08, + "loss": 0.2155, + "step": 36368 + }, + { + "epoch": 99.91483516483517, + "grad_norm": 9.102581977844238, + "learning_rate": 4.2582417582417586e-08, + "loss": 0.0895, + "step": 36369 + }, + { + "epoch": 99.91758241758242, + "grad_norm": 9.535921096801758, + "learning_rate": 4.120879120879121e-08, + "loss": 0.085, + "step": 36370 + }, + { + "epoch": 99.92032967032966, + "grad_norm": 17.857913970947266, + "learning_rate": 3.983516483516484e-08, + "loss": 0.4535, + "step": 36371 + }, + { + "epoch": 99.92307692307692, + "grad_norm": 13.144466400146484, + "learning_rate": 3.846153846153846e-08, + "loss": 0.4192, + "step": 36372 + }, + { + "epoch": 99.92582417582418, + "grad_norm": 17.862838745117188, + "learning_rate": 3.708791208791209e-08, + "loss": 0.359, + "step": 36373 + }, + { + "epoch": 99.92857142857143, + "grad_norm": 14.928848266601562, + "learning_rate": 3.571428571428572e-08, + "loss": 0.1817, + "step": 36374 + }, + { + "epoch": 99.93131868131869, + "grad_norm": 3.0795204639434814, + "learning_rate": 3.4340659340659344e-08, + "loss": 0.0442, + "step": 36375 + }, + { + "epoch": 99.93406593406593, + "grad_norm": 8.040660858154297, + "learning_rate": 3.296703296703297e-08, + "loss": 0.1245, + "step": 36376 + }, + { + "epoch": 99.93681318681318, + "grad_norm": 11.266860961914062, + "learning_rate": 3.15934065934066e-08, + "loss": 0.0982, + "step": 36377 + }, + { + "epoch": 99.93956043956044, + "grad_norm": 2.4864304065704346, + "learning_rate": 3.021978021978022e-08, + "loss": 0.0295, + "step": 36378 + }, + { + "epoch": 99.9423076923077, + "grad_norm": 13.087908744812012, + "learning_rate": 2.884615384615385e-08, + "loss": 0.2602, + "step": 36379 + }, + { + "epoch": 99.94505494505495, + "grad_norm": 15.717276573181152, + "learning_rate": 2.7472527472527473e-08, + "loss": 0.3146, + "step": 36380 + }, + { + "epoch": 99.9478021978022, + "grad_norm": 7.4243574142456055, + "learning_rate": 2.60989010989011e-08, + "loss": 0.1137, + "step": 36381 + }, + { + "epoch": 99.95054945054945, + "grad_norm": 13.129955291748047, + "learning_rate": 2.4725274725274727e-08, + "loss": 0.2497, + "step": 36382 + }, + { + "epoch": 99.9532967032967, + "grad_norm": 22.77500343322754, + "learning_rate": 2.3351648351648352e-08, + "loss": 0.4103, + "step": 36383 + }, + { + "epoch": 99.95604395604396, + "grad_norm": 13.564032554626465, + "learning_rate": 2.197802197802198e-08, + "loss": 0.1823, + "step": 36384 + }, + { + "epoch": 99.95879120879121, + "grad_norm": 5.473267078399658, + "learning_rate": 2.0604395604395605e-08, + "loss": 0.0616, + "step": 36385 + }, + { + "epoch": 99.96153846153847, + "grad_norm": 11.076255798339844, + "learning_rate": 1.923076923076923e-08, + "loss": 0.1636, + "step": 36386 + }, + { + "epoch": 99.96428571428571, + "grad_norm": 17.10734748840332, + "learning_rate": 1.785714285714286e-08, + "loss": 0.1689, + "step": 36387 + }, + { + "epoch": 99.96703296703296, + "grad_norm": 4.737600803375244, + "learning_rate": 1.6483516483516484e-08, + "loss": 0.0616, + "step": 36388 + }, + { + "epoch": 99.96978021978022, + "grad_norm": 16.574800491333008, + "learning_rate": 1.510989010989011e-08, + "loss": 0.2498, + "step": 36389 + }, + { + "epoch": 99.97252747252747, + "grad_norm": 15.041854858398438, + "learning_rate": 1.3736263736263736e-08, + "loss": 0.1632, + "step": 36390 + }, + { + "epoch": 99.97527472527473, + "grad_norm": 6.597498893737793, + "learning_rate": 1.2362637362637363e-08, + "loss": 0.0775, + "step": 36391 + }, + { + "epoch": 99.97802197802197, + "grad_norm": 14.733123779296875, + "learning_rate": 1.098901098901099e-08, + "loss": 0.3303, + "step": 36392 + }, + { + "epoch": 99.98076923076923, + "grad_norm": 11.318392753601074, + "learning_rate": 9.615384615384615e-09, + "loss": 0.0846, + "step": 36393 + }, + { + "epoch": 99.98351648351648, + "grad_norm": 7.874334335327148, + "learning_rate": 8.241758241758242e-09, + "loss": 0.0899, + "step": 36394 + }, + { + "epoch": 99.98626373626374, + "grad_norm": 10.465459823608398, + "learning_rate": 6.868131868131868e-09, + "loss": 0.3332, + "step": 36395 + }, + { + "epoch": 99.98901098901099, + "grad_norm": 3.8213672637939453, + "learning_rate": 5.494505494505495e-09, + "loss": 0.0447, + "step": 36396 + }, + { + "epoch": 99.99175824175825, + "grad_norm": 14.710310935974121, + "learning_rate": 4.120879120879121e-09, + "loss": 0.1691, + "step": 36397 + }, + { + "epoch": 99.99450549450549, + "grad_norm": 4.792688369750977, + "learning_rate": 2.7472527472527475e-09, + "loss": 0.0755, + "step": 36398 + }, + { + "epoch": 99.99725274725274, + "grad_norm": 12.380110740661621, + "learning_rate": 1.3736263736263738e-09, + "loss": 0.2049, + "step": 36399 + }, + { + "epoch": 100.0, + "grad_norm": 64.2523422241211, + "learning_rate": 0.0, + "loss": 0.8955, + "step": 36400 + }, + { + "epoch": 100.0, + "eval_accuracy": 0.7369146005509641, + "eval_f1": 0.7258646937219186, + "eval_f1_DuraRiadoRio_64x64": 0.5051546391752577, + "eval_f1_Mole_64x64": 0.7381974248927039, + "eval_f1_Quebrado_64x64": 0.88, + "eval_f1_RiadoRio_64x64": 0.6439024390243903, + "eval_f1_RioFechado_64x64": 0.8620689655172413, + "eval_loss": 1.7109801769256592, + "eval_precision": 0.8185139770043168, + "eval_precision_DuraRiadoRio_64x64": 0.98, + "eval_precision_Mole_64x64": 0.9662921348314607, + "eval_precision_Quebrado_64x64": 0.7900552486187845, + "eval_precision_RiadoRio_64x64": 0.5116279069767442, + "eval_precision_RioFechado_64x64": 0.8445945945945946, + "eval_recall": 0.735851659665596, + "eval_recall_DuraRiadoRio_64x64": 0.3402777777777778, + "eval_recall_Mole_64x64": 0.5972222222222222, + "eval_recall_Quebrado_64x64": 0.9930555555555556, + "eval_recall_RiadoRio_64x64": 0.868421052631579, + "eval_recall_RioFechado_64x64": 0.8802816901408451, + "eval_runtime": 1.7804, + "eval_samples_per_second": 407.765, + "eval_steps_per_second": 25.836, + "step": 36400 + }, + { + "epoch": 100.0, + "step": 36400, + "total_flos": 1.5288402814304256e+18, + "train_loss": 0.34031273227807385, + "train_runtime": 2805.7892, + "train_samples_per_second": 207.108, + "train_steps_per_second": 12.973 + } + ], + "logging_steps": 1.0, + "max_steps": 36400, + "num_input_tokens_seen": 0, + "num_train_epochs": 100, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 1.5288402814304256e+18, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +}